1 // 2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2)); 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3)); 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4)); 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5)); 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6)); 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7)); 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8)); 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9)); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 137 // 138 // Class for no registers (empty set). 139 reg_class no_reg(); 140 141 // Class for all registers 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 143 // Class for all registers (excluding EBP) 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 145 // Dynamic register class that selects at runtime between register classes 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 149 150 // Class for general registers 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 152 // Class for general registers (excluding EBP). 153 // This register class can be used for implicit null checks on win95. 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 155 // Used also if the PreserveFramePointer flag is true. 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 159 160 // Class of "X" registers 161 reg_class int_x_reg(EBX, ECX, EDX, EAX); 162 163 // Class of registers that can appear in an address with no offset. 164 // EBP and ESP require an extra instruction byte for zero offset. 165 // Used in fast-unlock 166 reg_class p_reg(EDX, EDI, ESI, EBX); 167 168 // Class for general registers excluding ECX 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 170 // Class for general registers excluding ECX (and EBP) 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 174 175 // Class for general registers excluding EAX 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 177 178 // Class for general registers excluding EAX and EBX. 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 180 // Class for general registers excluding EAX and EBX (and EBP) 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 184 185 // Class of EAX (for multiply and divide operations) 186 reg_class eax_reg(EAX); 187 188 // Class of EBX (for atomic add) 189 reg_class ebx_reg(EBX); 190 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 192 reg_class ecx_reg(ECX); 193 194 // Class of EDX (for multiply and divide operations) 195 reg_class edx_reg(EDX); 196 197 // Class of EDI (for synchronization) 198 reg_class edi_reg(EDI); 199 200 // Class of ESI (for synchronization) 201 reg_class esi_reg(ESI); 202 203 // Singleton class for stack pointer 204 reg_class sp_reg(ESP); 205 206 // Singleton class for instruction pointer 207 // reg_class ip_reg(EIP); 208 209 // Class of integer register pairs 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 211 // Class of integer register pairs (excluding EBP and EDI); 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 215 216 // Class of integer register pairs that aligns with calling convention 217 reg_class eadx_reg( EAX,EDX ); 218 reg_class ebcx_reg( ECX,EBX ); 219 220 // Not AX or DX, used in divides 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 222 // Not AX or DX (and neither EBP), used in divides 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 226 227 // Floating point registers. Notice FPR0 is not a choice. 228 // FPR0 is not ever allocated; we use clever encodings to fake 229 // a 2-address instructions out of Intels FP stack. 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 231 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 233 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 234 FPR7L,FPR7H ); 235 236 reg_class fp_flt_reg0( FPR1L ); 237 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 238 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 240 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 241 242 %} 243 244 245 //----------SOURCE BLOCK------------------------------------------------------- 246 // This is a block of C++ code which provides values, functions, and 247 // definitions necessary in the rest of the architecture description 248 source_hpp %{ 249 // Must be visible to the DFA in dfa_x86_32.cpp 250 extern bool is_operand_hi32_zero(Node* n); 251 %} 252 253 source %{ 254 #define RELOC_IMM32 Assembler::imm_operand 255 #define RELOC_DISP32 Assembler::disp32_operand 256 257 #define __ _masm. 258 259 // How to find the high register of a Long pair, given the low register 260 #define HIGH_FROM_LOW(x) ((x)+2) 261 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM 263 // instructions, to allow sign-masking or sign-bit flipping. They allow 264 // fast versions of NegF/NegD and AbsF/AbsD. 265 266 // Note: 'double' and 'long long' have 32-bits alignment on x86. 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 268 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 269 // of 128-bits operands for SSE instructions. 270 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 271 // Store the value to a 128-bits operand. 272 operand[0] = lo; 273 operand[1] = hi; 274 return operand; 275 } 276 277 // Buffer for 128-bits masks used by SSE instructions. 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 279 280 // Static initialization during VM startup. 281 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 283 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 285 286 // Offset hacking within calls. 287 static int pre_call_resets_size() { 288 int size = 0; 289 Compile* C = Compile::current(); 290 if (C->in_24_bit_fp_mode()) { 291 size += 6; // fldcw 292 } 293 if (C->max_vector_size() > 16) { 294 if(UseAVX <= 2) { 295 size += 3; // vzeroupper 296 } 297 } 298 return size; 299 } 300 301 // !!!!! Special hack to get all type of calls to specify the byte offset 302 // from the start of the call to the point where the return address 303 // will point. 304 int MachCallStaticJavaNode::ret_addr_offset() { 305 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 306 } 307 308 int MachCallDynamicJavaNode::ret_addr_offset() { 309 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 310 } 311 312 static int sizeof_FFree_Float_Stack_All = -1; 313 314 int MachCallRuntimeNode::ret_addr_offset() { 315 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 316 return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size(); 317 } 318 319 // Indicate if the safepoint node needs the polling page as an input. 320 // Since x86 does have absolute addressing, it doesn't. 321 bool SafePointNode::needs_polling_address_input() { 322 return false; 323 } 324 325 // 326 // Compute padding required for nodes which need alignment 327 // 328 329 // The address of the call instruction needs to be 4-byte aligned to 330 // ensure that it does not span a cache line so that it can be patched. 331 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 332 current_offset += pre_call_resets_size(); // skip fldcw, if any 333 current_offset += 1; // skip call opcode byte 334 return round_to(current_offset, alignment_required()) - current_offset; 335 } 336 337 // The address of the call instruction needs to be 4-byte aligned to 338 // ensure that it does not span a cache line so that it can be patched. 339 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 340 current_offset += pre_call_resets_size(); // skip fldcw, if any 341 current_offset += 5; // skip MOV instruction 342 current_offset += 1; // skip call opcode byte 343 return round_to(current_offset, alignment_required()) - current_offset; 344 } 345 346 // EMIT_RM() 347 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 348 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 349 cbuf.insts()->emit_int8(c); 350 } 351 352 // EMIT_CC() 353 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 354 unsigned char c = (unsigned char)( f1 | f2 ); 355 cbuf.insts()->emit_int8(c); 356 } 357 358 // EMIT_OPCODE() 359 void emit_opcode(CodeBuffer &cbuf, int code) { 360 cbuf.insts()->emit_int8((unsigned char) code); 361 } 362 363 // EMIT_OPCODE() w/ relocation information 364 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 365 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 366 emit_opcode(cbuf, code); 367 } 368 369 // EMIT_D8() 370 void emit_d8(CodeBuffer &cbuf, int d8) { 371 cbuf.insts()->emit_int8((unsigned char) d8); 372 } 373 374 // EMIT_D16() 375 void emit_d16(CodeBuffer &cbuf, int d16) { 376 cbuf.insts()->emit_int16(d16); 377 } 378 379 // EMIT_D32() 380 void emit_d32(CodeBuffer &cbuf, int d32) { 381 cbuf.insts()->emit_int32(d32); 382 } 383 384 // emit 32 bit value and construct relocation entry from relocInfo::relocType 385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 386 int format) { 387 cbuf.relocate(cbuf.insts_mark(), reloc, format); 388 cbuf.insts()->emit_int32(d32); 389 } 390 391 // emit 32 bit value and construct relocation entry from RelocationHolder 392 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 393 int format) { 394 #ifdef ASSERT 395 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 396 assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code"); 397 } 398 #endif 399 cbuf.relocate(cbuf.insts_mark(), rspec, format); 400 cbuf.insts()->emit_int32(d32); 401 } 402 403 // Access stack slot for load or store 404 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 405 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 406 if( -128 <= disp && disp <= 127 ) { 407 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 408 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 409 emit_d8 (cbuf, disp); // Displacement // R/M byte 410 } else { 411 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 412 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 413 emit_d32(cbuf, disp); // Displacement // R/M byte 414 } 415 } 416 417 // rRegI ereg, memory mem) %{ // emit_reg_mem 418 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 419 // There is no index & no scale, use form without SIB byte 420 if ((index == 0x4) && 421 (scale == 0) && (base != ESP_enc)) { 422 // If no displacement, mode is 0x0; unless base is [EBP] 423 if ( (displace == 0) && (base != EBP_enc) ) { 424 emit_rm(cbuf, 0x0, reg_encoding, base); 425 } 426 else { // If 8-bit displacement, mode 0x1 427 if ((displace >= -128) && (displace <= 127) 428 && (disp_reloc == relocInfo::none) ) { 429 emit_rm(cbuf, 0x1, reg_encoding, base); 430 emit_d8(cbuf, displace); 431 } 432 else { // If 32-bit displacement 433 if (base == -1) { // Special flag for absolute address 434 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 435 // (manual lies; no SIB needed here) 436 if ( disp_reloc != relocInfo::none ) { 437 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 438 } else { 439 emit_d32 (cbuf, displace); 440 } 441 } 442 else { // Normal base + offset 443 emit_rm(cbuf, 0x2, reg_encoding, base); 444 if ( disp_reloc != relocInfo::none ) { 445 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 446 } else { 447 emit_d32 (cbuf, displace); 448 } 449 } 450 } 451 } 452 } 453 else { // Else, encode with the SIB byte 454 // If no displacement, mode is 0x0; unless base is [EBP] 455 if (displace == 0 && (base != EBP_enc)) { // If no displacement 456 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 457 emit_rm(cbuf, scale, index, base); 458 } 459 else { // If 8-bit displacement, mode 0x1 460 if ((displace >= -128) && (displace <= 127) 461 && (disp_reloc == relocInfo::none) ) { 462 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 463 emit_rm(cbuf, scale, index, base); 464 emit_d8(cbuf, displace); 465 } 466 else { // If 32-bit displacement 467 if (base == 0x04 ) { 468 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 469 emit_rm(cbuf, scale, index, 0x04); 470 } else { 471 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 472 emit_rm(cbuf, scale, index, base); 473 } 474 if ( disp_reloc != relocInfo::none ) { 475 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 476 } else { 477 emit_d32 (cbuf, displace); 478 } 479 } 480 } 481 } 482 } 483 484 485 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 486 if( dst_encoding == src_encoding ) { 487 // reg-reg copy, use an empty encoding 488 } else { 489 emit_opcode( cbuf, 0x8B ); 490 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 491 } 492 } 493 494 void emit_cmpfp_fixup(MacroAssembler& _masm) { 495 Label exit; 496 __ jccb(Assembler::noParity, exit); 497 __ pushf(); 498 // 499 // comiss/ucomiss instructions set ZF,PF,CF flags and 500 // zero OF,AF,SF for NaN values. 501 // Fixup flags by zeroing ZF,PF so that compare of NaN 502 // values returns 'less than' result (CF is set). 503 // Leave the rest of flags unchanged. 504 // 505 // 7 6 5 4 3 2 1 0 506 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 507 // 0 0 1 0 1 0 1 1 (0x2B) 508 // 509 __ andl(Address(rsp, 0), 0xffffff2b); 510 __ popf(); 511 __ bind(exit); 512 } 513 514 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 515 Label done; 516 __ movl(dst, -1); 517 __ jcc(Assembler::parity, done); 518 __ jcc(Assembler::below, done); 519 __ setb(Assembler::notEqual, dst); 520 __ movzbl(dst, dst); 521 __ bind(done); 522 } 523 524 525 //============================================================================= 526 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 527 528 int Compile::ConstantTable::calculate_table_base_offset() const { 529 return 0; // absolute addressing, no offset 530 } 531 532 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 533 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 534 ShouldNotReachHere(); 535 } 536 537 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 538 // Empty encoding 539 } 540 541 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 542 return 0; 543 } 544 545 #ifndef PRODUCT 546 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 547 st->print("# MachConstantBaseNode (empty encoding)"); 548 } 549 #endif 550 551 552 //============================================================================= 553 #ifndef PRODUCT 554 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 555 Compile* C = ra_->C; 556 557 int framesize = C->frame_size_in_bytes(); 558 int bangsize = C->bang_size_in_bytes(); 559 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 560 // Remove wordSize for return addr which is already pushed. 561 framesize -= wordSize; 562 563 if (C->need_stack_bang(bangsize)) { 564 framesize -= wordSize; 565 st->print("# stack bang (%d bytes)", bangsize); 566 st->print("\n\t"); 567 st->print("PUSH EBP\t# Save EBP"); 568 if (PreserveFramePointer) { 569 st->print("\n\t"); 570 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 571 } 572 if (framesize) { 573 st->print("\n\t"); 574 st->print("SUB ESP, #%d\t# Create frame",framesize); 575 } 576 } else { 577 st->print("SUB ESP, #%d\t# Create frame",framesize); 578 st->print("\n\t"); 579 framesize -= wordSize; 580 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 581 if (PreserveFramePointer) { 582 st->print("\n\t"); 583 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 584 if (framesize > 0) { 585 st->print("\n\t"); 586 st->print("ADD EBP, #%d", framesize); 587 } 588 } 589 } 590 591 if (VerifyStackAtCalls) { 592 st->print("\n\t"); 593 framesize -= wordSize; 594 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 595 } 596 597 if( C->in_24_bit_fp_mode() ) { 598 st->print("\n\t"); 599 st->print("FLDCW \t# load 24 bit fpu control word"); 600 } 601 if (UseSSE >= 2 && VerifyFPU) { 602 st->print("\n\t"); 603 st->print("# verify FPU stack (must be clean on entry)"); 604 } 605 606 #ifdef ASSERT 607 if (VerifyStackAtCalls) { 608 st->print("\n\t"); 609 st->print("# stack alignment check"); 610 } 611 #endif 612 st->cr(); 613 } 614 #endif 615 616 617 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 618 Compile* C = ra_->C; 619 MacroAssembler _masm(&cbuf); 620 621 int framesize = C->frame_size_in_bytes(); 622 int bangsize = C->bang_size_in_bytes(); 623 624 __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode()); 625 626 C->set_frame_complete(cbuf.insts_size()); 627 628 if (C->has_mach_constant_base_node()) { 629 // NOTE: We set the table base offset here because users might be 630 // emitted before MachConstantBaseNode. 631 Compile::ConstantTable& constant_table = C->constant_table(); 632 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 633 } 634 } 635 636 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 637 return MachNode::size(ra_); // too many variables; just compute it the hard way 638 } 639 640 int MachPrologNode::reloc() const { 641 return 0; // a large enough number 642 } 643 644 //============================================================================= 645 #ifndef PRODUCT 646 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 647 Compile *C = ra_->C; 648 int framesize = C->frame_size_in_bytes(); 649 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 650 // Remove two words for return addr and rbp, 651 framesize -= 2*wordSize; 652 653 if (C->max_vector_size() > 16) { 654 st->print("VZEROUPPER"); 655 st->cr(); st->print("\t"); 656 } 657 if (C->in_24_bit_fp_mode()) { 658 st->print("FLDCW standard control word"); 659 st->cr(); st->print("\t"); 660 } 661 if (framesize) { 662 st->print("ADD ESP,%d\t# Destroy frame",framesize); 663 st->cr(); st->print("\t"); 664 } 665 st->print_cr("POPL EBP"); st->print("\t"); 666 if (do_polling() && C->is_method_compilation()) { 667 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 668 st->cr(); st->print("\t"); 669 } 670 } 671 #endif 672 673 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 674 Compile *C = ra_->C; 675 676 if (C->max_vector_size() > 16) { 677 // Clear upper bits of YMM registers when current compiled code uses 678 // wide vectors to avoid AVX <-> SSE transition penalty during call. 679 MacroAssembler masm(&cbuf); 680 masm.vzeroupper(); 681 } 682 // If method set FPU control word, restore to standard control word 683 if (C->in_24_bit_fp_mode()) { 684 MacroAssembler masm(&cbuf); 685 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 686 } 687 688 int framesize = C->frame_size_in_bytes(); 689 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 690 // Remove two words for return addr and rbp, 691 framesize -= 2*wordSize; 692 693 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 694 695 if (framesize >= 128) { 696 emit_opcode(cbuf, 0x81); // add SP, #framesize 697 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 698 emit_d32(cbuf, framesize); 699 } else if (framesize) { 700 emit_opcode(cbuf, 0x83); // add SP, #framesize 701 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 702 emit_d8(cbuf, framesize); 703 } 704 705 emit_opcode(cbuf, 0x58 | EBP_enc); 706 707 if (do_polling() && C->is_method_compilation()) { 708 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); 709 emit_opcode(cbuf,0x85); 710 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX 711 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 712 } 713 } 714 715 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 716 Compile *C = ra_->C; 717 // If method set FPU control word, restore to standard control word 718 int size = C->in_24_bit_fp_mode() ? 6 : 0; 719 if (C->max_vector_size() > 16) size += 3; // vzeroupper 720 if (do_polling() && C->is_method_compilation()) size += 6; 721 722 int framesize = C->frame_size_in_bytes(); 723 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 724 // Remove two words for return addr and rbp, 725 framesize -= 2*wordSize; 726 727 size++; // popl rbp, 728 729 if (framesize >= 128) { 730 size += 6; 731 } else { 732 size += framesize ? 3 : 0; 733 } 734 return size; 735 } 736 737 int MachEpilogNode::reloc() const { 738 return 0; // a large enough number 739 } 740 741 const Pipeline * MachEpilogNode::pipeline() const { 742 return MachNode::pipeline_class(); 743 } 744 745 int MachEpilogNode::safepoint_offset() const { return 0; } 746 747 //============================================================================= 748 749 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 750 static enum RC rc_class( OptoReg::Name reg ) { 751 752 if( !OptoReg::is_valid(reg) ) return rc_bad; 753 if (OptoReg::is_stack(reg)) return rc_stack; 754 755 VMReg r = OptoReg::as_VMReg(reg); 756 if (r->is_Register()) return rc_int; 757 if (r->is_FloatRegister()) { 758 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 759 return rc_float; 760 } 761 assert(r->is_XMMRegister(), "must be"); 762 return rc_xmm; 763 } 764 765 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 766 int opcode, const char *op_str, int size, outputStream* st ) { 767 if( cbuf ) { 768 emit_opcode (*cbuf, opcode ); 769 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 770 #ifndef PRODUCT 771 } else if( !do_size ) { 772 if( size != 0 ) st->print("\n\t"); 773 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 774 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 775 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 776 } else { // FLD, FST, PUSH, POP 777 st->print("%s [ESP + #%d]",op_str,offset); 778 } 779 #endif 780 } 781 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 782 return size+3+offset_size; 783 } 784 785 // Helper for XMM registers. Extra opcode bits, limited syntax. 786 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 787 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 788 int in_size_in_bits = Assembler::EVEX_32bit; 789 int evex_encoding = 0; 790 if (reg_lo+1 == reg_hi) { 791 in_size_in_bits = Assembler::EVEX_64bit; 792 evex_encoding = Assembler::VEX_W; 793 } 794 if (cbuf) { 795 MacroAssembler _masm(cbuf); 796 if (reg_lo+1 == reg_hi) { // double move? 797 if (is_load) { 798 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 799 } else { 800 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 801 } 802 } else { 803 if (is_load) { 804 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 805 } else { 806 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 807 } 808 } 809 #ifndef PRODUCT 810 } else if (!do_size) { 811 if (size != 0) st->print("\n\t"); 812 if (reg_lo+1 == reg_hi) { // double move? 813 if (is_load) st->print("%s %s,[ESP + #%d]", 814 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 815 Matcher::regName[reg_lo], offset); 816 else st->print("MOVSD [ESP + #%d],%s", 817 offset, Matcher::regName[reg_lo]); 818 } else { 819 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 820 Matcher::regName[reg_lo], offset); 821 else st->print("MOVSS [ESP + #%d],%s", 822 offset, Matcher::regName[reg_lo]); 823 } 824 #endif 825 } 826 bool is_single_byte = false; 827 if ((UseAVX > 2) && (offset != 0)) { 828 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 829 } 830 int offset_size = 0; 831 if (UseAVX > 2 ) { 832 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 833 } else { 834 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 835 } 836 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 837 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 838 return size+5+offset_size; 839 } 840 841 842 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 843 int src_hi, int dst_hi, int size, outputStream* st ) { 844 if (cbuf) { 845 MacroAssembler _masm(cbuf); 846 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 847 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 848 as_XMMRegister(Matcher::_regEncode[src_lo])); 849 } else { 850 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 851 as_XMMRegister(Matcher::_regEncode[src_lo])); 852 } 853 #ifndef PRODUCT 854 } else if (!do_size) { 855 if (size != 0) st->print("\n\t"); 856 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 857 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 858 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 859 } else { 860 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 861 } 862 } else { 863 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 864 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 865 } else { 866 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 867 } 868 } 869 #endif 870 } 871 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 872 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 873 int sz = (UseAVX > 2) ? 6 : 4; 874 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 875 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 876 return size + sz; 877 } 878 879 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 880 int src_hi, int dst_hi, int size, outputStream* st ) { 881 // 32-bit 882 if (cbuf) { 883 MacroAssembler _masm(cbuf); 884 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 885 as_Register(Matcher::_regEncode[src_lo])); 886 #ifndef PRODUCT 887 } else if (!do_size) { 888 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 889 #endif 890 } 891 return (UseAVX> 2) ? 6 : 4; 892 } 893 894 895 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 896 int src_hi, int dst_hi, int size, outputStream* st ) { 897 // 32-bit 898 if (cbuf) { 899 MacroAssembler _masm(cbuf); 900 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 901 as_XMMRegister(Matcher::_regEncode[src_lo])); 902 #ifndef PRODUCT 903 } else if (!do_size) { 904 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 905 #endif 906 } 907 return (UseAVX> 2) ? 6 : 4; 908 } 909 910 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 911 if( cbuf ) { 912 emit_opcode(*cbuf, 0x8B ); 913 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 914 #ifndef PRODUCT 915 } else if( !do_size ) { 916 if( size != 0 ) st->print("\n\t"); 917 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 918 #endif 919 } 920 return size+2; 921 } 922 923 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 924 int offset, int size, outputStream* st ) { 925 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 926 if( cbuf ) { 927 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 928 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 929 #ifndef PRODUCT 930 } else if( !do_size ) { 931 if( size != 0 ) st->print("\n\t"); 932 st->print("FLD %s",Matcher::regName[src_lo]); 933 #endif 934 } 935 size += 2; 936 } 937 938 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 939 const char *op_str; 940 int op; 941 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 942 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 943 op = 0xDD; 944 } else { // 32-bit store 945 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 946 op = 0xD9; 947 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 948 } 949 950 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 951 } 952 953 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 954 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 955 int src_hi, int dst_hi, uint ireg, outputStream* st); 956 957 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 958 int stack_offset, int reg, uint ireg, outputStream* st); 959 960 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, 961 int dst_offset, uint ireg, outputStream* st) { 962 int calc_size = 0; 963 int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 964 int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 965 switch (ireg) { 966 case Op_VecS: 967 calc_size = 3+src_offset_size + 3+dst_offset_size; 968 break; 969 case Op_VecD: 970 calc_size = 3+src_offset_size + 3+dst_offset_size; 971 src_offset += 4; 972 dst_offset += 4; 973 src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 974 dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 975 calc_size += 3+src_offset_size + 3+dst_offset_size; 976 break; 977 case Op_VecX: 978 case Op_VecY: 979 case Op_VecZ: 980 calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; 981 break; 982 default: 983 ShouldNotReachHere(); 984 } 985 if (cbuf) { 986 MacroAssembler _masm(cbuf); 987 int offset = __ offset(); 988 switch (ireg) { 989 case Op_VecS: 990 __ pushl(Address(rsp, src_offset)); 991 __ popl (Address(rsp, dst_offset)); 992 break; 993 case Op_VecD: 994 __ pushl(Address(rsp, src_offset)); 995 __ popl (Address(rsp, dst_offset)); 996 __ pushl(Address(rsp, src_offset+4)); 997 __ popl (Address(rsp, dst_offset+4)); 998 break; 999 case Op_VecX: 1000 __ movdqu(Address(rsp, -16), xmm0); 1001 __ movdqu(xmm0, Address(rsp, src_offset)); 1002 __ movdqu(Address(rsp, dst_offset), xmm0); 1003 __ movdqu(xmm0, Address(rsp, -16)); 1004 break; 1005 case Op_VecY: 1006 __ vmovdqu(Address(rsp, -32), xmm0); 1007 __ vmovdqu(xmm0, Address(rsp, src_offset)); 1008 __ vmovdqu(Address(rsp, dst_offset), xmm0); 1009 __ vmovdqu(xmm0, Address(rsp, -32)); 1010 case Op_VecZ: 1011 __ evmovdqul(Address(rsp, -64), xmm0, 2); 1012 __ evmovdqul(xmm0, Address(rsp, src_offset), 2); 1013 __ evmovdqul(Address(rsp, dst_offset), xmm0, 2); 1014 __ evmovdqul(xmm0, Address(rsp, -64), 2); 1015 break; 1016 default: 1017 ShouldNotReachHere(); 1018 } 1019 int size = __ offset() - offset; 1020 assert(size == calc_size, "incorrect size calculattion"); 1021 return size; 1022 #ifndef PRODUCT 1023 } else if (!do_size) { 1024 switch (ireg) { 1025 case Op_VecS: 1026 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1027 "popl [rsp + #%d]", 1028 src_offset, dst_offset); 1029 break; 1030 case Op_VecD: 1031 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1032 "popq [rsp + #%d]\n\t" 1033 "pushl [rsp + #%d]\n\t" 1034 "popq [rsp + #%d]", 1035 src_offset, dst_offset, src_offset+4, dst_offset+4); 1036 break; 1037 case Op_VecX: 1038 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1039 "movdqu xmm0, [rsp + #%d]\n\t" 1040 "movdqu [rsp + #%d], xmm0\n\t" 1041 "movdqu xmm0, [rsp - #16]", 1042 src_offset, dst_offset); 1043 break; 1044 case Op_VecY: 1045 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1046 "vmovdqu xmm0, [rsp + #%d]\n\t" 1047 "vmovdqu [rsp + #%d], xmm0\n\t" 1048 "vmovdqu xmm0, [rsp - #32]", 1049 src_offset, dst_offset); 1050 case Op_VecZ: 1051 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1052 "vmovdqu xmm0, [rsp + #%d]\n\t" 1053 "vmovdqu [rsp + #%d], xmm0\n\t" 1054 "vmovdqu xmm0, [rsp - #64]", 1055 src_offset, dst_offset); 1056 break; 1057 default: 1058 ShouldNotReachHere(); 1059 } 1060 #endif 1061 } 1062 return calc_size; 1063 } 1064 1065 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1066 // Get registers to move 1067 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1068 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1069 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1070 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1071 1072 enum RC src_second_rc = rc_class(src_second); 1073 enum RC src_first_rc = rc_class(src_first); 1074 enum RC dst_second_rc = rc_class(dst_second); 1075 enum RC dst_first_rc = rc_class(dst_first); 1076 1077 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1078 1079 // Generate spill code! 1080 int size = 0; 1081 1082 if( src_first == dst_first && src_second == dst_second ) 1083 return size; // Self copy, no move 1084 1085 if (bottom_type()->isa_vect() != NULL) { 1086 uint ireg = ideal_reg(); 1087 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1088 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1089 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1090 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1091 // mem -> mem 1092 int src_offset = ra_->reg2offset(src_first); 1093 int dst_offset = ra_->reg2offset(dst_first); 1094 return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); 1095 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1096 return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); 1097 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1098 int stack_offset = ra_->reg2offset(dst_first); 1099 return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); 1100 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1101 int stack_offset = ra_->reg2offset(src_first); 1102 return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); 1103 } else { 1104 ShouldNotReachHere(); 1105 } 1106 } 1107 1108 // -------------------------------------- 1109 // Check for mem-mem move. push/pop to move. 1110 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1111 if( src_second == dst_first ) { // overlapping stack copy ranges 1112 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1113 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1114 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1115 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1116 } 1117 // move low bits 1118 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1119 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1120 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1121 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1122 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1123 } 1124 return size; 1125 } 1126 1127 // -------------------------------------- 1128 // Check for integer reg-reg copy 1129 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1130 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1131 1132 // Check for integer store 1133 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1134 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1135 1136 // Check for integer load 1137 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 1138 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1139 1140 // Check for integer reg-xmm reg copy 1141 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1142 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1143 "no 64 bit integer-float reg moves" ); 1144 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1145 } 1146 // -------------------------------------- 1147 // Check for float reg-reg copy 1148 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1149 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1150 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1151 if( cbuf ) { 1152 1153 // Note the mucking with the register encode to compensate for the 0/1 1154 // indexing issue mentioned in a comment in the reg_def sections 1155 // for FPR registers many lines above here. 1156 1157 if( src_first != FPR1L_num ) { 1158 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1159 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1160 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1161 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1162 } else { 1163 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1164 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1165 } 1166 #ifndef PRODUCT 1167 } else if( !do_size ) { 1168 if( size != 0 ) st->print("\n\t"); 1169 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1170 else st->print( "FST %s", Matcher::regName[dst_first]); 1171 #endif 1172 } 1173 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1174 } 1175 1176 // Check for float store 1177 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1178 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1179 } 1180 1181 // Check for float load 1182 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1183 int offset = ra_->reg2offset(src_first); 1184 const char *op_str; 1185 int op; 1186 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1187 op_str = "FLD_D"; 1188 op = 0xDD; 1189 } else { // 32-bit load 1190 op_str = "FLD_S"; 1191 op = 0xD9; 1192 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1193 } 1194 if( cbuf ) { 1195 emit_opcode (*cbuf, op ); 1196 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1197 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1198 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1199 #ifndef PRODUCT 1200 } else if( !do_size ) { 1201 if( size != 0 ) st->print("\n\t"); 1202 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1203 #endif 1204 } 1205 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1206 return size + 3+offset_size+2; 1207 } 1208 1209 // Check for xmm reg-reg copy 1210 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1211 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1212 (src_first+1 == src_second && dst_first+1 == dst_second), 1213 "no non-adjacent float-moves" ); 1214 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1215 } 1216 1217 // Check for xmm reg-integer reg copy 1218 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1219 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1220 "no 64 bit float-integer reg moves" ); 1221 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1222 } 1223 1224 // Check for xmm store 1225 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1226 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1227 } 1228 1229 // Check for float xmm load 1230 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1231 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1232 } 1233 1234 // Copy from float reg to xmm reg 1235 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1236 // copy to the top of stack from floating point reg 1237 // and use LEA to preserve flags 1238 if( cbuf ) { 1239 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1240 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1241 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1242 emit_d8(*cbuf,0xF8); 1243 #ifndef PRODUCT 1244 } else if( !do_size ) { 1245 if( size != 0 ) st->print("\n\t"); 1246 st->print("LEA ESP,[ESP-8]"); 1247 #endif 1248 } 1249 size += 4; 1250 1251 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1252 1253 // Copy from the temp memory to the xmm reg. 1254 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1255 1256 if( cbuf ) { 1257 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1258 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1259 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1260 emit_d8(*cbuf,0x08); 1261 #ifndef PRODUCT 1262 } else if( !do_size ) { 1263 if( size != 0 ) st->print("\n\t"); 1264 st->print("LEA ESP,[ESP+8]"); 1265 #endif 1266 } 1267 size += 4; 1268 return size; 1269 } 1270 1271 assert( size > 0, "missed a case" ); 1272 1273 // -------------------------------------------------------------------- 1274 // Check for second bits still needing moving. 1275 if( src_second == dst_second ) 1276 return size; // Self copy; no move 1277 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1278 1279 // Check for second word int-int move 1280 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1281 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1282 1283 // Check for second word integer store 1284 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1285 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1286 1287 // Check for second word integer load 1288 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1289 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1290 1291 1292 Unimplemented(); 1293 return 0; // Mute compiler 1294 } 1295 1296 #ifndef PRODUCT 1297 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1298 implementation( NULL, ra_, false, st ); 1299 } 1300 #endif 1301 1302 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1303 implementation( &cbuf, ra_, false, NULL ); 1304 } 1305 1306 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1307 return implementation( NULL, ra_, true, NULL ); 1308 } 1309 1310 1311 //============================================================================= 1312 #ifndef PRODUCT 1313 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1314 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1315 int reg = ra_->get_reg_first(this); 1316 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1317 } 1318 #endif 1319 1320 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1321 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1322 int reg = ra_->get_encode(this); 1323 if( offset >= 128 ) { 1324 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1325 emit_rm(cbuf, 0x2, reg, 0x04); 1326 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1327 emit_d32(cbuf, offset); 1328 } 1329 else { 1330 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1331 emit_rm(cbuf, 0x1, reg, 0x04); 1332 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1333 emit_d8(cbuf, offset); 1334 } 1335 } 1336 1337 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1338 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1339 if( offset >= 128 ) { 1340 return 7; 1341 } 1342 else { 1343 return 4; 1344 } 1345 } 1346 1347 //============================================================================= 1348 #ifndef PRODUCT 1349 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1350 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1351 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1352 st->print_cr("\tNOP"); 1353 st->print_cr("\tNOP"); 1354 if( !OptoBreakpoint ) 1355 st->print_cr("\tNOP"); 1356 } 1357 #endif 1358 1359 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1360 MacroAssembler masm(&cbuf); 1361 #ifdef ASSERT 1362 uint insts_size = cbuf.insts_size(); 1363 #endif 1364 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1365 masm.jump_cc(Assembler::notEqual, 1366 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1367 /* WARNING these NOPs are critical so that verified entry point is properly 1368 aligned for patching by NativeJump::patch_verified_entry() */ 1369 int nops_cnt = 2; 1370 if( !OptoBreakpoint ) // Leave space for int3 1371 nops_cnt += 1; 1372 masm.nop(nops_cnt); 1373 1374 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1375 } 1376 1377 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1378 return OptoBreakpoint ? 11 : 12; 1379 } 1380 1381 1382 //============================================================================= 1383 1384 int Matcher::regnum_to_fpu_offset(int regnum) { 1385 return regnum - 32; // The FP registers are in the second chunk 1386 } 1387 1388 // This is UltraSparc specific, true just means we have fast l2f conversion 1389 const bool Matcher::convL2FSupported(void) { 1390 return true; 1391 } 1392 1393 // Is this branch offset short enough that a short branch can be used? 1394 // 1395 // NOTE: If the platform does not provide any short branch variants, then 1396 // this method should return false for offset 0. 1397 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1398 // The passed offset is relative to address of the branch. 1399 // On 86 a branch displacement is calculated relative to address 1400 // of a next instruction. 1401 offset -= br_size; 1402 1403 // the short version of jmpConUCF2 contains multiple branches, 1404 // making the reach slightly less 1405 if (rule == jmpConUCF2_rule) 1406 return (-126 <= offset && offset <= 125); 1407 return (-128 <= offset && offset <= 127); 1408 } 1409 1410 const bool Matcher::isSimpleConstant64(jlong value) { 1411 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1412 return false; 1413 } 1414 1415 // The ecx parameter to rep stos for the ClearArray node is in dwords. 1416 const bool Matcher::init_array_count_is_in_bytes = false; 1417 1418 // Threshold size for cleararray. 1419 const int Matcher::init_array_short_size = 8 * BytesPerLong; 1420 1421 // Needs 2 CMOV's for longs. 1422 const int Matcher::long_cmove_cost() { return 1; } 1423 1424 // No CMOVF/CMOVD with SSE/SSE2 1425 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 1426 1427 // Does the CPU require late expand (see block.cpp for description of late expand)? 1428 const bool Matcher::require_postalloc_expand = false; 1429 1430 // Should the Matcher clone shifts on addressing modes, expecting them to 1431 // be subsumed into complex addressing expressions or compute them into 1432 // registers? True for Intel but false for most RISCs 1433 const bool Matcher::clone_shift_expressions = true; 1434 1435 // Do we need to mask the count passed to shift instructions or does 1436 // the cpu only look at the lower 5/6 bits anyway? 1437 const bool Matcher::need_masked_shift_count = false; 1438 1439 bool Matcher::narrow_oop_use_complex_address() { 1440 ShouldNotCallThis(); 1441 return true; 1442 } 1443 1444 bool Matcher::narrow_klass_use_complex_address() { 1445 ShouldNotCallThis(); 1446 return true; 1447 } 1448 1449 1450 // Is it better to copy float constants, or load them directly from memory? 1451 // Intel can load a float constant from a direct address, requiring no 1452 // extra registers. Most RISCs will have to materialize an address into a 1453 // register first, so they would do better to copy the constant from stack. 1454 const bool Matcher::rematerialize_float_constants = true; 1455 1456 // If CPU can load and store mis-aligned doubles directly then no fixup is 1457 // needed. Else we split the double into 2 integer pieces and move it 1458 // piece-by-piece. Only happens when passing doubles into C code as the 1459 // Java calling convention forces doubles to be aligned. 1460 const bool Matcher::misaligned_doubles_ok = true; 1461 1462 1463 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1464 // Get the memory operand from the node 1465 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1466 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1467 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1468 uint opcnt = 1; // First operand 1469 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1470 while( idx >= skipped+num_edges ) { 1471 skipped += num_edges; 1472 opcnt++; // Bump operand count 1473 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1474 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1475 } 1476 1477 MachOper *memory = node->_opnds[opcnt]; 1478 MachOper *new_memory = NULL; 1479 switch (memory->opcode()) { 1480 case DIRECT: 1481 case INDOFFSET32X: 1482 // No transformation necessary. 1483 return; 1484 case INDIRECT: 1485 new_memory = new indirect_win95_safeOper( ); 1486 break; 1487 case INDOFFSET8: 1488 new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1489 break; 1490 case INDOFFSET32: 1491 new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1492 break; 1493 case INDINDEXOFFSET: 1494 new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1495 break; 1496 case INDINDEXSCALE: 1497 new_memory = new indIndexScale_win95_safeOper(memory->scale()); 1498 break; 1499 case INDINDEXSCALEOFFSET: 1500 new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1501 break; 1502 case LOAD_LONG_INDIRECT: 1503 case LOAD_LONG_INDOFFSET32: 1504 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1505 return; 1506 default: 1507 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1508 return; 1509 } 1510 node->_opnds[opcnt] = new_memory; 1511 } 1512 1513 // Advertise here if the CPU requires explicit rounding operations 1514 // to implement the UseStrictFP mode. 1515 const bool Matcher::strict_fp_requires_explicit_rounding = true; 1516 1517 // Are floats conerted to double when stored to stack during deoptimization? 1518 // On x32 it is stored with convertion only when FPU is used for floats. 1519 bool Matcher::float_in_double() { return (UseSSE == 0); } 1520 1521 // Do ints take an entire long register or just half? 1522 const bool Matcher::int_in_long = false; 1523 1524 // Return whether or not this register is ever used as an argument. This 1525 // function is used on startup to build the trampoline stubs in generateOptoStub. 1526 // Registers not mentioned will be killed by the VM call in the trampoline, and 1527 // arguments in those registers not be available to the callee. 1528 bool Matcher::can_be_java_arg( int reg ) { 1529 if( reg == ECX_num || reg == EDX_num ) return true; 1530 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1531 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1532 return false; 1533 } 1534 1535 bool Matcher::is_spillable_arg( int reg ) { 1536 return can_be_java_arg(reg); 1537 } 1538 1539 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1540 // Use hardware integer DIV instruction when 1541 // it is faster than a code which use multiply. 1542 // Only when constant divisor fits into 32 bit 1543 // (min_jint is excluded to get only correct 1544 // positive 32 bit values from negative). 1545 return VM_Version::has_fast_idiv() && 1546 (divisor == (int)divisor && divisor != min_jint); 1547 } 1548 1549 // Register for DIVI projection of divmodI 1550 RegMask Matcher::divI_proj_mask() { 1551 return EAX_REG_mask(); 1552 } 1553 1554 // Register for MODI projection of divmodI 1555 RegMask Matcher::modI_proj_mask() { 1556 return EDX_REG_mask(); 1557 } 1558 1559 // Register for DIVL projection of divmodL 1560 RegMask Matcher::divL_proj_mask() { 1561 ShouldNotReachHere(); 1562 return RegMask(); 1563 } 1564 1565 // Register for MODL projection of divmodL 1566 RegMask Matcher::modL_proj_mask() { 1567 ShouldNotReachHere(); 1568 return RegMask(); 1569 } 1570 1571 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1572 return NO_REG_mask(); 1573 } 1574 1575 // Returns true if the high 32 bits of the value is known to be zero. 1576 bool is_operand_hi32_zero(Node* n) { 1577 int opc = n->Opcode(); 1578 if (opc == Op_AndL) { 1579 Node* o2 = n->in(2); 1580 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1581 return true; 1582 } 1583 } 1584 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1585 return true; 1586 } 1587 return false; 1588 } 1589 1590 %} 1591 1592 //----------ENCODING BLOCK----------------------------------------------------- 1593 // This block specifies the encoding classes used by the compiler to output 1594 // byte streams. Encoding classes generate functions which are called by 1595 // Machine Instruction Nodes in order to generate the bit encoding of the 1596 // instruction. Operands specify their base encoding interface with the 1597 // interface keyword. There are currently supported four interfaces, 1598 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1599 // operand to generate a function which returns its register number when 1600 // queried. CONST_INTER causes an operand to generate a function which 1601 // returns the value of the constant when queried. MEMORY_INTER causes an 1602 // operand to generate four functions which return the Base Register, the 1603 // Index Register, the Scale Value, and the Offset Value of the operand when 1604 // queried. COND_INTER causes an operand to generate six functions which 1605 // return the encoding code (ie - encoding bits for the instruction) 1606 // associated with each basic boolean condition for a conditional instruction. 1607 // Instructions specify two basic values for encoding. They use the 1608 // ins_encode keyword to specify their encoding class (which must be one of 1609 // the class names specified in the encoding block), and they use the 1610 // opcode keyword to specify, in order, their primary, secondary, and 1611 // tertiary opcode. Only the opcode sections which a particular instruction 1612 // needs for encoding need to be specified. 1613 encode %{ 1614 // Build emit functions for each basic byte or larger field in the intel 1615 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1616 // code in the enc_class source block. Emit functions will live in the 1617 // main source block for now. In future, we can generalize this by 1618 // adding a syntax that specifies the sizes of fields in an order, 1619 // so that the adlc can build the emit functions automagically 1620 1621 // Emit primary opcode 1622 enc_class OpcP %{ 1623 emit_opcode(cbuf, $primary); 1624 %} 1625 1626 // Emit secondary opcode 1627 enc_class OpcS %{ 1628 emit_opcode(cbuf, $secondary); 1629 %} 1630 1631 // Emit opcode directly 1632 enc_class Opcode(immI d8) %{ 1633 emit_opcode(cbuf, $d8$$constant); 1634 %} 1635 1636 enc_class SizePrefix %{ 1637 emit_opcode(cbuf,0x66); 1638 %} 1639 1640 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1641 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1642 %} 1643 1644 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1645 emit_opcode(cbuf,$opcode$$constant); 1646 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1647 %} 1648 1649 enc_class mov_r32_imm0( rRegI dst ) %{ 1650 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1651 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1652 %} 1653 1654 enc_class cdq_enc %{ 1655 // Full implementation of Java idiv and irem; checks for 1656 // special case as described in JVM spec., p.243 & p.271. 1657 // 1658 // normal case special case 1659 // 1660 // input : rax,: dividend min_int 1661 // reg: divisor -1 1662 // 1663 // output: rax,: quotient (= rax, idiv reg) min_int 1664 // rdx: remainder (= rax, irem reg) 0 1665 // 1666 // Code sequnce: 1667 // 1668 // 81 F8 00 00 00 80 cmp rax,80000000h 1669 // 0F 85 0B 00 00 00 jne normal_case 1670 // 33 D2 xor rdx,edx 1671 // 83 F9 FF cmp rcx,0FFh 1672 // 0F 84 03 00 00 00 je done 1673 // normal_case: 1674 // 99 cdq 1675 // F7 F9 idiv rax,ecx 1676 // done: 1677 // 1678 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1679 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1680 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1681 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1682 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1683 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1684 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1685 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1686 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1687 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1688 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1689 // normal_case: 1690 emit_opcode(cbuf,0x99); // cdq 1691 // idiv (note: must be emitted by the user of this rule) 1692 // normal: 1693 %} 1694 1695 // Dense encoding for older common ops 1696 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1697 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1698 %} 1699 1700 1701 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1702 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1703 // Check for 8-bit immediate, and set sign extend bit in opcode 1704 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1705 emit_opcode(cbuf, $primary | 0x02); 1706 } 1707 else { // If 32-bit immediate 1708 emit_opcode(cbuf, $primary); 1709 } 1710 %} 1711 1712 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1713 // Emit primary opcode and set sign-extend bit 1714 // Check for 8-bit immediate, and set sign extend bit in opcode 1715 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1716 emit_opcode(cbuf, $primary | 0x02); } 1717 else { // If 32-bit immediate 1718 emit_opcode(cbuf, $primary); 1719 } 1720 // Emit r/m byte with secondary opcode, after primary opcode. 1721 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1722 %} 1723 1724 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1725 // Check for 8-bit immediate, and set sign extend bit in opcode 1726 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1727 $$$emit8$imm$$constant; 1728 } 1729 else { // If 32-bit immediate 1730 // Output immediate 1731 $$$emit32$imm$$constant; 1732 } 1733 %} 1734 1735 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1736 // Emit primary opcode and set sign-extend bit 1737 // Check for 8-bit immediate, and set sign extend bit in opcode 1738 int con = (int)$imm$$constant; // Throw away top bits 1739 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1740 // Emit r/m byte with secondary opcode, after primary opcode. 1741 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1742 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1743 else emit_d32(cbuf,con); 1744 %} 1745 1746 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1747 // Emit primary opcode and set sign-extend bit 1748 // Check for 8-bit immediate, and set sign extend bit in opcode 1749 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1750 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1751 // Emit r/m byte with tertiary opcode, after primary opcode. 1752 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1753 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1754 else emit_d32(cbuf,con); 1755 %} 1756 1757 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1758 emit_cc(cbuf, $secondary, $dst$$reg ); 1759 %} 1760 1761 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1762 int destlo = $dst$$reg; 1763 int desthi = HIGH_FROM_LOW(destlo); 1764 // bswap lo 1765 emit_opcode(cbuf, 0x0F); 1766 emit_cc(cbuf, 0xC8, destlo); 1767 // bswap hi 1768 emit_opcode(cbuf, 0x0F); 1769 emit_cc(cbuf, 0xC8, desthi); 1770 // xchg lo and hi 1771 emit_opcode(cbuf, 0x87); 1772 emit_rm(cbuf, 0x3, destlo, desthi); 1773 %} 1774 1775 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1776 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1777 %} 1778 1779 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1780 $$$emit8$primary; 1781 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1782 %} 1783 1784 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1785 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1786 emit_d8(cbuf, op >> 8 ); 1787 emit_d8(cbuf, op & 255); 1788 %} 1789 1790 // emulate a CMOV with a conditional branch around a MOV 1791 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1792 // Invert sense of branch from sense of CMOV 1793 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1794 emit_d8( cbuf, $brOffs$$constant ); 1795 %} 1796 1797 enc_class enc_PartialSubtypeCheck( ) %{ 1798 Register Redi = as_Register(EDI_enc); // result register 1799 Register Reax = as_Register(EAX_enc); // super class 1800 Register Recx = as_Register(ECX_enc); // killed 1801 Register Resi = as_Register(ESI_enc); // sub class 1802 Label miss; 1803 1804 MacroAssembler _masm(&cbuf); 1805 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1806 NULL, &miss, 1807 /*set_cond_codes:*/ true); 1808 if ($primary) { 1809 __ xorptr(Redi, Redi); 1810 } 1811 __ bind(miss); 1812 %} 1813 1814 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1815 MacroAssembler masm(&cbuf); 1816 int start = masm.offset(); 1817 if (UseSSE >= 2) { 1818 if (VerifyFPU) { 1819 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1820 } 1821 } else { 1822 // External c_calling_convention expects the FPU stack to be 'clean'. 1823 // Compiled code leaves it dirty. Do cleanup now. 1824 masm.empty_FPU_stack(); 1825 } 1826 if (sizeof_FFree_Float_Stack_All == -1) { 1827 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1828 } else { 1829 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1830 } 1831 %} 1832 1833 enc_class Verify_FPU_For_Leaf %{ 1834 if( VerifyFPU ) { 1835 MacroAssembler masm(&cbuf); 1836 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1837 } 1838 %} 1839 1840 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1841 // This is the instruction starting address for relocation info. 1842 cbuf.set_insts_mark(); 1843 $$$emit8$primary; 1844 // CALL directly to the runtime 1845 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1846 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1847 1848 if (UseSSE >= 2) { 1849 MacroAssembler _masm(&cbuf); 1850 BasicType rt = tf()->return_type(); 1851 1852 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1853 // A C runtime call where the return value is unused. In SSE2+ 1854 // mode the result needs to be removed from the FPU stack. It's 1855 // likely that this function call could be removed by the 1856 // optimizer if the C function is a pure function. 1857 __ ffree(0); 1858 } else if (rt == T_FLOAT) { 1859 __ lea(rsp, Address(rsp, -4)); 1860 __ fstp_s(Address(rsp, 0)); 1861 __ movflt(xmm0, Address(rsp, 0)); 1862 __ lea(rsp, Address(rsp, 4)); 1863 } else if (rt == T_DOUBLE) { 1864 __ lea(rsp, Address(rsp, -8)); 1865 __ fstp_d(Address(rsp, 0)); 1866 __ movdbl(xmm0, Address(rsp, 0)); 1867 __ lea(rsp, Address(rsp, 8)); 1868 } 1869 } 1870 %} 1871 1872 1873 enc_class pre_call_resets %{ 1874 // If method sets FPU control word restore it here 1875 debug_only(int off0 = cbuf.insts_size()); 1876 if (ra_->C->in_24_bit_fp_mode()) { 1877 MacroAssembler _masm(&cbuf); 1878 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1879 } 1880 if (ra_->C->max_vector_size() > 16) { 1881 // Clear upper bits of YMM registers when current compiled code uses 1882 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1883 MacroAssembler _masm(&cbuf); 1884 __ vzeroupper(); 1885 } 1886 debug_only(int off1 = cbuf.insts_size()); 1887 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1888 %} 1889 1890 enc_class post_call_FPU %{ 1891 // If method sets FPU control word do it here also 1892 if (Compile::current()->in_24_bit_fp_mode()) { 1893 MacroAssembler masm(&cbuf); 1894 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1895 } 1896 %} 1897 1898 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1899 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1900 // who we intended to call. 1901 cbuf.set_insts_mark(); 1902 $$$emit8$primary; 1903 if (!_method) { 1904 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1905 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1906 } else if (_optimized_virtual) { 1907 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1908 opt_virtual_call_Relocation::spec(), RELOC_IMM32 ); 1909 } else { 1910 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1911 static_call_Relocation::spec(), RELOC_IMM32 ); 1912 } 1913 if (_method) { // Emit stub for static call. 1914 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1915 if (stub == NULL) { 1916 ciEnv::current()->record_failure("CodeCache is full"); 1917 return; 1918 } 1919 } 1920 %} 1921 1922 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1923 MacroAssembler _masm(&cbuf); 1924 __ ic_call((address)$meth$$method); 1925 %} 1926 1927 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1928 int disp = in_bytes(Method::from_compiled_offset()); 1929 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1930 1931 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1932 cbuf.set_insts_mark(); 1933 $$$emit8$primary; 1934 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1935 emit_d8(cbuf, disp); // Displacement 1936 1937 %} 1938 1939 // Following encoding is no longer used, but may be restored if calling 1940 // convention changes significantly. 1941 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1942 // 1943 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1944 // // int ic_reg = Matcher::inline_cache_reg(); 1945 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1946 // // int imo_reg = Matcher::interpreter_method_oop_reg(); 1947 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1948 // 1949 // // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1950 // // // so we load it immediately before the call 1951 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1952 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1953 // 1954 // // xor rbp,ebp 1955 // emit_opcode(cbuf, 0x33); 1956 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1957 // 1958 // // CALL to interpreter. 1959 // cbuf.set_insts_mark(); 1960 // $$$emit8$primary; 1961 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1962 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1963 // %} 1964 1965 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1966 $$$emit8$primary; 1967 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1968 $$$emit8$shift$$constant; 1969 %} 1970 1971 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1972 // Load immediate does not have a zero or sign extended version 1973 // for 8-bit immediates 1974 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1975 $$$emit32$src$$constant; 1976 %} 1977 1978 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1979 // Load immediate does not have a zero or sign extended version 1980 // for 8-bit immediates 1981 emit_opcode(cbuf, $primary + $dst$$reg); 1982 $$$emit32$src$$constant; 1983 %} 1984 1985 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1986 // Load immediate does not have a zero or sign extended version 1987 // for 8-bit immediates 1988 int dst_enc = $dst$$reg; 1989 int src_con = $src$$constant & 0x0FFFFFFFFL; 1990 if (src_con == 0) { 1991 // xor dst, dst 1992 emit_opcode(cbuf, 0x33); 1993 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1994 } else { 1995 emit_opcode(cbuf, $primary + dst_enc); 1996 emit_d32(cbuf, src_con); 1997 } 1998 %} 1999 2000 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 2001 // Load immediate does not have a zero or sign extended version 2002 // for 8-bit immediates 2003 int dst_enc = $dst$$reg + 2; 2004 int src_con = ((julong)($src$$constant)) >> 32; 2005 if (src_con == 0) { 2006 // xor dst, dst 2007 emit_opcode(cbuf, 0x33); 2008 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2009 } else { 2010 emit_opcode(cbuf, $primary + dst_enc); 2011 emit_d32(cbuf, src_con); 2012 } 2013 %} 2014 2015 2016 // Encode a reg-reg copy. If it is useless, then empty encoding. 2017 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 2018 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2019 %} 2020 2021 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 2022 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2023 %} 2024 2025 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 2026 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2027 %} 2028 2029 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 2030 $$$emit8$primary; 2031 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2032 %} 2033 2034 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 2035 $$$emit8$secondary; 2036 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2037 %} 2038 2039 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 2040 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2041 %} 2042 2043 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2044 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2045 %} 2046 2047 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 2048 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2049 %} 2050 2051 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2052 // Output immediate 2053 $$$emit32$src$$constant; 2054 %} 2055 2056 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 2057 // Output Float immediate bits 2058 jfloat jf = $src$$constant; 2059 int jf_as_bits = jint_cast( jf ); 2060 emit_d32(cbuf, jf_as_bits); 2061 %} 2062 2063 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 2064 // Output Float immediate bits 2065 jfloat jf = $src$$constant; 2066 int jf_as_bits = jint_cast( jf ); 2067 emit_d32(cbuf, jf_as_bits); 2068 %} 2069 2070 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2071 // Output immediate 2072 $$$emit16$src$$constant; 2073 %} 2074 2075 enc_class Con_d32(immI src) %{ 2076 emit_d32(cbuf,$src$$constant); 2077 %} 2078 2079 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2080 // Output immediate memory reference 2081 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2082 emit_d32(cbuf, 0x00); 2083 %} 2084 2085 enc_class lock_prefix( ) %{ 2086 if( os::is_MP() ) 2087 emit_opcode(cbuf,0xF0); // [Lock] 2088 %} 2089 2090 // Cmp-xchg long value. 2091 // Note: we need to swap rbx, and rcx before and after the 2092 // cmpxchg8 instruction because the instruction uses 2093 // rcx as the high order word of the new value to store but 2094 // our register encoding uses rbx,. 2095 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2096 2097 // XCHG rbx,ecx 2098 emit_opcode(cbuf,0x87); 2099 emit_opcode(cbuf,0xD9); 2100 // [Lock] 2101 if( os::is_MP() ) 2102 emit_opcode(cbuf,0xF0); 2103 // CMPXCHG8 [Eptr] 2104 emit_opcode(cbuf,0x0F); 2105 emit_opcode(cbuf,0xC7); 2106 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2107 // XCHG rbx,ecx 2108 emit_opcode(cbuf,0x87); 2109 emit_opcode(cbuf,0xD9); 2110 %} 2111 2112 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2113 // [Lock] 2114 if( os::is_MP() ) 2115 emit_opcode(cbuf,0xF0); 2116 2117 // CMPXCHG [Eptr] 2118 emit_opcode(cbuf,0x0F); 2119 emit_opcode(cbuf,0xB1); 2120 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2121 %} 2122 2123 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2124 int res_encoding = $res$$reg; 2125 2126 // MOV res,0 2127 emit_opcode( cbuf, 0xB8 + res_encoding); 2128 emit_d32( cbuf, 0 ); 2129 // JNE,s fail 2130 emit_opcode(cbuf,0x75); 2131 emit_d8(cbuf, 5 ); 2132 // MOV res,1 2133 emit_opcode( cbuf, 0xB8 + res_encoding); 2134 emit_d32( cbuf, 1 ); 2135 // fail: 2136 %} 2137 2138 enc_class set_instruction_start( ) %{ 2139 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2140 %} 2141 2142 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2143 int reg_encoding = $ereg$$reg; 2144 int base = $mem$$base; 2145 int index = $mem$$index; 2146 int scale = $mem$$scale; 2147 int displace = $mem$$disp; 2148 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2149 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2150 %} 2151 2152 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2153 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2154 int base = $mem$$base; 2155 int index = $mem$$index; 2156 int scale = $mem$$scale; 2157 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2158 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2159 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2160 %} 2161 2162 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2163 int r1, r2; 2164 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2165 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2166 emit_opcode(cbuf,0x0F); 2167 emit_opcode(cbuf,$tertiary); 2168 emit_rm(cbuf, 0x3, r1, r2); 2169 emit_d8(cbuf,$cnt$$constant); 2170 emit_d8(cbuf,$primary); 2171 emit_rm(cbuf, 0x3, $secondary, r1); 2172 emit_d8(cbuf,$cnt$$constant); 2173 %} 2174 2175 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2176 emit_opcode( cbuf, 0x8B ); // Move 2177 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2178 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2179 emit_d8(cbuf,$primary); 2180 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2181 emit_d8(cbuf,$cnt$$constant-32); 2182 } 2183 emit_d8(cbuf,$primary); 2184 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2185 emit_d8(cbuf,31); 2186 %} 2187 2188 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2189 int r1, r2; 2190 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2191 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2192 2193 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2194 emit_rm(cbuf, 0x3, r1, r2); 2195 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2196 emit_opcode(cbuf,$primary); 2197 emit_rm(cbuf, 0x3, $secondary, r1); 2198 emit_d8(cbuf,$cnt$$constant-32); 2199 } 2200 emit_opcode(cbuf,0x33); // XOR r2,r2 2201 emit_rm(cbuf, 0x3, r2, r2); 2202 %} 2203 2204 // Clone of RegMem but accepts an extra parameter to access each 2205 // half of a double in memory; it never needs relocation info. 2206 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2207 emit_opcode(cbuf,$opcode$$constant); 2208 int reg_encoding = $rm_reg$$reg; 2209 int base = $mem$$base; 2210 int index = $mem$$index; 2211 int scale = $mem$$scale; 2212 int displace = $mem$$disp + $disp_for_half$$constant; 2213 relocInfo::relocType disp_reloc = relocInfo::none; 2214 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2215 %} 2216 2217 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2218 // 2219 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2220 // and it never needs relocation information. 2221 // Frequently used to move data between FPU's Stack Top and memory. 2222 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2223 int rm_byte_opcode = $rm_opcode$$constant; 2224 int base = $mem$$base; 2225 int index = $mem$$index; 2226 int scale = $mem$$scale; 2227 int displace = $mem$$disp; 2228 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2229 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2230 %} 2231 2232 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2233 int rm_byte_opcode = $rm_opcode$$constant; 2234 int base = $mem$$base; 2235 int index = $mem$$index; 2236 int scale = $mem$$scale; 2237 int displace = $mem$$disp; 2238 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2239 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2240 %} 2241 2242 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2243 int reg_encoding = $dst$$reg; 2244 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2245 int index = 0x04; // 0x04 indicates no index 2246 int scale = 0x00; // 0x00 indicates no scale 2247 int displace = $src1$$constant; // 0x00 indicates no displacement 2248 relocInfo::relocType disp_reloc = relocInfo::none; 2249 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2250 %} 2251 2252 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2253 // Compare dst,src 2254 emit_opcode(cbuf,0x3B); 2255 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2256 // jmp dst < src around move 2257 emit_opcode(cbuf,0x7C); 2258 emit_d8(cbuf,2); 2259 // move dst,src 2260 emit_opcode(cbuf,0x8B); 2261 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2262 %} 2263 2264 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2265 // Compare dst,src 2266 emit_opcode(cbuf,0x3B); 2267 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2268 // jmp dst > src around move 2269 emit_opcode(cbuf,0x7F); 2270 emit_d8(cbuf,2); 2271 // move dst,src 2272 emit_opcode(cbuf,0x8B); 2273 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2274 %} 2275 2276 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2277 // If src is FPR1, we can just FST to store it. 2278 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2279 int reg_encoding = 0x2; // Just store 2280 int base = $mem$$base; 2281 int index = $mem$$index; 2282 int scale = $mem$$scale; 2283 int displace = $mem$$disp; 2284 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2285 if( $src$$reg != FPR1L_enc ) { 2286 reg_encoding = 0x3; // Store & pop 2287 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2288 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2289 } 2290 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2291 emit_opcode(cbuf,$primary); 2292 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2293 %} 2294 2295 enc_class neg_reg(rRegI dst) %{ 2296 // NEG $dst 2297 emit_opcode(cbuf,0xF7); 2298 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2299 %} 2300 2301 enc_class setLT_reg(eCXRegI dst) %{ 2302 // SETLT $dst 2303 emit_opcode(cbuf,0x0F); 2304 emit_opcode(cbuf,0x9C); 2305 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2306 %} 2307 2308 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2309 int tmpReg = $tmp$$reg; 2310 2311 // SUB $p,$q 2312 emit_opcode(cbuf,0x2B); 2313 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2314 // SBB $tmp,$tmp 2315 emit_opcode(cbuf,0x1B); 2316 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2317 // AND $tmp,$y 2318 emit_opcode(cbuf,0x23); 2319 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2320 // ADD $p,$tmp 2321 emit_opcode(cbuf,0x03); 2322 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2323 %} 2324 2325 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2326 // TEST shift,32 2327 emit_opcode(cbuf,0xF7); 2328 emit_rm(cbuf, 0x3, 0, ECX_enc); 2329 emit_d32(cbuf,0x20); 2330 // JEQ,s small 2331 emit_opcode(cbuf, 0x74); 2332 emit_d8(cbuf, 0x04); 2333 // MOV $dst.hi,$dst.lo 2334 emit_opcode( cbuf, 0x8B ); 2335 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2336 // CLR $dst.lo 2337 emit_opcode(cbuf, 0x33); 2338 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2339 // small: 2340 // SHLD $dst.hi,$dst.lo,$shift 2341 emit_opcode(cbuf,0x0F); 2342 emit_opcode(cbuf,0xA5); 2343 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2344 // SHL $dst.lo,$shift" 2345 emit_opcode(cbuf,0xD3); 2346 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2347 %} 2348 2349 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2350 // TEST shift,32 2351 emit_opcode(cbuf,0xF7); 2352 emit_rm(cbuf, 0x3, 0, ECX_enc); 2353 emit_d32(cbuf,0x20); 2354 // JEQ,s small 2355 emit_opcode(cbuf, 0x74); 2356 emit_d8(cbuf, 0x04); 2357 // MOV $dst.lo,$dst.hi 2358 emit_opcode( cbuf, 0x8B ); 2359 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2360 // CLR $dst.hi 2361 emit_opcode(cbuf, 0x33); 2362 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2363 // small: 2364 // SHRD $dst.lo,$dst.hi,$shift 2365 emit_opcode(cbuf,0x0F); 2366 emit_opcode(cbuf,0xAD); 2367 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2368 // SHR $dst.hi,$shift" 2369 emit_opcode(cbuf,0xD3); 2370 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2371 %} 2372 2373 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2374 // TEST shift,32 2375 emit_opcode(cbuf,0xF7); 2376 emit_rm(cbuf, 0x3, 0, ECX_enc); 2377 emit_d32(cbuf,0x20); 2378 // JEQ,s small 2379 emit_opcode(cbuf, 0x74); 2380 emit_d8(cbuf, 0x05); 2381 // MOV $dst.lo,$dst.hi 2382 emit_opcode( cbuf, 0x8B ); 2383 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2384 // SAR $dst.hi,31 2385 emit_opcode(cbuf, 0xC1); 2386 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2387 emit_d8(cbuf, 0x1F ); 2388 // small: 2389 // SHRD $dst.lo,$dst.hi,$shift 2390 emit_opcode(cbuf,0x0F); 2391 emit_opcode(cbuf,0xAD); 2392 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2393 // SAR $dst.hi,$shift" 2394 emit_opcode(cbuf,0xD3); 2395 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2396 %} 2397 2398 2399 // ----------------- Encodings for floating point unit ----------------- 2400 // May leave result in FPU-TOS or FPU reg depending on opcodes 2401 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2402 $$$emit8$primary; 2403 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2404 %} 2405 2406 // Pop argument in FPR0 with FSTP ST(0) 2407 enc_class PopFPU() %{ 2408 emit_opcode( cbuf, 0xDD ); 2409 emit_d8( cbuf, 0xD8 ); 2410 %} 2411 2412 // !!!!! equivalent to Pop_Reg_F 2413 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2414 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2415 emit_d8( cbuf, 0xD8+$dst$$reg ); 2416 %} 2417 2418 enc_class Push_Reg_DPR( regDPR dst ) %{ 2419 emit_opcode( cbuf, 0xD9 ); 2420 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2421 %} 2422 2423 enc_class strictfp_bias1( regDPR dst ) %{ 2424 emit_opcode( cbuf, 0xDB ); // FLD m80real 2425 emit_opcode( cbuf, 0x2D ); 2426 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2427 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2428 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2429 %} 2430 2431 enc_class strictfp_bias2( regDPR dst ) %{ 2432 emit_opcode( cbuf, 0xDB ); // FLD m80real 2433 emit_opcode( cbuf, 0x2D ); 2434 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2435 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2436 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2437 %} 2438 2439 // Special case for moving an integer register to a stack slot. 2440 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2441 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2442 %} 2443 2444 // Special case for moving a register to a stack slot. 2445 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2446 // Opcode already emitted 2447 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2448 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2449 emit_d32(cbuf, $dst$$disp); // Displacement 2450 %} 2451 2452 // Push the integer in stackSlot 'src' onto FP-stack 2453 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2454 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2455 %} 2456 2457 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2458 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2459 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2460 %} 2461 2462 // Same as Pop_Mem_F except for opcode 2463 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2464 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2465 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2466 %} 2467 2468 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2469 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2470 emit_d8( cbuf, 0xD8+$dst$$reg ); 2471 %} 2472 2473 enc_class Push_Reg_FPR( regFPR dst ) %{ 2474 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2475 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2476 %} 2477 2478 // Push FPU's float to a stack-slot, and pop FPU-stack 2479 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2480 int pop = 0x02; 2481 if ($src$$reg != FPR1L_enc) { 2482 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2483 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2484 pop = 0x03; 2485 } 2486 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2487 %} 2488 2489 // Push FPU's double to a stack-slot, and pop FPU-stack 2490 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2491 int pop = 0x02; 2492 if ($src$$reg != FPR1L_enc) { 2493 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2494 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2495 pop = 0x03; 2496 } 2497 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2498 %} 2499 2500 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2501 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2502 int pop = 0xD0 - 1; // -1 since we skip FLD 2503 if ($src$$reg != FPR1L_enc) { 2504 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2505 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2506 pop = 0xD8; 2507 } 2508 emit_opcode( cbuf, 0xDD ); 2509 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2510 %} 2511 2512 2513 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2514 // load dst in FPR0 2515 emit_opcode( cbuf, 0xD9 ); 2516 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2517 if ($src$$reg != FPR1L_enc) { 2518 // fincstp 2519 emit_opcode (cbuf, 0xD9); 2520 emit_opcode (cbuf, 0xF7); 2521 // swap src with FPR1: 2522 // FXCH FPR1 with src 2523 emit_opcode(cbuf, 0xD9); 2524 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2525 // fdecstp 2526 emit_opcode (cbuf, 0xD9); 2527 emit_opcode (cbuf, 0xF6); 2528 } 2529 %} 2530 2531 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2532 MacroAssembler _masm(&cbuf); 2533 __ subptr(rsp, 8); 2534 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2535 __ fld_d(Address(rsp, 0)); 2536 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2537 __ fld_d(Address(rsp, 0)); 2538 %} 2539 2540 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2541 MacroAssembler _masm(&cbuf); 2542 __ subptr(rsp, 4); 2543 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2544 __ fld_s(Address(rsp, 0)); 2545 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2546 __ fld_s(Address(rsp, 0)); 2547 %} 2548 2549 enc_class Push_ResultD(regD dst) %{ 2550 MacroAssembler _masm(&cbuf); 2551 __ fstp_d(Address(rsp, 0)); 2552 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2553 __ addptr(rsp, 8); 2554 %} 2555 2556 enc_class Push_ResultF(regF dst, immI d8) %{ 2557 MacroAssembler _masm(&cbuf); 2558 __ fstp_s(Address(rsp, 0)); 2559 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2560 __ addptr(rsp, $d8$$constant); 2561 %} 2562 2563 enc_class Push_SrcD(regD src) %{ 2564 MacroAssembler _masm(&cbuf); 2565 __ subptr(rsp, 8); 2566 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2567 __ fld_d(Address(rsp, 0)); 2568 %} 2569 2570 enc_class push_stack_temp_qword() %{ 2571 MacroAssembler _masm(&cbuf); 2572 __ subptr(rsp, 8); 2573 %} 2574 2575 enc_class pop_stack_temp_qword() %{ 2576 MacroAssembler _masm(&cbuf); 2577 __ addptr(rsp, 8); 2578 %} 2579 2580 enc_class push_xmm_to_fpr1(regD src) %{ 2581 MacroAssembler _masm(&cbuf); 2582 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2583 __ fld_d(Address(rsp, 0)); 2584 %} 2585 2586 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2587 if ($src$$reg != FPR1L_enc) { 2588 // fincstp 2589 emit_opcode (cbuf, 0xD9); 2590 emit_opcode (cbuf, 0xF7); 2591 // FXCH FPR1 with src 2592 emit_opcode(cbuf, 0xD9); 2593 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2594 // fdecstp 2595 emit_opcode (cbuf, 0xD9); 2596 emit_opcode (cbuf, 0xF6); 2597 } 2598 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2599 // // FSTP FPR$dst$$reg 2600 // emit_opcode( cbuf, 0xDD ); 2601 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2602 %} 2603 2604 enc_class fnstsw_sahf_skip_parity() %{ 2605 // fnstsw ax 2606 emit_opcode( cbuf, 0xDF ); 2607 emit_opcode( cbuf, 0xE0 ); 2608 // sahf 2609 emit_opcode( cbuf, 0x9E ); 2610 // jnp ::skip 2611 emit_opcode( cbuf, 0x7B ); 2612 emit_opcode( cbuf, 0x05 ); 2613 %} 2614 2615 enc_class emitModDPR() %{ 2616 // fprem must be iterative 2617 // :: loop 2618 // fprem 2619 emit_opcode( cbuf, 0xD9 ); 2620 emit_opcode( cbuf, 0xF8 ); 2621 // wait 2622 emit_opcode( cbuf, 0x9b ); 2623 // fnstsw ax 2624 emit_opcode( cbuf, 0xDF ); 2625 emit_opcode( cbuf, 0xE0 ); 2626 // sahf 2627 emit_opcode( cbuf, 0x9E ); 2628 // jp ::loop 2629 emit_opcode( cbuf, 0x0F ); 2630 emit_opcode( cbuf, 0x8A ); 2631 emit_opcode( cbuf, 0xF4 ); 2632 emit_opcode( cbuf, 0xFF ); 2633 emit_opcode( cbuf, 0xFF ); 2634 emit_opcode( cbuf, 0xFF ); 2635 %} 2636 2637 enc_class fpu_flags() %{ 2638 // fnstsw_ax 2639 emit_opcode( cbuf, 0xDF); 2640 emit_opcode( cbuf, 0xE0); 2641 // test ax,0x0400 2642 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2643 emit_opcode( cbuf, 0xA9 ); 2644 emit_d16 ( cbuf, 0x0400 ); 2645 // // // This sequence works, but stalls for 12-16 cycles on PPro 2646 // // test rax,0x0400 2647 // emit_opcode( cbuf, 0xA9 ); 2648 // emit_d32 ( cbuf, 0x00000400 ); 2649 // 2650 // jz exit (no unordered comparison) 2651 emit_opcode( cbuf, 0x74 ); 2652 emit_d8 ( cbuf, 0x02 ); 2653 // mov ah,1 - treat as LT case (set carry flag) 2654 emit_opcode( cbuf, 0xB4 ); 2655 emit_d8 ( cbuf, 0x01 ); 2656 // sahf 2657 emit_opcode( cbuf, 0x9E); 2658 %} 2659 2660 enc_class cmpF_P6_fixup() %{ 2661 // Fixup the integer flags in case comparison involved a NaN 2662 // 2663 // JNP exit (no unordered comparison, P-flag is set by NaN) 2664 emit_opcode( cbuf, 0x7B ); 2665 emit_d8 ( cbuf, 0x03 ); 2666 // MOV AH,1 - treat as LT case (set carry flag) 2667 emit_opcode( cbuf, 0xB4 ); 2668 emit_d8 ( cbuf, 0x01 ); 2669 // SAHF 2670 emit_opcode( cbuf, 0x9E); 2671 // NOP // target for branch to avoid branch to branch 2672 emit_opcode( cbuf, 0x90); 2673 %} 2674 2675 // fnstsw_ax(); 2676 // sahf(); 2677 // movl(dst, nan_result); 2678 // jcc(Assembler::parity, exit); 2679 // movl(dst, less_result); 2680 // jcc(Assembler::below, exit); 2681 // movl(dst, equal_result); 2682 // jcc(Assembler::equal, exit); 2683 // movl(dst, greater_result); 2684 2685 // less_result = 1; 2686 // greater_result = -1; 2687 // equal_result = 0; 2688 // nan_result = -1; 2689 2690 enc_class CmpF_Result(rRegI dst) %{ 2691 // fnstsw_ax(); 2692 emit_opcode( cbuf, 0xDF); 2693 emit_opcode( cbuf, 0xE0); 2694 // sahf 2695 emit_opcode( cbuf, 0x9E); 2696 // movl(dst, nan_result); 2697 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2698 emit_d32( cbuf, -1 ); 2699 // jcc(Assembler::parity, exit); 2700 emit_opcode( cbuf, 0x7A ); 2701 emit_d8 ( cbuf, 0x13 ); 2702 // movl(dst, less_result); 2703 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2704 emit_d32( cbuf, -1 ); 2705 // jcc(Assembler::below, exit); 2706 emit_opcode( cbuf, 0x72 ); 2707 emit_d8 ( cbuf, 0x0C ); 2708 // movl(dst, equal_result); 2709 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2710 emit_d32( cbuf, 0 ); 2711 // jcc(Assembler::equal, exit); 2712 emit_opcode( cbuf, 0x74 ); 2713 emit_d8 ( cbuf, 0x05 ); 2714 // movl(dst, greater_result); 2715 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2716 emit_d32( cbuf, 1 ); 2717 %} 2718 2719 2720 // Compare the longs and set flags 2721 // BROKEN! Do Not use as-is 2722 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2723 // CMP $src1.hi,$src2.hi 2724 emit_opcode( cbuf, 0x3B ); 2725 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2726 // JNE,s done 2727 emit_opcode(cbuf,0x75); 2728 emit_d8(cbuf, 2 ); 2729 // CMP $src1.lo,$src2.lo 2730 emit_opcode( cbuf, 0x3B ); 2731 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2732 // done: 2733 %} 2734 2735 enc_class convert_int_long( regL dst, rRegI src ) %{ 2736 // mov $dst.lo,$src 2737 int dst_encoding = $dst$$reg; 2738 int src_encoding = $src$$reg; 2739 encode_Copy( cbuf, dst_encoding , src_encoding ); 2740 // mov $dst.hi,$src 2741 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2742 // sar $dst.hi,31 2743 emit_opcode( cbuf, 0xC1 ); 2744 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2745 emit_d8(cbuf, 0x1F ); 2746 %} 2747 2748 enc_class convert_long_double( eRegL src ) %{ 2749 // push $src.hi 2750 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2751 // push $src.lo 2752 emit_opcode(cbuf, 0x50+$src$$reg ); 2753 // fild 64-bits at [SP] 2754 emit_opcode(cbuf,0xdf); 2755 emit_d8(cbuf, 0x6C); 2756 emit_d8(cbuf, 0x24); 2757 emit_d8(cbuf, 0x00); 2758 // pop stack 2759 emit_opcode(cbuf, 0x83); // add SP, #8 2760 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2761 emit_d8(cbuf, 0x8); 2762 %} 2763 2764 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2765 // IMUL EDX:EAX,$src1 2766 emit_opcode( cbuf, 0xF7 ); 2767 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2768 // SAR EDX,$cnt-32 2769 int shift_count = ((int)$cnt$$constant) - 32; 2770 if (shift_count > 0) { 2771 emit_opcode(cbuf, 0xC1); 2772 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2773 emit_d8(cbuf, shift_count); 2774 } 2775 %} 2776 2777 // this version doesn't have add sp, 8 2778 enc_class convert_long_double2( eRegL src ) %{ 2779 // push $src.hi 2780 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2781 // push $src.lo 2782 emit_opcode(cbuf, 0x50+$src$$reg ); 2783 // fild 64-bits at [SP] 2784 emit_opcode(cbuf,0xdf); 2785 emit_d8(cbuf, 0x6C); 2786 emit_d8(cbuf, 0x24); 2787 emit_d8(cbuf, 0x00); 2788 %} 2789 2790 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2791 // Basic idea: long = (long)int * (long)int 2792 // IMUL EDX:EAX, src 2793 emit_opcode( cbuf, 0xF7 ); 2794 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2795 %} 2796 2797 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2798 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2799 // MUL EDX:EAX, src 2800 emit_opcode( cbuf, 0xF7 ); 2801 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2802 %} 2803 2804 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2805 // Basic idea: lo(result) = lo(x_lo * y_lo) 2806 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2807 // MOV $tmp,$src.lo 2808 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2809 // IMUL $tmp,EDX 2810 emit_opcode( cbuf, 0x0F ); 2811 emit_opcode( cbuf, 0xAF ); 2812 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2813 // MOV EDX,$src.hi 2814 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2815 // IMUL EDX,EAX 2816 emit_opcode( cbuf, 0x0F ); 2817 emit_opcode( cbuf, 0xAF ); 2818 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2819 // ADD $tmp,EDX 2820 emit_opcode( cbuf, 0x03 ); 2821 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2822 // MUL EDX:EAX,$src.lo 2823 emit_opcode( cbuf, 0xF7 ); 2824 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2825 // ADD EDX,ESI 2826 emit_opcode( cbuf, 0x03 ); 2827 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2828 %} 2829 2830 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2831 // Basic idea: lo(result) = lo(src * y_lo) 2832 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2833 // IMUL $tmp,EDX,$src 2834 emit_opcode( cbuf, 0x6B ); 2835 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2836 emit_d8( cbuf, (int)$src$$constant ); 2837 // MOV EDX,$src 2838 emit_opcode(cbuf, 0xB8 + EDX_enc); 2839 emit_d32( cbuf, (int)$src$$constant ); 2840 // MUL EDX:EAX,EDX 2841 emit_opcode( cbuf, 0xF7 ); 2842 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2843 // ADD EDX,ESI 2844 emit_opcode( cbuf, 0x03 ); 2845 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2846 %} 2847 2848 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2849 // PUSH src1.hi 2850 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2851 // PUSH src1.lo 2852 emit_opcode(cbuf, 0x50+$src1$$reg ); 2853 // PUSH src2.hi 2854 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2855 // PUSH src2.lo 2856 emit_opcode(cbuf, 0x50+$src2$$reg ); 2857 // CALL directly to the runtime 2858 cbuf.set_insts_mark(); 2859 emit_opcode(cbuf,0xE8); // Call into runtime 2860 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2861 // Restore stack 2862 emit_opcode(cbuf, 0x83); // add SP, #framesize 2863 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2864 emit_d8(cbuf, 4*4); 2865 %} 2866 2867 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2868 // PUSH src1.hi 2869 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2870 // PUSH src1.lo 2871 emit_opcode(cbuf, 0x50+$src1$$reg ); 2872 // PUSH src2.hi 2873 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2874 // PUSH src2.lo 2875 emit_opcode(cbuf, 0x50+$src2$$reg ); 2876 // CALL directly to the runtime 2877 cbuf.set_insts_mark(); 2878 emit_opcode(cbuf,0xE8); // Call into runtime 2879 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2880 // Restore stack 2881 emit_opcode(cbuf, 0x83); // add SP, #framesize 2882 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2883 emit_d8(cbuf, 4*4); 2884 %} 2885 2886 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2887 // MOV $tmp,$src.lo 2888 emit_opcode(cbuf, 0x8B); 2889 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2890 // OR $tmp,$src.hi 2891 emit_opcode(cbuf, 0x0B); 2892 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2893 %} 2894 2895 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2896 // CMP $src1.lo,$src2.lo 2897 emit_opcode( cbuf, 0x3B ); 2898 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2899 // JNE,s skip 2900 emit_cc(cbuf, 0x70, 0x5); 2901 emit_d8(cbuf,2); 2902 // CMP $src1.hi,$src2.hi 2903 emit_opcode( cbuf, 0x3B ); 2904 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2905 %} 2906 2907 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2908 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2909 emit_opcode( cbuf, 0x3B ); 2910 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2911 // MOV $tmp,$src1.hi 2912 emit_opcode( cbuf, 0x8B ); 2913 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2914 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2915 emit_opcode( cbuf, 0x1B ); 2916 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2917 %} 2918 2919 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2920 // XOR $tmp,$tmp 2921 emit_opcode(cbuf,0x33); // XOR 2922 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2923 // CMP $tmp,$src.lo 2924 emit_opcode( cbuf, 0x3B ); 2925 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2926 // SBB $tmp,$src.hi 2927 emit_opcode( cbuf, 0x1B ); 2928 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2929 %} 2930 2931 // Sniff, sniff... smells like Gnu Superoptimizer 2932 enc_class neg_long( eRegL dst ) %{ 2933 emit_opcode(cbuf,0xF7); // NEG hi 2934 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2935 emit_opcode(cbuf,0xF7); // NEG lo 2936 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2937 emit_opcode(cbuf,0x83); // SBB hi,0 2938 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2939 emit_d8 (cbuf,0 ); 2940 %} 2941 2942 enc_class enc_pop_rdx() %{ 2943 emit_opcode(cbuf,0x5A); 2944 %} 2945 2946 enc_class enc_rethrow() %{ 2947 cbuf.set_insts_mark(); 2948 emit_opcode(cbuf, 0xE9); // jmp entry 2949 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2950 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2951 %} 2952 2953 2954 // Convert a double to an int. Java semantics require we do complex 2955 // manglelations in the corner cases. So we set the rounding mode to 2956 // 'zero', store the darned double down as an int, and reset the 2957 // rounding mode to 'nearest'. The hardware throws an exception which 2958 // patches up the correct value directly to the stack. 2959 enc_class DPR2I_encoding( regDPR src ) %{ 2960 // Flip to round-to-zero mode. We attempted to allow invalid-op 2961 // exceptions here, so that a NAN or other corner-case value will 2962 // thrown an exception (but normal values get converted at full speed). 2963 // However, I2C adapters and other float-stack manglers leave pending 2964 // invalid-op exceptions hanging. We would have to clear them before 2965 // enabling them and that is more expensive than just testing for the 2966 // invalid value Intel stores down in the corner cases. 2967 emit_opcode(cbuf,0xD9); // FLDCW trunc 2968 emit_opcode(cbuf,0x2D); 2969 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 2970 // Allocate a word 2971 emit_opcode(cbuf,0x83); // SUB ESP,4 2972 emit_opcode(cbuf,0xEC); 2973 emit_d8(cbuf,0x04); 2974 // Encoding assumes a double has been pushed into FPR0. 2975 // Store down the double as an int, popping the FPU stack 2976 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2977 emit_opcode(cbuf,0x1C); 2978 emit_d8(cbuf,0x24); 2979 // Restore the rounding mode; mask the exception 2980 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2981 emit_opcode(cbuf,0x2D); 2982 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2983 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 2984 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 2985 2986 // Load the converted int; adjust CPU stack 2987 emit_opcode(cbuf,0x58); // POP EAX 2988 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2989 emit_d32 (cbuf,0x80000000); // 0x80000000 2990 emit_opcode(cbuf,0x75); // JNE around_slow_call 2991 emit_d8 (cbuf,0x07); // Size of slow_call 2992 // Push src onto stack slow-path 2993 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2994 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2995 // CALL directly to the runtime 2996 cbuf.set_insts_mark(); 2997 emit_opcode(cbuf,0xE8); // Call into runtime 2998 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2999 // Carry on here... 3000 %} 3001 3002 enc_class DPR2L_encoding( regDPR src ) %{ 3003 emit_opcode(cbuf,0xD9); // FLDCW trunc 3004 emit_opcode(cbuf,0x2D); 3005 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3006 // Allocate a word 3007 emit_opcode(cbuf,0x83); // SUB ESP,8 3008 emit_opcode(cbuf,0xEC); 3009 emit_d8(cbuf,0x08); 3010 // Encoding assumes a double has been pushed into FPR0. 3011 // Store down the double as a long, popping the FPU stack 3012 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3013 emit_opcode(cbuf,0x3C); 3014 emit_d8(cbuf,0x24); 3015 // Restore the rounding mode; mask the exception 3016 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3017 emit_opcode(cbuf,0x2D); 3018 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3019 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3020 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3021 3022 // Load the converted int; adjust CPU stack 3023 emit_opcode(cbuf,0x58); // POP EAX 3024 emit_opcode(cbuf,0x5A); // POP EDX 3025 emit_opcode(cbuf,0x81); // CMP EDX,imm 3026 emit_d8 (cbuf,0xFA); // rdx 3027 emit_d32 (cbuf,0x80000000); // 0x80000000 3028 emit_opcode(cbuf,0x75); // JNE around_slow_call 3029 emit_d8 (cbuf,0x07+4); // Size of slow_call 3030 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3031 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3032 emit_opcode(cbuf,0x75); // JNE around_slow_call 3033 emit_d8 (cbuf,0x07); // Size of slow_call 3034 // Push src onto stack slow-path 3035 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3036 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3037 // CALL directly to the runtime 3038 cbuf.set_insts_mark(); 3039 emit_opcode(cbuf,0xE8); // Call into runtime 3040 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3041 // Carry on here... 3042 %} 3043 3044 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3045 // Operand was loaded from memory into fp ST (stack top) 3046 // FMUL ST,$src /* D8 C8+i */ 3047 emit_opcode(cbuf, 0xD8); 3048 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3049 %} 3050 3051 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3052 // FADDP ST,src2 /* D8 C0+i */ 3053 emit_opcode(cbuf, 0xD8); 3054 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3055 //could use FADDP src2,fpST /* DE C0+i */ 3056 %} 3057 3058 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3059 // FADDP src2,ST /* DE C0+i */ 3060 emit_opcode(cbuf, 0xDE); 3061 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3062 %} 3063 3064 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3065 // Operand has been loaded into fp ST (stack top) 3066 // FSUB ST,$src1 3067 emit_opcode(cbuf, 0xD8); 3068 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3069 3070 // FDIV 3071 emit_opcode(cbuf, 0xD8); 3072 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3073 %} 3074 3075 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3076 // Operand was loaded from memory into fp ST (stack top) 3077 // FADD ST,$src /* D8 C0+i */ 3078 emit_opcode(cbuf, 0xD8); 3079 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3080 3081 // FMUL ST,src2 /* D8 C*+i */ 3082 emit_opcode(cbuf, 0xD8); 3083 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3084 %} 3085 3086 3087 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3088 // Operand was loaded from memory into fp ST (stack top) 3089 // FADD ST,$src /* D8 C0+i */ 3090 emit_opcode(cbuf, 0xD8); 3091 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3092 3093 // FMULP src2,ST /* DE C8+i */ 3094 emit_opcode(cbuf, 0xDE); 3095 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3096 %} 3097 3098 // Atomically load the volatile long 3099 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3100 emit_opcode(cbuf,0xDF); 3101 int rm_byte_opcode = 0x05; 3102 int base = $mem$$base; 3103 int index = $mem$$index; 3104 int scale = $mem$$scale; 3105 int displace = $mem$$disp; 3106 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3107 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3108 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3109 %} 3110 3111 // Volatile Store Long. Must be atomic, so move it into 3112 // the FP TOS and then do a 64-bit FIST. Has to probe the 3113 // target address before the store (for null-ptr checks) 3114 // so the memory operand is used twice in the encoding. 3115 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3116 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3117 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3118 emit_opcode(cbuf,0xDF); 3119 int rm_byte_opcode = 0x07; 3120 int base = $mem$$base; 3121 int index = $mem$$index; 3122 int scale = $mem$$scale; 3123 int displace = $mem$$disp; 3124 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3125 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3126 %} 3127 3128 // Safepoint Poll. This polls the safepoint page, and causes an 3129 // exception if it is not readable. Unfortunately, it kills the condition code 3130 // in the process 3131 // We current use TESTL [spp],EDI 3132 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 3133 3134 enc_class Safepoint_Poll() %{ 3135 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); 3136 emit_opcode(cbuf,0x85); 3137 emit_rm (cbuf, 0x0, 0x7, 0x5); 3138 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 3139 %} 3140 %} 3141 3142 3143 //----------FRAME-------------------------------------------------------------- 3144 // Definition of frame structure and management information. 3145 // 3146 // S T A C K L A Y O U T Allocators stack-slot number 3147 // | (to get allocators register number 3148 // G Owned by | | v add OptoReg::stack0()) 3149 // r CALLER | | 3150 // o | +--------+ pad to even-align allocators stack-slot 3151 // w V | pad0 | numbers; owned by CALLER 3152 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3153 // h ^ | in | 5 3154 // | | args | 4 Holes in incoming args owned by SELF 3155 // | | | | 3 3156 // | | +--------+ 3157 // V | | old out| Empty on Intel, window on Sparc 3158 // | old |preserve| Must be even aligned. 3159 // | SP-+--------+----> Matcher::_old_SP, even aligned 3160 // | | in | 3 area for Intel ret address 3161 // Owned by |preserve| Empty on Sparc. 3162 // SELF +--------+ 3163 // | | pad2 | 2 pad to align old SP 3164 // | +--------+ 1 3165 // | | locks | 0 3166 // | +--------+----> OptoReg::stack0(), even aligned 3167 // | | pad1 | 11 pad to align new SP 3168 // | +--------+ 3169 // | | | 10 3170 // | | spills | 9 spills 3171 // V | | 8 (pad0 slot for callee) 3172 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3173 // ^ | out | 7 3174 // | | args | 6 Holes in outgoing args owned by CALLEE 3175 // Owned by +--------+ 3176 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3177 // | new |preserve| Must be even-aligned. 3178 // | SP-+--------+----> Matcher::_new_SP, even aligned 3179 // | | | 3180 // 3181 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3182 // known from SELF's arguments and the Java calling convention. 3183 // Region 6-7 is determined per call site. 3184 // Note 2: If the calling convention leaves holes in the incoming argument 3185 // area, those holes are owned by SELF. Holes in the outgoing area 3186 // are owned by the CALLEE. Holes should not be nessecary in the 3187 // incoming area, as the Java calling convention is completely under 3188 // the control of the AD file. Doubles can be sorted and packed to 3189 // avoid holes. Holes in the outgoing arguments may be nessecary for 3190 // varargs C calling conventions. 3191 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3192 // even aligned with pad0 as needed. 3193 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3194 // region 6-11 is even aligned; it may be padded out more so that 3195 // the region from SP to FP meets the minimum stack alignment. 3196 3197 frame %{ 3198 // What direction does stack grow in (assumed to be same for C & Java) 3199 stack_direction(TOWARDS_LOW); 3200 3201 // These three registers define part of the calling convention 3202 // between compiled code and the interpreter. 3203 inline_cache_reg(EAX); // Inline Cache Register 3204 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 3205 3206 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3207 cisc_spilling_operand_name(indOffset32); 3208 3209 // Number of stack slots consumed by locking an object 3210 sync_stack_slots(1); 3211 3212 // Compiled code's Frame Pointer 3213 frame_pointer(ESP); 3214 // Interpreter stores its frame pointer in a register which is 3215 // stored to the stack by I2CAdaptors. 3216 // I2CAdaptors convert from interpreted java to compiled java. 3217 interpreter_frame_pointer(EBP); 3218 3219 // Stack alignment requirement 3220 // Alignment size in bytes (128-bit -> 16 bytes) 3221 stack_alignment(StackAlignmentInBytes); 3222 3223 // Number of stack slots between incoming argument block and the start of 3224 // a new frame. The PROLOG must add this many slots to the stack. The 3225 // EPILOG must remove this many slots. Intel needs one slot for 3226 // return address and one for rbp, (must save rbp) 3227 in_preserve_stack_slots(2+VerifyStackAtCalls); 3228 3229 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3230 // for calls to C. Supports the var-args backing area for register parms. 3231 varargs_C_out_slots_killed(0); 3232 3233 // The after-PROLOG location of the return address. Location of 3234 // return address specifies a type (REG or STACK) and a number 3235 // representing the register number (i.e. - use a register name) or 3236 // stack slot. 3237 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3238 // Otherwise, it is above the locks and verification slot and alignment word 3239 return_addr(STACK - 1 + 3240 round_to((Compile::current()->in_preserve_stack_slots() + 3241 Compile::current()->fixed_slots()), 3242 stack_alignment_in_slots())); 3243 3244 // Body of function which returns an integer array locating 3245 // arguments either in registers or in stack slots. Passed an array 3246 // of ideal registers called "sig" and a "length" count. Stack-slot 3247 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3248 // arguments for a CALLEE. Incoming stack arguments are 3249 // automatically biased by the preserve_stack_slots field above. 3250 calling_convention %{ 3251 // No difference between ingoing/outgoing just pass false 3252 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 3253 %} 3254 3255 3256 // Body of function which returns an integer array locating 3257 // arguments either in registers or in stack slots. Passed an array 3258 // of ideal registers called "sig" and a "length" count. Stack-slot 3259 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3260 // arguments for a CALLEE. Incoming stack arguments are 3261 // automatically biased by the preserve_stack_slots field above. 3262 c_calling_convention %{ 3263 // This is obviously always outgoing 3264 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 3265 %} 3266 3267 // Location of C & interpreter return values 3268 c_return_value %{ 3269 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3270 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3271 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3272 3273 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3274 // that C functions return float and double results in XMM0. 3275 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3276 return OptoRegPair(XMM0b_num,XMM0_num); 3277 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3278 return OptoRegPair(OptoReg::Bad,XMM0_num); 3279 3280 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3281 %} 3282 3283 // Location of return values 3284 return_value %{ 3285 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3286 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3287 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3288 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3289 return OptoRegPair(XMM0b_num,XMM0_num); 3290 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3291 return OptoRegPair(OptoReg::Bad,XMM0_num); 3292 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3293 %} 3294 3295 %} 3296 3297 //----------ATTRIBUTES--------------------------------------------------------- 3298 //----------Operand Attributes------------------------------------------------- 3299 op_attrib op_cost(0); // Required cost attribute 3300 3301 //----------Instruction Attributes--------------------------------------------- 3302 ins_attrib ins_cost(100); // Required cost attribute 3303 ins_attrib ins_size(8); // Required size attribute (in bits) 3304 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3305 // non-matching short branch variant of some 3306 // long branch? 3307 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3308 // specifies the alignment that some part of the instruction (not 3309 // necessarily the start) requires. If > 1, a compute_padding() 3310 // function must be provided for the instruction 3311 3312 //----------OPERANDS----------------------------------------------------------- 3313 // Operand definitions must precede instruction definitions for correct parsing 3314 // in the ADLC because operands constitute user defined types which are used in 3315 // instruction definitions. 3316 3317 //----------Simple Operands---------------------------------------------------- 3318 // Immediate Operands 3319 // Integer Immediate 3320 operand immI() %{ 3321 match(ConI); 3322 3323 op_cost(10); 3324 format %{ %} 3325 interface(CONST_INTER); 3326 %} 3327 3328 // Constant for test vs zero 3329 operand immI0() %{ 3330 predicate(n->get_int() == 0); 3331 match(ConI); 3332 3333 op_cost(0); 3334 format %{ %} 3335 interface(CONST_INTER); 3336 %} 3337 3338 // Constant for increment 3339 operand immI1() %{ 3340 predicate(n->get_int() == 1); 3341 match(ConI); 3342 3343 op_cost(0); 3344 format %{ %} 3345 interface(CONST_INTER); 3346 %} 3347 3348 // Constant for decrement 3349 operand immI_M1() %{ 3350 predicate(n->get_int() == -1); 3351 match(ConI); 3352 3353 op_cost(0); 3354 format %{ %} 3355 interface(CONST_INTER); 3356 %} 3357 3358 // Valid scale values for addressing modes 3359 operand immI2() %{ 3360 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3361 match(ConI); 3362 3363 format %{ %} 3364 interface(CONST_INTER); 3365 %} 3366 3367 operand immI8() %{ 3368 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3369 match(ConI); 3370 3371 op_cost(5); 3372 format %{ %} 3373 interface(CONST_INTER); 3374 %} 3375 3376 operand immI16() %{ 3377 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3378 match(ConI); 3379 3380 op_cost(10); 3381 format %{ %} 3382 interface(CONST_INTER); 3383 %} 3384 3385 // Int Immediate non-negative 3386 operand immU31() 3387 %{ 3388 predicate(n->get_int() >= 0); 3389 match(ConI); 3390 3391 op_cost(0); 3392 format %{ %} 3393 interface(CONST_INTER); 3394 %} 3395 3396 // Constant for long shifts 3397 operand immI_32() %{ 3398 predicate( n->get_int() == 32 ); 3399 match(ConI); 3400 3401 op_cost(0); 3402 format %{ %} 3403 interface(CONST_INTER); 3404 %} 3405 3406 operand immI_1_31() %{ 3407 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3408 match(ConI); 3409 3410 op_cost(0); 3411 format %{ %} 3412 interface(CONST_INTER); 3413 %} 3414 3415 operand immI_32_63() %{ 3416 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3417 match(ConI); 3418 op_cost(0); 3419 3420 format %{ %} 3421 interface(CONST_INTER); 3422 %} 3423 3424 operand immI_1() %{ 3425 predicate( n->get_int() == 1 ); 3426 match(ConI); 3427 3428 op_cost(0); 3429 format %{ %} 3430 interface(CONST_INTER); 3431 %} 3432 3433 operand immI_2() %{ 3434 predicate( n->get_int() == 2 ); 3435 match(ConI); 3436 3437 op_cost(0); 3438 format %{ %} 3439 interface(CONST_INTER); 3440 %} 3441 3442 operand immI_3() %{ 3443 predicate( n->get_int() == 3 ); 3444 match(ConI); 3445 3446 op_cost(0); 3447 format %{ %} 3448 interface(CONST_INTER); 3449 %} 3450 3451 // Pointer Immediate 3452 operand immP() %{ 3453 match(ConP); 3454 3455 op_cost(10); 3456 format %{ %} 3457 interface(CONST_INTER); 3458 %} 3459 3460 // NULL Pointer Immediate 3461 operand immP0() %{ 3462 predicate( n->get_ptr() == 0 ); 3463 match(ConP); 3464 op_cost(0); 3465 3466 format %{ %} 3467 interface(CONST_INTER); 3468 %} 3469 3470 // Long Immediate 3471 operand immL() %{ 3472 match(ConL); 3473 3474 op_cost(20); 3475 format %{ %} 3476 interface(CONST_INTER); 3477 %} 3478 3479 // Long Immediate zero 3480 operand immL0() %{ 3481 predicate( n->get_long() == 0L ); 3482 match(ConL); 3483 op_cost(0); 3484 3485 format %{ %} 3486 interface(CONST_INTER); 3487 %} 3488 3489 // Long Immediate zero 3490 operand immL_M1() %{ 3491 predicate( n->get_long() == -1L ); 3492 match(ConL); 3493 op_cost(0); 3494 3495 format %{ %} 3496 interface(CONST_INTER); 3497 %} 3498 3499 // Long immediate from 0 to 127. 3500 // Used for a shorter form of long mul by 10. 3501 operand immL_127() %{ 3502 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3503 match(ConL); 3504 op_cost(0); 3505 3506 format %{ %} 3507 interface(CONST_INTER); 3508 %} 3509 3510 // Long Immediate: low 32-bit mask 3511 operand immL_32bits() %{ 3512 predicate(n->get_long() == 0xFFFFFFFFL); 3513 match(ConL); 3514 op_cost(0); 3515 3516 format %{ %} 3517 interface(CONST_INTER); 3518 %} 3519 3520 // Long Immediate: low 32-bit mask 3521 operand immL32() %{ 3522 predicate(n->get_long() == (int)(n->get_long())); 3523 match(ConL); 3524 op_cost(20); 3525 3526 format %{ %} 3527 interface(CONST_INTER); 3528 %} 3529 3530 //Double Immediate zero 3531 operand immDPR0() %{ 3532 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3533 // bug that generates code such that NaNs compare equal to 0.0 3534 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3535 match(ConD); 3536 3537 op_cost(5); 3538 format %{ %} 3539 interface(CONST_INTER); 3540 %} 3541 3542 // Double Immediate one 3543 operand immDPR1() %{ 3544 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3545 match(ConD); 3546 3547 op_cost(5); 3548 format %{ %} 3549 interface(CONST_INTER); 3550 %} 3551 3552 // Double Immediate 3553 operand immDPR() %{ 3554 predicate(UseSSE<=1); 3555 match(ConD); 3556 3557 op_cost(5); 3558 format %{ %} 3559 interface(CONST_INTER); 3560 %} 3561 3562 operand immD() %{ 3563 predicate(UseSSE>=2); 3564 match(ConD); 3565 3566 op_cost(5); 3567 format %{ %} 3568 interface(CONST_INTER); 3569 %} 3570 3571 // Double Immediate zero 3572 operand immD0() %{ 3573 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3574 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3575 // compare equal to -0.0. 3576 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3577 match(ConD); 3578 3579 format %{ %} 3580 interface(CONST_INTER); 3581 %} 3582 3583 // Float Immediate zero 3584 operand immFPR0() %{ 3585 predicate(UseSSE == 0 && n->getf() == 0.0F); 3586 match(ConF); 3587 3588 op_cost(5); 3589 format %{ %} 3590 interface(CONST_INTER); 3591 %} 3592 3593 // Float Immediate one 3594 operand immFPR1() %{ 3595 predicate(UseSSE == 0 && n->getf() == 1.0F); 3596 match(ConF); 3597 3598 op_cost(5); 3599 format %{ %} 3600 interface(CONST_INTER); 3601 %} 3602 3603 // Float Immediate 3604 operand immFPR() %{ 3605 predicate( UseSSE == 0 ); 3606 match(ConF); 3607 3608 op_cost(5); 3609 format %{ %} 3610 interface(CONST_INTER); 3611 %} 3612 3613 // Float Immediate 3614 operand immF() %{ 3615 predicate(UseSSE >= 1); 3616 match(ConF); 3617 3618 op_cost(5); 3619 format %{ %} 3620 interface(CONST_INTER); 3621 %} 3622 3623 // Float Immediate zero. Zero and not -0.0 3624 operand immF0() %{ 3625 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3626 match(ConF); 3627 3628 op_cost(5); 3629 format %{ %} 3630 interface(CONST_INTER); 3631 %} 3632 3633 // Immediates for special shifts (sign extend) 3634 3635 // Constants for increment 3636 operand immI_16() %{ 3637 predicate( n->get_int() == 16 ); 3638 match(ConI); 3639 3640 format %{ %} 3641 interface(CONST_INTER); 3642 %} 3643 3644 operand immI_24() %{ 3645 predicate( n->get_int() == 24 ); 3646 match(ConI); 3647 3648 format %{ %} 3649 interface(CONST_INTER); 3650 %} 3651 3652 // Constant for byte-wide masking 3653 operand immI_255() %{ 3654 predicate( n->get_int() == 255 ); 3655 match(ConI); 3656 3657 format %{ %} 3658 interface(CONST_INTER); 3659 %} 3660 3661 // Constant for short-wide masking 3662 operand immI_65535() %{ 3663 predicate(n->get_int() == 65535); 3664 match(ConI); 3665 3666 format %{ %} 3667 interface(CONST_INTER); 3668 %} 3669 3670 // Register Operands 3671 // Integer Register 3672 operand rRegI() %{ 3673 constraint(ALLOC_IN_RC(int_reg)); 3674 match(RegI); 3675 match(xRegI); 3676 match(eAXRegI); 3677 match(eBXRegI); 3678 match(eCXRegI); 3679 match(eDXRegI); 3680 match(eDIRegI); 3681 match(eSIRegI); 3682 3683 format %{ %} 3684 interface(REG_INTER); 3685 %} 3686 3687 // Subset of Integer Register 3688 operand xRegI(rRegI reg) %{ 3689 constraint(ALLOC_IN_RC(int_x_reg)); 3690 match(reg); 3691 match(eAXRegI); 3692 match(eBXRegI); 3693 match(eCXRegI); 3694 match(eDXRegI); 3695 3696 format %{ %} 3697 interface(REG_INTER); 3698 %} 3699 3700 // Special Registers 3701 operand eAXRegI(xRegI reg) %{ 3702 constraint(ALLOC_IN_RC(eax_reg)); 3703 match(reg); 3704 match(rRegI); 3705 3706 format %{ "EAX" %} 3707 interface(REG_INTER); 3708 %} 3709 3710 // Special Registers 3711 operand eBXRegI(xRegI reg) %{ 3712 constraint(ALLOC_IN_RC(ebx_reg)); 3713 match(reg); 3714 match(rRegI); 3715 3716 format %{ "EBX" %} 3717 interface(REG_INTER); 3718 %} 3719 3720 operand eCXRegI(xRegI reg) %{ 3721 constraint(ALLOC_IN_RC(ecx_reg)); 3722 match(reg); 3723 match(rRegI); 3724 3725 format %{ "ECX" %} 3726 interface(REG_INTER); 3727 %} 3728 3729 operand eDXRegI(xRegI reg) %{ 3730 constraint(ALLOC_IN_RC(edx_reg)); 3731 match(reg); 3732 match(rRegI); 3733 3734 format %{ "EDX" %} 3735 interface(REG_INTER); 3736 %} 3737 3738 operand eDIRegI(xRegI reg) %{ 3739 constraint(ALLOC_IN_RC(edi_reg)); 3740 match(reg); 3741 match(rRegI); 3742 3743 format %{ "EDI" %} 3744 interface(REG_INTER); 3745 %} 3746 3747 operand naxRegI() %{ 3748 constraint(ALLOC_IN_RC(nax_reg)); 3749 match(RegI); 3750 match(eCXRegI); 3751 match(eDXRegI); 3752 match(eSIRegI); 3753 match(eDIRegI); 3754 3755 format %{ %} 3756 interface(REG_INTER); 3757 %} 3758 3759 operand nadxRegI() %{ 3760 constraint(ALLOC_IN_RC(nadx_reg)); 3761 match(RegI); 3762 match(eBXRegI); 3763 match(eCXRegI); 3764 match(eSIRegI); 3765 match(eDIRegI); 3766 3767 format %{ %} 3768 interface(REG_INTER); 3769 %} 3770 3771 operand ncxRegI() %{ 3772 constraint(ALLOC_IN_RC(ncx_reg)); 3773 match(RegI); 3774 match(eAXRegI); 3775 match(eDXRegI); 3776 match(eSIRegI); 3777 match(eDIRegI); 3778 3779 format %{ %} 3780 interface(REG_INTER); 3781 %} 3782 3783 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3784 // // 3785 operand eSIRegI(xRegI reg) %{ 3786 constraint(ALLOC_IN_RC(esi_reg)); 3787 match(reg); 3788 match(rRegI); 3789 3790 format %{ "ESI" %} 3791 interface(REG_INTER); 3792 %} 3793 3794 // Pointer Register 3795 operand anyRegP() %{ 3796 constraint(ALLOC_IN_RC(any_reg)); 3797 match(RegP); 3798 match(eAXRegP); 3799 match(eBXRegP); 3800 match(eCXRegP); 3801 match(eDIRegP); 3802 match(eRegP); 3803 3804 format %{ %} 3805 interface(REG_INTER); 3806 %} 3807 3808 operand eRegP() %{ 3809 constraint(ALLOC_IN_RC(int_reg)); 3810 match(RegP); 3811 match(eAXRegP); 3812 match(eBXRegP); 3813 match(eCXRegP); 3814 match(eDIRegP); 3815 3816 format %{ %} 3817 interface(REG_INTER); 3818 %} 3819 3820 // On windows95, EBP is not safe to use for implicit null tests. 3821 operand eRegP_no_EBP() %{ 3822 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3823 match(RegP); 3824 match(eAXRegP); 3825 match(eBXRegP); 3826 match(eCXRegP); 3827 match(eDIRegP); 3828 3829 op_cost(100); 3830 format %{ %} 3831 interface(REG_INTER); 3832 %} 3833 3834 operand naxRegP() %{ 3835 constraint(ALLOC_IN_RC(nax_reg)); 3836 match(RegP); 3837 match(eBXRegP); 3838 match(eDXRegP); 3839 match(eCXRegP); 3840 match(eSIRegP); 3841 match(eDIRegP); 3842 3843 format %{ %} 3844 interface(REG_INTER); 3845 %} 3846 3847 operand nabxRegP() %{ 3848 constraint(ALLOC_IN_RC(nabx_reg)); 3849 match(RegP); 3850 match(eCXRegP); 3851 match(eDXRegP); 3852 match(eSIRegP); 3853 match(eDIRegP); 3854 3855 format %{ %} 3856 interface(REG_INTER); 3857 %} 3858 3859 operand pRegP() %{ 3860 constraint(ALLOC_IN_RC(p_reg)); 3861 match(RegP); 3862 match(eBXRegP); 3863 match(eDXRegP); 3864 match(eSIRegP); 3865 match(eDIRegP); 3866 3867 format %{ %} 3868 interface(REG_INTER); 3869 %} 3870 3871 // Special Registers 3872 // Return a pointer value 3873 operand eAXRegP(eRegP reg) %{ 3874 constraint(ALLOC_IN_RC(eax_reg)); 3875 match(reg); 3876 format %{ "EAX" %} 3877 interface(REG_INTER); 3878 %} 3879 3880 // Used in AtomicAdd 3881 operand eBXRegP(eRegP reg) %{ 3882 constraint(ALLOC_IN_RC(ebx_reg)); 3883 match(reg); 3884 format %{ "EBX" %} 3885 interface(REG_INTER); 3886 %} 3887 3888 // Tail-call (interprocedural jump) to interpreter 3889 operand eCXRegP(eRegP reg) %{ 3890 constraint(ALLOC_IN_RC(ecx_reg)); 3891 match(reg); 3892 format %{ "ECX" %} 3893 interface(REG_INTER); 3894 %} 3895 3896 operand eSIRegP(eRegP reg) %{ 3897 constraint(ALLOC_IN_RC(esi_reg)); 3898 match(reg); 3899 format %{ "ESI" %} 3900 interface(REG_INTER); 3901 %} 3902 3903 // Used in rep stosw 3904 operand eDIRegP(eRegP reg) %{ 3905 constraint(ALLOC_IN_RC(edi_reg)); 3906 match(reg); 3907 format %{ "EDI" %} 3908 interface(REG_INTER); 3909 %} 3910 3911 operand eRegL() %{ 3912 constraint(ALLOC_IN_RC(long_reg)); 3913 match(RegL); 3914 match(eADXRegL); 3915 3916 format %{ %} 3917 interface(REG_INTER); 3918 %} 3919 3920 operand eADXRegL( eRegL reg ) %{ 3921 constraint(ALLOC_IN_RC(eadx_reg)); 3922 match(reg); 3923 3924 format %{ "EDX:EAX" %} 3925 interface(REG_INTER); 3926 %} 3927 3928 operand eBCXRegL( eRegL reg ) %{ 3929 constraint(ALLOC_IN_RC(ebcx_reg)); 3930 match(reg); 3931 3932 format %{ "EBX:ECX" %} 3933 interface(REG_INTER); 3934 %} 3935 3936 // Special case for integer high multiply 3937 operand eADXRegL_low_only() %{ 3938 constraint(ALLOC_IN_RC(eadx_reg)); 3939 match(RegL); 3940 3941 format %{ "EAX" %} 3942 interface(REG_INTER); 3943 %} 3944 3945 // Flags register, used as output of compare instructions 3946 operand eFlagsReg() %{ 3947 constraint(ALLOC_IN_RC(int_flags)); 3948 match(RegFlags); 3949 3950 format %{ "EFLAGS" %} 3951 interface(REG_INTER); 3952 %} 3953 3954 // Flags register, used as output of FLOATING POINT compare instructions 3955 operand eFlagsRegU() %{ 3956 constraint(ALLOC_IN_RC(int_flags)); 3957 match(RegFlags); 3958 3959 format %{ "EFLAGS_U" %} 3960 interface(REG_INTER); 3961 %} 3962 3963 operand eFlagsRegUCF() %{ 3964 constraint(ALLOC_IN_RC(int_flags)); 3965 match(RegFlags); 3966 predicate(false); 3967 3968 format %{ "EFLAGS_U_CF" %} 3969 interface(REG_INTER); 3970 %} 3971 3972 // Condition Code Register used by long compare 3973 operand flagsReg_long_LTGE() %{ 3974 constraint(ALLOC_IN_RC(int_flags)); 3975 match(RegFlags); 3976 format %{ "FLAGS_LTGE" %} 3977 interface(REG_INTER); 3978 %} 3979 operand flagsReg_long_EQNE() %{ 3980 constraint(ALLOC_IN_RC(int_flags)); 3981 match(RegFlags); 3982 format %{ "FLAGS_EQNE" %} 3983 interface(REG_INTER); 3984 %} 3985 operand flagsReg_long_LEGT() %{ 3986 constraint(ALLOC_IN_RC(int_flags)); 3987 match(RegFlags); 3988 format %{ "FLAGS_LEGT" %} 3989 interface(REG_INTER); 3990 %} 3991 3992 // Float register operands 3993 operand regDPR() %{ 3994 predicate( UseSSE < 2 ); 3995 constraint(ALLOC_IN_RC(fp_dbl_reg)); 3996 match(RegD); 3997 match(regDPR1); 3998 match(regDPR2); 3999 format %{ %} 4000 interface(REG_INTER); 4001 %} 4002 4003 operand regDPR1(regDPR reg) %{ 4004 predicate( UseSSE < 2 ); 4005 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4006 match(reg); 4007 format %{ "FPR1" %} 4008 interface(REG_INTER); 4009 %} 4010 4011 operand regDPR2(regDPR reg) %{ 4012 predicate( UseSSE < 2 ); 4013 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4014 match(reg); 4015 format %{ "FPR2" %} 4016 interface(REG_INTER); 4017 %} 4018 4019 operand regnotDPR1(regDPR reg) %{ 4020 predicate( UseSSE < 2 ); 4021 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4022 match(reg); 4023 format %{ %} 4024 interface(REG_INTER); 4025 %} 4026 4027 // Float register operands 4028 operand regFPR() %{ 4029 predicate( UseSSE < 2 ); 4030 constraint(ALLOC_IN_RC(fp_flt_reg)); 4031 match(RegF); 4032 match(regFPR1); 4033 format %{ %} 4034 interface(REG_INTER); 4035 %} 4036 4037 // Float register operands 4038 operand regFPR1(regFPR reg) %{ 4039 predicate( UseSSE < 2 ); 4040 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4041 match(reg); 4042 format %{ "FPR1" %} 4043 interface(REG_INTER); 4044 %} 4045 4046 // XMM Float register operands 4047 operand regF() %{ 4048 predicate( UseSSE>=1 ); 4049 constraint(ALLOC_IN_RC(float_reg_legacy)); 4050 match(RegF); 4051 format %{ %} 4052 interface(REG_INTER); 4053 %} 4054 4055 // XMM Double register operands 4056 operand regD() %{ 4057 predicate( UseSSE>=2 ); 4058 constraint(ALLOC_IN_RC(double_reg_legacy)); 4059 match(RegD); 4060 format %{ %} 4061 interface(REG_INTER); 4062 %} 4063 4064 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM) 4065 // runtime code generation via reg_class_dynamic. 4066 operand vecS() %{ 4067 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 4068 match(VecS); 4069 4070 format %{ %} 4071 interface(REG_INTER); 4072 %} 4073 4074 operand vecD() %{ 4075 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 4076 match(VecD); 4077 4078 format %{ %} 4079 interface(REG_INTER); 4080 %} 4081 4082 operand vecX() %{ 4083 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 4084 match(VecX); 4085 4086 format %{ %} 4087 interface(REG_INTER); 4088 %} 4089 4090 operand vecY() %{ 4091 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 4092 match(VecY); 4093 4094 format %{ %} 4095 interface(REG_INTER); 4096 %} 4097 4098 //----------Memory Operands---------------------------------------------------- 4099 // Direct Memory Operand 4100 operand direct(immP addr) %{ 4101 match(addr); 4102 4103 format %{ "[$addr]" %} 4104 interface(MEMORY_INTER) %{ 4105 base(0xFFFFFFFF); 4106 index(0x4); 4107 scale(0x0); 4108 disp($addr); 4109 %} 4110 %} 4111 4112 // Indirect Memory Operand 4113 operand indirect(eRegP reg) %{ 4114 constraint(ALLOC_IN_RC(int_reg)); 4115 match(reg); 4116 4117 format %{ "[$reg]" %} 4118 interface(MEMORY_INTER) %{ 4119 base($reg); 4120 index(0x4); 4121 scale(0x0); 4122 disp(0x0); 4123 %} 4124 %} 4125 4126 // Indirect Memory Plus Short Offset Operand 4127 operand indOffset8(eRegP reg, immI8 off) %{ 4128 match(AddP reg off); 4129 4130 format %{ "[$reg + $off]" %} 4131 interface(MEMORY_INTER) %{ 4132 base($reg); 4133 index(0x4); 4134 scale(0x0); 4135 disp($off); 4136 %} 4137 %} 4138 4139 // Indirect Memory Plus Long Offset Operand 4140 operand indOffset32(eRegP reg, immI off) %{ 4141 match(AddP reg off); 4142 4143 format %{ "[$reg + $off]" %} 4144 interface(MEMORY_INTER) %{ 4145 base($reg); 4146 index(0x4); 4147 scale(0x0); 4148 disp($off); 4149 %} 4150 %} 4151 4152 // Indirect Memory Plus Long Offset Operand 4153 operand indOffset32X(rRegI reg, immP off) %{ 4154 match(AddP off reg); 4155 4156 format %{ "[$reg + $off]" %} 4157 interface(MEMORY_INTER) %{ 4158 base($reg); 4159 index(0x4); 4160 scale(0x0); 4161 disp($off); 4162 %} 4163 %} 4164 4165 // Indirect Memory Plus Index Register Plus Offset Operand 4166 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4167 match(AddP (AddP reg ireg) off); 4168 4169 op_cost(10); 4170 format %{"[$reg + $off + $ireg]" %} 4171 interface(MEMORY_INTER) %{ 4172 base($reg); 4173 index($ireg); 4174 scale(0x0); 4175 disp($off); 4176 %} 4177 %} 4178 4179 // Indirect Memory Plus Index Register Plus Offset Operand 4180 operand indIndex(eRegP reg, rRegI ireg) %{ 4181 match(AddP reg ireg); 4182 4183 op_cost(10); 4184 format %{"[$reg + $ireg]" %} 4185 interface(MEMORY_INTER) %{ 4186 base($reg); 4187 index($ireg); 4188 scale(0x0); 4189 disp(0x0); 4190 %} 4191 %} 4192 4193 // // ------------------------------------------------------------------------- 4194 // // 486 architecture doesn't support "scale * index + offset" with out a base 4195 // // ------------------------------------------------------------------------- 4196 // // Scaled Memory Operands 4197 // // Indirect Memory Times Scale Plus Offset Operand 4198 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4199 // match(AddP off (LShiftI ireg scale)); 4200 // 4201 // op_cost(10); 4202 // format %{"[$off + $ireg << $scale]" %} 4203 // interface(MEMORY_INTER) %{ 4204 // base(0x4); 4205 // index($ireg); 4206 // scale($scale); 4207 // disp($off); 4208 // %} 4209 // %} 4210 4211 // Indirect Memory Times Scale Plus Index Register 4212 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4213 match(AddP reg (LShiftI ireg scale)); 4214 4215 op_cost(10); 4216 format %{"[$reg + $ireg << $scale]" %} 4217 interface(MEMORY_INTER) %{ 4218 base($reg); 4219 index($ireg); 4220 scale($scale); 4221 disp(0x0); 4222 %} 4223 %} 4224 4225 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4226 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4227 match(AddP (AddP reg (LShiftI ireg scale)) off); 4228 4229 op_cost(10); 4230 format %{"[$reg + $off + $ireg << $scale]" %} 4231 interface(MEMORY_INTER) %{ 4232 base($reg); 4233 index($ireg); 4234 scale($scale); 4235 disp($off); 4236 %} 4237 %} 4238 4239 //----------Load Long Memory Operands------------------------------------------ 4240 // The load-long idiom will use it's address expression again after loading 4241 // the first word of the long. If the load-long destination overlaps with 4242 // registers used in the addressing expression, the 2nd half will be loaded 4243 // from a clobbered address. Fix this by requiring that load-long use 4244 // address registers that do not overlap with the load-long target. 4245 4246 // load-long support 4247 operand load_long_RegP() %{ 4248 constraint(ALLOC_IN_RC(esi_reg)); 4249 match(RegP); 4250 match(eSIRegP); 4251 op_cost(100); 4252 format %{ %} 4253 interface(REG_INTER); 4254 %} 4255 4256 // Indirect Memory Operand Long 4257 operand load_long_indirect(load_long_RegP reg) %{ 4258 constraint(ALLOC_IN_RC(esi_reg)); 4259 match(reg); 4260 4261 format %{ "[$reg]" %} 4262 interface(MEMORY_INTER) %{ 4263 base($reg); 4264 index(0x4); 4265 scale(0x0); 4266 disp(0x0); 4267 %} 4268 %} 4269 4270 // Indirect Memory Plus Long Offset Operand 4271 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4272 match(AddP reg off); 4273 4274 format %{ "[$reg + $off]" %} 4275 interface(MEMORY_INTER) %{ 4276 base($reg); 4277 index(0x4); 4278 scale(0x0); 4279 disp($off); 4280 %} 4281 %} 4282 4283 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4284 4285 4286 //----------Special Memory Operands-------------------------------------------- 4287 // Stack Slot Operand - This operand is used for loading and storing temporary 4288 // values on the stack where a match requires a value to 4289 // flow through memory. 4290 operand stackSlotP(sRegP reg) %{ 4291 constraint(ALLOC_IN_RC(stack_slots)); 4292 // No match rule because this operand is only generated in matching 4293 format %{ "[$reg]" %} 4294 interface(MEMORY_INTER) %{ 4295 base(0x4); // ESP 4296 index(0x4); // No Index 4297 scale(0x0); // No Scale 4298 disp($reg); // Stack Offset 4299 %} 4300 %} 4301 4302 operand stackSlotI(sRegI reg) %{ 4303 constraint(ALLOC_IN_RC(stack_slots)); 4304 // No match rule because this operand is only generated in matching 4305 format %{ "[$reg]" %} 4306 interface(MEMORY_INTER) %{ 4307 base(0x4); // ESP 4308 index(0x4); // No Index 4309 scale(0x0); // No Scale 4310 disp($reg); // Stack Offset 4311 %} 4312 %} 4313 4314 operand stackSlotF(sRegF reg) %{ 4315 constraint(ALLOC_IN_RC(stack_slots)); 4316 // No match rule because this operand is only generated in matching 4317 format %{ "[$reg]" %} 4318 interface(MEMORY_INTER) %{ 4319 base(0x4); // ESP 4320 index(0x4); // No Index 4321 scale(0x0); // No Scale 4322 disp($reg); // Stack Offset 4323 %} 4324 %} 4325 4326 operand stackSlotD(sRegD reg) %{ 4327 constraint(ALLOC_IN_RC(stack_slots)); 4328 // No match rule because this operand is only generated in matching 4329 format %{ "[$reg]" %} 4330 interface(MEMORY_INTER) %{ 4331 base(0x4); // ESP 4332 index(0x4); // No Index 4333 scale(0x0); // No Scale 4334 disp($reg); // Stack Offset 4335 %} 4336 %} 4337 4338 operand stackSlotL(sRegL reg) %{ 4339 constraint(ALLOC_IN_RC(stack_slots)); 4340 // No match rule because this operand is only generated in matching 4341 format %{ "[$reg]" %} 4342 interface(MEMORY_INTER) %{ 4343 base(0x4); // ESP 4344 index(0x4); // No Index 4345 scale(0x0); // No Scale 4346 disp($reg); // Stack Offset 4347 %} 4348 %} 4349 4350 //----------Memory Operands - Win95 Implicit Null Variants---------------- 4351 // Indirect Memory Operand 4352 operand indirect_win95_safe(eRegP_no_EBP reg) 4353 %{ 4354 constraint(ALLOC_IN_RC(int_reg)); 4355 match(reg); 4356 4357 op_cost(100); 4358 format %{ "[$reg]" %} 4359 interface(MEMORY_INTER) %{ 4360 base($reg); 4361 index(0x4); 4362 scale(0x0); 4363 disp(0x0); 4364 %} 4365 %} 4366 4367 // Indirect Memory Plus Short Offset Operand 4368 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 4369 %{ 4370 match(AddP reg off); 4371 4372 op_cost(100); 4373 format %{ "[$reg + $off]" %} 4374 interface(MEMORY_INTER) %{ 4375 base($reg); 4376 index(0x4); 4377 scale(0x0); 4378 disp($off); 4379 %} 4380 %} 4381 4382 // Indirect Memory Plus Long Offset Operand 4383 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 4384 %{ 4385 match(AddP reg off); 4386 4387 op_cost(100); 4388 format %{ "[$reg + $off]" %} 4389 interface(MEMORY_INTER) %{ 4390 base($reg); 4391 index(0x4); 4392 scale(0x0); 4393 disp($off); 4394 %} 4395 %} 4396 4397 // Indirect Memory Plus Index Register Plus Offset Operand 4398 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) 4399 %{ 4400 match(AddP (AddP reg ireg) off); 4401 4402 op_cost(100); 4403 format %{"[$reg + $off + $ireg]" %} 4404 interface(MEMORY_INTER) %{ 4405 base($reg); 4406 index($ireg); 4407 scale(0x0); 4408 disp($off); 4409 %} 4410 %} 4411 4412 // Indirect Memory Times Scale Plus Index Register 4413 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) 4414 %{ 4415 match(AddP reg (LShiftI ireg scale)); 4416 4417 op_cost(100); 4418 format %{"[$reg + $ireg << $scale]" %} 4419 interface(MEMORY_INTER) %{ 4420 base($reg); 4421 index($ireg); 4422 scale($scale); 4423 disp(0x0); 4424 %} 4425 %} 4426 4427 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4428 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) 4429 %{ 4430 match(AddP (AddP reg (LShiftI ireg scale)) off); 4431 4432 op_cost(100); 4433 format %{"[$reg + $off + $ireg << $scale]" %} 4434 interface(MEMORY_INTER) %{ 4435 base($reg); 4436 index($ireg); 4437 scale($scale); 4438 disp($off); 4439 %} 4440 %} 4441 4442 //----------Conditional Branch Operands---------------------------------------- 4443 // Comparison Op - This is the operation of the comparison, and is limited to 4444 // the following set of codes: 4445 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4446 // 4447 // Other attributes of the comparison, such as unsignedness, are specified 4448 // by the comparison instruction that sets a condition code flags register. 4449 // That result is represented by a flags operand whose subtype is appropriate 4450 // to the unsignedness (etc.) of the comparison. 4451 // 4452 // Later, the instruction which matches both the Comparison Op (a Bool) and 4453 // the flags (produced by the Cmp) specifies the coding of the comparison op 4454 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4455 4456 // Comparision Code 4457 operand cmpOp() %{ 4458 match(Bool); 4459 4460 format %{ "" %} 4461 interface(COND_INTER) %{ 4462 equal(0x4, "e"); 4463 not_equal(0x5, "ne"); 4464 less(0xC, "l"); 4465 greater_equal(0xD, "ge"); 4466 less_equal(0xE, "le"); 4467 greater(0xF, "g"); 4468 overflow(0x0, "o"); 4469 no_overflow(0x1, "no"); 4470 %} 4471 %} 4472 4473 // Comparison Code, unsigned compare. Used by FP also, with 4474 // C2 (unordered) turned into GT or LT already. The other bits 4475 // C0 and C3 are turned into Carry & Zero flags. 4476 operand cmpOpU() %{ 4477 match(Bool); 4478 4479 format %{ "" %} 4480 interface(COND_INTER) %{ 4481 equal(0x4, "e"); 4482 not_equal(0x5, "ne"); 4483 less(0x2, "b"); 4484 greater_equal(0x3, "nb"); 4485 less_equal(0x6, "be"); 4486 greater(0x7, "nbe"); 4487 overflow(0x0, "o"); 4488 no_overflow(0x1, "no"); 4489 %} 4490 %} 4491 4492 // Floating comparisons that don't require any fixup for the unordered case 4493 operand cmpOpUCF() %{ 4494 match(Bool); 4495 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4496 n->as_Bool()->_test._test == BoolTest::ge || 4497 n->as_Bool()->_test._test == BoolTest::le || 4498 n->as_Bool()->_test._test == BoolTest::gt); 4499 format %{ "" %} 4500 interface(COND_INTER) %{ 4501 equal(0x4, "e"); 4502 not_equal(0x5, "ne"); 4503 less(0x2, "b"); 4504 greater_equal(0x3, "nb"); 4505 less_equal(0x6, "be"); 4506 greater(0x7, "nbe"); 4507 overflow(0x0, "o"); 4508 no_overflow(0x1, "no"); 4509 %} 4510 %} 4511 4512 4513 // Floating comparisons that can be fixed up with extra conditional jumps 4514 operand cmpOpUCF2() %{ 4515 match(Bool); 4516 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4517 n->as_Bool()->_test._test == BoolTest::eq); 4518 format %{ "" %} 4519 interface(COND_INTER) %{ 4520 equal(0x4, "e"); 4521 not_equal(0x5, "ne"); 4522 less(0x2, "b"); 4523 greater_equal(0x3, "nb"); 4524 less_equal(0x6, "be"); 4525 greater(0x7, "nbe"); 4526 overflow(0x0, "o"); 4527 no_overflow(0x1, "no"); 4528 %} 4529 %} 4530 4531 // Comparison Code for FP conditional move 4532 operand cmpOp_fcmov() %{ 4533 match(Bool); 4534 4535 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4536 n->as_Bool()->_test._test != BoolTest::no_overflow); 4537 format %{ "" %} 4538 interface(COND_INTER) %{ 4539 equal (0x0C8); 4540 not_equal (0x1C8); 4541 less (0x0C0); 4542 greater_equal(0x1C0); 4543 less_equal (0x0D0); 4544 greater (0x1D0); 4545 overflow(0x0, "o"); // not really supported by the instruction 4546 no_overflow(0x1, "no"); // not really supported by the instruction 4547 %} 4548 %} 4549 4550 // Comparision Code used in long compares 4551 operand cmpOp_commute() %{ 4552 match(Bool); 4553 4554 format %{ "" %} 4555 interface(COND_INTER) %{ 4556 equal(0x4, "e"); 4557 not_equal(0x5, "ne"); 4558 less(0xF, "g"); 4559 greater_equal(0xE, "le"); 4560 less_equal(0xD, "ge"); 4561 greater(0xC, "l"); 4562 overflow(0x0, "o"); 4563 no_overflow(0x1, "no"); 4564 %} 4565 %} 4566 4567 //----------OPERAND CLASSES---------------------------------------------------- 4568 // Operand Classes are groups of operands that are used as to simplify 4569 // instruction definitions by not requiring the AD writer to specify separate 4570 // instructions for every form of operand when the instruction accepts 4571 // multiple operand types with the same basic encoding and format. The classic 4572 // case of this is memory operands. 4573 4574 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4575 indIndex, indIndexScale, indIndexScaleOffset); 4576 4577 // Long memory operations are encoded in 2 instructions and a +4 offset. 4578 // This means some kind of offset is always required and you cannot use 4579 // an oop as the offset (done when working on static globals). 4580 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4581 indIndex, indIndexScale, indIndexScaleOffset); 4582 4583 4584 //----------PIPELINE----------------------------------------------------------- 4585 // Rules which define the behavior of the target architectures pipeline. 4586 pipeline %{ 4587 4588 //----------ATTRIBUTES--------------------------------------------------------- 4589 attributes %{ 4590 variable_size_instructions; // Fixed size instructions 4591 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4592 instruction_unit_size = 1; // An instruction is 1 bytes long 4593 instruction_fetch_unit_size = 16; // The processor fetches one line 4594 instruction_fetch_units = 1; // of 16 bytes 4595 4596 // List of nop instructions 4597 nops( MachNop ); 4598 %} 4599 4600 //----------RESOURCES---------------------------------------------------------- 4601 // Resources are the functional units available to the machine 4602 4603 // Generic P2/P3 pipeline 4604 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4605 // 3 instructions decoded per cycle. 4606 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4607 // 2 ALU op, only ALU0 handles mul/div instructions. 4608 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4609 MS0, MS1, MEM = MS0 | MS1, 4610 BR, FPU, 4611 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4612 4613 //----------PIPELINE DESCRIPTION----------------------------------------------- 4614 // Pipeline Description specifies the stages in the machine's pipeline 4615 4616 // Generic P2/P3 pipeline 4617 pipe_desc(S0, S1, S2, S3, S4, S5); 4618 4619 //----------PIPELINE CLASSES--------------------------------------------------- 4620 // Pipeline Classes describe the stages in which input and output are 4621 // referenced by the hardware pipeline. 4622 4623 // Naming convention: ialu or fpu 4624 // Then: _reg 4625 // Then: _reg if there is a 2nd register 4626 // Then: _long if it's a pair of instructions implementing a long 4627 // Then: _fat if it requires the big decoder 4628 // Or: _mem if it requires the big decoder and a memory unit. 4629 4630 // Integer ALU reg operation 4631 pipe_class ialu_reg(rRegI dst) %{ 4632 single_instruction; 4633 dst : S4(write); 4634 dst : S3(read); 4635 DECODE : S0; // any decoder 4636 ALU : S3; // any alu 4637 %} 4638 4639 // Long ALU reg operation 4640 pipe_class ialu_reg_long(eRegL dst) %{ 4641 instruction_count(2); 4642 dst : S4(write); 4643 dst : S3(read); 4644 DECODE : S0(2); // any 2 decoders 4645 ALU : S3(2); // both alus 4646 %} 4647 4648 // Integer ALU reg operation using big decoder 4649 pipe_class ialu_reg_fat(rRegI dst) %{ 4650 single_instruction; 4651 dst : S4(write); 4652 dst : S3(read); 4653 D0 : S0; // big decoder only 4654 ALU : S3; // any alu 4655 %} 4656 4657 // Long ALU reg operation using big decoder 4658 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4659 instruction_count(2); 4660 dst : S4(write); 4661 dst : S3(read); 4662 D0 : S0(2); // big decoder only; twice 4663 ALU : S3(2); // any 2 alus 4664 %} 4665 4666 // Integer ALU reg-reg operation 4667 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4668 single_instruction; 4669 dst : S4(write); 4670 src : S3(read); 4671 DECODE : S0; // any decoder 4672 ALU : S3; // any alu 4673 %} 4674 4675 // Long ALU reg-reg operation 4676 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4677 instruction_count(2); 4678 dst : S4(write); 4679 src : S3(read); 4680 DECODE : S0(2); // any 2 decoders 4681 ALU : S3(2); // both alus 4682 %} 4683 4684 // Integer ALU reg-reg operation 4685 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4686 single_instruction; 4687 dst : S4(write); 4688 src : S3(read); 4689 D0 : S0; // big decoder only 4690 ALU : S3; // any alu 4691 %} 4692 4693 // Long ALU reg-reg operation 4694 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4695 instruction_count(2); 4696 dst : S4(write); 4697 src : S3(read); 4698 D0 : S0(2); // big decoder only; twice 4699 ALU : S3(2); // both alus 4700 %} 4701 4702 // Integer ALU reg-mem operation 4703 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4704 single_instruction; 4705 dst : S5(write); 4706 mem : S3(read); 4707 D0 : S0; // big decoder only 4708 ALU : S4; // any alu 4709 MEM : S3; // any mem 4710 %} 4711 4712 // Long ALU reg-mem operation 4713 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4714 instruction_count(2); 4715 dst : S5(write); 4716 mem : S3(read); 4717 D0 : S0(2); // big decoder only; twice 4718 ALU : S4(2); // any 2 alus 4719 MEM : S3(2); // both mems 4720 %} 4721 4722 // Integer mem operation (prefetch) 4723 pipe_class ialu_mem(memory mem) 4724 %{ 4725 single_instruction; 4726 mem : S3(read); 4727 D0 : S0; // big decoder only 4728 MEM : S3; // any mem 4729 %} 4730 4731 // Integer Store to Memory 4732 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4733 single_instruction; 4734 mem : S3(read); 4735 src : S5(read); 4736 D0 : S0; // big decoder only 4737 ALU : S4; // any alu 4738 MEM : S3; 4739 %} 4740 4741 // Long Store to Memory 4742 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4743 instruction_count(2); 4744 mem : S3(read); 4745 src : S5(read); 4746 D0 : S0(2); // big decoder only; twice 4747 ALU : S4(2); // any 2 alus 4748 MEM : S3(2); // Both mems 4749 %} 4750 4751 // Integer Store to Memory 4752 pipe_class ialu_mem_imm(memory mem) %{ 4753 single_instruction; 4754 mem : S3(read); 4755 D0 : S0; // big decoder only 4756 ALU : S4; // any alu 4757 MEM : S3; 4758 %} 4759 4760 // Integer ALU0 reg-reg operation 4761 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4762 single_instruction; 4763 dst : S4(write); 4764 src : S3(read); 4765 D0 : S0; // Big decoder only 4766 ALU0 : S3; // only alu0 4767 %} 4768 4769 // Integer ALU0 reg-mem operation 4770 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4771 single_instruction; 4772 dst : S5(write); 4773 mem : S3(read); 4774 D0 : S0; // big decoder only 4775 ALU0 : S4; // ALU0 only 4776 MEM : S3; // any mem 4777 %} 4778 4779 // Integer ALU reg-reg operation 4780 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4781 single_instruction; 4782 cr : S4(write); 4783 src1 : S3(read); 4784 src2 : S3(read); 4785 DECODE : S0; // any decoder 4786 ALU : S3; // any alu 4787 %} 4788 4789 // Integer ALU reg-imm operation 4790 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4791 single_instruction; 4792 cr : S4(write); 4793 src1 : S3(read); 4794 DECODE : S0; // any decoder 4795 ALU : S3; // any alu 4796 %} 4797 4798 // Integer ALU reg-mem operation 4799 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4800 single_instruction; 4801 cr : S4(write); 4802 src1 : S3(read); 4803 src2 : S3(read); 4804 D0 : S0; // big decoder only 4805 ALU : S4; // any alu 4806 MEM : S3; 4807 %} 4808 4809 // Conditional move reg-reg 4810 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4811 instruction_count(4); 4812 y : S4(read); 4813 q : S3(read); 4814 p : S3(read); 4815 DECODE : S0(4); // any decoder 4816 %} 4817 4818 // Conditional move reg-reg 4819 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4820 single_instruction; 4821 dst : S4(write); 4822 src : S3(read); 4823 cr : S3(read); 4824 DECODE : S0; // any decoder 4825 %} 4826 4827 // Conditional move reg-mem 4828 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4829 single_instruction; 4830 dst : S4(write); 4831 src : S3(read); 4832 cr : S3(read); 4833 DECODE : S0; // any decoder 4834 MEM : S3; 4835 %} 4836 4837 // Conditional move reg-reg long 4838 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4839 single_instruction; 4840 dst : S4(write); 4841 src : S3(read); 4842 cr : S3(read); 4843 DECODE : S0(2); // any 2 decoders 4844 %} 4845 4846 // Conditional move double reg-reg 4847 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4848 single_instruction; 4849 dst : S4(write); 4850 src : S3(read); 4851 cr : S3(read); 4852 DECODE : S0; // any decoder 4853 %} 4854 4855 // Float reg-reg operation 4856 pipe_class fpu_reg(regDPR dst) %{ 4857 instruction_count(2); 4858 dst : S3(read); 4859 DECODE : S0(2); // any 2 decoders 4860 FPU : S3; 4861 %} 4862 4863 // Float reg-reg operation 4864 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4865 instruction_count(2); 4866 dst : S4(write); 4867 src : S3(read); 4868 DECODE : S0(2); // any 2 decoders 4869 FPU : S3; 4870 %} 4871 4872 // Float reg-reg operation 4873 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4874 instruction_count(3); 4875 dst : S4(write); 4876 src1 : S3(read); 4877 src2 : S3(read); 4878 DECODE : S0(3); // any 3 decoders 4879 FPU : S3(2); 4880 %} 4881 4882 // Float reg-reg operation 4883 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4884 instruction_count(4); 4885 dst : S4(write); 4886 src1 : S3(read); 4887 src2 : S3(read); 4888 src3 : S3(read); 4889 DECODE : S0(4); // any 3 decoders 4890 FPU : S3(2); 4891 %} 4892 4893 // Float reg-reg operation 4894 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4895 instruction_count(4); 4896 dst : S4(write); 4897 src1 : S3(read); 4898 src2 : S3(read); 4899 src3 : S3(read); 4900 DECODE : S1(3); // any 3 decoders 4901 D0 : S0; // Big decoder only 4902 FPU : S3(2); 4903 MEM : S3; 4904 %} 4905 4906 // Float reg-mem operation 4907 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4908 instruction_count(2); 4909 dst : S5(write); 4910 mem : S3(read); 4911 D0 : S0; // big decoder only 4912 DECODE : S1; // any decoder for FPU POP 4913 FPU : S4; 4914 MEM : S3; // any mem 4915 %} 4916 4917 // Float reg-mem operation 4918 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4919 instruction_count(3); 4920 dst : S5(write); 4921 src1 : S3(read); 4922 mem : S3(read); 4923 D0 : S0; // big decoder only 4924 DECODE : S1(2); // any decoder for FPU POP 4925 FPU : S4; 4926 MEM : S3; // any mem 4927 %} 4928 4929 // Float mem-reg operation 4930 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4931 instruction_count(2); 4932 src : S5(read); 4933 mem : S3(read); 4934 DECODE : S0; // any decoder for FPU PUSH 4935 D0 : S1; // big decoder only 4936 FPU : S4; 4937 MEM : S3; // any mem 4938 %} 4939 4940 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4941 instruction_count(3); 4942 src1 : S3(read); 4943 src2 : S3(read); 4944 mem : S3(read); 4945 DECODE : S0(2); // any decoder for FPU PUSH 4946 D0 : S1; // big decoder only 4947 FPU : S4; 4948 MEM : S3; // any mem 4949 %} 4950 4951 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4952 instruction_count(3); 4953 src1 : S3(read); 4954 src2 : S3(read); 4955 mem : S4(read); 4956 DECODE : S0; // any decoder for FPU PUSH 4957 D0 : S0(2); // big decoder only 4958 FPU : S4; 4959 MEM : S3(2); // any mem 4960 %} 4961 4962 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4963 instruction_count(2); 4964 src1 : S3(read); 4965 dst : S4(read); 4966 D0 : S0(2); // big decoder only 4967 MEM : S3(2); // any mem 4968 %} 4969 4970 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4971 instruction_count(3); 4972 src1 : S3(read); 4973 src2 : S3(read); 4974 dst : S4(read); 4975 D0 : S0(3); // big decoder only 4976 FPU : S4; 4977 MEM : S3(3); // any mem 4978 %} 4979 4980 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4981 instruction_count(3); 4982 src1 : S4(read); 4983 mem : S4(read); 4984 DECODE : S0; // any decoder for FPU PUSH 4985 D0 : S0(2); // big decoder only 4986 FPU : S4; 4987 MEM : S3(2); // any mem 4988 %} 4989 4990 // Float load constant 4991 pipe_class fpu_reg_con(regDPR dst) %{ 4992 instruction_count(2); 4993 dst : S5(write); 4994 D0 : S0; // big decoder only for the load 4995 DECODE : S1; // any decoder for FPU POP 4996 FPU : S4; 4997 MEM : S3; // any mem 4998 %} 4999 5000 // Float load constant 5001 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 5002 instruction_count(3); 5003 dst : S5(write); 5004 src : S3(read); 5005 D0 : S0; // big decoder only for the load 5006 DECODE : S1(2); // any decoder for FPU POP 5007 FPU : S4; 5008 MEM : S3; // any mem 5009 %} 5010 5011 // UnConditional branch 5012 pipe_class pipe_jmp( label labl ) %{ 5013 single_instruction; 5014 BR : S3; 5015 %} 5016 5017 // Conditional branch 5018 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5019 single_instruction; 5020 cr : S1(read); 5021 BR : S3; 5022 %} 5023 5024 // Allocation idiom 5025 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5026 instruction_count(1); force_serialization; 5027 fixed_latency(6); 5028 heap_ptr : S3(read); 5029 DECODE : S0(3); 5030 D0 : S2; 5031 MEM : S3; 5032 ALU : S3(2); 5033 dst : S5(write); 5034 BR : S5; 5035 %} 5036 5037 // Generic big/slow expanded idiom 5038 pipe_class pipe_slow( ) %{ 5039 instruction_count(10); multiple_bundles; force_serialization; 5040 fixed_latency(100); 5041 D0 : S0(2); 5042 MEM : S3(2); 5043 %} 5044 5045 // The real do-nothing guy 5046 pipe_class empty( ) %{ 5047 instruction_count(0); 5048 %} 5049 5050 // Define the class for the Nop node 5051 define %{ 5052 MachNop = empty; 5053 %} 5054 5055 %} 5056 5057 //----------INSTRUCTIONS------------------------------------------------------- 5058 // 5059 // match -- States which machine-independent subtree may be replaced 5060 // by this instruction. 5061 // ins_cost -- The estimated cost of this instruction is used by instruction 5062 // selection to identify a minimum cost tree of machine 5063 // instructions that matches a tree of machine-independent 5064 // instructions. 5065 // format -- A string providing the disassembly for this instruction. 5066 // The value of an instruction's operand may be inserted 5067 // by referring to it with a '$' prefix. 5068 // opcode -- Three instruction opcodes may be provided. These are referred 5069 // to within an encode class as $primary, $secondary, and $tertiary 5070 // respectively. The primary opcode is commonly used to 5071 // indicate the type of machine instruction, while secondary 5072 // and tertiary are often used for prefix options or addressing 5073 // modes. 5074 // ins_encode -- A list of encode classes with parameters. The encode class 5075 // name must have been defined in an 'enc_class' specification 5076 // in the encode section of the architecture description. 5077 5078 //----------BSWAP-Instruction-------------------------------------------------- 5079 instruct bytes_reverse_int(rRegI dst) %{ 5080 match(Set dst (ReverseBytesI dst)); 5081 5082 format %{ "BSWAP $dst" %} 5083 opcode(0x0F, 0xC8); 5084 ins_encode( OpcP, OpcSReg(dst) ); 5085 ins_pipe( ialu_reg ); 5086 %} 5087 5088 instruct bytes_reverse_long(eRegL dst) %{ 5089 match(Set dst (ReverseBytesL dst)); 5090 5091 format %{ "BSWAP $dst.lo\n\t" 5092 "BSWAP $dst.hi\n\t" 5093 "XCHG $dst.lo $dst.hi" %} 5094 5095 ins_cost(125); 5096 ins_encode( bswap_long_bytes(dst) ); 5097 ins_pipe( ialu_reg_reg); 5098 %} 5099 5100 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5101 match(Set dst (ReverseBytesUS dst)); 5102 effect(KILL cr); 5103 5104 format %{ "BSWAP $dst\n\t" 5105 "SHR $dst,16\n\t" %} 5106 ins_encode %{ 5107 __ bswapl($dst$$Register); 5108 __ shrl($dst$$Register, 16); 5109 %} 5110 ins_pipe( ialu_reg ); 5111 %} 5112 5113 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5114 match(Set dst (ReverseBytesS dst)); 5115 effect(KILL cr); 5116 5117 format %{ "BSWAP $dst\n\t" 5118 "SAR $dst,16\n\t" %} 5119 ins_encode %{ 5120 __ bswapl($dst$$Register); 5121 __ sarl($dst$$Register, 16); 5122 %} 5123 ins_pipe( ialu_reg ); 5124 %} 5125 5126 5127 //---------- Zeros Count Instructions ------------------------------------------ 5128 5129 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5130 predicate(UseCountLeadingZerosInstruction); 5131 match(Set dst (CountLeadingZerosI src)); 5132 effect(KILL cr); 5133 5134 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5135 ins_encode %{ 5136 __ lzcntl($dst$$Register, $src$$Register); 5137 %} 5138 ins_pipe(ialu_reg); 5139 %} 5140 5141 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5142 predicate(!UseCountLeadingZerosInstruction); 5143 match(Set dst (CountLeadingZerosI src)); 5144 effect(KILL cr); 5145 5146 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5147 "JNZ skip\n\t" 5148 "MOV $dst, -1\n" 5149 "skip:\n\t" 5150 "NEG $dst\n\t" 5151 "ADD $dst, 31" %} 5152 ins_encode %{ 5153 Register Rdst = $dst$$Register; 5154 Register Rsrc = $src$$Register; 5155 Label skip; 5156 __ bsrl(Rdst, Rsrc); 5157 __ jccb(Assembler::notZero, skip); 5158 __ movl(Rdst, -1); 5159 __ bind(skip); 5160 __ negl(Rdst); 5161 __ addl(Rdst, BitsPerInt - 1); 5162 %} 5163 ins_pipe(ialu_reg); 5164 %} 5165 5166 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5167 predicate(UseCountLeadingZerosInstruction); 5168 match(Set dst (CountLeadingZerosL src)); 5169 effect(TEMP dst, KILL cr); 5170 5171 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5172 "JNC done\n\t" 5173 "LZCNT $dst, $src.lo\n\t" 5174 "ADD $dst, 32\n" 5175 "done:" %} 5176 ins_encode %{ 5177 Register Rdst = $dst$$Register; 5178 Register Rsrc = $src$$Register; 5179 Label done; 5180 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5181 __ jccb(Assembler::carryClear, done); 5182 __ lzcntl(Rdst, Rsrc); 5183 __ addl(Rdst, BitsPerInt); 5184 __ bind(done); 5185 %} 5186 ins_pipe(ialu_reg); 5187 %} 5188 5189 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5190 predicate(!UseCountLeadingZerosInstruction); 5191 match(Set dst (CountLeadingZerosL src)); 5192 effect(TEMP dst, KILL cr); 5193 5194 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5195 "JZ msw_is_zero\n\t" 5196 "ADD $dst, 32\n\t" 5197 "JMP not_zero\n" 5198 "msw_is_zero:\n\t" 5199 "BSR $dst, $src.lo\n\t" 5200 "JNZ not_zero\n\t" 5201 "MOV $dst, -1\n" 5202 "not_zero:\n\t" 5203 "NEG $dst\n\t" 5204 "ADD $dst, 63\n" %} 5205 ins_encode %{ 5206 Register Rdst = $dst$$Register; 5207 Register Rsrc = $src$$Register; 5208 Label msw_is_zero; 5209 Label not_zero; 5210 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5211 __ jccb(Assembler::zero, msw_is_zero); 5212 __ addl(Rdst, BitsPerInt); 5213 __ jmpb(not_zero); 5214 __ bind(msw_is_zero); 5215 __ bsrl(Rdst, Rsrc); 5216 __ jccb(Assembler::notZero, not_zero); 5217 __ movl(Rdst, -1); 5218 __ bind(not_zero); 5219 __ negl(Rdst); 5220 __ addl(Rdst, BitsPerLong - 1); 5221 %} 5222 ins_pipe(ialu_reg); 5223 %} 5224 5225 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5226 predicate(UseCountTrailingZerosInstruction); 5227 match(Set dst (CountTrailingZerosI src)); 5228 effect(KILL cr); 5229 5230 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5231 ins_encode %{ 5232 __ tzcntl($dst$$Register, $src$$Register); 5233 %} 5234 ins_pipe(ialu_reg); 5235 %} 5236 5237 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5238 predicate(!UseCountTrailingZerosInstruction); 5239 match(Set dst (CountTrailingZerosI src)); 5240 effect(KILL cr); 5241 5242 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5243 "JNZ done\n\t" 5244 "MOV $dst, 32\n" 5245 "done:" %} 5246 ins_encode %{ 5247 Register Rdst = $dst$$Register; 5248 Label done; 5249 __ bsfl(Rdst, $src$$Register); 5250 __ jccb(Assembler::notZero, done); 5251 __ movl(Rdst, BitsPerInt); 5252 __ bind(done); 5253 %} 5254 ins_pipe(ialu_reg); 5255 %} 5256 5257 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5258 predicate(UseCountTrailingZerosInstruction); 5259 match(Set dst (CountTrailingZerosL src)); 5260 effect(TEMP dst, KILL cr); 5261 5262 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5263 "JNC done\n\t" 5264 "TZCNT $dst, $src.hi\n\t" 5265 "ADD $dst, 32\n" 5266 "done:" %} 5267 ins_encode %{ 5268 Register Rdst = $dst$$Register; 5269 Register Rsrc = $src$$Register; 5270 Label done; 5271 __ tzcntl(Rdst, Rsrc); 5272 __ jccb(Assembler::carryClear, done); 5273 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5274 __ addl(Rdst, BitsPerInt); 5275 __ bind(done); 5276 %} 5277 ins_pipe(ialu_reg); 5278 %} 5279 5280 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5281 predicate(!UseCountTrailingZerosInstruction); 5282 match(Set dst (CountTrailingZerosL src)); 5283 effect(TEMP dst, KILL cr); 5284 5285 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5286 "JNZ done\n\t" 5287 "BSF $dst, $src.hi\n\t" 5288 "JNZ msw_not_zero\n\t" 5289 "MOV $dst, 32\n" 5290 "msw_not_zero:\n\t" 5291 "ADD $dst, 32\n" 5292 "done:" %} 5293 ins_encode %{ 5294 Register Rdst = $dst$$Register; 5295 Register Rsrc = $src$$Register; 5296 Label msw_not_zero; 5297 Label done; 5298 __ bsfl(Rdst, Rsrc); 5299 __ jccb(Assembler::notZero, done); 5300 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5301 __ jccb(Assembler::notZero, msw_not_zero); 5302 __ movl(Rdst, BitsPerInt); 5303 __ bind(msw_not_zero); 5304 __ addl(Rdst, BitsPerInt); 5305 __ bind(done); 5306 %} 5307 ins_pipe(ialu_reg); 5308 %} 5309 5310 5311 //---------- Population Count Instructions ------------------------------------- 5312 5313 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5314 predicate(UsePopCountInstruction); 5315 match(Set dst (PopCountI src)); 5316 effect(KILL cr); 5317 5318 format %{ "POPCNT $dst, $src" %} 5319 ins_encode %{ 5320 __ popcntl($dst$$Register, $src$$Register); 5321 %} 5322 ins_pipe(ialu_reg); 5323 %} 5324 5325 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5326 predicate(UsePopCountInstruction); 5327 match(Set dst (PopCountI (LoadI mem))); 5328 effect(KILL cr); 5329 5330 format %{ "POPCNT $dst, $mem" %} 5331 ins_encode %{ 5332 __ popcntl($dst$$Register, $mem$$Address); 5333 %} 5334 ins_pipe(ialu_reg); 5335 %} 5336 5337 // Note: Long.bitCount(long) returns an int. 5338 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5339 predicate(UsePopCountInstruction); 5340 match(Set dst (PopCountL src)); 5341 effect(KILL cr, TEMP tmp, TEMP dst); 5342 5343 format %{ "POPCNT $dst, $src.lo\n\t" 5344 "POPCNT $tmp, $src.hi\n\t" 5345 "ADD $dst, $tmp" %} 5346 ins_encode %{ 5347 __ popcntl($dst$$Register, $src$$Register); 5348 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5349 __ addl($dst$$Register, $tmp$$Register); 5350 %} 5351 ins_pipe(ialu_reg); 5352 %} 5353 5354 // Note: Long.bitCount(long) returns an int. 5355 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5356 predicate(UsePopCountInstruction); 5357 match(Set dst (PopCountL (LoadL mem))); 5358 effect(KILL cr, TEMP tmp, TEMP dst); 5359 5360 format %{ "POPCNT $dst, $mem\n\t" 5361 "POPCNT $tmp, $mem+4\n\t" 5362 "ADD $dst, $tmp" %} 5363 ins_encode %{ 5364 //__ popcntl($dst$$Register, $mem$$Address$$first); 5365 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5366 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5367 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5368 __ addl($dst$$Register, $tmp$$Register); 5369 %} 5370 ins_pipe(ialu_reg); 5371 %} 5372 5373 5374 //----------Load/Store/Move Instructions--------------------------------------- 5375 //----------Load Instructions-------------------------------------------------- 5376 // Load Byte (8bit signed) 5377 instruct loadB(xRegI dst, memory mem) %{ 5378 match(Set dst (LoadB mem)); 5379 5380 ins_cost(125); 5381 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5382 5383 ins_encode %{ 5384 __ movsbl($dst$$Register, $mem$$Address); 5385 %} 5386 5387 ins_pipe(ialu_reg_mem); 5388 %} 5389 5390 // Load Byte (8bit signed) into Long Register 5391 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5392 match(Set dst (ConvI2L (LoadB mem))); 5393 effect(KILL cr); 5394 5395 ins_cost(375); 5396 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5397 "MOV $dst.hi,$dst.lo\n\t" 5398 "SAR $dst.hi,7" %} 5399 5400 ins_encode %{ 5401 __ movsbl($dst$$Register, $mem$$Address); 5402 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5403 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5404 %} 5405 5406 ins_pipe(ialu_reg_mem); 5407 %} 5408 5409 // Load Unsigned Byte (8bit UNsigned) 5410 instruct loadUB(xRegI dst, memory mem) %{ 5411 match(Set dst (LoadUB mem)); 5412 5413 ins_cost(125); 5414 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5415 5416 ins_encode %{ 5417 __ movzbl($dst$$Register, $mem$$Address); 5418 %} 5419 5420 ins_pipe(ialu_reg_mem); 5421 %} 5422 5423 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5424 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5425 match(Set dst (ConvI2L (LoadUB mem))); 5426 effect(KILL cr); 5427 5428 ins_cost(250); 5429 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5430 "XOR $dst.hi,$dst.hi" %} 5431 5432 ins_encode %{ 5433 Register Rdst = $dst$$Register; 5434 __ movzbl(Rdst, $mem$$Address); 5435 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5436 %} 5437 5438 ins_pipe(ialu_reg_mem); 5439 %} 5440 5441 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5442 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5443 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5444 effect(KILL cr); 5445 5446 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5447 "XOR $dst.hi,$dst.hi\n\t" 5448 "AND $dst.lo,right_n_bits($mask, 8)" %} 5449 ins_encode %{ 5450 Register Rdst = $dst$$Register; 5451 __ movzbl(Rdst, $mem$$Address); 5452 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5453 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5454 %} 5455 ins_pipe(ialu_reg_mem); 5456 %} 5457 5458 // Load Short (16bit signed) 5459 instruct loadS(rRegI dst, memory mem) %{ 5460 match(Set dst (LoadS mem)); 5461 5462 ins_cost(125); 5463 format %{ "MOVSX $dst,$mem\t# short" %} 5464 5465 ins_encode %{ 5466 __ movswl($dst$$Register, $mem$$Address); 5467 %} 5468 5469 ins_pipe(ialu_reg_mem); 5470 %} 5471 5472 // Load Short (16 bit signed) to Byte (8 bit signed) 5473 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5474 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5475 5476 ins_cost(125); 5477 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5478 ins_encode %{ 5479 __ movsbl($dst$$Register, $mem$$Address); 5480 %} 5481 ins_pipe(ialu_reg_mem); 5482 %} 5483 5484 // Load Short (16bit signed) into Long Register 5485 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5486 match(Set dst (ConvI2L (LoadS mem))); 5487 effect(KILL cr); 5488 5489 ins_cost(375); 5490 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5491 "MOV $dst.hi,$dst.lo\n\t" 5492 "SAR $dst.hi,15" %} 5493 5494 ins_encode %{ 5495 __ movswl($dst$$Register, $mem$$Address); 5496 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5497 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5498 %} 5499 5500 ins_pipe(ialu_reg_mem); 5501 %} 5502 5503 // Load Unsigned Short/Char (16bit unsigned) 5504 instruct loadUS(rRegI dst, memory mem) %{ 5505 match(Set dst (LoadUS mem)); 5506 5507 ins_cost(125); 5508 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5509 5510 ins_encode %{ 5511 __ movzwl($dst$$Register, $mem$$Address); 5512 %} 5513 5514 ins_pipe(ialu_reg_mem); 5515 %} 5516 5517 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5518 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5519 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5520 5521 ins_cost(125); 5522 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5523 ins_encode %{ 5524 __ movsbl($dst$$Register, $mem$$Address); 5525 %} 5526 ins_pipe(ialu_reg_mem); 5527 %} 5528 5529 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5530 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5531 match(Set dst (ConvI2L (LoadUS mem))); 5532 effect(KILL cr); 5533 5534 ins_cost(250); 5535 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5536 "XOR $dst.hi,$dst.hi" %} 5537 5538 ins_encode %{ 5539 __ movzwl($dst$$Register, $mem$$Address); 5540 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5541 %} 5542 5543 ins_pipe(ialu_reg_mem); 5544 %} 5545 5546 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5547 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5548 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5549 effect(KILL cr); 5550 5551 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5552 "XOR $dst.hi,$dst.hi" %} 5553 ins_encode %{ 5554 Register Rdst = $dst$$Register; 5555 __ movzbl(Rdst, $mem$$Address); 5556 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5557 %} 5558 ins_pipe(ialu_reg_mem); 5559 %} 5560 5561 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5562 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5563 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5564 effect(KILL cr); 5565 5566 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5567 "XOR $dst.hi,$dst.hi\n\t" 5568 "AND $dst.lo,right_n_bits($mask, 16)" %} 5569 ins_encode %{ 5570 Register Rdst = $dst$$Register; 5571 __ movzwl(Rdst, $mem$$Address); 5572 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5573 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5574 %} 5575 ins_pipe(ialu_reg_mem); 5576 %} 5577 5578 // Load Integer 5579 instruct loadI(rRegI dst, memory mem) %{ 5580 match(Set dst (LoadI mem)); 5581 5582 ins_cost(125); 5583 format %{ "MOV $dst,$mem\t# int" %} 5584 5585 ins_encode %{ 5586 __ movl($dst$$Register, $mem$$Address); 5587 %} 5588 5589 ins_pipe(ialu_reg_mem); 5590 %} 5591 5592 // Load Integer (32 bit signed) to Byte (8 bit signed) 5593 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5594 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5595 5596 ins_cost(125); 5597 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5598 ins_encode %{ 5599 __ movsbl($dst$$Register, $mem$$Address); 5600 %} 5601 ins_pipe(ialu_reg_mem); 5602 %} 5603 5604 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5605 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5606 match(Set dst (AndI (LoadI mem) mask)); 5607 5608 ins_cost(125); 5609 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5610 ins_encode %{ 5611 __ movzbl($dst$$Register, $mem$$Address); 5612 %} 5613 ins_pipe(ialu_reg_mem); 5614 %} 5615 5616 // Load Integer (32 bit signed) to Short (16 bit signed) 5617 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5618 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5619 5620 ins_cost(125); 5621 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5622 ins_encode %{ 5623 __ movswl($dst$$Register, $mem$$Address); 5624 %} 5625 ins_pipe(ialu_reg_mem); 5626 %} 5627 5628 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5629 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5630 match(Set dst (AndI (LoadI mem) mask)); 5631 5632 ins_cost(125); 5633 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5634 ins_encode %{ 5635 __ movzwl($dst$$Register, $mem$$Address); 5636 %} 5637 ins_pipe(ialu_reg_mem); 5638 %} 5639 5640 // Load Integer into Long Register 5641 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5642 match(Set dst (ConvI2L (LoadI mem))); 5643 effect(KILL cr); 5644 5645 ins_cost(375); 5646 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5647 "MOV $dst.hi,$dst.lo\n\t" 5648 "SAR $dst.hi,31" %} 5649 5650 ins_encode %{ 5651 __ movl($dst$$Register, $mem$$Address); 5652 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5653 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5654 %} 5655 5656 ins_pipe(ialu_reg_mem); 5657 %} 5658 5659 // Load Integer with mask 0xFF into Long Register 5660 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5661 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5662 effect(KILL cr); 5663 5664 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5665 "XOR $dst.hi,$dst.hi" %} 5666 ins_encode %{ 5667 Register Rdst = $dst$$Register; 5668 __ movzbl(Rdst, $mem$$Address); 5669 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5670 %} 5671 ins_pipe(ialu_reg_mem); 5672 %} 5673 5674 // Load Integer with mask 0xFFFF into Long Register 5675 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5676 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5677 effect(KILL cr); 5678 5679 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5680 "XOR $dst.hi,$dst.hi" %} 5681 ins_encode %{ 5682 Register Rdst = $dst$$Register; 5683 __ movzwl(Rdst, $mem$$Address); 5684 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5685 %} 5686 ins_pipe(ialu_reg_mem); 5687 %} 5688 5689 // Load Integer with 31-bit mask into Long Register 5690 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5691 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5692 effect(KILL cr); 5693 5694 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5695 "XOR $dst.hi,$dst.hi\n\t" 5696 "AND $dst.lo,$mask" %} 5697 ins_encode %{ 5698 Register Rdst = $dst$$Register; 5699 __ movl(Rdst, $mem$$Address); 5700 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5701 __ andl(Rdst, $mask$$constant); 5702 %} 5703 ins_pipe(ialu_reg_mem); 5704 %} 5705 5706 // Load Unsigned Integer into Long Register 5707 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5708 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5709 effect(KILL cr); 5710 5711 ins_cost(250); 5712 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5713 "XOR $dst.hi,$dst.hi" %} 5714 5715 ins_encode %{ 5716 __ movl($dst$$Register, $mem$$Address); 5717 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5718 %} 5719 5720 ins_pipe(ialu_reg_mem); 5721 %} 5722 5723 // Load Long. Cannot clobber address while loading, so restrict address 5724 // register to ESI 5725 instruct loadL(eRegL dst, load_long_memory mem) %{ 5726 predicate(!((LoadLNode*)n)->require_atomic_access()); 5727 match(Set dst (LoadL mem)); 5728 5729 ins_cost(250); 5730 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5731 "MOV $dst.hi,$mem+4" %} 5732 5733 ins_encode %{ 5734 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5735 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5736 __ movl($dst$$Register, Amemlo); 5737 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5738 %} 5739 5740 ins_pipe(ialu_reg_long_mem); 5741 %} 5742 5743 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5744 // then store it down to the stack and reload on the int 5745 // side. 5746 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5747 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5748 match(Set dst (LoadL mem)); 5749 5750 ins_cost(200); 5751 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5752 "FISTp $dst" %} 5753 ins_encode(enc_loadL_volatile(mem,dst)); 5754 ins_pipe( fpu_reg_mem ); 5755 %} 5756 5757 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5758 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5759 match(Set dst (LoadL mem)); 5760 effect(TEMP tmp); 5761 ins_cost(180); 5762 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5763 "MOVSD $dst,$tmp" %} 5764 ins_encode %{ 5765 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5766 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5767 %} 5768 ins_pipe( pipe_slow ); 5769 %} 5770 5771 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5772 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5773 match(Set dst (LoadL mem)); 5774 effect(TEMP tmp); 5775 ins_cost(160); 5776 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5777 "MOVD $dst.lo,$tmp\n\t" 5778 "PSRLQ $tmp,32\n\t" 5779 "MOVD $dst.hi,$tmp" %} 5780 ins_encode %{ 5781 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5782 __ movdl($dst$$Register, $tmp$$XMMRegister); 5783 __ psrlq($tmp$$XMMRegister, 32); 5784 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5785 %} 5786 ins_pipe( pipe_slow ); 5787 %} 5788 5789 // Load Range 5790 instruct loadRange(rRegI dst, memory mem) %{ 5791 match(Set dst (LoadRange mem)); 5792 5793 ins_cost(125); 5794 format %{ "MOV $dst,$mem" %} 5795 opcode(0x8B); 5796 ins_encode( OpcP, RegMem(dst,mem)); 5797 ins_pipe( ialu_reg_mem ); 5798 %} 5799 5800 5801 // Load Pointer 5802 instruct loadP(eRegP dst, memory mem) %{ 5803 match(Set dst (LoadP mem)); 5804 5805 ins_cost(125); 5806 format %{ "MOV $dst,$mem" %} 5807 opcode(0x8B); 5808 ins_encode( OpcP, RegMem(dst,mem)); 5809 ins_pipe( ialu_reg_mem ); 5810 %} 5811 5812 // Load Klass Pointer 5813 instruct loadKlass(eRegP dst, memory mem) %{ 5814 match(Set dst (LoadKlass mem)); 5815 5816 ins_cost(125); 5817 format %{ "MOV $dst,$mem" %} 5818 opcode(0x8B); 5819 ins_encode( OpcP, RegMem(dst,mem)); 5820 ins_pipe( ialu_reg_mem ); 5821 %} 5822 5823 // Load Double 5824 instruct loadDPR(regDPR dst, memory mem) %{ 5825 predicate(UseSSE<=1); 5826 match(Set dst (LoadD mem)); 5827 5828 ins_cost(150); 5829 format %{ "FLD_D ST,$mem\n\t" 5830 "FSTP $dst" %} 5831 opcode(0xDD); /* DD /0 */ 5832 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5833 Pop_Reg_DPR(dst) ); 5834 ins_pipe( fpu_reg_mem ); 5835 %} 5836 5837 // Load Double to XMM 5838 instruct loadD(regD dst, memory mem) %{ 5839 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5840 match(Set dst (LoadD mem)); 5841 ins_cost(145); 5842 format %{ "MOVSD $dst,$mem" %} 5843 ins_encode %{ 5844 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5845 %} 5846 ins_pipe( pipe_slow ); 5847 %} 5848 5849 instruct loadD_partial(regD dst, memory mem) %{ 5850 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5851 match(Set dst (LoadD mem)); 5852 ins_cost(145); 5853 format %{ "MOVLPD $dst,$mem" %} 5854 ins_encode %{ 5855 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5856 %} 5857 ins_pipe( pipe_slow ); 5858 %} 5859 5860 // Load to XMM register (single-precision floating point) 5861 // MOVSS instruction 5862 instruct loadF(regF dst, memory mem) %{ 5863 predicate(UseSSE>=1); 5864 match(Set dst (LoadF mem)); 5865 ins_cost(145); 5866 format %{ "MOVSS $dst,$mem" %} 5867 ins_encode %{ 5868 __ movflt ($dst$$XMMRegister, $mem$$Address); 5869 %} 5870 ins_pipe( pipe_slow ); 5871 %} 5872 5873 // Load Float 5874 instruct loadFPR(regFPR dst, memory mem) %{ 5875 predicate(UseSSE==0); 5876 match(Set dst (LoadF mem)); 5877 5878 ins_cost(150); 5879 format %{ "FLD_S ST,$mem\n\t" 5880 "FSTP $dst" %} 5881 opcode(0xD9); /* D9 /0 */ 5882 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5883 Pop_Reg_FPR(dst) ); 5884 ins_pipe( fpu_reg_mem ); 5885 %} 5886 5887 // Load Effective Address 5888 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5889 match(Set dst mem); 5890 5891 ins_cost(110); 5892 format %{ "LEA $dst,$mem" %} 5893 opcode(0x8D); 5894 ins_encode( OpcP, RegMem(dst,mem)); 5895 ins_pipe( ialu_reg_reg_fat ); 5896 %} 5897 5898 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5899 match(Set dst mem); 5900 5901 ins_cost(110); 5902 format %{ "LEA $dst,$mem" %} 5903 opcode(0x8D); 5904 ins_encode( OpcP, RegMem(dst,mem)); 5905 ins_pipe( ialu_reg_reg_fat ); 5906 %} 5907 5908 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5909 match(Set dst mem); 5910 5911 ins_cost(110); 5912 format %{ "LEA $dst,$mem" %} 5913 opcode(0x8D); 5914 ins_encode( OpcP, RegMem(dst,mem)); 5915 ins_pipe( ialu_reg_reg_fat ); 5916 %} 5917 5918 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5919 match(Set dst mem); 5920 5921 ins_cost(110); 5922 format %{ "LEA $dst,$mem" %} 5923 opcode(0x8D); 5924 ins_encode( OpcP, RegMem(dst,mem)); 5925 ins_pipe( ialu_reg_reg_fat ); 5926 %} 5927 5928 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5929 match(Set dst mem); 5930 5931 ins_cost(110); 5932 format %{ "LEA $dst,$mem" %} 5933 opcode(0x8D); 5934 ins_encode( OpcP, RegMem(dst,mem)); 5935 ins_pipe( ialu_reg_reg_fat ); 5936 %} 5937 5938 // Load Constant 5939 instruct loadConI(rRegI dst, immI src) %{ 5940 match(Set dst src); 5941 5942 format %{ "MOV $dst,$src" %} 5943 ins_encode( LdImmI(dst, src) ); 5944 ins_pipe( ialu_reg_fat ); 5945 %} 5946 5947 // Load Constant zero 5948 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ 5949 match(Set dst src); 5950 effect(KILL cr); 5951 5952 ins_cost(50); 5953 format %{ "XOR $dst,$dst" %} 5954 opcode(0x33); /* + rd */ 5955 ins_encode( OpcP, RegReg( dst, dst ) ); 5956 ins_pipe( ialu_reg ); 5957 %} 5958 5959 instruct loadConP(eRegP dst, immP src) %{ 5960 match(Set dst src); 5961 5962 format %{ "MOV $dst,$src" %} 5963 opcode(0xB8); /* + rd */ 5964 ins_encode( LdImmP(dst, src) ); 5965 ins_pipe( ialu_reg_fat ); 5966 %} 5967 5968 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 5969 match(Set dst src); 5970 effect(KILL cr); 5971 ins_cost(200); 5972 format %{ "MOV $dst.lo,$src.lo\n\t" 5973 "MOV $dst.hi,$src.hi" %} 5974 opcode(0xB8); 5975 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 5976 ins_pipe( ialu_reg_long_fat ); 5977 %} 5978 5979 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 5980 match(Set dst src); 5981 effect(KILL cr); 5982 ins_cost(150); 5983 format %{ "XOR $dst.lo,$dst.lo\n\t" 5984 "XOR $dst.hi,$dst.hi" %} 5985 opcode(0x33,0x33); 5986 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 5987 ins_pipe( ialu_reg_long ); 5988 %} 5989 5990 // The instruction usage is guarded by predicate in operand immFPR(). 5991 instruct loadConFPR(regFPR dst, immFPR con) %{ 5992 match(Set dst con); 5993 ins_cost(125); 5994 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 5995 "FSTP $dst" %} 5996 ins_encode %{ 5997 __ fld_s($constantaddress($con)); 5998 __ fstp_d($dst$$reg); 5999 %} 6000 ins_pipe(fpu_reg_con); 6001 %} 6002 6003 // The instruction usage is guarded by predicate in operand immFPR0(). 6004 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6005 match(Set dst con); 6006 ins_cost(125); 6007 format %{ "FLDZ ST\n\t" 6008 "FSTP $dst" %} 6009 ins_encode %{ 6010 __ fldz(); 6011 __ fstp_d($dst$$reg); 6012 %} 6013 ins_pipe(fpu_reg_con); 6014 %} 6015 6016 // The instruction usage is guarded by predicate in operand immFPR1(). 6017 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6018 match(Set dst con); 6019 ins_cost(125); 6020 format %{ "FLD1 ST\n\t" 6021 "FSTP $dst" %} 6022 ins_encode %{ 6023 __ fld1(); 6024 __ fstp_d($dst$$reg); 6025 %} 6026 ins_pipe(fpu_reg_con); 6027 %} 6028 6029 // The instruction usage is guarded by predicate in operand immF(). 6030 instruct loadConF(regF dst, immF con) %{ 6031 match(Set dst con); 6032 ins_cost(125); 6033 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6034 ins_encode %{ 6035 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6036 %} 6037 ins_pipe(pipe_slow); 6038 %} 6039 6040 // The instruction usage is guarded by predicate in operand immF0(). 6041 instruct loadConF0(regF dst, immF0 src) %{ 6042 match(Set dst src); 6043 ins_cost(100); 6044 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6045 ins_encode %{ 6046 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6047 %} 6048 ins_pipe(pipe_slow); 6049 %} 6050 6051 // The instruction usage is guarded by predicate in operand immDPR(). 6052 instruct loadConDPR(regDPR dst, immDPR con) %{ 6053 match(Set dst con); 6054 ins_cost(125); 6055 6056 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6057 "FSTP $dst" %} 6058 ins_encode %{ 6059 __ fld_d($constantaddress($con)); 6060 __ fstp_d($dst$$reg); 6061 %} 6062 ins_pipe(fpu_reg_con); 6063 %} 6064 6065 // The instruction usage is guarded by predicate in operand immDPR0(). 6066 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6067 match(Set dst con); 6068 ins_cost(125); 6069 6070 format %{ "FLDZ ST\n\t" 6071 "FSTP $dst" %} 6072 ins_encode %{ 6073 __ fldz(); 6074 __ fstp_d($dst$$reg); 6075 %} 6076 ins_pipe(fpu_reg_con); 6077 %} 6078 6079 // The instruction usage is guarded by predicate in operand immDPR1(). 6080 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6081 match(Set dst con); 6082 ins_cost(125); 6083 6084 format %{ "FLD1 ST\n\t" 6085 "FSTP $dst" %} 6086 ins_encode %{ 6087 __ fld1(); 6088 __ fstp_d($dst$$reg); 6089 %} 6090 ins_pipe(fpu_reg_con); 6091 %} 6092 6093 // The instruction usage is guarded by predicate in operand immD(). 6094 instruct loadConD(regD dst, immD con) %{ 6095 match(Set dst con); 6096 ins_cost(125); 6097 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6098 ins_encode %{ 6099 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6100 %} 6101 ins_pipe(pipe_slow); 6102 %} 6103 6104 // The instruction usage is guarded by predicate in operand immD0(). 6105 instruct loadConD0(regD dst, immD0 src) %{ 6106 match(Set dst src); 6107 ins_cost(100); 6108 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6109 ins_encode %{ 6110 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6111 %} 6112 ins_pipe( pipe_slow ); 6113 %} 6114 6115 // Load Stack Slot 6116 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6117 match(Set dst src); 6118 ins_cost(125); 6119 6120 format %{ "MOV $dst,$src" %} 6121 opcode(0x8B); 6122 ins_encode( OpcP, RegMem(dst,src)); 6123 ins_pipe( ialu_reg_mem ); 6124 %} 6125 6126 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6127 match(Set dst src); 6128 6129 ins_cost(200); 6130 format %{ "MOV $dst,$src.lo\n\t" 6131 "MOV $dst+4,$src.hi" %} 6132 opcode(0x8B, 0x8B); 6133 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6134 ins_pipe( ialu_mem_long_reg ); 6135 %} 6136 6137 // Load Stack Slot 6138 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6139 match(Set dst src); 6140 ins_cost(125); 6141 6142 format %{ "MOV $dst,$src" %} 6143 opcode(0x8B); 6144 ins_encode( OpcP, RegMem(dst,src)); 6145 ins_pipe( ialu_reg_mem ); 6146 %} 6147 6148 // Load Stack Slot 6149 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6150 match(Set dst src); 6151 ins_cost(125); 6152 6153 format %{ "FLD_S $src\n\t" 6154 "FSTP $dst" %} 6155 opcode(0xD9); /* D9 /0, FLD m32real */ 6156 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6157 Pop_Reg_FPR(dst) ); 6158 ins_pipe( fpu_reg_mem ); 6159 %} 6160 6161 // Load Stack Slot 6162 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6163 match(Set dst src); 6164 ins_cost(125); 6165 6166 format %{ "FLD_D $src\n\t" 6167 "FSTP $dst" %} 6168 opcode(0xDD); /* DD /0, FLD m64real */ 6169 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6170 Pop_Reg_DPR(dst) ); 6171 ins_pipe( fpu_reg_mem ); 6172 %} 6173 6174 // Prefetch instructions for allocation. 6175 // Must be safe to execute with invalid address (cannot fault). 6176 6177 instruct prefetchAlloc0( memory mem ) %{ 6178 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6179 match(PrefetchAllocation mem); 6180 ins_cost(0); 6181 size(0); 6182 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6183 ins_encode(); 6184 ins_pipe(empty); 6185 %} 6186 6187 instruct prefetchAlloc( memory mem ) %{ 6188 predicate(AllocatePrefetchInstr==3); 6189 match( PrefetchAllocation mem ); 6190 ins_cost(100); 6191 6192 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6193 ins_encode %{ 6194 __ prefetchw($mem$$Address); 6195 %} 6196 ins_pipe(ialu_mem); 6197 %} 6198 6199 instruct prefetchAllocNTA( memory mem ) %{ 6200 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6201 match(PrefetchAllocation mem); 6202 ins_cost(100); 6203 6204 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6205 ins_encode %{ 6206 __ prefetchnta($mem$$Address); 6207 %} 6208 ins_pipe(ialu_mem); 6209 %} 6210 6211 instruct prefetchAllocT0( memory mem ) %{ 6212 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6213 match(PrefetchAllocation mem); 6214 ins_cost(100); 6215 6216 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6217 ins_encode %{ 6218 __ prefetcht0($mem$$Address); 6219 %} 6220 ins_pipe(ialu_mem); 6221 %} 6222 6223 instruct prefetchAllocT2( memory mem ) %{ 6224 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6225 match(PrefetchAllocation mem); 6226 ins_cost(100); 6227 6228 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6229 ins_encode %{ 6230 __ prefetcht2($mem$$Address); 6231 %} 6232 ins_pipe(ialu_mem); 6233 %} 6234 6235 //----------Store Instructions------------------------------------------------- 6236 6237 // Store Byte 6238 instruct storeB(memory mem, xRegI src) %{ 6239 match(Set mem (StoreB mem src)); 6240 6241 ins_cost(125); 6242 format %{ "MOV8 $mem,$src" %} 6243 opcode(0x88); 6244 ins_encode( OpcP, RegMem( src, mem ) ); 6245 ins_pipe( ialu_mem_reg ); 6246 %} 6247 6248 // Store Char/Short 6249 instruct storeC(memory mem, rRegI src) %{ 6250 match(Set mem (StoreC mem src)); 6251 6252 ins_cost(125); 6253 format %{ "MOV16 $mem,$src" %} 6254 opcode(0x89, 0x66); 6255 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6256 ins_pipe( ialu_mem_reg ); 6257 %} 6258 6259 // Store Integer 6260 instruct storeI(memory mem, rRegI src) %{ 6261 match(Set mem (StoreI mem src)); 6262 6263 ins_cost(125); 6264 format %{ "MOV $mem,$src" %} 6265 opcode(0x89); 6266 ins_encode( OpcP, RegMem( src, mem ) ); 6267 ins_pipe( ialu_mem_reg ); 6268 %} 6269 6270 // Store Long 6271 instruct storeL(long_memory mem, eRegL src) %{ 6272 predicate(!((StoreLNode*)n)->require_atomic_access()); 6273 match(Set mem (StoreL mem src)); 6274 6275 ins_cost(200); 6276 format %{ "MOV $mem,$src.lo\n\t" 6277 "MOV $mem+4,$src.hi" %} 6278 opcode(0x89, 0x89); 6279 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6280 ins_pipe( ialu_mem_long_reg ); 6281 %} 6282 6283 // Store Long to Integer 6284 instruct storeL2I(memory mem, eRegL src) %{ 6285 match(Set mem (StoreI mem (ConvL2I src))); 6286 6287 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6288 ins_encode %{ 6289 __ movl($mem$$Address, $src$$Register); 6290 %} 6291 ins_pipe(ialu_mem_reg); 6292 %} 6293 6294 // Volatile Store Long. Must be atomic, so move it into 6295 // the FP TOS and then do a 64-bit FIST. Has to probe the 6296 // target address before the store (for null-ptr checks) 6297 // so the memory operand is used twice in the encoding. 6298 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6299 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6300 match(Set mem (StoreL mem src)); 6301 effect( KILL cr ); 6302 ins_cost(400); 6303 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6304 "FILD $src\n\t" 6305 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6306 opcode(0x3B); 6307 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6308 ins_pipe( fpu_reg_mem ); 6309 %} 6310 6311 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6312 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6313 match(Set mem (StoreL mem src)); 6314 effect( TEMP tmp, KILL cr ); 6315 ins_cost(380); 6316 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6317 "MOVSD $tmp,$src\n\t" 6318 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6319 ins_encode %{ 6320 __ cmpl(rax, $mem$$Address); 6321 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6322 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6323 %} 6324 ins_pipe( pipe_slow ); 6325 %} 6326 6327 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6328 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6329 match(Set mem (StoreL mem src)); 6330 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6331 ins_cost(360); 6332 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6333 "MOVD $tmp,$src.lo\n\t" 6334 "MOVD $tmp2,$src.hi\n\t" 6335 "PUNPCKLDQ $tmp,$tmp2\n\t" 6336 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6337 ins_encode %{ 6338 __ cmpl(rax, $mem$$Address); 6339 __ movdl($tmp$$XMMRegister, $src$$Register); 6340 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6341 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6342 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6343 %} 6344 ins_pipe( pipe_slow ); 6345 %} 6346 6347 // Store Pointer; for storing unknown oops and raw pointers 6348 instruct storeP(memory mem, anyRegP src) %{ 6349 match(Set mem (StoreP mem src)); 6350 6351 ins_cost(125); 6352 format %{ "MOV $mem,$src" %} 6353 opcode(0x89); 6354 ins_encode( OpcP, RegMem( src, mem ) ); 6355 ins_pipe( ialu_mem_reg ); 6356 %} 6357 6358 // Store Integer Immediate 6359 instruct storeImmI(memory mem, immI src) %{ 6360 match(Set mem (StoreI mem src)); 6361 6362 ins_cost(150); 6363 format %{ "MOV $mem,$src" %} 6364 opcode(0xC7); /* C7 /0 */ 6365 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6366 ins_pipe( ialu_mem_imm ); 6367 %} 6368 6369 // Store Short/Char Immediate 6370 instruct storeImmI16(memory mem, immI16 src) %{ 6371 predicate(UseStoreImmI16); 6372 match(Set mem (StoreC mem src)); 6373 6374 ins_cost(150); 6375 format %{ "MOV16 $mem,$src" %} 6376 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6377 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6378 ins_pipe( ialu_mem_imm ); 6379 %} 6380 6381 // Store Pointer Immediate; null pointers or constant oops that do not 6382 // need card-mark barriers. 6383 instruct storeImmP(memory mem, immP src) %{ 6384 match(Set mem (StoreP mem src)); 6385 6386 ins_cost(150); 6387 format %{ "MOV $mem,$src" %} 6388 opcode(0xC7); /* C7 /0 */ 6389 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6390 ins_pipe( ialu_mem_imm ); 6391 %} 6392 6393 // Store Byte Immediate 6394 instruct storeImmB(memory mem, immI8 src) %{ 6395 match(Set mem (StoreB mem src)); 6396 6397 ins_cost(150); 6398 format %{ "MOV8 $mem,$src" %} 6399 opcode(0xC6); /* C6 /0 */ 6400 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6401 ins_pipe( ialu_mem_imm ); 6402 %} 6403 6404 // Store CMS card-mark Immediate 6405 instruct storeImmCM(memory mem, immI8 src) %{ 6406 match(Set mem (StoreCM mem src)); 6407 6408 ins_cost(150); 6409 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6410 opcode(0xC6); /* C6 /0 */ 6411 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6412 ins_pipe( ialu_mem_imm ); 6413 %} 6414 6415 // Store Double 6416 instruct storeDPR( memory mem, regDPR1 src) %{ 6417 predicate(UseSSE<=1); 6418 match(Set mem (StoreD mem src)); 6419 6420 ins_cost(100); 6421 format %{ "FST_D $mem,$src" %} 6422 opcode(0xDD); /* DD /2 */ 6423 ins_encode( enc_FPR_store(mem,src) ); 6424 ins_pipe( fpu_mem_reg ); 6425 %} 6426 6427 // Store double does rounding on x86 6428 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6429 predicate(UseSSE<=1); 6430 match(Set mem (StoreD mem (RoundDouble src))); 6431 6432 ins_cost(100); 6433 format %{ "FST_D $mem,$src\t# round" %} 6434 opcode(0xDD); /* DD /2 */ 6435 ins_encode( enc_FPR_store(mem,src) ); 6436 ins_pipe( fpu_mem_reg ); 6437 %} 6438 6439 // Store XMM register to memory (double-precision floating points) 6440 // MOVSD instruction 6441 instruct storeD(memory mem, regD src) %{ 6442 predicate(UseSSE>=2); 6443 match(Set mem (StoreD mem src)); 6444 ins_cost(95); 6445 format %{ "MOVSD $mem,$src" %} 6446 ins_encode %{ 6447 __ movdbl($mem$$Address, $src$$XMMRegister); 6448 %} 6449 ins_pipe( pipe_slow ); 6450 %} 6451 6452 // Store XMM register to memory (single-precision floating point) 6453 // MOVSS instruction 6454 instruct storeF(memory mem, regF src) %{ 6455 predicate(UseSSE>=1); 6456 match(Set mem (StoreF mem src)); 6457 ins_cost(95); 6458 format %{ "MOVSS $mem,$src" %} 6459 ins_encode %{ 6460 __ movflt($mem$$Address, $src$$XMMRegister); 6461 %} 6462 ins_pipe( pipe_slow ); 6463 %} 6464 6465 // Store Float 6466 instruct storeFPR( memory mem, regFPR1 src) %{ 6467 predicate(UseSSE==0); 6468 match(Set mem (StoreF mem src)); 6469 6470 ins_cost(100); 6471 format %{ "FST_S $mem,$src" %} 6472 opcode(0xD9); /* D9 /2 */ 6473 ins_encode( enc_FPR_store(mem,src) ); 6474 ins_pipe( fpu_mem_reg ); 6475 %} 6476 6477 // Store Float does rounding on x86 6478 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6479 predicate(UseSSE==0); 6480 match(Set mem (StoreF mem (RoundFloat src))); 6481 6482 ins_cost(100); 6483 format %{ "FST_S $mem,$src\t# round" %} 6484 opcode(0xD9); /* D9 /2 */ 6485 ins_encode( enc_FPR_store(mem,src) ); 6486 ins_pipe( fpu_mem_reg ); 6487 %} 6488 6489 // Store Float does rounding on x86 6490 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6491 predicate(UseSSE<=1); 6492 match(Set mem (StoreF mem (ConvD2F src))); 6493 6494 ins_cost(100); 6495 format %{ "FST_S $mem,$src\t# D-round" %} 6496 opcode(0xD9); /* D9 /2 */ 6497 ins_encode( enc_FPR_store(mem,src) ); 6498 ins_pipe( fpu_mem_reg ); 6499 %} 6500 6501 // Store immediate Float value (it is faster than store from FPU register) 6502 // The instruction usage is guarded by predicate in operand immFPR(). 6503 instruct storeFPR_imm( memory mem, immFPR src) %{ 6504 match(Set mem (StoreF mem src)); 6505 6506 ins_cost(50); 6507 format %{ "MOV $mem,$src\t# store float" %} 6508 opcode(0xC7); /* C7 /0 */ 6509 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6510 ins_pipe( ialu_mem_imm ); 6511 %} 6512 6513 // Store immediate Float value (it is faster than store from XMM register) 6514 // The instruction usage is guarded by predicate in operand immF(). 6515 instruct storeF_imm( memory mem, immF src) %{ 6516 match(Set mem (StoreF mem src)); 6517 6518 ins_cost(50); 6519 format %{ "MOV $mem,$src\t# store float" %} 6520 opcode(0xC7); /* C7 /0 */ 6521 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6522 ins_pipe( ialu_mem_imm ); 6523 %} 6524 6525 // Store Integer to stack slot 6526 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6527 match(Set dst src); 6528 6529 ins_cost(100); 6530 format %{ "MOV $dst,$src" %} 6531 opcode(0x89); 6532 ins_encode( OpcPRegSS( dst, src ) ); 6533 ins_pipe( ialu_mem_reg ); 6534 %} 6535 6536 // Store Integer to stack slot 6537 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6538 match(Set dst src); 6539 6540 ins_cost(100); 6541 format %{ "MOV $dst,$src" %} 6542 opcode(0x89); 6543 ins_encode( OpcPRegSS( dst, src ) ); 6544 ins_pipe( ialu_mem_reg ); 6545 %} 6546 6547 // Store Long to stack slot 6548 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6549 match(Set dst src); 6550 6551 ins_cost(200); 6552 format %{ "MOV $dst,$src.lo\n\t" 6553 "MOV $dst+4,$src.hi" %} 6554 opcode(0x89, 0x89); 6555 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6556 ins_pipe( ialu_mem_long_reg ); 6557 %} 6558 6559 //----------MemBar Instructions----------------------------------------------- 6560 // Memory barrier flavors 6561 6562 instruct membar_acquire() %{ 6563 match(MemBarAcquire); 6564 match(LoadFence); 6565 ins_cost(400); 6566 6567 size(0); 6568 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6569 ins_encode(); 6570 ins_pipe(empty); 6571 %} 6572 6573 instruct membar_acquire_lock() %{ 6574 match(MemBarAcquireLock); 6575 ins_cost(0); 6576 6577 size(0); 6578 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6579 ins_encode( ); 6580 ins_pipe(empty); 6581 %} 6582 6583 instruct membar_release() %{ 6584 match(MemBarRelease); 6585 match(StoreFence); 6586 ins_cost(400); 6587 6588 size(0); 6589 format %{ "MEMBAR-release ! (empty encoding)" %} 6590 ins_encode( ); 6591 ins_pipe(empty); 6592 %} 6593 6594 instruct membar_release_lock() %{ 6595 match(MemBarReleaseLock); 6596 ins_cost(0); 6597 6598 size(0); 6599 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6600 ins_encode( ); 6601 ins_pipe(empty); 6602 %} 6603 6604 instruct membar_volatile(eFlagsReg cr) %{ 6605 match(MemBarVolatile); 6606 effect(KILL cr); 6607 ins_cost(400); 6608 6609 format %{ 6610 $$template 6611 if (os::is_MP()) { 6612 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6613 } else { 6614 $$emit$$"MEMBAR-volatile ! (empty encoding)" 6615 } 6616 %} 6617 ins_encode %{ 6618 __ membar(Assembler::StoreLoad); 6619 %} 6620 ins_pipe(pipe_slow); 6621 %} 6622 6623 instruct unnecessary_membar_volatile() %{ 6624 match(MemBarVolatile); 6625 predicate(Matcher::post_store_load_barrier(n)); 6626 ins_cost(0); 6627 6628 size(0); 6629 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6630 ins_encode( ); 6631 ins_pipe(empty); 6632 %} 6633 6634 instruct membar_storestore() %{ 6635 match(MemBarStoreStore); 6636 ins_cost(0); 6637 6638 size(0); 6639 format %{ "MEMBAR-storestore (empty encoding)" %} 6640 ins_encode( ); 6641 ins_pipe(empty); 6642 %} 6643 6644 //----------Move Instructions-------------------------------------------------- 6645 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6646 match(Set dst (CastX2P src)); 6647 format %{ "# X2P $dst, $src" %} 6648 ins_encode( /*empty encoding*/ ); 6649 ins_cost(0); 6650 ins_pipe(empty); 6651 %} 6652 6653 instruct castP2X(rRegI dst, eRegP src ) %{ 6654 match(Set dst (CastP2X src)); 6655 ins_cost(50); 6656 format %{ "MOV $dst, $src\t# CastP2X" %} 6657 ins_encode( enc_Copy( dst, src) ); 6658 ins_pipe( ialu_reg_reg ); 6659 %} 6660 6661 //----------Conditional Move--------------------------------------------------- 6662 // Conditional move 6663 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6664 predicate(!VM_Version::supports_cmov() ); 6665 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6666 ins_cost(200); 6667 format %{ "J$cop,us skip\t# signed cmove\n\t" 6668 "MOV $dst,$src\n" 6669 "skip:" %} 6670 ins_encode %{ 6671 Label Lskip; 6672 // Invert sense of branch from sense of CMOV 6673 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6674 __ movl($dst$$Register, $src$$Register); 6675 __ bind(Lskip); 6676 %} 6677 ins_pipe( pipe_cmov_reg ); 6678 %} 6679 6680 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6681 predicate(!VM_Version::supports_cmov() ); 6682 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6683 ins_cost(200); 6684 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6685 "MOV $dst,$src\n" 6686 "skip:" %} 6687 ins_encode %{ 6688 Label Lskip; 6689 // Invert sense of branch from sense of CMOV 6690 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6691 __ movl($dst$$Register, $src$$Register); 6692 __ bind(Lskip); 6693 %} 6694 ins_pipe( pipe_cmov_reg ); 6695 %} 6696 6697 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6698 predicate(VM_Version::supports_cmov() ); 6699 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6700 ins_cost(200); 6701 format %{ "CMOV$cop $dst,$src" %} 6702 opcode(0x0F,0x40); 6703 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6704 ins_pipe( pipe_cmov_reg ); 6705 %} 6706 6707 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6708 predicate(VM_Version::supports_cmov() ); 6709 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6710 ins_cost(200); 6711 format %{ "CMOV$cop $dst,$src" %} 6712 opcode(0x0F,0x40); 6713 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6714 ins_pipe( pipe_cmov_reg ); 6715 %} 6716 6717 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6718 predicate(VM_Version::supports_cmov() ); 6719 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6720 ins_cost(200); 6721 expand %{ 6722 cmovI_regU(cop, cr, dst, src); 6723 %} 6724 %} 6725 6726 // Conditional move 6727 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6728 predicate(VM_Version::supports_cmov() ); 6729 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6730 ins_cost(250); 6731 format %{ "CMOV$cop $dst,$src" %} 6732 opcode(0x0F,0x40); 6733 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6734 ins_pipe( pipe_cmov_mem ); 6735 %} 6736 6737 // Conditional move 6738 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6739 predicate(VM_Version::supports_cmov() ); 6740 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6741 ins_cost(250); 6742 format %{ "CMOV$cop $dst,$src" %} 6743 opcode(0x0F,0x40); 6744 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6745 ins_pipe( pipe_cmov_mem ); 6746 %} 6747 6748 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6749 predicate(VM_Version::supports_cmov() ); 6750 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6751 ins_cost(250); 6752 expand %{ 6753 cmovI_memU(cop, cr, dst, src); 6754 %} 6755 %} 6756 6757 // Conditional move 6758 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6759 predicate(VM_Version::supports_cmov() ); 6760 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6761 ins_cost(200); 6762 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6763 opcode(0x0F,0x40); 6764 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6765 ins_pipe( pipe_cmov_reg ); 6766 %} 6767 6768 // Conditional move (non-P6 version) 6769 // Note: a CMoveP is generated for stubs and native wrappers 6770 // regardless of whether we are on a P6, so we 6771 // emulate a cmov here 6772 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6773 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6774 ins_cost(300); 6775 format %{ "Jn$cop skip\n\t" 6776 "MOV $dst,$src\t# pointer\n" 6777 "skip:" %} 6778 opcode(0x8b); 6779 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6780 ins_pipe( pipe_cmov_reg ); 6781 %} 6782 6783 // Conditional move 6784 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6785 predicate(VM_Version::supports_cmov() ); 6786 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6787 ins_cost(200); 6788 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6789 opcode(0x0F,0x40); 6790 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6791 ins_pipe( pipe_cmov_reg ); 6792 %} 6793 6794 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6795 predicate(VM_Version::supports_cmov() ); 6796 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6797 ins_cost(200); 6798 expand %{ 6799 cmovP_regU(cop, cr, dst, src); 6800 %} 6801 %} 6802 6803 // DISABLED: Requires the ADLC to emit a bottom_type call that 6804 // correctly meets the two pointer arguments; one is an incoming 6805 // register but the other is a memory operand. ALSO appears to 6806 // be buggy with implicit null checks. 6807 // 6808 //// Conditional move 6809 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6810 // predicate(VM_Version::supports_cmov() ); 6811 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6812 // ins_cost(250); 6813 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6814 // opcode(0x0F,0x40); 6815 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6816 // ins_pipe( pipe_cmov_mem ); 6817 //%} 6818 // 6819 //// Conditional move 6820 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6821 // predicate(VM_Version::supports_cmov() ); 6822 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6823 // ins_cost(250); 6824 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6825 // opcode(0x0F,0x40); 6826 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6827 // ins_pipe( pipe_cmov_mem ); 6828 //%} 6829 6830 // Conditional move 6831 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6832 predicate(UseSSE<=1); 6833 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6834 ins_cost(200); 6835 format %{ "FCMOV$cop $dst,$src\t# double" %} 6836 opcode(0xDA); 6837 ins_encode( enc_cmov_dpr(cop,src) ); 6838 ins_pipe( pipe_cmovDPR_reg ); 6839 %} 6840 6841 // Conditional move 6842 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6843 predicate(UseSSE==0); 6844 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6845 ins_cost(200); 6846 format %{ "FCMOV$cop $dst,$src\t# float" %} 6847 opcode(0xDA); 6848 ins_encode( enc_cmov_dpr(cop,src) ); 6849 ins_pipe( pipe_cmovDPR_reg ); 6850 %} 6851 6852 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6853 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6854 predicate(UseSSE<=1); 6855 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6856 ins_cost(200); 6857 format %{ "Jn$cop skip\n\t" 6858 "MOV $dst,$src\t# double\n" 6859 "skip:" %} 6860 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6861 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6862 ins_pipe( pipe_cmovDPR_reg ); 6863 %} 6864 6865 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6866 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6867 predicate(UseSSE==0); 6868 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6869 ins_cost(200); 6870 format %{ "Jn$cop skip\n\t" 6871 "MOV $dst,$src\t# float\n" 6872 "skip:" %} 6873 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6874 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6875 ins_pipe( pipe_cmovDPR_reg ); 6876 %} 6877 6878 // No CMOVE with SSE/SSE2 6879 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6880 predicate (UseSSE>=1); 6881 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6882 ins_cost(200); 6883 format %{ "Jn$cop skip\n\t" 6884 "MOVSS $dst,$src\t# float\n" 6885 "skip:" %} 6886 ins_encode %{ 6887 Label skip; 6888 // Invert sense of branch from sense of CMOV 6889 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6890 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6891 __ bind(skip); 6892 %} 6893 ins_pipe( pipe_slow ); 6894 %} 6895 6896 // No CMOVE with SSE/SSE2 6897 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6898 predicate (UseSSE>=2); 6899 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6900 ins_cost(200); 6901 format %{ "Jn$cop skip\n\t" 6902 "MOVSD $dst,$src\t# float\n" 6903 "skip:" %} 6904 ins_encode %{ 6905 Label skip; 6906 // Invert sense of branch from sense of CMOV 6907 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6908 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6909 __ bind(skip); 6910 %} 6911 ins_pipe( pipe_slow ); 6912 %} 6913 6914 // unsigned version 6915 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6916 predicate (UseSSE>=1); 6917 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6918 ins_cost(200); 6919 format %{ "Jn$cop skip\n\t" 6920 "MOVSS $dst,$src\t# float\n" 6921 "skip:" %} 6922 ins_encode %{ 6923 Label skip; 6924 // Invert sense of branch from sense of CMOV 6925 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6926 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6927 __ bind(skip); 6928 %} 6929 ins_pipe( pipe_slow ); 6930 %} 6931 6932 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6933 predicate (UseSSE>=1); 6934 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6935 ins_cost(200); 6936 expand %{ 6937 fcmovF_regU(cop, cr, dst, src); 6938 %} 6939 %} 6940 6941 // unsigned version 6942 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6943 predicate (UseSSE>=2); 6944 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6945 ins_cost(200); 6946 format %{ "Jn$cop skip\n\t" 6947 "MOVSD $dst,$src\t# float\n" 6948 "skip:" %} 6949 ins_encode %{ 6950 Label skip; 6951 // Invert sense of branch from sense of CMOV 6952 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6953 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6954 __ bind(skip); 6955 %} 6956 ins_pipe( pipe_slow ); 6957 %} 6958 6959 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 6960 predicate (UseSSE>=2); 6961 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6962 ins_cost(200); 6963 expand %{ 6964 fcmovD_regU(cop, cr, dst, src); 6965 %} 6966 %} 6967 6968 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 6969 predicate(VM_Version::supports_cmov() ); 6970 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6971 ins_cost(200); 6972 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6973 "CMOV$cop $dst.hi,$src.hi" %} 6974 opcode(0x0F,0x40); 6975 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6976 ins_pipe( pipe_cmov_reg_long ); 6977 %} 6978 6979 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 6980 predicate(VM_Version::supports_cmov() ); 6981 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6982 ins_cost(200); 6983 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6984 "CMOV$cop $dst.hi,$src.hi" %} 6985 opcode(0x0F,0x40); 6986 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6987 ins_pipe( pipe_cmov_reg_long ); 6988 %} 6989 6990 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 6991 predicate(VM_Version::supports_cmov() ); 6992 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6993 ins_cost(200); 6994 expand %{ 6995 cmovL_regU(cop, cr, dst, src); 6996 %} 6997 %} 6998 6999 //----------Arithmetic Instructions-------------------------------------------- 7000 //----------Addition Instructions---------------------------------------------- 7001 7002 // Integer Addition Instructions 7003 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7004 match(Set dst (AddI dst src)); 7005 effect(KILL cr); 7006 7007 size(2); 7008 format %{ "ADD $dst,$src" %} 7009 opcode(0x03); 7010 ins_encode( OpcP, RegReg( dst, src) ); 7011 ins_pipe( ialu_reg_reg ); 7012 %} 7013 7014 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7015 match(Set dst (AddI dst src)); 7016 effect(KILL cr); 7017 7018 format %{ "ADD $dst,$src" %} 7019 opcode(0x81, 0x00); /* /0 id */ 7020 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7021 ins_pipe( ialu_reg ); 7022 %} 7023 7024 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 7025 predicate(UseIncDec); 7026 match(Set dst (AddI dst src)); 7027 effect(KILL cr); 7028 7029 size(1); 7030 format %{ "INC $dst" %} 7031 opcode(0x40); /* */ 7032 ins_encode( Opc_plus( primary, dst ) ); 7033 ins_pipe( ialu_reg ); 7034 %} 7035 7036 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7037 match(Set dst (AddI src0 src1)); 7038 ins_cost(110); 7039 7040 format %{ "LEA $dst,[$src0 + $src1]" %} 7041 opcode(0x8D); /* 0x8D /r */ 7042 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7043 ins_pipe( ialu_reg_reg ); 7044 %} 7045 7046 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7047 match(Set dst (AddP src0 src1)); 7048 ins_cost(110); 7049 7050 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7051 opcode(0x8D); /* 0x8D /r */ 7052 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7053 ins_pipe( ialu_reg_reg ); 7054 %} 7055 7056 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7057 predicate(UseIncDec); 7058 match(Set dst (AddI dst src)); 7059 effect(KILL cr); 7060 7061 size(1); 7062 format %{ "DEC $dst" %} 7063 opcode(0x48); /* */ 7064 ins_encode( Opc_plus( primary, dst ) ); 7065 ins_pipe( ialu_reg ); 7066 %} 7067 7068 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7069 match(Set dst (AddP dst src)); 7070 effect(KILL cr); 7071 7072 size(2); 7073 format %{ "ADD $dst,$src" %} 7074 opcode(0x03); 7075 ins_encode( OpcP, RegReg( dst, src) ); 7076 ins_pipe( ialu_reg_reg ); 7077 %} 7078 7079 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7080 match(Set dst (AddP dst src)); 7081 effect(KILL cr); 7082 7083 format %{ "ADD $dst,$src" %} 7084 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7085 // ins_encode( RegImm( dst, src) ); 7086 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7087 ins_pipe( ialu_reg ); 7088 %} 7089 7090 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7091 match(Set dst (AddI dst (LoadI src))); 7092 effect(KILL cr); 7093 7094 ins_cost(125); 7095 format %{ "ADD $dst,$src" %} 7096 opcode(0x03); 7097 ins_encode( OpcP, RegMem( dst, src) ); 7098 ins_pipe( ialu_reg_mem ); 7099 %} 7100 7101 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7102 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7103 effect(KILL cr); 7104 7105 ins_cost(150); 7106 format %{ "ADD $dst,$src" %} 7107 opcode(0x01); /* Opcode 01 /r */ 7108 ins_encode( OpcP, RegMem( src, dst ) ); 7109 ins_pipe( ialu_mem_reg ); 7110 %} 7111 7112 // Add Memory with Immediate 7113 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7114 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7115 effect(KILL cr); 7116 7117 ins_cost(125); 7118 format %{ "ADD $dst,$src" %} 7119 opcode(0x81); /* Opcode 81 /0 id */ 7120 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7121 ins_pipe( ialu_mem_imm ); 7122 %} 7123 7124 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 7125 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7126 effect(KILL cr); 7127 7128 ins_cost(125); 7129 format %{ "INC $dst" %} 7130 opcode(0xFF); /* Opcode FF /0 */ 7131 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7132 ins_pipe( ialu_mem_imm ); 7133 %} 7134 7135 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7136 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7137 effect(KILL cr); 7138 7139 ins_cost(125); 7140 format %{ "DEC $dst" %} 7141 opcode(0xFF); /* Opcode FF /1 */ 7142 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7143 ins_pipe( ialu_mem_imm ); 7144 %} 7145 7146 7147 instruct checkCastPP( eRegP dst ) %{ 7148 match(Set dst (CheckCastPP dst)); 7149 7150 size(0); 7151 format %{ "#checkcastPP of $dst" %} 7152 ins_encode( /*empty encoding*/ ); 7153 ins_pipe( empty ); 7154 %} 7155 7156 instruct castPP( eRegP dst ) %{ 7157 match(Set dst (CastPP dst)); 7158 format %{ "#castPP of $dst" %} 7159 ins_encode( /*empty encoding*/ ); 7160 ins_pipe( empty ); 7161 %} 7162 7163 instruct castII( rRegI dst ) %{ 7164 match(Set dst (CastII dst)); 7165 format %{ "#castII of $dst" %} 7166 ins_encode( /*empty encoding*/ ); 7167 ins_cost(0); 7168 ins_pipe( empty ); 7169 %} 7170 7171 7172 // Load-locked - same as a regular pointer load when used with compare-swap 7173 instruct loadPLocked(eRegP dst, memory mem) %{ 7174 match(Set dst (LoadPLocked mem)); 7175 7176 ins_cost(125); 7177 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7178 opcode(0x8B); 7179 ins_encode( OpcP, RegMem(dst,mem)); 7180 ins_pipe( ialu_reg_mem ); 7181 %} 7182 7183 // Conditional-store of the updated heap-top. 7184 // Used during allocation of the shared heap. 7185 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7186 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7187 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7188 // EAX is killed if there is contention, but then it's also unused. 7189 // In the common case of no contention, EAX holds the new oop address. 7190 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7191 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7192 ins_pipe( pipe_cmpxchg ); 7193 %} 7194 7195 // Conditional-store of an int value. 7196 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7197 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7198 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7199 effect(KILL oldval); 7200 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7201 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7202 ins_pipe( pipe_cmpxchg ); 7203 %} 7204 7205 // Conditional-store of a long value. 7206 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7207 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7208 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7209 effect(KILL oldval); 7210 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7211 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7212 "XCHG EBX,ECX" 7213 %} 7214 ins_encode %{ 7215 // Note: we need to swap rbx, and rcx before and after the 7216 // cmpxchg8 instruction because the instruction uses 7217 // rcx as the high order word of the new value to store but 7218 // our register encoding uses rbx. 7219 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7220 if( os::is_MP() ) 7221 __ lock(); 7222 __ cmpxchg8($mem$$Address); 7223 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7224 %} 7225 ins_pipe( pipe_cmpxchg ); 7226 %} 7227 7228 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7229 7230 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7231 predicate(VM_Version::supports_cx8()); 7232 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7233 effect(KILL cr, KILL oldval); 7234 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7235 "MOV $res,0\n\t" 7236 "JNE,s fail\n\t" 7237 "MOV $res,1\n" 7238 "fail:" %} 7239 ins_encode( enc_cmpxchg8(mem_ptr), 7240 enc_flags_ne_to_boolean(res) ); 7241 ins_pipe( pipe_cmpxchg ); 7242 %} 7243 7244 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7245 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7246 effect(KILL cr, KILL oldval); 7247 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7248 "MOV $res,0\n\t" 7249 "JNE,s fail\n\t" 7250 "MOV $res,1\n" 7251 "fail:" %} 7252 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7253 ins_pipe( pipe_cmpxchg ); 7254 %} 7255 7256 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7257 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7258 effect(KILL cr, KILL oldval); 7259 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7260 "MOV $res,0\n\t" 7261 "JNE,s fail\n\t" 7262 "MOV $res,1\n" 7263 "fail:" %} 7264 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7265 ins_pipe( pipe_cmpxchg ); 7266 %} 7267 7268 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7269 predicate(n->as_LoadStore()->result_not_used()); 7270 match(Set dummy (GetAndAddI mem add)); 7271 effect(KILL cr); 7272 format %{ "ADDL [$mem],$add" %} 7273 ins_encode %{ 7274 if (os::is_MP()) { __ lock(); } 7275 __ addl($mem$$Address, $add$$constant); 7276 %} 7277 ins_pipe( pipe_cmpxchg ); 7278 %} 7279 7280 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7281 match(Set newval (GetAndAddI mem newval)); 7282 effect(KILL cr); 7283 format %{ "XADDL [$mem],$newval" %} 7284 ins_encode %{ 7285 if (os::is_MP()) { __ lock(); } 7286 __ xaddl($mem$$Address, $newval$$Register); 7287 %} 7288 ins_pipe( pipe_cmpxchg ); 7289 %} 7290 7291 instruct xchgI( memory mem, rRegI newval) %{ 7292 match(Set newval (GetAndSetI mem newval)); 7293 format %{ "XCHGL $newval,[$mem]" %} 7294 ins_encode %{ 7295 __ xchgl($newval$$Register, $mem$$Address); 7296 %} 7297 ins_pipe( pipe_cmpxchg ); 7298 %} 7299 7300 instruct xchgP( memory mem, pRegP newval) %{ 7301 match(Set newval (GetAndSetP mem newval)); 7302 format %{ "XCHGL $newval,[$mem]" %} 7303 ins_encode %{ 7304 __ xchgl($newval$$Register, $mem$$Address); 7305 %} 7306 ins_pipe( pipe_cmpxchg ); 7307 %} 7308 7309 //----------Subtraction Instructions------------------------------------------- 7310 7311 // Integer Subtraction Instructions 7312 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7313 match(Set dst (SubI dst src)); 7314 effect(KILL cr); 7315 7316 size(2); 7317 format %{ "SUB $dst,$src" %} 7318 opcode(0x2B); 7319 ins_encode( OpcP, RegReg( dst, src) ); 7320 ins_pipe( ialu_reg_reg ); 7321 %} 7322 7323 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7324 match(Set dst (SubI dst src)); 7325 effect(KILL cr); 7326 7327 format %{ "SUB $dst,$src" %} 7328 opcode(0x81,0x05); /* Opcode 81 /5 */ 7329 // ins_encode( RegImm( dst, src) ); 7330 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7331 ins_pipe( ialu_reg ); 7332 %} 7333 7334 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7335 match(Set dst (SubI dst (LoadI src))); 7336 effect(KILL cr); 7337 7338 ins_cost(125); 7339 format %{ "SUB $dst,$src" %} 7340 opcode(0x2B); 7341 ins_encode( OpcP, RegMem( dst, src) ); 7342 ins_pipe( ialu_reg_mem ); 7343 %} 7344 7345 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7346 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7347 effect(KILL cr); 7348 7349 ins_cost(150); 7350 format %{ "SUB $dst,$src" %} 7351 opcode(0x29); /* Opcode 29 /r */ 7352 ins_encode( OpcP, RegMem( src, dst ) ); 7353 ins_pipe( ialu_mem_reg ); 7354 %} 7355 7356 // Subtract from a pointer 7357 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ 7358 match(Set dst (AddP dst (SubI zero src))); 7359 effect(KILL cr); 7360 7361 size(2); 7362 format %{ "SUB $dst,$src" %} 7363 opcode(0x2B); 7364 ins_encode( OpcP, RegReg( dst, src) ); 7365 ins_pipe( ialu_reg_reg ); 7366 %} 7367 7368 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ 7369 match(Set dst (SubI zero dst)); 7370 effect(KILL cr); 7371 7372 size(2); 7373 format %{ "NEG $dst" %} 7374 opcode(0xF7,0x03); // Opcode F7 /3 7375 ins_encode( OpcP, RegOpc( dst ) ); 7376 ins_pipe( ialu_reg ); 7377 %} 7378 7379 //----------Multiplication/Division Instructions------------------------------- 7380 // Integer Multiplication Instructions 7381 // Multiply Register 7382 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7383 match(Set dst (MulI dst src)); 7384 effect(KILL cr); 7385 7386 size(3); 7387 ins_cost(300); 7388 format %{ "IMUL $dst,$src" %} 7389 opcode(0xAF, 0x0F); 7390 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7391 ins_pipe( ialu_reg_reg_alu0 ); 7392 %} 7393 7394 // Multiply 32-bit Immediate 7395 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7396 match(Set dst (MulI src imm)); 7397 effect(KILL cr); 7398 7399 ins_cost(300); 7400 format %{ "IMUL $dst,$src,$imm" %} 7401 opcode(0x69); /* 69 /r id */ 7402 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7403 ins_pipe( ialu_reg_reg_alu0 ); 7404 %} 7405 7406 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7407 match(Set dst src); 7408 effect(KILL cr); 7409 7410 // Note that this is artificially increased to make it more expensive than loadConL 7411 ins_cost(250); 7412 format %{ "MOV EAX,$src\t// low word only" %} 7413 opcode(0xB8); 7414 ins_encode( LdImmL_Lo(dst, src) ); 7415 ins_pipe( ialu_reg_fat ); 7416 %} 7417 7418 // Multiply by 32-bit Immediate, taking the shifted high order results 7419 // (special case for shift by 32) 7420 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7421 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7422 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7423 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7424 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7425 effect(USE src1, KILL cr); 7426 7427 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7428 ins_cost(0*100 + 1*400 - 150); 7429 format %{ "IMUL EDX:EAX,$src1" %} 7430 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7431 ins_pipe( pipe_slow ); 7432 %} 7433 7434 // Multiply by 32-bit Immediate, taking the shifted high order results 7435 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7436 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7437 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7438 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7439 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7440 effect(USE src1, KILL cr); 7441 7442 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7443 ins_cost(1*100 + 1*400 - 150); 7444 format %{ "IMUL EDX:EAX,$src1\n\t" 7445 "SAR EDX,$cnt-32" %} 7446 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7447 ins_pipe( pipe_slow ); 7448 %} 7449 7450 // Multiply Memory 32-bit Immediate 7451 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7452 match(Set dst (MulI (LoadI src) imm)); 7453 effect(KILL cr); 7454 7455 ins_cost(300); 7456 format %{ "IMUL $dst,$src,$imm" %} 7457 opcode(0x69); /* 69 /r id */ 7458 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7459 ins_pipe( ialu_reg_mem_alu0 ); 7460 %} 7461 7462 // Multiply Memory 7463 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7464 match(Set dst (MulI dst (LoadI src))); 7465 effect(KILL cr); 7466 7467 ins_cost(350); 7468 format %{ "IMUL $dst,$src" %} 7469 opcode(0xAF, 0x0F); 7470 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7471 ins_pipe( ialu_reg_mem_alu0 ); 7472 %} 7473 7474 // Multiply Register Int to Long 7475 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7476 // Basic Idea: long = (long)int * (long)int 7477 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7478 effect(DEF dst, USE src, USE src1, KILL flags); 7479 7480 ins_cost(300); 7481 format %{ "IMUL $dst,$src1" %} 7482 7483 ins_encode( long_int_multiply( dst, src1 ) ); 7484 ins_pipe( ialu_reg_reg_alu0 ); 7485 %} 7486 7487 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7488 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7489 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7490 effect(KILL flags); 7491 7492 ins_cost(300); 7493 format %{ "MUL $dst,$src1" %} 7494 7495 ins_encode( long_uint_multiply(dst, src1) ); 7496 ins_pipe( ialu_reg_reg_alu0 ); 7497 %} 7498 7499 // Multiply Register Long 7500 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7501 match(Set dst (MulL dst src)); 7502 effect(KILL cr, TEMP tmp); 7503 ins_cost(4*100+3*400); 7504 // Basic idea: lo(result) = lo(x_lo * y_lo) 7505 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7506 format %{ "MOV $tmp,$src.lo\n\t" 7507 "IMUL $tmp,EDX\n\t" 7508 "MOV EDX,$src.hi\n\t" 7509 "IMUL EDX,EAX\n\t" 7510 "ADD $tmp,EDX\n\t" 7511 "MUL EDX:EAX,$src.lo\n\t" 7512 "ADD EDX,$tmp" %} 7513 ins_encode( long_multiply( dst, src, tmp ) ); 7514 ins_pipe( pipe_slow ); 7515 %} 7516 7517 // Multiply Register Long where the left operand's high 32 bits are zero 7518 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7519 predicate(is_operand_hi32_zero(n->in(1))); 7520 match(Set dst (MulL dst src)); 7521 effect(KILL cr, TEMP tmp); 7522 ins_cost(2*100+2*400); 7523 // Basic idea: lo(result) = lo(x_lo * y_lo) 7524 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7525 format %{ "MOV $tmp,$src.hi\n\t" 7526 "IMUL $tmp,EAX\n\t" 7527 "MUL EDX:EAX,$src.lo\n\t" 7528 "ADD EDX,$tmp" %} 7529 ins_encode %{ 7530 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7531 __ imull($tmp$$Register, rax); 7532 __ mull($src$$Register); 7533 __ addl(rdx, $tmp$$Register); 7534 %} 7535 ins_pipe( pipe_slow ); 7536 %} 7537 7538 // Multiply Register Long where the right operand's high 32 bits are zero 7539 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7540 predicate(is_operand_hi32_zero(n->in(2))); 7541 match(Set dst (MulL dst src)); 7542 effect(KILL cr, TEMP tmp); 7543 ins_cost(2*100+2*400); 7544 // Basic idea: lo(result) = lo(x_lo * y_lo) 7545 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7546 format %{ "MOV $tmp,$src.lo\n\t" 7547 "IMUL $tmp,EDX\n\t" 7548 "MUL EDX:EAX,$src.lo\n\t" 7549 "ADD EDX,$tmp" %} 7550 ins_encode %{ 7551 __ movl($tmp$$Register, $src$$Register); 7552 __ imull($tmp$$Register, rdx); 7553 __ mull($src$$Register); 7554 __ addl(rdx, $tmp$$Register); 7555 %} 7556 ins_pipe( pipe_slow ); 7557 %} 7558 7559 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7560 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7561 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7562 match(Set dst (MulL dst src)); 7563 effect(KILL cr); 7564 ins_cost(1*400); 7565 // Basic idea: lo(result) = lo(x_lo * y_lo) 7566 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7567 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7568 ins_encode %{ 7569 __ mull($src$$Register); 7570 %} 7571 ins_pipe( pipe_slow ); 7572 %} 7573 7574 // Multiply Register Long by small constant 7575 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7576 match(Set dst (MulL dst src)); 7577 effect(KILL cr, TEMP tmp); 7578 ins_cost(2*100+2*400); 7579 size(12); 7580 // Basic idea: lo(result) = lo(src * EAX) 7581 // hi(result) = hi(src * EAX) + lo(src * EDX) 7582 format %{ "IMUL $tmp,EDX,$src\n\t" 7583 "MOV EDX,$src\n\t" 7584 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7585 "ADD EDX,$tmp" %} 7586 ins_encode( long_multiply_con( dst, src, tmp ) ); 7587 ins_pipe( pipe_slow ); 7588 %} 7589 7590 // Integer DIV with Register 7591 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7592 match(Set rax (DivI rax div)); 7593 effect(KILL rdx, KILL cr); 7594 size(26); 7595 ins_cost(30*100+10*100); 7596 format %{ "CMP EAX,0x80000000\n\t" 7597 "JNE,s normal\n\t" 7598 "XOR EDX,EDX\n\t" 7599 "CMP ECX,-1\n\t" 7600 "JE,s done\n" 7601 "normal: CDQ\n\t" 7602 "IDIV $div\n\t" 7603 "done:" %} 7604 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7605 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7606 ins_pipe( ialu_reg_reg_alu0 ); 7607 %} 7608 7609 // Divide Register Long 7610 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7611 match(Set dst (DivL src1 src2)); 7612 effect( KILL cr, KILL cx, KILL bx ); 7613 ins_cost(10000); 7614 format %{ "PUSH $src1.hi\n\t" 7615 "PUSH $src1.lo\n\t" 7616 "PUSH $src2.hi\n\t" 7617 "PUSH $src2.lo\n\t" 7618 "CALL SharedRuntime::ldiv\n\t" 7619 "ADD ESP,16" %} 7620 ins_encode( long_div(src1,src2) ); 7621 ins_pipe( pipe_slow ); 7622 %} 7623 7624 // Integer DIVMOD with Register, both quotient and mod results 7625 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7626 match(DivModI rax div); 7627 effect(KILL cr); 7628 size(26); 7629 ins_cost(30*100+10*100); 7630 format %{ "CMP EAX,0x80000000\n\t" 7631 "JNE,s normal\n\t" 7632 "XOR EDX,EDX\n\t" 7633 "CMP ECX,-1\n\t" 7634 "JE,s done\n" 7635 "normal: CDQ\n\t" 7636 "IDIV $div\n\t" 7637 "done:" %} 7638 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7639 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7640 ins_pipe( pipe_slow ); 7641 %} 7642 7643 // Integer MOD with Register 7644 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7645 match(Set rdx (ModI rax div)); 7646 effect(KILL rax, KILL cr); 7647 7648 size(26); 7649 ins_cost(300); 7650 format %{ "CDQ\n\t" 7651 "IDIV $div" %} 7652 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7653 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7654 ins_pipe( ialu_reg_reg_alu0 ); 7655 %} 7656 7657 // Remainder Register Long 7658 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7659 match(Set dst (ModL src1 src2)); 7660 effect( KILL cr, KILL cx, KILL bx ); 7661 ins_cost(10000); 7662 format %{ "PUSH $src1.hi\n\t" 7663 "PUSH $src1.lo\n\t" 7664 "PUSH $src2.hi\n\t" 7665 "PUSH $src2.lo\n\t" 7666 "CALL SharedRuntime::lrem\n\t" 7667 "ADD ESP,16" %} 7668 ins_encode( long_mod(src1,src2) ); 7669 ins_pipe( pipe_slow ); 7670 %} 7671 7672 // Divide Register Long (no special case since divisor != -1) 7673 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7674 match(Set dst (DivL dst imm)); 7675 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7676 ins_cost(1000); 7677 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7678 "XOR $tmp2,$tmp2\n\t" 7679 "CMP $tmp,EDX\n\t" 7680 "JA,s fast\n\t" 7681 "MOV $tmp2,EAX\n\t" 7682 "MOV EAX,EDX\n\t" 7683 "MOV EDX,0\n\t" 7684 "JLE,s pos\n\t" 7685 "LNEG EAX : $tmp2\n\t" 7686 "DIV $tmp # unsigned division\n\t" 7687 "XCHG EAX,$tmp2\n\t" 7688 "DIV $tmp\n\t" 7689 "LNEG $tmp2 : EAX\n\t" 7690 "JMP,s done\n" 7691 "pos:\n\t" 7692 "DIV $tmp\n\t" 7693 "XCHG EAX,$tmp2\n" 7694 "fast:\n\t" 7695 "DIV $tmp\n" 7696 "done:\n\t" 7697 "MOV EDX,$tmp2\n\t" 7698 "NEG EDX:EAX # if $imm < 0" %} 7699 ins_encode %{ 7700 int con = (int)$imm$$constant; 7701 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7702 int pcon = (con > 0) ? con : -con; 7703 Label Lfast, Lpos, Ldone; 7704 7705 __ movl($tmp$$Register, pcon); 7706 __ xorl($tmp2$$Register,$tmp2$$Register); 7707 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7708 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7709 7710 __ movl($tmp2$$Register, $dst$$Register); // save 7711 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7712 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7713 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7714 7715 // Negative dividend. 7716 // convert value to positive to use unsigned division 7717 __ lneg($dst$$Register, $tmp2$$Register); 7718 __ divl($tmp$$Register); 7719 __ xchgl($dst$$Register, $tmp2$$Register); 7720 __ divl($tmp$$Register); 7721 // revert result back to negative 7722 __ lneg($tmp2$$Register, $dst$$Register); 7723 __ jmpb(Ldone); 7724 7725 __ bind(Lpos); 7726 __ divl($tmp$$Register); // Use unsigned division 7727 __ xchgl($dst$$Register, $tmp2$$Register); 7728 // Fallthrow for final divide, tmp2 has 32 bit hi result 7729 7730 __ bind(Lfast); 7731 // fast path: src is positive 7732 __ divl($tmp$$Register); // Use unsigned division 7733 7734 __ bind(Ldone); 7735 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7736 if (con < 0) { 7737 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7738 } 7739 %} 7740 ins_pipe( pipe_slow ); 7741 %} 7742 7743 // Remainder Register Long (remainder fit into 32 bits) 7744 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7745 match(Set dst (ModL dst imm)); 7746 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7747 ins_cost(1000); 7748 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7749 "CMP $tmp,EDX\n\t" 7750 "JA,s fast\n\t" 7751 "MOV $tmp2,EAX\n\t" 7752 "MOV EAX,EDX\n\t" 7753 "MOV EDX,0\n\t" 7754 "JLE,s pos\n\t" 7755 "LNEG EAX : $tmp2\n\t" 7756 "DIV $tmp # unsigned division\n\t" 7757 "MOV EAX,$tmp2\n\t" 7758 "DIV $tmp\n\t" 7759 "NEG EDX\n\t" 7760 "JMP,s done\n" 7761 "pos:\n\t" 7762 "DIV $tmp\n\t" 7763 "MOV EAX,$tmp2\n" 7764 "fast:\n\t" 7765 "DIV $tmp\n" 7766 "done:\n\t" 7767 "MOV EAX,EDX\n\t" 7768 "SAR EDX,31\n\t" %} 7769 ins_encode %{ 7770 int con = (int)$imm$$constant; 7771 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7772 int pcon = (con > 0) ? con : -con; 7773 Label Lfast, Lpos, Ldone; 7774 7775 __ movl($tmp$$Register, pcon); 7776 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7777 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7778 7779 __ movl($tmp2$$Register, $dst$$Register); // save 7780 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7781 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7782 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7783 7784 // Negative dividend. 7785 // convert value to positive to use unsigned division 7786 __ lneg($dst$$Register, $tmp2$$Register); 7787 __ divl($tmp$$Register); 7788 __ movl($dst$$Register, $tmp2$$Register); 7789 __ divl($tmp$$Register); 7790 // revert remainder back to negative 7791 __ negl(HIGH_FROM_LOW($dst$$Register)); 7792 __ jmpb(Ldone); 7793 7794 __ bind(Lpos); 7795 __ divl($tmp$$Register); 7796 __ movl($dst$$Register, $tmp2$$Register); 7797 7798 __ bind(Lfast); 7799 // fast path: src is positive 7800 __ divl($tmp$$Register); 7801 7802 __ bind(Ldone); 7803 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7804 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 7805 7806 %} 7807 ins_pipe( pipe_slow ); 7808 %} 7809 7810 // Integer Shift Instructions 7811 // Shift Left by one 7812 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7813 match(Set dst (LShiftI dst shift)); 7814 effect(KILL cr); 7815 7816 size(2); 7817 format %{ "SHL $dst,$shift" %} 7818 opcode(0xD1, 0x4); /* D1 /4 */ 7819 ins_encode( OpcP, RegOpc( dst ) ); 7820 ins_pipe( ialu_reg ); 7821 %} 7822 7823 // Shift Left by 8-bit immediate 7824 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7825 match(Set dst (LShiftI dst shift)); 7826 effect(KILL cr); 7827 7828 size(3); 7829 format %{ "SHL $dst,$shift" %} 7830 opcode(0xC1, 0x4); /* C1 /4 ib */ 7831 ins_encode( RegOpcImm( dst, shift) ); 7832 ins_pipe( ialu_reg ); 7833 %} 7834 7835 // Shift Left by variable 7836 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7837 match(Set dst (LShiftI dst shift)); 7838 effect(KILL cr); 7839 7840 size(2); 7841 format %{ "SHL $dst,$shift" %} 7842 opcode(0xD3, 0x4); /* D3 /4 */ 7843 ins_encode( OpcP, RegOpc( dst ) ); 7844 ins_pipe( ialu_reg_reg ); 7845 %} 7846 7847 // Arithmetic shift right by one 7848 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7849 match(Set dst (RShiftI dst shift)); 7850 effect(KILL cr); 7851 7852 size(2); 7853 format %{ "SAR $dst,$shift" %} 7854 opcode(0xD1, 0x7); /* D1 /7 */ 7855 ins_encode( OpcP, RegOpc( dst ) ); 7856 ins_pipe( ialu_reg ); 7857 %} 7858 7859 // Arithmetic shift right by one 7860 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 7861 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7862 effect(KILL cr); 7863 format %{ "SAR $dst,$shift" %} 7864 opcode(0xD1, 0x7); /* D1 /7 */ 7865 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 7866 ins_pipe( ialu_mem_imm ); 7867 %} 7868 7869 // Arithmetic Shift Right by 8-bit immediate 7870 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7871 match(Set dst (RShiftI dst shift)); 7872 effect(KILL cr); 7873 7874 size(3); 7875 format %{ "SAR $dst,$shift" %} 7876 opcode(0xC1, 0x7); /* C1 /7 ib */ 7877 ins_encode( RegOpcImm( dst, shift ) ); 7878 ins_pipe( ialu_mem_imm ); 7879 %} 7880 7881 // Arithmetic Shift Right by 8-bit immediate 7882 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 7883 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7884 effect(KILL cr); 7885 7886 format %{ "SAR $dst,$shift" %} 7887 opcode(0xC1, 0x7); /* C1 /7 ib */ 7888 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 7889 ins_pipe( ialu_mem_imm ); 7890 %} 7891 7892 // Arithmetic Shift Right by variable 7893 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7894 match(Set dst (RShiftI dst shift)); 7895 effect(KILL cr); 7896 7897 size(2); 7898 format %{ "SAR $dst,$shift" %} 7899 opcode(0xD3, 0x7); /* D3 /7 */ 7900 ins_encode( OpcP, RegOpc( dst ) ); 7901 ins_pipe( ialu_reg_reg ); 7902 %} 7903 7904 // Logical shift right by one 7905 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7906 match(Set dst (URShiftI dst shift)); 7907 effect(KILL cr); 7908 7909 size(2); 7910 format %{ "SHR $dst,$shift" %} 7911 opcode(0xD1, 0x5); /* D1 /5 */ 7912 ins_encode( OpcP, RegOpc( dst ) ); 7913 ins_pipe( ialu_reg ); 7914 %} 7915 7916 // Logical Shift Right by 8-bit immediate 7917 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7918 match(Set dst (URShiftI dst shift)); 7919 effect(KILL cr); 7920 7921 size(3); 7922 format %{ "SHR $dst,$shift" %} 7923 opcode(0xC1, 0x5); /* C1 /5 ib */ 7924 ins_encode( RegOpcImm( dst, shift) ); 7925 ins_pipe( ialu_reg ); 7926 %} 7927 7928 7929 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 7930 // This idiom is used by the compiler for the i2b bytecode. 7931 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 7932 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 7933 7934 size(3); 7935 format %{ "MOVSX $dst,$src :8" %} 7936 ins_encode %{ 7937 __ movsbl($dst$$Register, $src$$Register); 7938 %} 7939 ins_pipe(ialu_reg_reg); 7940 %} 7941 7942 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 7943 // This idiom is used by the compiler the i2s bytecode. 7944 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 7945 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 7946 7947 size(3); 7948 format %{ "MOVSX $dst,$src :16" %} 7949 ins_encode %{ 7950 __ movswl($dst$$Register, $src$$Register); 7951 %} 7952 ins_pipe(ialu_reg_reg); 7953 %} 7954 7955 7956 // Logical Shift Right by variable 7957 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7958 match(Set dst (URShiftI dst shift)); 7959 effect(KILL cr); 7960 7961 size(2); 7962 format %{ "SHR $dst,$shift" %} 7963 opcode(0xD3, 0x5); /* D3 /5 */ 7964 ins_encode( OpcP, RegOpc( dst ) ); 7965 ins_pipe( ialu_reg_reg ); 7966 %} 7967 7968 7969 //----------Logical Instructions----------------------------------------------- 7970 //----------Integer Logical Instructions--------------------------------------- 7971 // And Instructions 7972 // And Register with Register 7973 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7974 match(Set dst (AndI dst src)); 7975 effect(KILL cr); 7976 7977 size(2); 7978 format %{ "AND $dst,$src" %} 7979 opcode(0x23); 7980 ins_encode( OpcP, RegReg( dst, src) ); 7981 ins_pipe( ialu_reg_reg ); 7982 %} 7983 7984 // And Register with Immediate 7985 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7986 match(Set dst (AndI dst src)); 7987 effect(KILL cr); 7988 7989 format %{ "AND $dst,$src" %} 7990 opcode(0x81,0x04); /* Opcode 81 /4 */ 7991 // ins_encode( RegImm( dst, src) ); 7992 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7993 ins_pipe( ialu_reg ); 7994 %} 7995 7996 // And Register with Memory 7997 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7998 match(Set dst (AndI dst (LoadI src))); 7999 effect(KILL cr); 8000 8001 ins_cost(125); 8002 format %{ "AND $dst,$src" %} 8003 opcode(0x23); 8004 ins_encode( OpcP, RegMem( dst, src) ); 8005 ins_pipe( ialu_reg_mem ); 8006 %} 8007 8008 // And Memory with Register 8009 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8010 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8011 effect(KILL cr); 8012 8013 ins_cost(150); 8014 format %{ "AND $dst,$src" %} 8015 opcode(0x21); /* Opcode 21 /r */ 8016 ins_encode( OpcP, RegMem( src, dst ) ); 8017 ins_pipe( ialu_mem_reg ); 8018 %} 8019 8020 // And Memory with Immediate 8021 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8022 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8023 effect(KILL cr); 8024 8025 ins_cost(125); 8026 format %{ "AND $dst,$src" %} 8027 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8028 // ins_encode( MemImm( dst, src) ); 8029 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8030 ins_pipe( ialu_mem_imm ); 8031 %} 8032 8033 // BMI1 instructions 8034 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8035 match(Set dst (AndI (XorI src1 minus_1) src2)); 8036 predicate(UseBMI1Instructions); 8037 effect(KILL cr); 8038 8039 format %{ "ANDNL $dst, $src1, $src2" %} 8040 8041 ins_encode %{ 8042 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8043 %} 8044 ins_pipe(ialu_reg); 8045 %} 8046 8047 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8048 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8049 predicate(UseBMI1Instructions); 8050 effect(KILL cr); 8051 8052 ins_cost(125); 8053 format %{ "ANDNL $dst, $src1, $src2" %} 8054 8055 ins_encode %{ 8056 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8057 %} 8058 ins_pipe(ialu_reg_mem); 8059 %} 8060 8061 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ 8062 match(Set dst (AndI (SubI imm_zero src) src)); 8063 predicate(UseBMI1Instructions); 8064 effect(KILL cr); 8065 8066 format %{ "BLSIL $dst, $src" %} 8067 8068 ins_encode %{ 8069 __ blsil($dst$$Register, $src$$Register); 8070 %} 8071 ins_pipe(ialu_reg); 8072 %} 8073 8074 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ 8075 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8076 predicate(UseBMI1Instructions); 8077 effect(KILL cr); 8078 8079 ins_cost(125); 8080 format %{ "BLSIL $dst, $src" %} 8081 8082 ins_encode %{ 8083 __ blsil($dst$$Register, $src$$Address); 8084 %} 8085 ins_pipe(ialu_reg_mem); 8086 %} 8087 8088 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8089 %{ 8090 match(Set dst (XorI (AddI src minus_1) src)); 8091 predicate(UseBMI1Instructions); 8092 effect(KILL cr); 8093 8094 format %{ "BLSMSKL $dst, $src" %} 8095 8096 ins_encode %{ 8097 __ blsmskl($dst$$Register, $src$$Register); 8098 %} 8099 8100 ins_pipe(ialu_reg); 8101 %} 8102 8103 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8104 %{ 8105 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8106 predicate(UseBMI1Instructions); 8107 effect(KILL cr); 8108 8109 ins_cost(125); 8110 format %{ "BLSMSKL $dst, $src" %} 8111 8112 ins_encode %{ 8113 __ blsmskl($dst$$Register, $src$$Address); 8114 %} 8115 8116 ins_pipe(ialu_reg_mem); 8117 %} 8118 8119 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8120 %{ 8121 match(Set dst (AndI (AddI src minus_1) src) ); 8122 predicate(UseBMI1Instructions); 8123 effect(KILL cr); 8124 8125 format %{ "BLSRL $dst, $src" %} 8126 8127 ins_encode %{ 8128 __ blsrl($dst$$Register, $src$$Register); 8129 %} 8130 8131 ins_pipe(ialu_reg); 8132 %} 8133 8134 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8135 %{ 8136 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8137 predicate(UseBMI1Instructions); 8138 effect(KILL cr); 8139 8140 ins_cost(125); 8141 format %{ "BLSRL $dst, $src" %} 8142 8143 ins_encode %{ 8144 __ blsrl($dst$$Register, $src$$Address); 8145 %} 8146 8147 ins_pipe(ialu_reg_mem); 8148 %} 8149 8150 // Or Instructions 8151 // Or Register with Register 8152 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8153 match(Set dst (OrI dst src)); 8154 effect(KILL cr); 8155 8156 size(2); 8157 format %{ "OR $dst,$src" %} 8158 opcode(0x0B); 8159 ins_encode( OpcP, RegReg( dst, src) ); 8160 ins_pipe( ialu_reg_reg ); 8161 %} 8162 8163 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8164 match(Set dst (OrI dst (CastP2X src))); 8165 effect(KILL cr); 8166 8167 size(2); 8168 format %{ "OR $dst,$src" %} 8169 opcode(0x0B); 8170 ins_encode( OpcP, RegReg( dst, src) ); 8171 ins_pipe( ialu_reg_reg ); 8172 %} 8173 8174 8175 // Or Register with Immediate 8176 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8177 match(Set dst (OrI dst src)); 8178 effect(KILL cr); 8179 8180 format %{ "OR $dst,$src" %} 8181 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8182 // ins_encode( RegImm( dst, src) ); 8183 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8184 ins_pipe( ialu_reg ); 8185 %} 8186 8187 // Or Register with Memory 8188 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8189 match(Set dst (OrI dst (LoadI src))); 8190 effect(KILL cr); 8191 8192 ins_cost(125); 8193 format %{ "OR $dst,$src" %} 8194 opcode(0x0B); 8195 ins_encode( OpcP, RegMem( dst, src) ); 8196 ins_pipe( ialu_reg_mem ); 8197 %} 8198 8199 // Or Memory with Register 8200 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8201 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8202 effect(KILL cr); 8203 8204 ins_cost(150); 8205 format %{ "OR $dst,$src" %} 8206 opcode(0x09); /* Opcode 09 /r */ 8207 ins_encode( OpcP, RegMem( src, dst ) ); 8208 ins_pipe( ialu_mem_reg ); 8209 %} 8210 8211 // Or Memory with Immediate 8212 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8213 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8214 effect(KILL cr); 8215 8216 ins_cost(125); 8217 format %{ "OR $dst,$src" %} 8218 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8219 // ins_encode( MemImm( dst, src) ); 8220 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8221 ins_pipe( ialu_mem_imm ); 8222 %} 8223 8224 // ROL/ROR 8225 // ROL expand 8226 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8227 effect(USE_DEF dst, USE shift, KILL cr); 8228 8229 format %{ "ROL $dst, $shift" %} 8230 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8231 ins_encode( OpcP, RegOpc( dst )); 8232 ins_pipe( ialu_reg ); 8233 %} 8234 8235 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8236 effect(USE_DEF dst, USE shift, KILL cr); 8237 8238 format %{ "ROL $dst, $shift" %} 8239 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8240 ins_encode( RegOpcImm(dst, shift) ); 8241 ins_pipe(ialu_reg); 8242 %} 8243 8244 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8245 effect(USE_DEF dst, USE shift, KILL cr); 8246 8247 format %{ "ROL $dst, $shift" %} 8248 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8249 ins_encode(OpcP, RegOpc(dst)); 8250 ins_pipe( ialu_reg_reg ); 8251 %} 8252 // end of ROL expand 8253 8254 // ROL 32bit by one once 8255 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8256 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8257 8258 expand %{ 8259 rolI_eReg_imm1(dst, lshift, cr); 8260 %} 8261 %} 8262 8263 // ROL 32bit var by imm8 once 8264 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8265 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8266 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8267 8268 expand %{ 8269 rolI_eReg_imm8(dst, lshift, cr); 8270 %} 8271 %} 8272 8273 // ROL 32bit var by var once 8274 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8275 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8276 8277 expand %{ 8278 rolI_eReg_CL(dst, shift, cr); 8279 %} 8280 %} 8281 8282 // ROL 32bit var by var once 8283 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8284 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8285 8286 expand %{ 8287 rolI_eReg_CL(dst, shift, cr); 8288 %} 8289 %} 8290 8291 // ROR expand 8292 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8293 effect(USE_DEF dst, USE shift, KILL cr); 8294 8295 format %{ "ROR $dst, $shift" %} 8296 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8297 ins_encode( OpcP, RegOpc( dst ) ); 8298 ins_pipe( ialu_reg ); 8299 %} 8300 8301 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8302 effect (USE_DEF dst, USE shift, KILL cr); 8303 8304 format %{ "ROR $dst, $shift" %} 8305 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8306 ins_encode( RegOpcImm(dst, shift) ); 8307 ins_pipe( ialu_reg ); 8308 %} 8309 8310 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8311 effect(USE_DEF dst, USE shift, KILL cr); 8312 8313 format %{ "ROR $dst, $shift" %} 8314 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8315 ins_encode(OpcP, RegOpc(dst)); 8316 ins_pipe( ialu_reg_reg ); 8317 %} 8318 // end of ROR expand 8319 8320 // ROR right once 8321 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8322 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8323 8324 expand %{ 8325 rorI_eReg_imm1(dst, rshift, cr); 8326 %} 8327 %} 8328 8329 // ROR 32bit by immI8 once 8330 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8331 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8332 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8333 8334 expand %{ 8335 rorI_eReg_imm8(dst, rshift, cr); 8336 %} 8337 %} 8338 8339 // ROR 32bit var by var once 8340 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8341 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8342 8343 expand %{ 8344 rorI_eReg_CL(dst, shift, cr); 8345 %} 8346 %} 8347 8348 // ROR 32bit var by var once 8349 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8350 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8351 8352 expand %{ 8353 rorI_eReg_CL(dst, shift, cr); 8354 %} 8355 %} 8356 8357 // Xor Instructions 8358 // Xor Register with Register 8359 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8360 match(Set dst (XorI dst src)); 8361 effect(KILL cr); 8362 8363 size(2); 8364 format %{ "XOR $dst,$src" %} 8365 opcode(0x33); 8366 ins_encode( OpcP, RegReg( dst, src) ); 8367 ins_pipe( ialu_reg_reg ); 8368 %} 8369 8370 // Xor Register with Immediate -1 8371 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8372 match(Set dst (XorI dst imm)); 8373 8374 size(2); 8375 format %{ "NOT $dst" %} 8376 ins_encode %{ 8377 __ notl($dst$$Register); 8378 %} 8379 ins_pipe( ialu_reg ); 8380 %} 8381 8382 // Xor Register with Immediate 8383 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8384 match(Set dst (XorI dst src)); 8385 effect(KILL cr); 8386 8387 format %{ "XOR $dst,$src" %} 8388 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8389 // ins_encode( RegImm( dst, src) ); 8390 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8391 ins_pipe( ialu_reg ); 8392 %} 8393 8394 // Xor Register with Memory 8395 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8396 match(Set dst (XorI dst (LoadI src))); 8397 effect(KILL cr); 8398 8399 ins_cost(125); 8400 format %{ "XOR $dst,$src" %} 8401 opcode(0x33); 8402 ins_encode( OpcP, RegMem(dst, src) ); 8403 ins_pipe( ialu_reg_mem ); 8404 %} 8405 8406 // Xor Memory with Register 8407 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8408 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8409 effect(KILL cr); 8410 8411 ins_cost(150); 8412 format %{ "XOR $dst,$src" %} 8413 opcode(0x31); /* Opcode 31 /r */ 8414 ins_encode( OpcP, RegMem( src, dst ) ); 8415 ins_pipe( ialu_mem_reg ); 8416 %} 8417 8418 // Xor Memory with Immediate 8419 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8420 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8421 effect(KILL cr); 8422 8423 ins_cost(125); 8424 format %{ "XOR $dst,$src" %} 8425 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8426 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8427 ins_pipe( ialu_mem_imm ); 8428 %} 8429 8430 //----------Convert Int to Boolean--------------------------------------------- 8431 8432 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8433 effect( DEF dst, USE src ); 8434 format %{ "MOV $dst,$src" %} 8435 ins_encode( enc_Copy( dst, src) ); 8436 ins_pipe( ialu_reg_reg ); 8437 %} 8438 8439 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8440 effect( USE_DEF dst, USE src, KILL cr ); 8441 8442 size(4); 8443 format %{ "NEG $dst\n\t" 8444 "ADC $dst,$src" %} 8445 ins_encode( neg_reg(dst), 8446 OpcRegReg(0x13,dst,src) ); 8447 ins_pipe( ialu_reg_reg_long ); 8448 %} 8449 8450 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8451 match(Set dst (Conv2B src)); 8452 8453 expand %{ 8454 movI_nocopy(dst,src); 8455 ci2b(dst,src,cr); 8456 %} 8457 %} 8458 8459 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8460 effect( DEF dst, USE src ); 8461 format %{ "MOV $dst,$src" %} 8462 ins_encode( enc_Copy( dst, src) ); 8463 ins_pipe( ialu_reg_reg ); 8464 %} 8465 8466 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8467 effect( USE_DEF dst, USE src, KILL cr ); 8468 format %{ "NEG $dst\n\t" 8469 "ADC $dst,$src" %} 8470 ins_encode( neg_reg(dst), 8471 OpcRegReg(0x13,dst,src) ); 8472 ins_pipe( ialu_reg_reg_long ); 8473 %} 8474 8475 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8476 match(Set dst (Conv2B src)); 8477 8478 expand %{ 8479 movP_nocopy(dst,src); 8480 cp2b(dst,src,cr); 8481 %} 8482 %} 8483 8484 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8485 match(Set dst (CmpLTMask p q)); 8486 effect(KILL cr); 8487 ins_cost(400); 8488 8489 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8490 format %{ "XOR $dst,$dst\n\t" 8491 "CMP $p,$q\n\t" 8492 "SETlt $dst\n\t" 8493 "NEG $dst" %} 8494 ins_encode %{ 8495 Register Rp = $p$$Register; 8496 Register Rq = $q$$Register; 8497 Register Rd = $dst$$Register; 8498 Label done; 8499 __ xorl(Rd, Rd); 8500 __ cmpl(Rp, Rq); 8501 __ setb(Assembler::less, Rd); 8502 __ negl(Rd); 8503 %} 8504 8505 ins_pipe(pipe_slow); 8506 %} 8507 8508 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ 8509 match(Set dst (CmpLTMask dst zero)); 8510 effect(DEF dst, KILL cr); 8511 ins_cost(100); 8512 8513 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8514 ins_encode %{ 8515 __ sarl($dst$$Register, 31); 8516 %} 8517 ins_pipe(ialu_reg); 8518 %} 8519 8520 /* better to save a register than avoid a branch */ 8521 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8522 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8523 effect(KILL cr); 8524 ins_cost(400); 8525 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8526 "JGE done\n\t" 8527 "ADD $p,$y\n" 8528 "done: " %} 8529 ins_encode %{ 8530 Register Rp = $p$$Register; 8531 Register Rq = $q$$Register; 8532 Register Ry = $y$$Register; 8533 Label done; 8534 __ subl(Rp, Rq); 8535 __ jccb(Assembler::greaterEqual, done); 8536 __ addl(Rp, Ry); 8537 __ bind(done); 8538 %} 8539 8540 ins_pipe(pipe_cmplt); 8541 %} 8542 8543 /* better to save a register than avoid a branch */ 8544 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8545 match(Set y (AndI (CmpLTMask p q) y)); 8546 effect(KILL cr); 8547 8548 ins_cost(300); 8549 8550 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8551 "JLT done\n\t" 8552 "XORL $y, $y\n" 8553 "done: " %} 8554 ins_encode %{ 8555 Register Rp = $p$$Register; 8556 Register Rq = $q$$Register; 8557 Register Ry = $y$$Register; 8558 Label done; 8559 __ cmpl(Rp, Rq); 8560 __ jccb(Assembler::less, done); 8561 __ xorl(Ry, Ry); 8562 __ bind(done); 8563 %} 8564 8565 ins_pipe(pipe_cmplt); 8566 %} 8567 8568 /* If I enable this, I encourage spilling in the inner loop of compress. 8569 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8570 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8571 */ 8572 //----------Overflow Math Instructions----------------------------------------- 8573 8574 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8575 %{ 8576 match(Set cr (OverflowAddI op1 op2)); 8577 effect(DEF cr, USE_KILL op1, USE op2); 8578 8579 format %{ "ADD $op1, $op2\t# overflow check int" %} 8580 8581 ins_encode %{ 8582 __ addl($op1$$Register, $op2$$Register); 8583 %} 8584 ins_pipe(ialu_reg_reg); 8585 %} 8586 8587 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8588 %{ 8589 match(Set cr (OverflowAddI op1 op2)); 8590 effect(DEF cr, USE_KILL op1, USE op2); 8591 8592 format %{ "ADD $op1, $op2\t# overflow check int" %} 8593 8594 ins_encode %{ 8595 __ addl($op1$$Register, $op2$$constant); 8596 %} 8597 ins_pipe(ialu_reg_reg); 8598 %} 8599 8600 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8601 %{ 8602 match(Set cr (OverflowSubI op1 op2)); 8603 8604 format %{ "CMP $op1, $op2\t# overflow check int" %} 8605 ins_encode %{ 8606 __ cmpl($op1$$Register, $op2$$Register); 8607 %} 8608 ins_pipe(ialu_reg_reg); 8609 %} 8610 8611 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8612 %{ 8613 match(Set cr (OverflowSubI op1 op2)); 8614 8615 format %{ "CMP $op1, $op2\t# overflow check int" %} 8616 ins_encode %{ 8617 __ cmpl($op1$$Register, $op2$$constant); 8618 %} 8619 ins_pipe(ialu_reg_reg); 8620 %} 8621 8622 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) 8623 %{ 8624 match(Set cr (OverflowSubI zero op2)); 8625 effect(DEF cr, USE_KILL op2); 8626 8627 format %{ "NEG $op2\t# overflow check int" %} 8628 ins_encode %{ 8629 __ negl($op2$$Register); 8630 %} 8631 ins_pipe(ialu_reg_reg); 8632 %} 8633 8634 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8635 %{ 8636 match(Set cr (OverflowMulI op1 op2)); 8637 effect(DEF cr, USE_KILL op1, USE op2); 8638 8639 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8640 ins_encode %{ 8641 __ imull($op1$$Register, $op2$$Register); 8642 %} 8643 ins_pipe(ialu_reg_reg_alu0); 8644 %} 8645 8646 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8647 %{ 8648 match(Set cr (OverflowMulI op1 op2)); 8649 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8650 8651 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8652 ins_encode %{ 8653 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8654 %} 8655 ins_pipe(ialu_reg_reg_alu0); 8656 %} 8657 8658 //----------Long Instructions------------------------------------------------ 8659 // Add Long Register with Register 8660 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8661 match(Set dst (AddL dst src)); 8662 effect(KILL cr); 8663 ins_cost(200); 8664 format %{ "ADD $dst.lo,$src.lo\n\t" 8665 "ADC $dst.hi,$src.hi" %} 8666 opcode(0x03, 0x13); 8667 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8668 ins_pipe( ialu_reg_reg_long ); 8669 %} 8670 8671 // Add Long Register with Immediate 8672 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8673 match(Set dst (AddL dst src)); 8674 effect(KILL cr); 8675 format %{ "ADD $dst.lo,$src.lo\n\t" 8676 "ADC $dst.hi,$src.hi" %} 8677 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8678 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8679 ins_pipe( ialu_reg_long ); 8680 %} 8681 8682 // Add Long Register with Memory 8683 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8684 match(Set dst (AddL dst (LoadL mem))); 8685 effect(KILL cr); 8686 ins_cost(125); 8687 format %{ "ADD $dst.lo,$mem\n\t" 8688 "ADC $dst.hi,$mem+4" %} 8689 opcode(0x03, 0x13); 8690 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8691 ins_pipe( ialu_reg_long_mem ); 8692 %} 8693 8694 // Subtract Long Register with Register. 8695 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8696 match(Set dst (SubL dst src)); 8697 effect(KILL cr); 8698 ins_cost(200); 8699 format %{ "SUB $dst.lo,$src.lo\n\t" 8700 "SBB $dst.hi,$src.hi" %} 8701 opcode(0x2B, 0x1B); 8702 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8703 ins_pipe( ialu_reg_reg_long ); 8704 %} 8705 8706 // Subtract Long Register with Immediate 8707 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8708 match(Set dst (SubL dst src)); 8709 effect(KILL cr); 8710 format %{ "SUB $dst.lo,$src.lo\n\t" 8711 "SBB $dst.hi,$src.hi" %} 8712 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8713 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8714 ins_pipe( ialu_reg_long ); 8715 %} 8716 8717 // Subtract Long Register with Memory 8718 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8719 match(Set dst (SubL dst (LoadL mem))); 8720 effect(KILL cr); 8721 ins_cost(125); 8722 format %{ "SUB $dst.lo,$mem\n\t" 8723 "SBB $dst.hi,$mem+4" %} 8724 opcode(0x2B, 0x1B); 8725 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8726 ins_pipe( ialu_reg_long_mem ); 8727 %} 8728 8729 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8730 match(Set dst (SubL zero dst)); 8731 effect(KILL cr); 8732 ins_cost(300); 8733 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8734 ins_encode( neg_long(dst) ); 8735 ins_pipe( ialu_reg_reg_long ); 8736 %} 8737 8738 // And Long Register with Register 8739 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8740 match(Set dst (AndL dst src)); 8741 effect(KILL cr); 8742 format %{ "AND $dst.lo,$src.lo\n\t" 8743 "AND $dst.hi,$src.hi" %} 8744 opcode(0x23,0x23); 8745 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8746 ins_pipe( ialu_reg_reg_long ); 8747 %} 8748 8749 // And Long Register with Immediate 8750 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8751 match(Set dst (AndL dst src)); 8752 effect(KILL cr); 8753 format %{ "AND $dst.lo,$src.lo\n\t" 8754 "AND $dst.hi,$src.hi" %} 8755 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8756 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8757 ins_pipe( ialu_reg_long ); 8758 %} 8759 8760 // And Long Register with Memory 8761 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8762 match(Set dst (AndL dst (LoadL mem))); 8763 effect(KILL cr); 8764 ins_cost(125); 8765 format %{ "AND $dst.lo,$mem\n\t" 8766 "AND $dst.hi,$mem+4" %} 8767 opcode(0x23, 0x23); 8768 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8769 ins_pipe( ialu_reg_long_mem ); 8770 %} 8771 8772 // BMI1 instructions 8773 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8774 match(Set dst (AndL (XorL src1 minus_1) src2)); 8775 predicate(UseBMI1Instructions); 8776 effect(KILL cr, TEMP dst); 8777 8778 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8779 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8780 %} 8781 8782 ins_encode %{ 8783 Register Rdst = $dst$$Register; 8784 Register Rsrc1 = $src1$$Register; 8785 Register Rsrc2 = $src2$$Register; 8786 __ andnl(Rdst, Rsrc1, Rsrc2); 8787 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 8788 %} 8789 ins_pipe(ialu_reg_reg_long); 8790 %} 8791 8792 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 8793 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 8794 predicate(UseBMI1Instructions); 8795 effect(KILL cr, TEMP dst); 8796 8797 ins_cost(125); 8798 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 8799 "ANDNL $dst.hi, $src1.hi, $src2+4" 8800 %} 8801 8802 ins_encode %{ 8803 Register Rdst = $dst$$Register; 8804 Register Rsrc1 = $src1$$Register; 8805 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 8806 8807 __ andnl(Rdst, Rsrc1, $src2$$Address); 8808 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 8809 %} 8810 ins_pipe(ialu_reg_mem); 8811 %} 8812 8813 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 8814 match(Set dst (AndL (SubL imm_zero src) src)); 8815 predicate(UseBMI1Instructions); 8816 effect(KILL cr, TEMP dst); 8817 8818 format %{ "MOVL $dst.hi, 0\n\t" 8819 "BLSIL $dst.lo, $src.lo\n\t" 8820 "JNZ done\n\t" 8821 "BLSIL $dst.hi, $src.hi\n" 8822 "done:" 8823 %} 8824 8825 ins_encode %{ 8826 Label done; 8827 Register Rdst = $dst$$Register; 8828 Register Rsrc = $src$$Register; 8829 __ movl(HIGH_FROM_LOW(Rdst), 0); 8830 __ blsil(Rdst, Rsrc); 8831 __ jccb(Assembler::notZero, done); 8832 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8833 __ bind(done); 8834 %} 8835 ins_pipe(ialu_reg); 8836 %} 8837 8838 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 8839 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 8840 predicate(UseBMI1Instructions); 8841 effect(KILL cr, TEMP dst); 8842 8843 ins_cost(125); 8844 format %{ "MOVL $dst.hi, 0\n\t" 8845 "BLSIL $dst.lo, $src\n\t" 8846 "JNZ done\n\t" 8847 "BLSIL $dst.hi, $src+4\n" 8848 "done:" 8849 %} 8850 8851 ins_encode %{ 8852 Label done; 8853 Register Rdst = $dst$$Register; 8854 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8855 8856 __ movl(HIGH_FROM_LOW(Rdst), 0); 8857 __ blsil(Rdst, $src$$Address); 8858 __ jccb(Assembler::notZero, done); 8859 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 8860 __ bind(done); 8861 %} 8862 ins_pipe(ialu_reg_mem); 8863 %} 8864 8865 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8866 %{ 8867 match(Set dst (XorL (AddL src minus_1) src)); 8868 predicate(UseBMI1Instructions); 8869 effect(KILL cr, TEMP dst); 8870 8871 format %{ "MOVL $dst.hi, 0\n\t" 8872 "BLSMSKL $dst.lo, $src.lo\n\t" 8873 "JNC done\n\t" 8874 "BLSMSKL $dst.hi, $src.hi\n" 8875 "done:" 8876 %} 8877 8878 ins_encode %{ 8879 Label done; 8880 Register Rdst = $dst$$Register; 8881 Register Rsrc = $src$$Register; 8882 __ movl(HIGH_FROM_LOW(Rdst), 0); 8883 __ blsmskl(Rdst, Rsrc); 8884 __ jccb(Assembler::carryClear, done); 8885 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8886 __ bind(done); 8887 %} 8888 8889 ins_pipe(ialu_reg); 8890 %} 8891 8892 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8893 %{ 8894 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 8895 predicate(UseBMI1Instructions); 8896 effect(KILL cr, TEMP dst); 8897 8898 ins_cost(125); 8899 format %{ "MOVL $dst.hi, 0\n\t" 8900 "BLSMSKL $dst.lo, $src\n\t" 8901 "JNC done\n\t" 8902 "BLSMSKL $dst.hi, $src+4\n" 8903 "done:" 8904 %} 8905 8906 ins_encode %{ 8907 Label done; 8908 Register Rdst = $dst$$Register; 8909 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8910 8911 __ movl(HIGH_FROM_LOW(Rdst), 0); 8912 __ blsmskl(Rdst, $src$$Address); 8913 __ jccb(Assembler::carryClear, done); 8914 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 8915 __ bind(done); 8916 %} 8917 8918 ins_pipe(ialu_reg_mem); 8919 %} 8920 8921 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8922 %{ 8923 match(Set dst (AndL (AddL src minus_1) src) ); 8924 predicate(UseBMI1Instructions); 8925 effect(KILL cr, TEMP dst); 8926 8927 format %{ "MOVL $dst.hi, $src.hi\n\t" 8928 "BLSRL $dst.lo, $src.lo\n\t" 8929 "JNC done\n\t" 8930 "BLSRL $dst.hi, $src.hi\n" 8931 "done:" 8932 %} 8933 8934 ins_encode %{ 8935 Label done; 8936 Register Rdst = $dst$$Register; 8937 Register Rsrc = $src$$Register; 8938 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8939 __ blsrl(Rdst, Rsrc); 8940 __ jccb(Assembler::carryClear, done); 8941 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8942 __ bind(done); 8943 %} 8944 8945 ins_pipe(ialu_reg); 8946 %} 8947 8948 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8949 %{ 8950 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 8951 predicate(UseBMI1Instructions); 8952 effect(KILL cr, TEMP dst); 8953 8954 ins_cost(125); 8955 format %{ "MOVL $dst.hi, $src+4\n\t" 8956 "BLSRL $dst.lo, $src\n\t" 8957 "JNC done\n\t" 8958 "BLSRL $dst.hi, $src+4\n" 8959 "done:" 8960 %} 8961 8962 ins_encode %{ 8963 Label done; 8964 Register Rdst = $dst$$Register; 8965 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8966 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 8967 __ blsrl(Rdst, $src$$Address); 8968 __ jccb(Assembler::carryClear, done); 8969 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 8970 __ bind(done); 8971 %} 8972 8973 ins_pipe(ialu_reg_mem); 8974 %} 8975 8976 // Or Long Register with Register 8977 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8978 match(Set dst (OrL dst src)); 8979 effect(KILL cr); 8980 format %{ "OR $dst.lo,$src.lo\n\t" 8981 "OR $dst.hi,$src.hi" %} 8982 opcode(0x0B,0x0B); 8983 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8984 ins_pipe( ialu_reg_reg_long ); 8985 %} 8986 8987 // Or Long Register with Immediate 8988 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8989 match(Set dst (OrL dst src)); 8990 effect(KILL cr); 8991 format %{ "OR $dst.lo,$src.lo\n\t" 8992 "OR $dst.hi,$src.hi" %} 8993 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 8994 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8995 ins_pipe( ialu_reg_long ); 8996 %} 8997 8998 // Or Long Register with Memory 8999 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9000 match(Set dst (OrL dst (LoadL mem))); 9001 effect(KILL cr); 9002 ins_cost(125); 9003 format %{ "OR $dst.lo,$mem\n\t" 9004 "OR $dst.hi,$mem+4" %} 9005 opcode(0x0B,0x0B); 9006 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9007 ins_pipe( ialu_reg_long_mem ); 9008 %} 9009 9010 // Xor Long Register with Register 9011 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9012 match(Set dst (XorL dst src)); 9013 effect(KILL cr); 9014 format %{ "XOR $dst.lo,$src.lo\n\t" 9015 "XOR $dst.hi,$src.hi" %} 9016 opcode(0x33,0x33); 9017 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9018 ins_pipe( ialu_reg_reg_long ); 9019 %} 9020 9021 // Xor Long Register with Immediate -1 9022 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9023 match(Set dst (XorL dst imm)); 9024 format %{ "NOT $dst.lo\n\t" 9025 "NOT $dst.hi" %} 9026 ins_encode %{ 9027 __ notl($dst$$Register); 9028 __ notl(HIGH_FROM_LOW($dst$$Register)); 9029 %} 9030 ins_pipe( ialu_reg_long ); 9031 %} 9032 9033 // Xor Long Register with Immediate 9034 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9035 match(Set dst (XorL dst src)); 9036 effect(KILL cr); 9037 format %{ "XOR $dst.lo,$src.lo\n\t" 9038 "XOR $dst.hi,$src.hi" %} 9039 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9040 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9041 ins_pipe( ialu_reg_long ); 9042 %} 9043 9044 // Xor Long Register with Memory 9045 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9046 match(Set dst (XorL dst (LoadL mem))); 9047 effect(KILL cr); 9048 ins_cost(125); 9049 format %{ "XOR $dst.lo,$mem\n\t" 9050 "XOR $dst.hi,$mem+4" %} 9051 opcode(0x33,0x33); 9052 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9053 ins_pipe( ialu_reg_long_mem ); 9054 %} 9055 9056 // Shift Left Long by 1 9057 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9058 predicate(UseNewLongLShift); 9059 match(Set dst (LShiftL dst cnt)); 9060 effect(KILL cr); 9061 ins_cost(100); 9062 format %{ "ADD $dst.lo,$dst.lo\n\t" 9063 "ADC $dst.hi,$dst.hi" %} 9064 ins_encode %{ 9065 __ addl($dst$$Register,$dst$$Register); 9066 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9067 %} 9068 ins_pipe( ialu_reg_long ); 9069 %} 9070 9071 // Shift Left Long by 2 9072 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9073 predicate(UseNewLongLShift); 9074 match(Set dst (LShiftL dst cnt)); 9075 effect(KILL cr); 9076 ins_cost(100); 9077 format %{ "ADD $dst.lo,$dst.lo\n\t" 9078 "ADC $dst.hi,$dst.hi\n\t" 9079 "ADD $dst.lo,$dst.lo\n\t" 9080 "ADC $dst.hi,$dst.hi" %} 9081 ins_encode %{ 9082 __ addl($dst$$Register,$dst$$Register); 9083 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9084 __ addl($dst$$Register,$dst$$Register); 9085 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9086 %} 9087 ins_pipe( ialu_reg_long ); 9088 %} 9089 9090 // Shift Left Long by 3 9091 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9092 predicate(UseNewLongLShift); 9093 match(Set dst (LShiftL dst cnt)); 9094 effect(KILL cr); 9095 ins_cost(100); 9096 format %{ "ADD $dst.lo,$dst.lo\n\t" 9097 "ADC $dst.hi,$dst.hi\n\t" 9098 "ADD $dst.lo,$dst.lo\n\t" 9099 "ADC $dst.hi,$dst.hi\n\t" 9100 "ADD $dst.lo,$dst.lo\n\t" 9101 "ADC $dst.hi,$dst.hi" %} 9102 ins_encode %{ 9103 __ addl($dst$$Register,$dst$$Register); 9104 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9105 __ addl($dst$$Register,$dst$$Register); 9106 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9107 __ addl($dst$$Register,$dst$$Register); 9108 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9109 %} 9110 ins_pipe( ialu_reg_long ); 9111 %} 9112 9113 // Shift Left Long by 1-31 9114 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9115 match(Set dst (LShiftL dst cnt)); 9116 effect(KILL cr); 9117 ins_cost(200); 9118 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9119 "SHL $dst.lo,$cnt" %} 9120 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9121 ins_encode( move_long_small_shift(dst,cnt) ); 9122 ins_pipe( ialu_reg_long ); 9123 %} 9124 9125 // Shift Left Long by 32-63 9126 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9127 match(Set dst (LShiftL dst cnt)); 9128 effect(KILL cr); 9129 ins_cost(300); 9130 format %{ "MOV $dst.hi,$dst.lo\n" 9131 "\tSHL $dst.hi,$cnt-32\n" 9132 "\tXOR $dst.lo,$dst.lo" %} 9133 opcode(0xC1, 0x4); /* C1 /4 ib */ 9134 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9135 ins_pipe( ialu_reg_long ); 9136 %} 9137 9138 // Shift Left Long by variable 9139 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9140 match(Set dst (LShiftL dst shift)); 9141 effect(KILL cr); 9142 ins_cost(500+200); 9143 size(17); 9144 format %{ "TEST $shift,32\n\t" 9145 "JEQ,s small\n\t" 9146 "MOV $dst.hi,$dst.lo\n\t" 9147 "XOR $dst.lo,$dst.lo\n" 9148 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9149 "SHL $dst.lo,$shift" %} 9150 ins_encode( shift_left_long( dst, shift ) ); 9151 ins_pipe( pipe_slow ); 9152 %} 9153 9154 // Shift Right Long by 1-31 9155 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9156 match(Set dst (URShiftL dst cnt)); 9157 effect(KILL cr); 9158 ins_cost(200); 9159 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9160 "SHR $dst.hi,$cnt" %} 9161 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9162 ins_encode( move_long_small_shift(dst,cnt) ); 9163 ins_pipe( ialu_reg_long ); 9164 %} 9165 9166 // Shift Right Long by 32-63 9167 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9168 match(Set dst (URShiftL dst cnt)); 9169 effect(KILL cr); 9170 ins_cost(300); 9171 format %{ "MOV $dst.lo,$dst.hi\n" 9172 "\tSHR $dst.lo,$cnt-32\n" 9173 "\tXOR $dst.hi,$dst.hi" %} 9174 opcode(0xC1, 0x5); /* C1 /5 ib */ 9175 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9176 ins_pipe( ialu_reg_long ); 9177 %} 9178 9179 // Shift Right Long by variable 9180 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9181 match(Set dst (URShiftL dst shift)); 9182 effect(KILL cr); 9183 ins_cost(600); 9184 size(17); 9185 format %{ "TEST $shift,32\n\t" 9186 "JEQ,s small\n\t" 9187 "MOV $dst.lo,$dst.hi\n\t" 9188 "XOR $dst.hi,$dst.hi\n" 9189 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9190 "SHR $dst.hi,$shift" %} 9191 ins_encode( shift_right_long( dst, shift ) ); 9192 ins_pipe( pipe_slow ); 9193 %} 9194 9195 // Shift Right Long by 1-31 9196 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9197 match(Set dst (RShiftL dst cnt)); 9198 effect(KILL cr); 9199 ins_cost(200); 9200 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9201 "SAR $dst.hi,$cnt" %} 9202 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9203 ins_encode( move_long_small_shift(dst,cnt) ); 9204 ins_pipe( ialu_reg_long ); 9205 %} 9206 9207 // Shift Right Long by 32-63 9208 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9209 match(Set dst (RShiftL dst cnt)); 9210 effect(KILL cr); 9211 ins_cost(300); 9212 format %{ "MOV $dst.lo,$dst.hi\n" 9213 "\tSAR $dst.lo,$cnt-32\n" 9214 "\tSAR $dst.hi,31" %} 9215 opcode(0xC1, 0x7); /* C1 /7 ib */ 9216 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9217 ins_pipe( ialu_reg_long ); 9218 %} 9219 9220 // Shift Right arithmetic Long by variable 9221 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9222 match(Set dst (RShiftL dst shift)); 9223 effect(KILL cr); 9224 ins_cost(600); 9225 size(18); 9226 format %{ "TEST $shift,32\n\t" 9227 "JEQ,s small\n\t" 9228 "MOV $dst.lo,$dst.hi\n\t" 9229 "SAR $dst.hi,31\n" 9230 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9231 "SAR $dst.hi,$shift" %} 9232 ins_encode( shift_right_arith_long( dst, shift ) ); 9233 ins_pipe( pipe_slow ); 9234 %} 9235 9236 9237 //----------Double Instructions------------------------------------------------ 9238 // Double Math 9239 9240 // Compare & branch 9241 9242 // P6 version of float compare, sets condition codes in EFLAGS 9243 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9244 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9245 match(Set cr (CmpD src1 src2)); 9246 effect(KILL rax); 9247 ins_cost(150); 9248 format %{ "FLD $src1\n\t" 9249 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9250 "JNP exit\n\t" 9251 "MOV ah,1 // saw a NaN, set CF\n\t" 9252 "SAHF\n" 9253 "exit:\tNOP // avoid branch to branch" %} 9254 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9255 ins_encode( Push_Reg_DPR(src1), 9256 OpcP, RegOpc(src2), 9257 cmpF_P6_fixup ); 9258 ins_pipe( pipe_slow ); 9259 %} 9260 9261 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9262 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9263 match(Set cr (CmpD src1 src2)); 9264 ins_cost(150); 9265 format %{ "FLD $src1\n\t" 9266 "FUCOMIP ST,$src2 // P6 instruction" %} 9267 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9268 ins_encode( Push_Reg_DPR(src1), 9269 OpcP, RegOpc(src2)); 9270 ins_pipe( pipe_slow ); 9271 %} 9272 9273 // Compare & branch 9274 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9275 predicate(UseSSE<=1); 9276 match(Set cr (CmpD src1 src2)); 9277 effect(KILL rax); 9278 ins_cost(200); 9279 format %{ "FLD $src1\n\t" 9280 "FCOMp $src2\n\t" 9281 "FNSTSW AX\n\t" 9282 "TEST AX,0x400\n\t" 9283 "JZ,s flags\n\t" 9284 "MOV AH,1\t# unordered treat as LT\n" 9285 "flags:\tSAHF" %} 9286 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9287 ins_encode( Push_Reg_DPR(src1), 9288 OpcP, RegOpc(src2), 9289 fpu_flags); 9290 ins_pipe( pipe_slow ); 9291 %} 9292 9293 // Compare vs zero into -1,0,1 9294 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9295 predicate(UseSSE<=1); 9296 match(Set dst (CmpD3 src1 zero)); 9297 effect(KILL cr, KILL rax); 9298 ins_cost(280); 9299 format %{ "FTSTD $dst,$src1" %} 9300 opcode(0xE4, 0xD9); 9301 ins_encode( Push_Reg_DPR(src1), 9302 OpcS, OpcP, PopFPU, 9303 CmpF_Result(dst)); 9304 ins_pipe( pipe_slow ); 9305 %} 9306 9307 // Compare into -1,0,1 9308 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9309 predicate(UseSSE<=1); 9310 match(Set dst (CmpD3 src1 src2)); 9311 effect(KILL cr, KILL rax); 9312 ins_cost(300); 9313 format %{ "FCMPD $dst,$src1,$src2" %} 9314 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9315 ins_encode( Push_Reg_DPR(src1), 9316 OpcP, RegOpc(src2), 9317 CmpF_Result(dst)); 9318 ins_pipe( pipe_slow ); 9319 %} 9320 9321 // float compare and set condition codes in EFLAGS by XMM regs 9322 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9323 predicate(UseSSE>=2); 9324 match(Set cr (CmpD src1 src2)); 9325 ins_cost(145); 9326 format %{ "UCOMISD $src1,$src2\n\t" 9327 "JNP,s exit\n\t" 9328 "PUSHF\t# saw NaN, set CF\n\t" 9329 "AND [rsp], #0xffffff2b\n\t" 9330 "POPF\n" 9331 "exit:" %} 9332 ins_encode %{ 9333 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9334 emit_cmpfp_fixup(_masm); 9335 %} 9336 ins_pipe( pipe_slow ); 9337 %} 9338 9339 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9340 predicate(UseSSE>=2); 9341 match(Set cr (CmpD src1 src2)); 9342 ins_cost(100); 9343 format %{ "UCOMISD $src1,$src2" %} 9344 ins_encode %{ 9345 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9346 %} 9347 ins_pipe( pipe_slow ); 9348 %} 9349 9350 // float compare and set condition codes in EFLAGS by XMM regs 9351 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9352 predicate(UseSSE>=2); 9353 match(Set cr (CmpD src1 (LoadD src2))); 9354 ins_cost(145); 9355 format %{ "UCOMISD $src1,$src2\n\t" 9356 "JNP,s exit\n\t" 9357 "PUSHF\t# saw NaN, set CF\n\t" 9358 "AND [rsp], #0xffffff2b\n\t" 9359 "POPF\n" 9360 "exit:" %} 9361 ins_encode %{ 9362 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9363 emit_cmpfp_fixup(_masm); 9364 %} 9365 ins_pipe( pipe_slow ); 9366 %} 9367 9368 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9369 predicate(UseSSE>=2); 9370 match(Set cr (CmpD src1 (LoadD src2))); 9371 ins_cost(100); 9372 format %{ "UCOMISD $src1,$src2" %} 9373 ins_encode %{ 9374 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9375 %} 9376 ins_pipe( pipe_slow ); 9377 %} 9378 9379 // Compare into -1,0,1 in XMM 9380 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9381 predicate(UseSSE>=2); 9382 match(Set dst (CmpD3 src1 src2)); 9383 effect(KILL cr); 9384 ins_cost(255); 9385 format %{ "UCOMISD $src1, $src2\n\t" 9386 "MOV $dst, #-1\n\t" 9387 "JP,s done\n\t" 9388 "JB,s done\n\t" 9389 "SETNE $dst\n\t" 9390 "MOVZB $dst, $dst\n" 9391 "done:" %} 9392 ins_encode %{ 9393 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9394 emit_cmpfp3(_masm, $dst$$Register); 9395 %} 9396 ins_pipe( pipe_slow ); 9397 %} 9398 9399 // Compare into -1,0,1 in XMM and memory 9400 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9401 predicate(UseSSE>=2); 9402 match(Set dst (CmpD3 src1 (LoadD src2))); 9403 effect(KILL cr); 9404 ins_cost(275); 9405 format %{ "UCOMISD $src1, $src2\n\t" 9406 "MOV $dst, #-1\n\t" 9407 "JP,s done\n\t" 9408 "JB,s done\n\t" 9409 "SETNE $dst\n\t" 9410 "MOVZB $dst, $dst\n" 9411 "done:" %} 9412 ins_encode %{ 9413 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9414 emit_cmpfp3(_masm, $dst$$Register); 9415 %} 9416 ins_pipe( pipe_slow ); 9417 %} 9418 9419 9420 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9421 predicate (UseSSE <=1); 9422 match(Set dst (SubD dst src)); 9423 9424 format %{ "FLD $src\n\t" 9425 "DSUBp $dst,ST" %} 9426 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9427 ins_cost(150); 9428 ins_encode( Push_Reg_DPR(src), 9429 OpcP, RegOpc(dst) ); 9430 ins_pipe( fpu_reg_reg ); 9431 %} 9432 9433 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9434 predicate (UseSSE <=1); 9435 match(Set dst (RoundDouble (SubD src1 src2))); 9436 ins_cost(250); 9437 9438 format %{ "FLD $src2\n\t" 9439 "DSUB ST,$src1\n\t" 9440 "FSTP_D $dst\t# D-round" %} 9441 opcode(0xD8, 0x5); 9442 ins_encode( Push_Reg_DPR(src2), 9443 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9444 ins_pipe( fpu_mem_reg_reg ); 9445 %} 9446 9447 9448 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9449 predicate (UseSSE <=1); 9450 match(Set dst (SubD dst (LoadD src))); 9451 ins_cost(150); 9452 9453 format %{ "FLD $src\n\t" 9454 "DSUBp $dst,ST" %} 9455 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9456 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9457 OpcP, RegOpc(dst) ); 9458 ins_pipe( fpu_reg_mem ); 9459 %} 9460 9461 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9462 predicate (UseSSE<=1); 9463 match(Set dst (AbsD src)); 9464 ins_cost(100); 9465 format %{ "FABS" %} 9466 opcode(0xE1, 0xD9); 9467 ins_encode( OpcS, OpcP ); 9468 ins_pipe( fpu_reg_reg ); 9469 %} 9470 9471 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9472 predicate(UseSSE<=1); 9473 match(Set dst (NegD src)); 9474 ins_cost(100); 9475 format %{ "FCHS" %} 9476 opcode(0xE0, 0xD9); 9477 ins_encode( OpcS, OpcP ); 9478 ins_pipe( fpu_reg_reg ); 9479 %} 9480 9481 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9482 predicate(UseSSE<=1); 9483 match(Set dst (AddD dst src)); 9484 format %{ "FLD $src\n\t" 9485 "DADD $dst,ST" %} 9486 size(4); 9487 ins_cost(150); 9488 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9489 ins_encode( Push_Reg_DPR(src), 9490 OpcP, RegOpc(dst) ); 9491 ins_pipe( fpu_reg_reg ); 9492 %} 9493 9494 9495 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9496 predicate(UseSSE<=1); 9497 match(Set dst (RoundDouble (AddD src1 src2))); 9498 ins_cost(250); 9499 9500 format %{ "FLD $src2\n\t" 9501 "DADD ST,$src1\n\t" 9502 "FSTP_D $dst\t# D-round" %} 9503 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9504 ins_encode( Push_Reg_DPR(src2), 9505 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9506 ins_pipe( fpu_mem_reg_reg ); 9507 %} 9508 9509 9510 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9511 predicate(UseSSE<=1); 9512 match(Set dst (AddD dst (LoadD src))); 9513 ins_cost(150); 9514 9515 format %{ "FLD $src\n\t" 9516 "DADDp $dst,ST" %} 9517 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9518 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9519 OpcP, RegOpc(dst) ); 9520 ins_pipe( fpu_reg_mem ); 9521 %} 9522 9523 // add-to-memory 9524 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9525 predicate(UseSSE<=1); 9526 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9527 ins_cost(150); 9528 9529 format %{ "FLD_D $dst\n\t" 9530 "DADD ST,$src\n\t" 9531 "FST_D $dst" %} 9532 opcode(0xDD, 0x0); 9533 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9534 Opcode(0xD8), RegOpc(src), 9535 set_instruction_start, 9536 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9537 ins_pipe( fpu_reg_mem ); 9538 %} 9539 9540 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9541 predicate(UseSSE<=1); 9542 match(Set dst (AddD dst con)); 9543 ins_cost(125); 9544 format %{ "FLD1\n\t" 9545 "DADDp $dst,ST" %} 9546 ins_encode %{ 9547 __ fld1(); 9548 __ faddp($dst$$reg); 9549 %} 9550 ins_pipe(fpu_reg); 9551 %} 9552 9553 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9554 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9555 match(Set dst (AddD dst con)); 9556 ins_cost(200); 9557 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9558 "DADDp $dst,ST" %} 9559 ins_encode %{ 9560 __ fld_d($constantaddress($con)); 9561 __ faddp($dst$$reg); 9562 %} 9563 ins_pipe(fpu_reg_mem); 9564 %} 9565 9566 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9567 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9568 match(Set dst (RoundDouble (AddD src con))); 9569 ins_cost(200); 9570 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9571 "DADD ST,$src\n\t" 9572 "FSTP_D $dst\t# D-round" %} 9573 ins_encode %{ 9574 __ fld_d($constantaddress($con)); 9575 __ fadd($src$$reg); 9576 __ fstp_d(Address(rsp, $dst$$disp)); 9577 %} 9578 ins_pipe(fpu_mem_reg_con); 9579 %} 9580 9581 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9582 predicate(UseSSE<=1); 9583 match(Set dst (MulD dst src)); 9584 format %{ "FLD $src\n\t" 9585 "DMULp $dst,ST" %} 9586 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9587 ins_cost(150); 9588 ins_encode( Push_Reg_DPR(src), 9589 OpcP, RegOpc(dst) ); 9590 ins_pipe( fpu_reg_reg ); 9591 %} 9592 9593 // Strict FP instruction biases argument before multiply then 9594 // biases result to avoid double rounding of subnormals. 9595 // 9596 // scale arg1 by multiplying arg1 by 2^(-15360) 9597 // load arg2 9598 // multiply scaled arg1 by arg2 9599 // rescale product by 2^(15360) 9600 // 9601 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9602 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9603 match(Set dst (MulD dst src)); 9604 ins_cost(1); // Select this instruction for all strict FP double multiplies 9605 9606 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9607 "DMULp $dst,ST\n\t" 9608 "FLD $src\n\t" 9609 "DMULp $dst,ST\n\t" 9610 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9611 "DMULp $dst,ST\n\t" %} 9612 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9613 ins_encode( strictfp_bias1(dst), 9614 Push_Reg_DPR(src), 9615 OpcP, RegOpc(dst), 9616 strictfp_bias2(dst) ); 9617 ins_pipe( fpu_reg_reg ); 9618 %} 9619 9620 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9621 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9622 match(Set dst (MulD dst con)); 9623 ins_cost(200); 9624 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9625 "DMULp $dst,ST" %} 9626 ins_encode %{ 9627 __ fld_d($constantaddress($con)); 9628 __ fmulp($dst$$reg); 9629 %} 9630 ins_pipe(fpu_reg_mem); 9631 %} 9632 9633 9634 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9635 predicate( UseSSE<=1 ); 9636 match(Set dst (MulD dst (LoadD src))); 9637 ins_cost(200); 9638 format %{ "FLD_D $src\n\t" 9639 "DMULp $dst,ST" %} 9640 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9641 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9642 OpcP, RegOpc(dst) ); 9643 ins_pipe( fpu_reg_mem ); 9644 %} 9645 9646 // 9647 // Cisc-alternate to reg-reg multiply 9648 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9649 predicate( UseSSE<=1 ); 9650 match(Set dst (MulD src (LoadD mem))); 9651 ins_cost(250); 9652 format %{ "FLD_D $mem\n\t" 9653 "DMUL ST,$src\n\t" 9654 "FSTP_D $dst" %} 9655 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9656 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9657 OpcReg_FPR(src), 9658 Pop_Reg_DPR(dst) ); 9659 ins_pipe( fpu_reg_reg_mem ); 9660 %} 9661 9662 9663 // MACRO3 -- addDPR a mulDPR 9664 // This instruction is a '2-address' instruction in that the result goes 9665 // back to src2. This eliminates a move from the macro; possibly the 9666 // register allocator will have to add it back (and maybe not). 9667 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9668 predicate( UseSSE<=1 ); 9669 match(Set src2 (AddD (MulD src0 src1) src2)); 9670 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9671 "DMUL ST,$src1\n\t" 9672 "DADDp $src2,ST" %} 9673 ins_cost(250); 9674 opcode(0xDD); /* LoadD DD /0 */ 9675 ins_encode( Push_Reg_FPR(src0), 9676 FMul_ST_reg(src1), 9677 FAddP_reg_ST(src2) ); 9678 ins_pipe( fpu_reg_reg_reg ); 9679 %} 9680 9681 9682 // MACRO3 -- subDPR a mulDPR 9683 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9684 predicate( UseSSE<=1 ); 9685 match(Set src2 (SubD (MulD src0 src1) src2)); 9686 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9687 "DMUL ST,$src1\n\t" 9688 "DSUBRp $src2,ST" %} 9689 ins_cost(250); 9690 ins_encode( Push_Reg_FPR(src0), 9691 FMul_ST_reg(src1), 9692 Opcode(0xDE), Opc_plus(0xE0,src2)); 9693 ins_pipe( fpu_reg_reg_reg ); 9694 %} 9695 9696 9697 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9698 predicate( UseSSE<=1 ); 9699 match(Set dst (DivD dst src)); 9700 9701 format %{ "FLD $src\n\t" 9702 "FDIVp $dst,ST" %} 9703 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9704 ins_cost(150); 9705 ins_encode( Push_Reg_DPR(src), 9706 OpcP, RegOpc(dst) ); 9707 ins_pipe( fpu_reg_reg ); 9708 %} 9709 9710 // Strict FP instruction biases argument before division then 9711 // biases result, to avoid double rounding of subnormals. 9712 // 9713 // scale dividend by multiplying dividend by 2^(-15360) 9714 // load divisor 9715 // divide scaled dividend by divisor 9716 // rescale quotient by 2^(15360) 9717 // 9718 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9719 predicate (UseSSE<=1); 9720 match(Set dst (DivD dst src)); 9721 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9722 ins_cost(01); 9723 9724 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9725 "DMULp $dst,ST\n\t" 9726 "FLD $src\n\t" 9727 "FDIVp $dst,ST\n\t" 9728 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9729 "DMULp $dst,ST\n\t" %} 9730 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9731 ins_encode( strictfp_bias1(dst), 9732 Push_Reg_DPR(src), 9733 OpcP, RegOpc(dst), 9734 strictfp_bias2(dst) ); 9735 ins_pipe( fpu_reg_reg ); 9736 %} 9737 9738 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9739 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 9740 match(Set dst (RoundDouble (DivD src1 src2))); 9741 9742 format %{ "FLD $src1\n\t" 9743 "FDIV ST,$src2\n\t" 9744 "FSTP_D $dst\t# D-round" %} 9745 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 9746 ins_encode( Push_Reg_DPR(src1), 9747 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 9748 ins_pipe( fpu_mem_reg_reg ); 9749 %} 9750 9751 9752 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9753 predicate(UseSSE<=1); 9754 match(Set dst (ModD dst src)); 9755 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9756 9757 format %{ "DMOD $dst,$src" %} 9758 ins_cost(250); 9759 ins_encode(Push_Reg_Mod_DPR(dst, src), 9760 emitModDPR(), 9761 Push_Result_Mod_DPR(src), 9762 Pop_Reg_DPR(dst)); 9763 ins_pipe( pipe_slow ); 9764 %} 9765 9766 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9767 predicate(UseSSE>=2); 9768 match(Set dst (ModD src0 src1)); 9769 effect(KILL rax, KILL cr); 9770 9771 format %{ "SUB ESP,8\t # DMOD\n" 9772 "\tMOVSD [ESP+0],$src1\n" 9773 "\tFLD_D [ESP+0]\n" 9774 "\tMOVSD [ESP+0],$src0\n" 9775 "\tFLD_D [ESP+0]\n" 9776 "loop:\tFPREM\n" 9777 "\tFWAIT\n" 9778 "\tFNSTSW AX\n" 9779 "\tSAHF\n" 9780 "\tJP loop\n" 9781 "\tFSTP_D [ESP+0]\n" 9782 "\tMOVSD $dst,[ESP+0]\n" 9783 "\tADD ESP,8\n" 9784 "\tFSTP ST0\t # Restore FPU Stack" 9785 %} 9786 ins_cost(250); 9787 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9788 ins_pipe( pipe_slow ); 9789 %} 9790 9791 instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{ 9792 predicate (UseSSE<=1); 9793 match(Set dst (SinD src)); 9794 ins_cost(1800); 9795 format %{ "DSIN $dst" %} 9796 opcode(0xD9, 0xFE); 9797 ins_encode( OpcP, OpcS ); 9798 ins_pipe( pipe_slow ); 9799 %} 9800 9801 instruct sinD_reg(regD dst, eFlagsReg cr) %{ 9802 predicate (UseSSE>=2); 9803 match(Set dst (SinD dst)); 9804 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9805 ins_cost(1800); 9806 format %{ "DSIN $dst" %} 9807 opcode(0xD9, 0xFE); 9808 ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); 9809 ins_pipe( pipe_slow ); 9810 %} 9811 9812 instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{ 9813 predicate (UseSSE<=1); 9814 match(Set dst (CosD src)); 9815 ins_cost(1800); 9816 format %{ "DCOS $dst" %} 9817 opcode(0xD9, 0xFF); 9818 ins_encode( OpcP, OpcS ); 9819 ins_pipe( pipe_slow ); 9820 %} 9821 9822 instruct cosD_reg(regD dst, eFlagsReg cr) %{ 9823 predicate (UseSSE>=2); 9824 match(Set dst (CosD dst)); 9825 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9826 ins_cost(1800); 9827 format %{ "DCOS $dst" %} 9828 opcode(0xD9, 0xFF); 9829 ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); 9830 ins_pipe( pipe_slow ); 9831 %} 9832 9833 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{ 9834 predicate (UseSSE<=1); 9835 match(Set dst(TanD src)); 9836 format %{ "DTAN $dst" %} 9837 ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan 9838 Opcode(0xDD), Opcode(0xD8)); // fstp st 9839 ins_pipe( pipe_slow ); 9840 %} 9841 9842 instruct tanD_reg(regD dst, eFlagsReg cr) %{ 9843 predicate (UseSSE>=2); 9844 match(Set dst(TanD dst)); 9845 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9846 format %{ "DTAN $dst" %} 9847 ins_encode( Push_SrcD(dst), 9848 Opcode(0xD9), Opcode(0xF2), // fptan 9849 Opcode(0xDD), Opcode(0xD8), // fstp st 9850 Push_ResultD(dst) ); 9851 ins_pipe( pipe_slow ); 9852 %} 9853 9854 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9855 predicate (UseSSE<=1); 9856 match(Set dst(AtanD dst src)); 9857 format %{ "DATA $dst,$src" %} 9858 opcode(0xD9, 0xF3); 9859 ins_encode( Push_Reg_DPR(src), 9860 OpcP, OpcS, RegOpc(dst) ); 9861 ins_pipe( pipe_slow ); 9862 %} 9863 9864 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 9865 predicate (UseSSE>=2); 9866 match(Set dst(AtanD dst src)); 9867 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9868 format %{ "DATA $dst,$src" %} 9869 opcode(0xD9, 0xF3); 9870 ins_encode( Push_SrcD(src), 9871 OpcP, OpcS, Push_ResultD(dst) ); 9872 ins_pipe( pipe_slow ); 9873 %} 9874 9875 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 9876 predicate (UseSSE<=1); 9877 match(Set dst (SqrtD src)); 9878 format %{ "DSQRT $dst,$src" %} 9879 opcode(0xFA, 0xD9); 9880 ins_encode( Push_Reg_DPR(src), 9881 OpcS, OpcP, Pop_Reg_DPR(dst) ); 9882 ins_pipe( pipe_slow ); 9883 %} 9884 9885 instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ 9886 predicate (UseSSE<=1); 9887 match(Set Y (PowD X Y)); // Raise X to the Yth power 9888 effect(KILL rax, KILL rdx, KILL rcx, KILL cr); 9889 format %{ "fast_pow $X $Y -> $Y // KILL $rax, $rcx, $rdx" %} 9890 ins_encode %{ 9891 __ subptr(rsp, 8); 9892 __ fld_s($X$$reg - 1); 9893 __ fast_pow(); 9894 __ addptr(rsp, 8); 9895 %} 9896 ins_pipe( pipe_slow ); 9897 %} 9898 9899 instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ 9900 predicate (UseSSE>=2); 9901 match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power 9902 effect(KILL rax, KILL rdx, KILL rcx, KILL cr); 9903 format %{ "fast_pow $src0 $src1 -> $dst // KILL $rax, $rcx, $rdx" %} 9904 ins_encode %{ 9905 __ subptr(rsp, 8); 9906 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 9907 __ fld_d(Address(rsp, 0)); 9908 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 9909 __ fld_d(Address(rsp, 0)); 9910 __ fast_pow(); 9911 __ fstp_d(Address(rsp, 0)); 9912 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 9913 __ addptr(rsp, 8); 9914 %} 9915 ins_pipe( pipe_slow ); 9916 %} 9917 9918 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{ 9919 predicate (UseSSE<=1); 9920 // The source Double operand on FPU stack 9921 match(Set dst (Log10D src)); 9922 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 9923 // fxch ; swap ST(0) with ST(1) 9924 // fyl2x ; compute log_10(2) * log_2(x) 9925 format %{ "FLDLG2 \t\t\t#Log10\n\t" 9926 "FXCH \n\t" 9927 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 9928 %} 9929 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 9930 Opcode(0xD9), Opcode(0xC9), // fxch 9931 Opcode(0xD9), Opcode(0xF1)); // fyl2x 9932 9933 ins_pipe( pipe_slow ); 9934 %} 9935 9936 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{ 9937 predicate (UseSSE>=2); 9938 effect(KILL cr); 9939 match(Set dst (Log10D src)); 9940 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 9941 // fyl2x ; compute log_10(2) * log_2(x) 9942 format %{ "FLDLG2 \t\t\t#Log10\n\t" 9943 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 9944 %} 9945 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 9946 Push_SrcD(src), 9947 Opcode(0xD9), Opcode(0xF1), // fyl2x 9948 Push_ResultD(dst)); 9949 9950 ins_pipe( pipe_slow ); 9951 %} 9952 9953 instruct logDPR_reg(regDPR1 dst, regDPR1 src) %{ 9954 predicate (UseSSE<=1); 9955 // The source Double operand on FPU stack 9956 match(Set dst (LogD src)); 9957 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number 9958 // fxch ; swap ST(0) with ST(1) 9959 // fyl2x ; compute log_e(2) * log_2(x) 9960 format %{ "FLDLN2 \t\t\t#Log_e\n\t" 9961 "FXCH \n\t" 9962 "FYL2X \t\t\t# Q=Log_e*Log_2(x)" 9963 %} 9964 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2 9965 Opcode(0xD9), Opcode(0xC9), // fxch 9966 Opcode(0xD9), Opcode(0xF1)); // fyl2x 9967 9968 ins_pipe( pipe_slow ); 9969 %} 9970 9971 instruct logD_reg(regD dst, regD src, eFlagsReg cr) %{ 9972 predicate (UseSSE>=2); 9973 effect(KILL cr); 9974 // The source and result Double operands in XMM registers 9975 match(Set dst (LogD src)); 9976 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number 9977 // fyl2x ; compute log_e(2) * log_2(x) 9978 format %{ "FLDLN2 \t\t\t#Log_e\n\t" 9979 "FYL2X \t\t\t# Q=Log_e*Log_2(x)" 9980 %} 9981 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2 9982 Push_SrcD(src), 9983 Opcode(0xD9), Opcode(0xF1), // fyl2x 9984 Push_ResultD(dst)); 9985 ins_pipe( pipe_slow ); 9986 %} 9987 9988 //-------------Float Instructions------------------------------- 9989 // Float Math 9990 9991 // Code for float compare: 9992 // fcompp(); 9993 // fwait(); fnstsw_ax(); 9994 // sahf(); 9995 // movl(dst, unordered_result); 9996 // jcc(Assembler::parity, exit); 9997 // movl(dst, less_result); 9998 // jcc(Assembler::below, exit); 9999 // movl(dst, equal_result); 10000 // jcc(Assembler::equal, exit); 10001 // movl(dst, greater_result); 10002 // exit: 10003 10004 // P6 version of float compare, sets condition codes in EFLAGS 10005 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10006 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10007 match(Set cr (CmpF src1 src2)); 10008 effect(KILL rax); 10009 ins_cost(150); 10010 format %{ "FLD $src1\n\t" 10011 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10012 "JNP exit\n\t" 10013 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10014 "SAHF\n" 10015 "exit:\tNOP // avoid branch to branch" %} 10016 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10017 ins_encode( Push_Reg_DPR(src1), 10018 OpcP, RegOpc(src2), 10019 cmpF_P6_fixup ); 10020 ins_pipe( pipe_slow ); 10021 %} 10022 10023 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 10024 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10025 match(Set cr (CmpF src1 src2)); 10026 ins_cost(100); 10027 format %{ "FLD $src1\n\t" 10028 "FUCOMIP ST,$src2 // P6 instruction" %} 10029 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10030 ins_encode( Push_Reg_DPR(src1), 10031 OpcP, RegOpc(src2)); 10032 ins_pipe( pipe_slow ); 10033 %} 10034 10035 10036 // Compare & branch 10037 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10038 predicate(UseSSE == 0); 10039 match(Set cr (CmpF src1 src2)); 10040 effect(KILL rax); 10041 ins_cost(200); 10042 format %{ "FLD $src1\n\t" 10043 "FCOMp $src2\n\t" 10044 "FNSTSW AX\n\t" 10045 "TEST AX,0x400\n\t" 10046 "JZ,s flags\n\t" 10047 "MOV AH,1\t# unordered treat as LT\n" 10048 "flags:\tSAHF" %} 10049 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10050 ins_encode( Push_Reg_DPR(src1), 10051 OpcP, RegOpc(src2), 10052 fpu_flags); 10053 ins_pipe( pipe_slow ); 10054 %} 10055 10056 // Compare vs zero into -1,0,1 10057 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10058 predicate(UseSSE == 0); 10059 match(Set dst (CmpF3 src1 zero)); 10060 effect(KILL cr, KILL rax); 10061 ins_cost(280); 10062 format %{ "FTSTF $dst,$src1" %} 10063 opcode(0xE4, 0xD9); 10064 ins_encode( Push_Reg_DPR(src1), 10065 OpcS, OpcP, PopFPU, 10066 CmpF_Result(dst)); 10067 ins_pipe( pipe_slow ); 10068 %} 10069 10070 // Compare into -1,0,1 10071 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10072 predicate(UseSSE == 0); 10073 match(Set dst (CmpF3 src1 src2)); 10074 effect(KILL cr, KILL rax); 10075 ins_cost(300); 10076 format %{ "FCMPF $dst,$src1,$src2" %} 10077 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10078 ins_encode( Push_Reg_DPR(src1), 10079 OpcP, RegOpc(src2), 10080 CmpF_Result(dst)); 10081 ins_pipe( pipe_slow ); 10082 %} 10083 10084 // float compare and set condition codes in EFLAGS by XMM regs 10085 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10086 predicate(UseSSE>=1); 10087 match(Set cr (CmpF src1 src2)); 10088 ins_cost(145); 10089 format %{ "UCOMISS $src1,$src2\n\t" 10090 "JNP,s exit\n\t" 10091 "PUSHF\t# saw NaN, set CF\n\t" 10092 "AND [rsp], #0xffffff2b\n\t" 10093 "POPF\n" 10094 "exit:" %} 10095 ins_encode %{ 10096 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10097 emit_cmpfp_fixup(_masm); 10098 %} 10099 ins_pipe( pipe_slow ); 10100 %} 10101 10102 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10103 predicate(UseSSE>=1); 10104 match(Set cr (CmpF src1 src2)); 10105 ins_cost(100); 10106 format %{ "UCOMISS $src1,$src2" %} 10107 ins_encode %{ 10108 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10109 %} 10110 ins_pipe( pipe_slow ); 10111 %} 10112 10113 // float compare and set condition codes in EFLAGS by XMM regs 10114 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10115 predicate(UseSSE>=1); 10116 match(Set cr (CmpF src1 (LoadF src2))); 10117 ins_cost(165); 10118 format %{ "UCOMISS $src1,$src2\n\t" 10119 "JNP,s exit\n\t" 10120 "PUSHF\t# saw NaN, set CF\n\t" 10121 "AND [rsp], #0xffffff2b\n\t" 10122 "POPF\n" 10123 "exit:" %} 10124 ins_encode %{ 10125 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10126 emit_cmpfp_fixup(_masm); 10127 %} 10128 ins_pipe( pipe_slow ); 10129 %} 10130 10131 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10132 predicate(UseSSE>=1); 10133 match(Set cr (CmpF src1 (LoadF src2))); 10134 ins_cost(100); 10135 format %{ "UCOMISS $src1,$src2" %} 10136 ins_encode %{ 10137 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10138 %} 10139 ins_pipe( pipe_slow ); 10140 %} 10141 10142 // Compare into -1,0,1 in XMM 10143 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10144 predicate(UseSSE>=1); 10145 match(Set dst (CmpF3 src1 src2)); 10146 effect(KILL cr); 10147 ins_cost(255); 10148 format %{ "UCOMISS $src1, $src2\n\t" 10149 "MOV $dst, #-1\n\t" 10150 "JP,s done\n\t" 10151 "JB,s done\n\t" 10152 "SETNE $dst\n\t" 10153 "MOVZB $dst, $dst\n" 10154 "done:" %} 10155 ins_encode %{ 10156 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10157 emit_cmpfp3(_masm, $dst$$Register); 10158 %} 10159 ins_pipe( pipe_slow ); 10160 %} 10161 10162 // Compare into -1,0,1 in XMM and memory 10163 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10164 predicate(UseSSE>=1); 10165 match(Set dst (CmpF3 src1 (LoadF src2))); 10166 effect(KILL cr); 10167 ins_cost(275); 10168 format %{ "UCOMISS $src1, $src2\n\t" 10169 "MOV $dst, #-1\n\t" 10170 "JP,s done\n\t" 10171 "JB,s done\n\t" 10172 "SETNE $dst\n\t" 10173 "MOVZB $dst, $dst\n" 10174 "done:" %} 10175 ins_encode %{ 10176 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10177 emit_cmpfp3(_masm, $dst$$Register); 10178 %} 10179 ins_pipe( pipe_slow ); 10180 %} 10181 10182 // Spill to obtain 24-bit precision 10183 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10184 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10185 match(Set dst (SubF src1 src2)); 10186 10187 format %{ "FSUB $dst,$src1 - $src2" %} 10188 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10189 ins_encode( Push_Reg_FPR(src1), 10190 OpcReg_FPR(src2), 10191 Pop_Mem_FPR(dst) ); 10192 ins_pipe( fpu_mem_reg_reg ); 10193 %} 10194 // 10195 // This instruction does not round to 24-bits 10196 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10197 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10198 match(Set dst (SubF dst src)); 10199 10200 format %{ "FSUB $dst,$src" %} 10201 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10202 ins_encode( Push_Reg_FPR(src), 10203 OpcP, RegOpc(dst) ); 10204 ins_pipe( fpu_reg_reg ); 10205 %} 10206 10207 // Spill to obtain 24-bit precision 10208 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10209 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10210 match(Set dst (AddF src1 src2)); 10211 10212 format %{ "FADD $dst,$src1,$src2" %} 10213 opcode(0xD8, 0x0); /* D8 C0+i */ 10214 ins_encode( Push_Reg_FPR(src2), 10215 OpcReg_FPR(src1), 10216 Pop_Mem_FPR(dst) ); 10217 ins_pipe( fpu_mem_reg_reg ); 10218 %} 10219 // 10220 // This instruction does not round to 24-bits 10221 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10222 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10223 match(Set dst (AddF dst src)); 10224 10225 format %{ "FLD $src\n\t" 10226 "FADDp $dst,ST" %} 10227 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10228 ins_encode( Push_Reg_FPR(src), 10229 OpcP, RegOpc(dst) ); 10230 ins_pipe( fpu_reg_reg ); 10231 %} 10232 10233 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10234 predicate(UseSSE==0); 10235 match(Set dst (AbsF src)); 10236 ins_cost(100); 10237 format %{ "FABS" %} 10238 opcode(0xE1, 0xD9); 10239 ins_encode( OpcS, OpcP ); 10240 ins_pipe( fpu_reg_reg ); 10241 %} 10242 10243 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10244 predicate(UseSSE==0); 10245 match(Set dst (NegF src)); 10246 ins_cost(100); 10247 format %{ "FCHS" %} 10248 opcode(0xE0, 0xD9); 10249 ins_encode( OpcS, OpcP ); 10250 ins_pipe( fpu_reg_reg ); 10251 %} 10252 10253 // Cisc-alternate to addFPR_reg 10254 // Spill to obtain 24-bit precision 10255 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10256 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10257 match(Set dst (AddF src1 (LoadF src2))); 10258 10259 format %{ "FLD $src2\n\t" 10260 "FADD ST,$src1\n\t" 10261 "FSTP_S $dst" %} 10262 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10263 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10264 OpcReg_FPR(src1), 10265 Pop_Mem_FPR(dst) ); 10266 ins_pipe( fpu_mem_reg_mem ); 10267 %} 10268 // 10269 // Cisc-alternate to addFPR_reg 10270 // This instruction does not round to 24-bits 10271 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10272 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10273 match(Set dst (AddF dst (LoadF src))); 10274 10275 format %{ "FADD $dst,$src" %} 10276 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10277 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10278 OpcP, RegOpc(dst) ); 10279 ins_pipe( fpu_reg_mem ); 10280 %} 10281 10282 // // Following two instructions for _222_mpegaudio 10283 // Spill to obtain 24-bit precision 10284 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10285 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10286 match(Set dst (AddF src1 src2)); 10287 10288 format %{ "FADD $dst,$src1,$src2" %} 10289 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10290 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10291 OpcReg_FPR(src2), 10292 Pop_Mem_FPR(dst) ); 10293 ins_pipe( fpu_mem_reg_mem ); 10294 %} 10295 10296 // Cisc-spill variant 10297 // Spill to obtain 24-bit precision 10298 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10299 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10300 match(Set dst (AddF src1 (LoadF src2))); 10301 10302 format %{ "FADD $dst,$src1,$src2 cisc" %} 10303 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10304 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10305 set_instruction_start, 10306 OpcP, RMopc_Mem(secondary,src1), 10307 Pop_Mem_FPR(dst) ); 10308 ins_pipe( fpu_mem_mem_mem ); 10309 %} 10310 10311 // Spill to obtain 24-bit precision 10312 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10313 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10314 match(Set dst (AddF src1 src2)); 10315 10316 format %{ "FADD $dst,$src1,$src2" %} 10317 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10318 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10319 set_instruction_start, 10320 OpcP, RMopc_Mem(secondary,src1), 10321 Pop_Mem_FPR(dst) ); 10322 ins_pipe( fpu_mem_mem_mem ); 10323 %} 10324 10325 10326 // Spill to obtain 24-bit precision 10327 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10328 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10329 match(Set dst (AddF src con)); 10330 format %{ "FLD $src\n\t" 10331 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10332 "FSTP_S $dst" %} 10333 ins_encode %{ 10334 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10335 __ fadd_s($constantaddress($con)); 10336 __ fstp_s(Address(rsp, $dst$$disp)); 10337 %} 10338 ins_pipe(fpu_mem_reg_con); 10339 %} 10340 // 10341 // This instruction does not round to 24-bits 10342 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10343 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10344 match(Set dst (AddF src con)); 10345 format %{ "FLD $src\n\t" 10346 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10347 "FSTP $dst" %} 10348 ins_encode %{ 10349 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10350 __ fadd_s($constantaddress($con)); 10351 __ fstp_d($dst$$reg); 10352 %} 10353 ins_pipe(fpu_reg_reg_con); 10354 %} 10355 10356 // Spill to obtain 24-bit precision 10357 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10358 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10359 match(Set dst (MulF src1 src2)); 10360 10361 format %{ "FLD $src1\n\t" 10362 "FMUL $src2\n\t" 10363 "FSTP_S $dst" %} 10364 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10365 ins_encode( Push_Reg_FPR(src1), 10366 OpcReg_FPR(src2), 10367 Pop_Mem_FPR(dst) ); 10368 ins_pipe( fpu_mem_reg_reg ); 10369 %} 10370 // 10371 // This instruction does not round to 24-bits 10372 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10373 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10374 match(Set dst (MulF src1 src2)); 10375 10376 format %{ "FLD $src1\n\t" 10377 "FMUL $src2\n\t" 10378 "FSTP_S $dst" %} 10379 opcode(0xD8, 0x1); /* D8 C8+i */ 10380 ins_encode( Push_Reg_FPR(src2), 10381 OpcReg_FPR(src1), 10382 Pop_Reg_FPR(dst) ); 10383 ins_pipe( fpu_reg_reg_reg ); 10384 %} 10385 10386 10387 // Spill to obtain 24-bit precision 10388 // Cisc-alternate to reg-reg multiply 10389 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10390 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10391 match(Set dst (MulF src1 (LoadF src2))); 10392 10393 format %{ "FLD_S $src2\n\t" 10394 "FMUL $src1\n\t" 10395 "FSTP_S $dst" %} 10396 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10397 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10398 OpcReg_FPR(src1), 10399 Pop_Mem_FPR(dst) ); 10400 ins_pipe( fpu_mem_reg_mem ); 10401 %} 10402 // 10403 // This instruction does not round to 24-bits 10404 // Cisc-alternate to reg-reg multiply 10405 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10406 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10407 match(Set dst (MulF src1 (LoadF src2))); 10408 10409 format %{ "FMUL $dst,$src1,$src2" %} 10410 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10411 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10412 OpcReg_FPR(src1), 10413 Pop_Reg_FPR(dst) ); 10414 ins_pipe( fpu_reg_reg_mem ); 10415 %} 10416 10417 // Spill to obtain 24-bit precision 10418 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10419 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10420 match(Set dst (MulF src1 src2)); 10421 10422 format %{ "FMUL $dst,$src1,$src2" %} 10423 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10424 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10425 set_instruction_start, 10426 OpcP, RMopc_Mem(secondary,src1), 10427 Pop_Mem_FPR(dst) ); 10428 ins_pipe( fpu_mem_mem_mem ); 10429 %} 10430 10431 // Spill to obtain 24-bit precision 10432 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10433 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10434 match(Set dst (MulF src con)); 10435 10436 format %{ "FLD $src\n\t" 10437 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10438 "FSTP_S $dst" %} 10439 ins_encode %{ 10440 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10441 __ fmul_s($constantaddress($con)); 10442 __ fstp_s(Address(rsp, $dst$$disp)); 10443 %} 10444 ins_pipe(fpu_mem_reg_con); 10445 %} 10446 // 10447 // This instruction does not round to 24-bits 10448 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10449 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10450 match(Set dst (MulF src con)); 10451 10452 format %{ "FLD $src\n\t" 10453 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10454 "FSTP $dst" %} 10455 ins_encode %{ 10456 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10457 __ fmul_s($constantaddress($con)); 10458 __ fstp_d($dst$$reg); 10459 %} 10460 ins_pipe(fpu_reg_reg_con); 10461 %} 10462 10463 10464 // 10465 // MACRO1 -- subsume unshared load into mulFPR 10466 // This instruction does not round to 24-bits 10467 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10468 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10469 match(Set dst (MulF (LoadF mem1) src)); 10470 10471 format %{ "FLD $mem1 ===MACRO1===\n\t" 10472 "FMUL ST,$src\n\t" 10473 "FSTP $dst" %} 10474 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10475 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10476 OpcReg_FPR(src), 10477 Pop_Reg_FPR(dst) ); 10478 ins_pipe( fpu_reg_reg_mem ); 10479 %} 10480 // 10481 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10482 // This instruction does not round to 24-bits 10483 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10484 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10485 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10486 ins_cost(95); 10487 10488 format %{ "FLD $mem1 ===MACRO2===\n\t" 10489 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10490 "FADD ST,$src2\n\t" 10491 "FSTP $dst" %} 10492 opcode(0xD9); /* LoadF D9 /0 */ 10493 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10494 FMul_ST_reg(src1), 10495 FAdd_ST_reg(src2), 10496 Pop_Reg_FPR(dst) ); 10497 ins_pipe( fpu_reg_mem_reg_reg ); 10498 %} 10499 10500 // MACRO3 -- addFPR a mulFPR 10501 // This instruction does not round to 24-bits. It is a '2-address' 10502 // instruction in that the result goes back to src2. This eliminates 10503 // a move from the macro; possibly the register allocator will have 10504 // to add it back (and maybe not). 10505 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10506 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10507 match(Set src2 (AddF (MulF src0 src1) src2)); 10508 10509 format %{ "FLD $src0 ===MACRO3===\n\t" 10510 "FMUL ST,$src1\n\t" 10511 "FADDP $src2,ST" %} 10512 opcode(0xD9); /* LoadF D9 /0 */ 10513 ins_encode( Push_Reg_FPR(src0), 10514 FMul_ST_reg(src1), 10515 FAddP_reg_ST(src2) ); 10516 ins_pipe( fpu_reg_reg_reg ); 10517 %} 10518 10519 // MACRO4 -- divFPR subFPR 10520 // This instruction does not round to 24-bits 10521 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10522 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10523 match(Set dst (DivF (SubF src2 src1) src3)); 10524 10525 format %{ "FLD $src2 ===MACRO4===\n\t" 10526 "FSUB ST,$src1\n\t" 10527 "FDIV ST,$src3\n\t" 10528 "FSTP $dst" %} 10529 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10530 ins_encode( Push_Reg_FPR(src2), 10531 subFPR_divFPR_encode(src1,src3), 10532 Pop_Reg_FPR(dst) ); 10533 ins_pipe( fpu_reg_reg_reg_reg ); 10534 %} 10535 10536 // Spill to obtain 24-bit precision 10537 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10538 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10539 match(Set dst (DivF src1 src2)); 10540 10541 format %{ "FDIV $dst,$src1,$src2" %} 10542 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10543 ins_encode( Push_Reg_FPR(src1), 10544 OpcReg_FPR(src2), 10545 Pop_Mem_FPR(dst) ); 10546 ins_pipe( fpu_mem_reg_reg ); 10547 %} 10548 // 10549 // This instruction does not round to 24-bits 10550 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10551 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10552 match(Set dst (DivF dst src)); 10553 10554 format %{ "FDIV $dst,$src" %} 10555 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10556 ins_encode( Push_Reg_FPR(src), 10557 OpcP, RegOpc(dst) ); 10558 ins_pipe( fpu_reg_reg ); 10559 %} 10560 10561 10562 // Spill to obtain 24-bit precision 10563 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10564 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10565 match(Set dst (ModF src1 src2)); 10566 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10567 10568 format %{ "FMOD $dst,$src1,$src2" %} 10569 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10570 emitModDPR(), 10571 Push_Result_Mod_DPR(src2), 10572 Pop_Mem_FPR(dst)); 10573 ins_pipe( pipe_slow ); 10574 %} 10575 // 10576 // This instruction does not round to 24-bits 10577 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10578 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10579 match(Set dst (ModF dst src)); 10580 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10581 10582 format %{ "FMOD $dst,$src" %} 10583 ins_encode(Push_Reg_Mod_DPR(dst, src), 10584 emitModDPR(), 10585 Push_Result_Mod_DPR(src), 10586 Pop_Reg_FPR(dst)); 10587 ins_pipe( pipe_slow ); 10588 %} 10589 10590 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10591 predicate(UseSSE>=1); 10592 match(Set dst (ModF src0 src1)); 10593 effect(KILL rax, KILL cr); 10594 format %{ "SUB ESP,4\t # FMOD\n" 10595 "\tMOVSS [ESP+0],$src1\n" 10596 "\tFLD_S [ESP+0]\n" 10597 "\tMOVSS [ESP+0],$src0\n" 10598 "\tFLD_S [ESP+0]\n" 10599 "loop:\tFPREM\n" 10600 "\tFWAIT\n" 10601 "\tFNSTSW AX\n" 10602 "\tSAHF\n" 10603 "\tJP loop\n" 10604 "\tFSTP_S [ESP+0]\n" 10605 "\tMOVSS $dst,[ESP+0]\n" 10606 "\tADD ESP,4\n" 10607 "\tFSTP ST0\t # Restore FPU Stack" 10608 %} 10609 ins_cost(250); 10610 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10611 ins_pipe( pipe_slow ); 10612 %} 10613 10614 10615 //----------Arithmetic Conversion Instructions--------------------------------- 10616 // The conversions operations are all Alpha sorted. Please keep it that way! 10617 10618 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10619 predicate(UseSSE==0); 10620 match(Set dst (RoundFloat src)); 10621 ins_cost(125); 10622 format %{ "FST_S $dst,$src\t# F-round" %} 10623 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10624 ins_pipe( fpu_mem_reg ); 10625 %} 10626 10627 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10628 predicate(UseSSE<=1); 10629 match(Set dst (RoundDouble src)); 10630 ins_cost(125); 10631 format %{ "FST_D $dst,$src\t# D-round" %} 10632 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10633 ins_pipe( fpu_mem_reg ); 10634 %} 10635 10636 // Force rounding to 24-bit precision and 6-bit exponent 10637 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10638 predicate(UseSSE==0); 10639 match(Set dst (ConvD2F src)); 10640 format %{ "FST_S $dst,$src\t# F-round" %} 10641 expand %{ 10642 roundFloat_mem_reg(dst,src); 10643 %} 10644 %} 10645 10646 // Force rounding to 24-bit precision and 6-bit exponent 10647 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10648 predicate(UseSSE==1); 10649 match(Set dst (ConvD2F src)); 10650 effect( KILL cr ); 10651 format %{ "SUB ESP,4\n\t" 10652 "FST_S [ESP],$src\t# F-round\n\t" 10653 "MOVSS $dst,[ESP]\n\t" 10654 "ADD ESP,4" %} 10655 ins_encode %{ 10656 __ subptr(rsp, 4); 10657 if ($src$$reg != FPR1L_enc) { 10658 __ fld_s($src$$reg-1); 10659 __ fstp_s(Address(rsp, 0)); 10660 } else { 10661 __ fst_s(Address(rsp, 0)); 10662 } 10663 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10664 __ addptr(rsp, 4); 10665 %} 10666 ins_pipe( pipe_slow ); 10667 %} 10668 10669 // Force rounding double precision to single precision 10670 instruct convD2F_reg(regF dst, regD src) %{ 10671 predicate(UseSSE>=2); 10672 match(Set dst (ConvD2F src)); 10673 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10674 ins_encode %{ 10675 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10676 %} 10677 ins_pipe( pipe_slow ); 10678 %} 10679 10680 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10681 predicate(UseSSE==0); 10682 match(Set dst (ConvF2D src)); 10683 format %{ "FST_S $dst,$src\t# D-round" %} 10684 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10685 ins_pipe( fpu_reg_reg ); 10686 %} 10687 10688 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10689 predicate(UseSSE==1); 10690 match(Set dst (ConvF2D src)); 10691 format %{ "FST_D $dst,$src\t# D-round" %} 10692 expand %{ 10693 roundDouble_mem_reg(dst,src); 10694 %} 10695 %} 10696 10697 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10698 predicate(UseSSE==1); 10699 match(Set dst (ConvF2D src)); 10700 effect( KILL cr ); 10701 format %{ "SUB ESP,4\n\t" 10702 "MOVSS [ESP] $src\n\t" 10703 "FLD_S [ESP]\n\t" 10704 "ADD ESP,4\n\t" 10705 "FSTP $dst\t# D-round" %} 10706 ins_encode %{ 10707 __ subptr(rsp, 4); 10708 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10709 __ fld_s(Address(rsp, 0)); 10710 __ addptr(rsp, 4); 10711 __ fstp_d($dst$$reg); 10712 %} 10713 ins_pipe( pipe_slow ); 10714 %} 10715 10716 instruct convF2D_reg(regD dst, regF src) %{ 10717 predicate(UseSSE>=2); 10718 match(Set dst (ConvF2D src)); 10719 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10720 ins_encode %{ 10721 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10722 %} 10723 ins_pipe( pipe_slow ); 10724 %} 10725 10726 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10727 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10728 predicate(UseSSE<=1); 10729 match(Set dst (ConvD2I src)); 10730 effect( KILL tmp, KILL cr ); 10731 format %{ "FLD $src\t# Convert double to int \n\t" 10732 "FLDCW trunc mode\n\t" 10733 "SUB ESP,4\n\t" 10734 "FISTp [ESP + #0]\n\t" 10735 "FLDCW std/24-bit mode\n\t" 10736 "POP EAX\n\t" 10737 "CMP EAX,0x80000000\n\t" 10738 "JNE,s fast\n\t" 10739 "FLD_D $src\n\t" 10740 "CALL d2i_wrapper\n" 10741 "fast:" %} 10742 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10743 ins_pipe( pipe_slow ); 10744 %} 10745 10746 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10747 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10748 predicate(UseSSE>=2); 10749 match(Set dst (ConvD2I src)); 10750 effect( KILL tmp, KILL cr ); 10751 format %{ "CVTTSD2SI $dst, $src\n\t" 10752 "CMP $dst,0x80000000\n\t" 10753 "JNE,s fast\n\t" 10754 "SUB ESP, 8\n\t" 10755 "MOVSD [ESP], $src\n\t" 10756 "FLD_D [ESP]\n\t" 10757 "ADD ESP, 8\n\t" 10758 "CALL d2i_wrapper\n" 10759 "fast:" %} 10760 ins_encode %{ 10761 Label fast; 10762 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10763 __ cmpl($dst$$Register, 0x80000000); 10764 __ jccb(Assembler::notEqual, fast); 10765 __ subptr(rsp, 8); 10766 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10767 __ fld_d(Address(rsp, 0)); 10768 __ addptr(rsp, 8); 10769 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10770 __ bind(fast); 10771 %} 10772 ins_pipe( pipe_slow ); 10773 %} 10774 10775 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10776 predicate(UseSSE<=1); 10777 match(Set dst (ConvD2L src)); 10778 effect( KILL cr ); 10779 format %{ "FLD $src\t# Convert double to long\n\t" 10780 "FLDCW trunc mode\n\t" 10781 "SUB ESP,8\n\t" 10782 "FISTp [ESP + #0]\n\t" 10783 "FLDCW std/24-bit mode\n\t" 10784 "POP EAX\n\t" 10785 "POP EDX\n\t" 10786 "CMP EDX,0x80000000\n\t" 10787 "JNE,s fast\n\t" 10788 "TEST EAX,EAX\n\t" 10789 "JNE,s fast\n\t" 10790 "FLD $src\n\t" 10791 "CALL d2l_wrapper\n" 10792 "fast:" %} 10793 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10794 ins_pipe( pipe_slow ); 10795 %} 10796 10797 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10798 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10799 predicate (UseSSE>=2); 10800 match(Set dst (ConvD2L src)); 10801 effect( KILL cr ); 10802 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10803 "MOVSD [ESP],$src\n\t" 10804 "FLD_D [ESP]\n\t" 10805 "FLDCW trunc mode\n\t" 10806 "FISTp [ESP + #0]\n\t" 10807 "FLDCW std/24-bit mode\n\t" 10808 "POP EAX\n\t" 10809 "POP EDX\n\t" 10810 "CMP EDX,0x80000000\n\t" 10811 "JNE,s fast\n\t" 10812 "TEST EAX,EAX\n\t" 10813 "JNE,s fast\n\t" 10814 "SUB ESP,8\n\t" 10815 "MOVSD [ESP],$src\n\t" 10816 "FLD_D [ESP]\n\t" 10817 "ADD ESP,8\n\t" 10818 "CALL d2l_wrapper\n" 10819 "fast:" %} 10820 ins_encode %{ 10821 Label fast; 10822 __ subptr(rsp, 8); 10823 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10824 __ fld_d(Address(rsp, 0)); 10825 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10826 __ fistp_d(Address(rsp, 0)); 10827 // Restore the rounding mode, mask the exception 10828 if (Compile::current()->in_24_bit_fp_mode()) { 10829 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10830 } else { 10831 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10832 } 10833 // Load the converted long, adjust CPU stack 10834 __ pop(rax); 10835 __ pop(rdx); 10836 __ cmpl(rdx, 0x80000000); 10837 __ jccb(Assembler::notEqual, fast); 10838 __ testl(rax, rax); 10839 __ jccb(Assembler::notEqual, fast); 10840 __ subptr(rsp, 8); 10841 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10842 __ fld_d(Address(rsp, 0)); 10843 __ addptr(rsp, 8); 10844 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10845 __ bind(fast); 10846 %} 10847 ins_pipe( pipe_slow ); 10848 %} 10849 10850 // Convert a double to an int. Java semantics require we do complex 10851 // manglations in the corner cases. So we set the rounding mode to 10852 // 'zero', store the darned double down as an int, and reset the 10853 // rounding mode to 'nearest'. The hardware stores a flag value down 10854 // if we would overflow or converted a NAN; we check for this and 10855 // and go the slow path if needed. 10856 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10857 predicate(UseSSE==0); 10858 match(Set dst (ConvF2I src)); 10859 effect( KILL tmp, KILL cr ); 10860 format %{ "FLD $src\t# Convert float to int \n\t" 10861 "FLDCW trunc mode\n\t" 10862 "SUB ESP,4\n\t" 10863 "FISTp [ESP + #0]\n\t" 10864 "FLDCW std/24-bit mode\n\t" 10865 "POP EAX\n\t" 10866 "CMP EAX,0x80000000\n\t" 10867 "JNE,s fast\n\t" 10868 "FLD $src\n\t" 10869 "CALL d2i_wrapper\n" 10870 "fast:" %} 10871 // DPR2I_encoding works for FPR2I 10872 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10873 ins_pipe( pipe_slow ); 10874 %} 10875 10876 // Convert a float in xmm to an int reg. 10877 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10878 predicate(UseSSE>=1); 10879 match(Set dst (ConvF2I src)); 10880 effect( KILL tmp, KILL cr ); 10881 format %{ "CVTTSS2SI $dst, $src\n\t" 10882 "CMP $dst,0x80000000\n\t" 10883 "JNE,s fast\n\t" 10884 "SUB ESP, 4\n\t" 10885 "MOVSS [ESP], $src\n\t" 10886 "FLD [ESP]\n\t" 10887 "ADD ESP, 4\n\t" 10888 "CALL d2i_wrapper\n" 10889 "fast:" %} 10890 ins_encode %{ 10891 Label fast; 10892 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10893 __ cmpl($dst$$Register, 0x80000000); 10894 __ jccb(Assembler::notEqual, fast); 10895 __ subptr(rsp, 4); 10896 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10897 __ fld_s(Address(rsp, 0)); 10898 __ addptr(rsp, 4); 10899 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10900 __ bind(fast); 10901 %} 10902 ins_pipe( pipe_slow ); 10903 %} 10904 10905 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10906 predicate(UseSSE==0); 10907 match(Set dst (ConvF2L src)); 10908 effect( KILL cr ); 10909 format %{ "FLD $src\t# Convert float to long\n\t" 10910 "FLDCW trunc mode\n\t" 10911 "SUB ESP,8\n\t" 10912 "FISTp [ESP + #0]\n\t" 10913 "FLDCW std/24-bit mode\n\t" 10914 "POP EAX\n\t" 10915 "POP EDX\n\t" 10916 "CMP EDX,0x80000000\n\t" 10917 "JNE,s fast\n\t" 10918 "TEST EAX,EAX\n\t" 10919 "JNE,s fast\n\t" 10920 "FLD $src\n\t" 10921 "CALL d2l_wrapper\n" 10922 "fast:" %} 10923 // DPR2L_encoding works for FPR2L 10924 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10925 ins_pipe( pipe_slow ); 10926 %} 10927 10928 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10929 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10930 predicate (UseSSE>=1); 10931 match(Set dst (ConvF2L src)); 10932 effect( KILL cr ); 10933 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10934 "MOVSS [ESP],$src\n\t" 10935 "FLD_S [ESP]\n\t" 10936 "FLDCW trunc mode\n\t" 10937 "FISTp [ESP + #0]\n\t" 10938 "FLDCW std/24-bit mode\n\t" 10939 "POP EAX\n\t" 10940 "POP EDX\n\t" 10941 "CMP EDX,0x80000000\n\t" 10942 "JNE,s fast\n\t" 10943 "TEST EAX,EAX\n\t" 10944 "JNE,s fast\n\t" 10945 "SUB ESP,4\t# Convert float to long\n\t" 10946 "MOVSS [ESP],$src\n\t" 10947 "FLD_S [ESP]\n\t" 10948 "ADD ESP,4\n\t" 10949 "CALL d2l_wrapper\n" 10950 "fast:" %} 10951 ins_encode %{ 10952 Label fast; 10953 __ subptr(rsp, 8); 10954 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10955 __ fld_s(Address(rsp, 0)); 10956 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10957 __ fistp_d(Address(rsp, 0)); 10958 // Restore the rounding mode, mask the exception 10959 if (Compile::current()->in_24_bit_fp_mode()) { 10960 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10961 } else { 10962 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10963 } 10964 // Load the converted long, adjust CPU stack 10965 __ pop(rax); 10966 __ pop(rdx); 10967 __ cmpl(rdx, 0x80000000); 10968 __ jccb(Assembler::notEqual, fast); 10969 __ testl(rax, rax); 10970 __ jccb(Assembler::notEqual, fast); 10971 __ subptr(rsp, 4); 10972 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10973 __ fld_s(Address(rsp, 0)); 10974 __ addptr(rsp, 4); 10975 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10976 __ bind(fast); 10977 %} 10978 ins_pipe( pipe_slow ); 10979 %} 10980 10981 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 10982 predicate( UseSSE<=1 ); 10983 match(Set dst (ConvI2D src)); 10984 format %{ "FILD $src\n\t" 10985 "FSTP $dst" %} 10986 opcode(0xDB, 0x0); /* DB /0 */ 10987 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 10988 ins_pipe( fpu_reg_mem ); 10989 %} 10990 10991 instruct convI2D_reg(regD dst, rRegI src) %{ 10992 predicate( UseSSE>=2 && !UseXmmI2D ); 10993 match(Set dst (ConvI2D src)); 10994 format %{ "CVTSI2SD $dst,$src" %} 10995 ins_encode %{ 10996 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 10997 %} 10998 ins_pipe( pipe_slow ); 10999 %} 11000 11001 instruct convI2D_mem(regD dst, memory mem) %{ 11002 predicate( UseSSE>=2 ); 11003 match(Set dst (ConvI2D (LoadI mem))); 11004 format %{ "CVTSI2SD $dst,$mem" %} 11005 ins_encode %{ 11006 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 11007 %} 11008 ins_pipe( pipe_slow ); 11009 %} 11010 11011 instruct convXI2D_reg(regD dst, rRegI src) 11012 %{ 11013 predicate( UseSSE>=2 && UseXmmI2D ); 11014 match(Set dst (ConvI2D src)); 11015 11016 format %{ "MOVD $dst,$src\n\t" 11017 "CVTDQ2PD $dst,$dst\t# i2d" %} 11018 ins_encode %{ 11019 __ movdl($dst$$XMMRegister, $src$$Register); 11020 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11021 %} 11022 ins_pipe(pipe_slow); // XXX 11023 %} 11024 11025 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 11026 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11027 match(Set dst (ConvI2D (LoadI mem))); 11028 format %{ "FILD $mem\n\t" 11029 "FSTP $dst" %} 11030 opcode(0xDB); /* DB /0 */ 11031 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11032 Pop_Reg_DPR(dst)); 11033 ins_pipe( fpu_reg_mem ); 11034 %} 11035 11036 // Convert a byte to a float; no rounding step needed. 11037 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11038 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11039 match(Set dst (ConvI2F src)); 11040 format %{ "FILD $src\n\t" 11041 "FSTP $dst" %} 11042 11043 opcode(0xDB, 0x0); /* DB /0 */ 11044 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11045 ins_pipe( fpu_reg_mem ); 11046 %} 11047 11048 // In 24-bit mode, force exponent rounding by storing back out 11049 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11050 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11051 match(Set dst (ConvI2F src)); 11052 ins_cost(200); 11053 format %{ "FILD $src\n\t" 11054 "FSTP_S $dst" %} 11055 opcode(0xDB, 0x0); /* DB /0 */ 11056 ins_encode( Push_Mem_I(src), 11057 Pop_Mem_FPR(dst)); 11058 ins_pipe( fpu_mem_mem ); 11059 %} 11060 11061 // In 24-bit mode, force exponent rounding by storing back out 11062 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11063 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11064 match(Set dst (ConvI2F (LoadI mem))); 11065 ins_cost(200); 11066 format %{ "FILD $mem\n\t" 11067 "FSTP_S $dst" %} 11068 opcode(0xDB); /* DB /0 */ 11069 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11070 Pop_Mem_FPR(dst)); 11071 ins_pipe( fpu_mem_mem ); 11072 %} 11073 11074 // This instruction does not round to 24-bits 11075 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11076 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11077 match(Set dst (ConvI2F src)); 11078 format %{ "FILD $src\n\t" 11079 "FSTP $dst" %} 11080 opcode(0xDB, 0x0); /* DB /0 */ 11081 ins_encode( Push_Mem_I(src), 11082 Pop_Reg_FPR(dst)); 11083 ins_pipe( fpu_reg_mem ); 11084 %} 11085 11086 // This instruction does not round to 24-bits 11087 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11088 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11089 match(Set dst (ConvI2F (LoadI mem))); 11090 format %{ "FILD $mem\n\t" 11091 "FSTP $dst" %} 11092 opcode(0xDB); /* DB /0 */ 11093 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11094 Pop_Reg_FPR(dst)); 11095 ins_pipe( fpu_reg_mem ); 11096 %} 11097 11098 // Convert an int to a float in xmm; no rounding step needed. 11099 instruct convI2F_reg(regF dst, rRegI src) %{ 11100 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11101 match(Set dst (ConvI2F src)); 11102 format %{ "CVTSI2SS $dst, $src" %} 11103 ins_encode %{ 11104 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11105 %} 11106 ins_pipe( pipe_slow ); 11107 %} 11108 11109 instruct convXI2F_reg(regF dst, rRegI src) 11110 %{ 11111 predicate( UseSSE>=2 && UseXmmI2F ); 11112 match(Set dst (ConvI2F src)); 11113 11114 format %{ "MOVD $dst,$src\n\t" 11115 "CVTDQ2PS $dst,$dst\t# i2f" %} 11116 ins_encode %{ 11117 __ movdl($dst$$XMMRegister, $src$$Register); 11118 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11119 %} 11120 ins_pipe(pipe_slow); // XXX 11121 %} 11122 11123 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11124 match(Set dst (ConvI2L src)); 11125 effect(KILL cr); 11126 ins_cost(375); 11127 format %{ "MOV $dst.lo,$src\n\t" 11128 "MOV $dst.hi,$src\n\t" 11129 "SAR $dst.hi,31" %} 11130 ins_encode(convert_int_long(dst,src)); 11131 ins_pipe( ialu_reg_reg_long ); 11132 %} 11133 11134 // Zero-extend convert int to long 11135 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11136 match(Set dst (AndL (ConvI2L src) mask) ); 11137 effect( KILL flags ); 11138 ins_cost(250); 11139 format %{ "MOV $dst.lo,$src\n\t" 11140 "XOR $dst.hi,$dst.hi" %} 11141 opcode(0x33); // XOR 11142 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11143 ins_pipe( ialu_reg_reg_long ); 11144 %} 11145 11146 // Zero-extend long 11147 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11148 match(Set dst (AndL src mask) ); 11149 effect( KILL flags ); 11150 ins_cost(250); 11151 format %{ "MOV $dst.lo,$src.lo\n\t" 11152 "XOR $dst.hi,$dst.hi\n\t" %} 11153 opcode(0x33); // XOR 11154 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11155 ins_pipe( ialu_reg_reg_long ); 11156 %} 11157 11158 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11159 predicate (UseSSE<=1); 11160 match(Set dst (ConvL2D src)); 11161 effect( KILL cr ); 11162 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11163 "PUSH $src.lo\n\t" 11164 "FILD ST,[ESP + #0]\n\t" 11165 "ADD ESP,8\n\t" 11166 "FSTP_D $dst\t# D-round" %} 11167 opcode(0xDF, 0x5); /* DF /5 */ 11168 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11169 ins_pipe( pipe_slow ); 11170 %} 11171 11172 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11173 predicate (UseSSE>=2); 11174 match(Set dst (ConvL2D src)); 11175 effect( KILL cr ); 11176 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11177 "PUSH $src.lo\n\t" 11178 "FILD_D [ESP]\n\t" 11179 "FSTP_D [ESP]\n\t" 11180 "MOVSD $dst,[ESP]\n\t" 11181 "ADD ESP,8" %} 11182 opcode(0xDF, 0x5); /* DF /5 */ 11183 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11184 ins_pipe( pipe_slow ); 11185 %} 11186 11187 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11188 predicate (UseSSE>=1); 11189 match(Set dst (ConvL2F src)); 11190 effect( KILL cr ); 11191 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11192 "PUSH $src.lo\n\t" 11193 "FILD_D [ESP]\n\t" 11194 "FSTP_S [ESP]\n\t" 11195 "MOVSS $dst,[ESP]\n\t" 11196 "ADD ESP,8" %} 11197 opcode(0xDF, 0x5); /* DF /5 */ 11198 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11199 ins_pipe( pipe_slow ); 11200 %} 11201 11202 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11203 match(Set dst (ConvL2F src)); 11204 effect( KILL cr ); 11205 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11206 "PUSH $src.lo\n\t" 11207 "FILD ST,[ESP + #0]\n\t" 11208 "ADD ESP,8\n\t" 11209 "FSTP_S $dst\t# F-round" %} 11210 opcode(0xDF, 0x5); /* DF /5 */ 11211 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11212 ins_pipe( pipe_slow ); 11213 %} 11214 11215 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11216 match(Set dst (ConvL2I src)); 11217 effect( DEF dst, USE src ); 11218 format %{ "MOV $dst,$src.lo" %} 11219 ins_encode(enc_CopyL_Lo(dst,src)); 11220 ins_pipe( ialu_reg_reg ); 11221 %} 11222 11223 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11224 match(Set dst (MoveF2I src)); 11225 effect( DEF dst, USE src ); 11226 ins_cost(100); 11227 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11228 ins_encode %{ 11229 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11230 %} 11231 ins_pipe( ialu_reg_mem ); 11232 %} 11233 11234 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11235 predicate(UseSSE==0); 11236 match(Set dst (MoveF2I src)); 11237 effect( DEF dst, USE src ); 11238 11239 ins_cost(125); 11240 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11241 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11242 ins_pipe( fpu_mem_reg ); 11243 %} 11244 11245 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11246 predicate(UseSSE>=1); 11247 match(Set dst (MoveF2I src)); 11248 effect( DEF dst, USE src ); 11249 11250 ins_cost(95); 11251 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11252 ins_encode %{ 11253 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11254 %} 11255 ins_pipe( pipe_slow ); 11256 %} 11257 11258 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11259 predicate(UseSSE>=2); 11260 match(Set dst (MoveF2I src)); 11261 effect( DEF dst, USE src ); 11262 ins_cost(85); 11263 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11264 ins_encode %{ 11265 __ movdl($dst$$Register, $src$$XMMRegister); 11266 %} 11267 ins_pipe( pipe_slow ); 11268 %} 11269 11270 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11271 match(Set dst (MoveI2F src)); 11272 effect( DEF dst, USE src ); 11273 11274 ins_cost(100); 11275 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11276 ins_encode %{ 11277 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11278 %} 11279 ins_pipe( ialu_mem_reg ); 11280 %} 11281 11282 11283 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11284 predicate(UseSSE==0); 11285 match(Set dst (MoveI2F src)); 11286 effect(DEF dst, USE src); 11287 11288 ins_cost(125); 11289 format %{ "FLD_S $src\n\t" 11290 "FSTP $dst\t# MoveI2F_stack_reg" %} 11291 opcode(0xD9); /* D9 /0, FLD m32real */ 11292 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11293 Pop_Reg_FPR(dst) ); 11294 ins_pipe( fpu_reg_mem ); 11295 %} 11296 11297 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11298 predicate(UseSSE>=1); 11299 match(Set dst (MoveI2F src)); 11300 effect( DEF dst, USE src ); 11301 11302 ins_cost(95); 11303 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11304 ins_encode %{ 11305 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11306 %} 11307 ins_pipe( pipe_slow ); 11308 %} 11309 11310 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11311 predicate(UseSSE>=2); 11312 match(Set dst (MoveI2F src)); 11313 effect( DEF dst, USE src ); 11314 11315 ins_cost(85); 11316 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11317 ins_encode %{ 11318 __ movdl($dst$$XMMRegister, $src$$Register); 11319 %} 11320 ins_pipe( pipe_slow ); 11321 %} 11322 11323 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11324 match(Set dst (MoveD2L src)); 11325 effect(DEF dst, USE src); 11326 11327 ins_cost(250); 11328 format %{ "MOV $dst.lo,$src\n\t" 11329 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11330 opcode(0x8B, 0x8B); 11331 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11332 ins_pipe( ialu_mem_long_reg ); 11333 %} 11334 11335 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11336 predicate(UseSSE<=1); 11337 match(Set dst (MoveD2L src)); 11338 effect(DEF dst, USE src); 11339 11340 ins_cost(125); 11341 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11342 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11343 ins_pipe( fpu_mem_reg ); 11344 %} 11345 11346 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11347 predicate(UseSSE>=2); 11348 match(Set dst (MoveD2L src)); 11349 effect(DEF dst, USE src); 11350 ins_cost(95); 11351 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11352 ins_encode %{ 11353 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11354 %} 11355 ins_pipe( pipe_slow ); 11356 %} 11357 11358 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11359 predicate(UseSSE>=2); 11360 match(Set dst (MoveD2L src)); 11361 effect(DEF dst, USE src, TEMP tmp); 11362 ins_cost(85); 11363 format %{ "MOVD $dst.lo,$src\n\t" 11364 "PSHUFLW $tmp,$src,0x4E\n\t" 11365 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11366 ins_encode %{ 11367 __ movdl($dst$$Register, $src$$XMMRegister); 11368 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11369 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11370 %} 11371 ins_pipe( pipe_slow ); 11372 %} 11373 11374 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11375 match(Set dst (MoveL2D src)); 11376 effect(DEF dst, USE src); 11377 11378 ins_cost(200); 11379 format %{ "MOV $dst,$src.lo\n\t" 11380 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11381 opcode(0x89, 0x89); 11382 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11383 ins_pipe( ialu_mem_long_reg ); 11384 %} 11385 11386 11387 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11388 predicate(UseSSE<=1); 11389 match(Set dst (MoveL2D src)); 11390 effect(DEF dst, USE src); 11391 ins_cost(125); 11392 11393 format %{ "FLD_D $src\n\t" 11394 "FSTP $dst\t# MoveL2D_stack_reg" %} 11395 opcode(0xDD); /* DD /0, FLD m64real */ 11396 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11397 Pop_Reg_DPR(dst) ); 11398 ins_pipe( fpu_reg_mem ); 11399 %} 11400 11401 11402 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11403 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11404 match(Set dst (MoveL2D src)); 11405 effect(DEF dst, USE src); 11406 11407 ins_cost(95); 11408 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11409 ins_encode %{ 11410 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11411 %} 11412 ins_pipe( pipe_slow ); 11413 %} 11414 11415 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11416 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11417 match(Set dst (MoveL2D src)); 11418 effect(DEF dst, USE src); 11419 11420 ins_cost(95); 11421 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11422 ins_encode %{ 11423 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11424 %} 11425 ins_pipe( pipe_slow ); 11426 %} 11427 11428 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11429 predicate(UseSSE>=2); 11430 match(Set dst (MoveL2D src)); 11431 effect(TEMP dst, USE src, TEMP tmp); 11432 ins_cost(85); 11433 format %{ "MOVD $dst,$src.lo\n\t" 11434 "MOVD $tmp,$src.hi\n\t" 11435 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11436 ins_encode %{ 11437 __ movdl($dst$$XMMRegister, $src$$Register); 11438 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11439 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11440 %} 11441 ins_pipe( pipe_slow ); 11442 %} 11443 11444 11445 // ======================================================================= 11446 // fast clearing of an array 11447 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11448 predicate(!UseFastStosb); 11449 match(Set dummy (ClearArray cnt base)); 11450 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11451 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11452 "SHL ECX,1\t# Convert doublewords to words\n\t" 11453 "REP STOS\t# store EAX into [EDI++] while ECX--" %} 11454 ins_encode %{ 11455 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); 11456 %} 11457 ins_pipe( pipe_slow ); 11458 %} 11459 11460 instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11461 predicate(UseFastStosb); 11462 match(Set dummy (ClearArray cnt base)); 11463 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11464 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11465 "SHL ECX,3\t# Convert doublewords to bytes\n\t" 11466 "REP STOSB\t# store EAX into [EDI++] while ECX--" %} 11467 ins_encode %{ 11468 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); 11469 %} 11470 ins_pipe( pipe_slow ); 11471 %} 11472 11473 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11474 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11475 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11476 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11477 11478 format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11479 ins_encode %{ 11480 __ string_compare($str1$$Register, $str2$$Register, 11481 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11482 $tmp1$$XMMRegister); 11483 %} 11484 ins_pipe( pipe_slow ); 11485 %} 11486 11487 // fast string equals 11488 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11489 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11490 match(Set result (StrEquals (Binary str1 str2) cnt)); 11491 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11492 11493 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11494 ins_encode %{ 11495 __ char_arrays_equals(false, $str1$$Register, $str2$$Register, 11496 $cnt$$Register, $result$$Register, $tmp3$$Register, 11497 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11498 %} 11499 ins_pipe( pipe_slow ); 11500 %} 11501 11502 // fast search of substring with known size. 11503 instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11504 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11505 predicate(UseSSE42Intrinsics); 11506 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11507 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11508 11509 format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11510 ins_encode %{ 11511 int icnt2 = (int)$int_cnt2$$constant; 11512 if (icnt2 >= 8) { 11513 // IndexOf for constant substrings with size >= 8 elements 11514 // which don't need to be loaded through stack. 11515 __ string_indexofC8($str1$$Register, $str2$$Register, 11516 $cnt1$$Register, $cnt2$$Register, 11517 icnt2, $result$$Register, 11518 $vec$$XMMRegister, $tmp$$Register); 11519 } else { 11520 // Small strings are loaded through stack if they cross page boundary. 11521 __ string_indexof($str1$$Register, $str2$$Register, 11522 $cnt1$$Register, $cnt2$$Register, 11523 icnt2, $result$$Register, 11524 $vec$$XMMRegister, $tmp$$Register); 11525 } 11526 %} 11527 ins_pipe( pipe_slow ); 11528 %} 11529 11530 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11531 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11532 predicate(UseSSE42Intrinsics); 11533 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11534 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11535 11536 format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11537 ins_encode %{ 11538 __ string_indexof($str1$$Register, $str2$$Register, 11539 $cnt1$$Register, $cnt2$$Register, 11540 (-1), $result$$Register, 11541 $vec$$XMMRegister, $tmp$$Register); 11542 %} 11543 ins_pipe( pipe_slow ); 11544 %} 11545 11546 // fast array equals 11547 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11548 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11549 %{ 11550 match(Set result (AryEq ary1 ary2)); 11551 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11552 //ins_cost(300); 11553 11554 format %{ "Array Equals $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11555 ins_encode %{ 11556 __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register, 11557 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11558 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11559 %} 11560 ins_pipe( pipe_slow ); 11561 %} 11562 11563 // encode char[] to byte[] in ISO_8859_1 11564 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 11565 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11566 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11567 match(Set result (EncodeISOArray src (Binary dst len))); 11568 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11569 11570 format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 11571 ins_encode %{ 11572 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 11573 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11574 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11575 %} 11576 ins_pipe( pipe_slow ); 11577 %} 11578 11579 11580 //----------Control Flow Instructions------------------------------------------ 11581 // Signed compare Instructions 11582 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 11583 match(Set cr (CmpI op1 op2)); 11584 effect( DEF cr, USE op1, USE op2 ); 11585 format %{ "CMP $op1,$op2" %} 11586 opcode(0x3B); /* Opcode 3B /r */ 11587 ins_encode( OpcP, RegReg( op1, op2) ); 11588 ins_pipe( ialu_cr_reg_reg ); 11589 %} 11590 11591 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 11592 match(Set cr (CmpI op1 op2)); 11593 effect( DEF cr, USE op1 ); 11594 format %{ "CMP $op1,$op2" %} 11595 opcode(0x81,0x07); /* Opcode 81 /7 */ 11596 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 11597 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11598 ins_pipe( ialu_cr_reg_imm ); 11599 %} 11600 11601 // Cisc-spilled version of cmpI_eReg 11602 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 11603 match(Set cr (CmpI op1 (LoadI op2))); 11604 11605 format %{ "CMP $op1,$op2" %} 11606 ins_cost(500); 11607 opcode(0x3B); /* Opcode 3B /r */ 11608 ins_encode( OpcP, RegMem( op1, op2) ); 11609 ins_pipe( ialu_cr_reg_mem ); 11610 %} 11611 11612 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ 11613 match(Set cr (CmpI src zero)); 11614 effect( DEF cr, USE src ); 11615 11616 format %{ "TEST $src,$src" %} 11617 opcode(0x85); 11618 ins_encode( OpcP, RegReg( src, src ) ); 11619 ins_pipe( ialu_cr_reg_imm ); 11620 %} 11621 11622 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ 11623 match(Set cr (CmpI (AndI src con) zero)); 11624 11625 format %{ "TEST $src,$con" %} 11626 opcode(0xF7,0x00); 11627 ins_encode( OpcP, RegOpc(src), Con32(con) ); 11628 ins_pipe( ialu_cr_reg_imm ); 11629 %} 11630 11631 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ 11632 match(Set cr (CmpI (AndI src mem) zero)); 11633 11634 format %{ "TEST $src,$mem" %} 11635 opcode(0x85); 11636 ins_encode( OpcP, RegMem( src, mem ) ); 11637 ins_pipe( ialu_cr_reg_mem ); 11638 %} 11639 11640 // Unsigned compare Instructions; really, same as signed except they 11641 // produce an eFlagsRegU instead of eFlagsReg. 11642 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 11643 match(Set cr (CmpU op1 op2)); 11644 11645 format %{ "CMPu $op1,$op2" %} 11646 opcode(0x3B); /* Opcode 3B /r */ 11647 ins_encode( OpcP, RegReg( op1, op2) ); 11648 ins_pipe( ialu_cr_reg_reg ); 11649 %} 11650 11651 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 11652 match(Set cr (CmpU op1 op2)); 11653 11654 format %{ "CMPu $op1,$op2" %} 11655 opcode(0x81,0x07); /* Opcode 81 /7 */ 11656 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11657 ins_pipe( ialu_cr_reg_imm ); 11658 %} 11659 11660 // // Cisc-spilled version of cmpU_eReg 11661 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 11662 match(Set cr (CmpU op1 (LoadI op2))); 11663 11664 format %{ "CMPu $op1,$op2" %} 11665 ins_cost(500); 11666 opcode(0x3B); /* Opcode 3B /r */ 11667 ins_encode( OpcP, RegMem( op1, op2) ); 11668 ins_pipe( ialu_cr_reg_mem ); 11669 %} 11670 11671 // // Cisc-spilled version of cmpU_eReg 11672 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 11673 // match(Set cr (CmpU (LoadI op1) op2)); 11674 // 11675 // format %{ "CMPu $op1,$op2" %} 11676 // ins_cost(500); 11677 // opcode(0x39); /* Opcode 39 /r */ 11678 // ins_encode( OpcP, RegMem( op1, op2) ); 11679 //%} 11680 11681 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ 11682 match(Set cr (CmpU src zero)); 11683 11684 format %{ "TESTu $src,$src" %} 11685 opcode(0x85); 11686 ins_encode( OpcP, RegReg( src, src ) ); 11687 ins_pipe( ialu_cr_reg_imm ); 11688 %} 11689 11690 // Unsigned pointer compare Instructions 11691 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 11692 match(Set cr (CmpP op1 op2)); 11693 11694 format %{ "CMPu $op1,$op2" %} 11695 opcode(0x3B); /* Opcode 3B /r */ 11696 ins_encode( OpcP, RegReg( op1, op2) ); 11697 ins_pipe( ialu_cr_reg_reg ); 11698 %} 11699 11700 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 11701 match(Set cr (CmpP op1 op2)); 11702 11703 format %{ "CMPu $op1,$op2" %} 11704 opcode(0x81,0x07); /* Opcode 81 /7 */ 11705 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11706 ins_pipe( ialu_cr_reg_imm ); 11707 %} 11708 11709 // // Cisc-spilled version of cmpP_eReg 11710 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 11711 match(Set cr (CmpP op1 (LoadP op2))); 11712 11713 format %{ "CMPu $op1,$op2" %} 11714 ins_cost(500); 11715 opcode(0x3B); /* Opcode 3B /r */ 11716 ins_encode( OpcP, RegMem( op1, op2) ); 11717 ins_pipe( ialu_cr_reg_mem ); 11718 %} 11719 11720 // // Cisc-spilled version of cmpP_eReg 11721 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 11722 // match(Set cr (CmpP (LoadP op1) op2)); 11723 // 11724 // format %{ "CMPu $op1,$op2" %} 11725 // ins_cost(500); 11726 // opcode(0x39); /* Opcode 39 /r */ 11727 // ins_encode( OpcP, RegMem( op1, op2) ); 11728 //%} 11729 11730 // Compare raw pointer (used in out-of-heap check). 11731 // Only works because non-oop pointers must be raw pointers 11732 // and raw pointers have no anti-dependencies. 11733 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 11734 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 11735 match(Set cr (CmpP op1 (LoadP op2))); 11736 11737 format %{ "CMPu $op1,$op2" %} 11738 opcode(0x3B); /* Opcode 3B /r */ 11739 ins_encode( OpcP, RegMem( op1, op2) ); 11740 ins_pipe( ialu_cr_reg_mem ); 11741 %} 11742 11743 // 11744 // This will generate a signed flags result. This should be ok 11745 // since any compare to a zero should be eq/neq. 11746 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 11747 match(Set cr (CmpP src zero)); 11748 11749 format %{ "TEST $src,$src" %} 11750 opcode(0x85); 11751 ins_encode( OpcP, RegReg( src, src ) ); 11752 ins_pipe( ialu_cr_reg_imm ); 11753 %} 11754 11755 // Cisc-spilled version of testP_reg 11756 // This will generate a signed flags result. This should be ok 11757 // since any compare to a zero should be eq/neq. 11758 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 11759 match(Set cr (CmpP (LoadP op) zero)); 11760 11761 format %{ "TEST $op,0xFFFFFFFF" %} 11762 ins_cost(500); 11763 opcode(0xF7); /* Opcode F7 /0 */ 11764 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 11765 ins_pipe( ialu_cr_reg_imm ); 11766 %} 11767 11768 // Yanked all unsigned pointer compare operations. 11769 // Pointer compares are done with CmpP which is already unsigned. 11770 11771 //----------Max and Min-------------------------------------------------------- 11772 // Min Instructions 11773 //// 11774 // *** Min and Max using the conditional move are slower than the 11775 // *** branch version on a Pentium III. 11776 // // Conditional move for min 11777 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11778 // effect( USE_DEF op2, USE op1, USE cr ); 11779 // format %{ "CMOVlt $op2,$op1\t! min" %} 11780 // opcode(0x4C,0x0F); 11781 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11782 // ins_pipe( pipe_cmov_reg ); 11783 //%} 11784 // 11785 //// Min Register with Register (P6 version) 11786 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11787 // predicate(VM_Version::supports_cmov() ); 11788 // match(Set op2 (MinI op1 op2)); 11789 // ins_cost(200); 11790 // expand %{ 11791 // eFlagsReg cr; 11792 // compI_eReg(cr,op1,op2); 11793 // cmovI_reg_lt(op2,op1,cr); 11794 // %} 11795 //%} 11796 11797 // Min Register with Register (generic version) 11798 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11799 match(Set dst (MinI dst src)); 11800 effect(KILL flags); 11801 ins_cost(300); 11802 11803 format %{ "MIN $dst,$src" %} 11804 opcode(0xCC); 11805 ins_encode( min_enc(dst,src) ); 11806 ins_pipe( pipe_slow ); 11807 %} 11808 11809 // Max Register with Register 11810 // *** Min and Max using the conditional move are slower than the 11811 // *** branch version on a Pentium III. 11812 // // Conditional move for max 11813 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11814 // effect( USE_DEF op2, USE op1, USE cr ); 11815 // format %{ "CMOVgt $op2,$op1\t! max" %} 11816 // opcode(0x4F,0x0F); 11817 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11818 // ins_pipe( pipe_cmov_reg ); 11819 //%} 11820 // 11821 // // Max Register with Register (P6 version) 11822 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11823 // predicate(VM_Version::supports_cmov() ); 11824 // match(Set op2 (MaxI op1 op2)); 11825 // ins_cost(200); 11826 // expand %{ 11827 // eFlagsReg cr; 11828 // compI_eReg(cr,op1,op2); 11829 // cmovI_reg_gt(op2,op1,cr); 11830 // %} 11831 //%} 11832 11833 // Max Register with Register (generic version) 11834 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11835 match(Set dst (MaxI dst src)); 11836 effect(KILL flags); 11837 ins_cost(300); 11838 11839 format %{ "MAX $dst,$src" %} 11840 opcode(0xCC); 11841 ins_encode( max_enc(dst,src) ); 11842 ins_pipe( pipe_slow ); 11843 %} 11844 11845 // ============================================================================ 11846 // Counted Loop limit node which represents exact final iterator value. 11847 // Note: the resulting value should fit into integer range since 11848 // counted loops have limit check on overflow. 11849 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 11850 match(Set limit (LoopLimit (Binary init limit) stride)); 11851 effect(TEMP limit_hi, TEMP tmp, KILL flags); 11852 ins_cost(300); 11853 11854 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 11855 ins_encode %{ 11856 int strd = (int)$stride$$constant; 11857 assert(strd != 1 && strd != -1, "sanity"); 11858 int m1 = (strd > 0) ? 1 : -1; 11859 // Convert limit to long (EAX:EDX) 11860 __ cdql(); 11861 // Convert init to long (init:tmp) 11862 __ movl($tmp$$Register, $init$$Register); 11863 __ sarl($tmp$$Register, 31); 11864 // $limit - $init 11865 __ subl($limit$$Register, $init$$Register); 11866 __ sbbl($limit_hi$$Register, $tmp$$Register); 11867 // + ($stride - 1) 11868 if (strd > 0) { 11869 __ addl($limit$$Register, (strd - 1)); 11870 __ adcl($limit_hi$$Register, 0); 11871 __ movl($tmp$$Register, strd); 11872 } else { 11873 __ addl($limit$$Register, (strd + 1)); 11874 __ adcl($limit_hi$$Register, -1); 11875 __ lneg($limit_hi$$Register, $limit$$Register); 11876 __ movl($tmp$$Register, -strd); 11877 } 11878 // signed devision: (EAX:EDX) / pos_stride 11879 __ idivl($tmp$$Register); 11880 if (strd < 0) { 11881 // restore sign 11882 __ negl($tmp$$Register); 11883 } 11884 // (EAX) * stride 11885 __ mull($tmp$$Register); 11886 // + init (ignore upper bits) 11887 __ addl($limit$$Register, $init$$Register); 11888 %} 11889 ins_pipe( pipe_slow ); 11890 %} 11891 11892 // ============================================================================ 11893 // Branch Instructions 11894 // Jump Table 11895 instruct jumpXtnd(rRegI switch_val) %{ 11896 match(Jump switch_val); 11897 ins_cost(350); 11898 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 11899 ins_encode %{ 11900 // Jump to Address(table_base + switch_reg) 11901 Address index(noreg, $switch_val$$Register, Address::times_1); 11902 __ jump(ArrayAddress($constantaddress, index)); 11903 %} 11904 ins_pipe(pipe_jmp); 11905 %} 11906 11907 // Jump Direct - Label defines a relative address from JMP+1 11908 instruct jmpDir(label labl) %{ 11909 match(Goto); 11910 effect(USE labl); 11911 11912 ins_cost(300); 11913 format %{ "JMP $labl" %} 11914 size(5); 11915 ins_encode %{ 11916 Label* L = $labl$$label; 11917 __ jmp(*L, false); // Always long jump 11918 %} 11919 ins_pipe( pipe_jmp ); 11920 %} 11921 11922 // Jump Direct Conditional - Label defines a relative address from Jcc+1 11923 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 11924 match(If cop cr); 11925 effect(USE labl); 11926 11927 ins_cost(300); 11928 format %{ "J$cop $labl" %} 11929 size(6); 11930 ins_encode %{ 11931 Label* L = $labl$$label; 11932 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 11933 %} 11934 ins_pipe( pipe_jcc ); 11935 %} 11936 11937 // Jump Direct Conditional - Label defines a relative address from Jcc+1 11938 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 11939 match(CountedLoopEnd cop cr); 11940 effect(USE labl); 11941 11942 ins_cost(300); 11943 format %{ "J$cop $labl\t# Loop end" %} 11944 size(6); 11945 ins_encode %{ 11946 Label* L = $labl$$label; 11947 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 11948 %} 11949 ins_pipe( pipe_jcc ); 11950 %} 11951 11952 // Jump Direct Conditional - Label defines a relative address from Jcc+1 11953 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 11954 match(CountedLoopEnd cop cmp); 11955 effect(USE labl); 11956 11957 ins_cost(300); 11958 format %{ "J$cop,u $labl\t# Loop end" %} 11959 size(6); 11960 ins_encode %{ 11961 Label* L = $labl$$label; 11962 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 11963 %} 11964 ins_pipe( pipe_jcc ); 11965 %} 11966 11967 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 11968 match(CountedLoopEnd cop cmp); 11969 effect(USE labl); 11970 11971 ins_cost(200); 11972 format %{ "J$cop,u $labl\t# Loop end" %} 11973 size(6); 11974 ins_encode %{ 11975 Label* L = $labl$$label; 11976 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 11977 %} 11978 ins_pipe( pipe_jcc ); 11979 %} 11980 11981 // Jump Direct Conditional - using unsigned comparison 11982 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 11983 match(If cop cmp); 11984 effect(USE labl); 11985 11986 ins_cost(300); 11987 format %{ "J$cop,u $labl" %} 11988 size(6); 11989 ins_encode %{ 11990 Label* L = $labl$$label; 11991 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 11992 %} 11993 ins_pipe(pipe_jcc); 11994 %} 11995 11996 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 11997 match(If cop cmp); 11998 effect(USE labl); 11999 12000 ins_cost(200); 12001 format %{ "J$cop,u $labl" %} 12002 size(6); 12003 ins_encode %{ 12004 Label* L = $labl$$label; 12005 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12006 %} 12007 ins_pipe(pipe_jcc); 12008 %} 12009 12010 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12011 match(If cop cmp); 12012 effect(USE labl); 12013 12014 ins_cost(200); 12015 format %{ $$template 12016 if ($cop$$cmpcode == Assembler::notEqual) { 12017 $$emit$$"JP,u $labl\n\t" 12018 $$emit$$"J$cop,u $labl" 12019 } else { 12020 $$emit$$"JP,u done\n\t" 12021 $$emit$$"J$cop,u $labl\n\t" 12022 $$emit$$"done:" 12023 } 12024 %} 12025 ins_encode %{ 12026 Label* l = $labl$$label; 12027 if ($cop$$cmpcode == Assembler::notEqual) { 12028 __ jcc(Assembler::parity, *l, false); 12029 __ jcc(Assembler::notEqual, *l, false); 12030 } else if ($cop$$cmpcode == Assembler::equal) { 12031 Label done; 12032 __ jccb(Assembler::parity, done); 12033 __ jcc(Assembler::equal, *l, false); 12034 __ bind(done); 12035 } else { 12036 ShouldNotReachHere(); 12037 } 12038 %} 12039 ins_pipe(pipe_jcc); 12040 %} 12041 12042 // ============================================================================ 12043 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12044 // array for an instance of the superklass. Set a hidden internal cache on a 12045 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12046 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12047 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12048 match(Set result (PartialSubtypeCheck sub super)); 12049 effect( KILL rcx, KILL cr ); 12050 12051 ins_cost(1100); // slightly larger than the next version 12052 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12053 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12054 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12055 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12056 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12057 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12058 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12059 "miss:\t" %} 12060 12061 opcode(0x1); // Force a XOR of EDI 12062 ins_encode( enc_PartialSubtypeCheck() ); 12063 ins_pipe( pipe_slow ); 12064 %} 12065 12066 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12067 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12068 effect( KILL rcx, KILL result ); 12069 12070 ins_cost(1000); 12071 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12072 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12073 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12074 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12075 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12076 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12077 "miss:\t" %} 12078 12079 opcode(0x0); // No need to XOR EDI 12080 ins_encode( enc_PartialSubtypeCheck() ); 12081 ins_pipe( pipe_slow ); 12082 %} 12083 12084 // ============================================================================ 12085 // Branch Instructions -- short offset versions 12086 // 12087 // These instructions are used to replace jumps of a long offset (the default 12088 // match) with jumps of a shorter offset. These instructions are all tagged 12089 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12090 // match rules in general matching. Instead, the ADLC generates a conversion 12091 // method in the MachNode which can be used to do in-place replacement of the 12092 // long variant with the shorter variant. The compiler will determine if a 12093 // branch can be taken by the is_short_branch_offset() predicate in the machine 12094 // specific code section of the file. 12095 12096 // Jump Direct - Label defines a relative address from JMP+1 12097 instruct jmpDir_short(label labl) %{ 12098 match(Goto); 12099 effect(USE labl); 12100 12101 ins_cost(300); 12102 format %{ "JMP,s $labl" %} 12103 size(2); 12104 ins_encode %{ 12105 Label* L = $labl$$label; 12106 __ jmpb(*L); 12107 %} 12108 ins_pipe( pipe_jmp ); 12109 ins_short_branch(1); 12110 %} 12111 12112 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12113 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12114 match(If cop cr); 12115 effect(USE labl); 12116 12117 ins_cost(300); 12118 format %{ "J$cop,s $labl" %} 12119 size(2); 12120 ins_encode %{ 12121 Label* L = $labl$$label; 12122 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12123 %} 12124 ins_pipe( pipe_jcc ); 12125 ins_short_branch(1); 12126 %} 12127 12128 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12129 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12130 match(CountedLoopEnd cop cr); 12131 effect(USE labl); 12132 12133 ins_cost(300); 12134 format %{ "J$cop,s $labl\t# Loop end" %} 12135 size(2); 12136 ins_encode %{ 12137 Label* L = $labl$$label; 12138 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12139 %} 12140 ins_pipe( pipe_jcc ); 12141 ins_short_branch(1); 12142 %} 12143 12144 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12145 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12146 match(CountedLoopEnd cop cmp); 12147 effect(USE labl); 12148 12149 ins_cost(300); 12150 format %{ "J$cop,us $labl\t# Loop end" %} 12151 size(2); 12152 ins_encode %{ 12153 Label* L = $labl$$label; 12154 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12155 %} 12156 ins_pipe( pipe_jcc ); 12157 ins_short_branch(1); 12158 %} 12159 12160 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12161 match(CountedLoopEnd cop cmp); 12162 effect(USE labl); 12163 12164 ins_cost(300); 12165 format %{ "J$cop,us $labl\t# Loop end" %} 12166 size(2); 12167 ins_encode %{ 12168 Label* L = $labl$$label; 12169 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12170 %} 12171 ins_pipe( pipe_jcc ); 12172 ins_short_branch(1); 12173 %} 12174 12175 // Jump Direct Conditional - using unsigned comparison 12176 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12177 match(If cop cmp); 12178 effect(USE labl); 12179 12180 ins_cost(300); 12181 format %{ "J$cop,us $labl" %} 12182 size(2); 12183 ins_encode %{ 12184 Label* L = $labl$$label; 12185 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12186 %} 12187 ins_pipe( pipe_jcc ); 12188 ins_short_branch(1); 12189 %} 12190 12191 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12192 match(If cop cmp); 12193 effect(USE labl); 12194 12195 ins_cost(300); 12196 format %{ "J$cop,us $labl" %} 12197 size(2); 12198 ins_encode %{ 12199 Label* L = $labl$$label; 12200 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12201 %} 12202 ins_pipe( pipe_jcc ); 12203 ins_short_branch(1); 12204 %} 12205 12206 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12207 match(If cop cmp); 12208 effect(USE labl); 12209 12210 ins_cost(300); 12211 format %{ $$template 12212 if ($cop$$cmpcode == Assembler::notEqual) { 12213 $$emit$$"JP,u,s $labl\n\t" 12214 $$emit$$"J$cop,u,s $labl" 12215 } else { 12216 $$emit$$"JP,u,s done\n\t" 12217 $$emit$$"J$cop,u,s $labl\n\t" 12218 $$emit$$"done:" 12219 } 12220 %} 12221 size(4); 12222 ins_encode %{ 12223 Label* l = $labl$$label; 12224 if ($cop$$cmpcode == Assembler::notEqual) { 12225 __ jccb(Assembler::parity, *l); 12226 __ jccb(Assembler::notEqual, *l); 12227 } else if ($cop$$cmpcode == Assembler::equal) { 12228 Label done; 12229 __ jccb(Assembler::parity, done); 12230 __ jccb(Assembler::equal, *l); 12231 __ bind(done); 12232 } else { 12233 ShouldNotReachHere(); 12234 } 12235 %} 12236 ins_pipe(pipe_jcc); 12237 ins_short_branch(1); 12238 %} 12239 12240 // ============================================================================ 12241 // Long Compare 12242 // 12243 // Currently we hold longs in 2 registers. Comparing such values efficiently 12244 // is tricky. The flavor of compare used depends on whether we are testing 12245 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12246 // The GE test is the negated LT test. The LE test can be had by commuting 12247 // the operands (yielding a GE test) and then negating; negate again for the 12248 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12249 // NE test is negated from that. 12250 12251 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12252 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12253 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12254 // are collapsed internally in the ADLC's dfa-gen code. The match for 12255 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12256 // foo match ends up with the wrong leaf. One fix is to not match both 12257 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12258 // both forms beat the trinary form of long-compare and both are very useful 12259 // on Intel which has so few registers. 12260 12261 // Manifest a CmpL result in an integer register. Very painful. 12262 // This is the test to avoid. 12263 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12264 match(Set dst (CmpL3 src1 src2)); 12265 effect( KILL flags ); 12266 ins_cost(1000); 12267 format %{ "XOR $dst,$dst\n\t" 12268 "CMP $src1.hi,$src2.hi\n\t" 12269 "JLT,s m_one\n\t" 12270 "JGT,s p_one\n\t" 12271 "CMP $src1.lo,$src2.lo\n\t" 12272 "JB,s m_one\n\t" 12273 "JEQ,s done\n" 12274 "p_one:\tINC $dst\n\t" 12275 "JMP,s done\n" 12276 "m_one:\tDEC $dst\n" 12277 "done:" %} 12278 ins_encode %{ 12279 Label p_one, m_one, done; 12280 __ xorptr($dst$$Register, $dst$$Register); 12281 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12282 __ jccb(Assembler::less, m_one); 12283 __ jccb(Assembler::greater, p_one); 12284 __ cmpl($src1$$Register, $src2$$Register); 12285 __ jccb(Assembler::below, m_one); 12286 __ jccb(Assembler::equal, done); 12287 __ bind(p_one); 12288 __ incrementl($dst$$Register); 12289 __ jmpb(done); 12290 __ bind(m_one); 12291 __ decrementl($dst$$Register); 12292 __ bind(done); 12293 %} 12294 ins_pipe( pipe_slow ); 12295 %} 12296 12297 //====== 12298 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12299 // compares. Can be used for LE or GT compares by reversing arguments. 12300 // NOT GOOD FOR EQ/NE tests. 12301 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12302 match( Set flags (CmpL src zero )); 12303 ins_cost(100); 12304 format %{ "TEST $src.hi,$src.hi" %} 12305 opcode(0x85); 12306 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12307 ins_pipe( ialu_cr_reg_reg ); 12308 %} 12309 12310 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12311 // compares. Can be used for LE or GT compares by reversing arguments. 12312 // NOT GOOD FOR EQ/NE tests. 12313 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12314 match( Set flags (CmpL src1 src2 )); 12315 effect( TEMP tmp ); 12316 ins_cost(300); 12317 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12318 "MOV $tmp,$src1.hi\n\t" 12319 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12320 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12321 ins_pipe( ialu_cr_reg_reg ); 12322 %} 12323 12324 // Long compares reg < zero/req OR reg >= zero/req. 12325 // Just a wrapper for a normal branch, plus the predicate test. 12326 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12327 match(If cmp flags); 12328 effect(USE labl); 12329 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12330 expand %{ 12331 jmpCon(cmp,flags,labl); // JLT or JGE... 12332 %} 12333 %} 12334 12335 // Compare 2 longs and CMOVE longs. 12336 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12337 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12338 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12339 ins_cost(400); 12340 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12341 "CMOV$cmp $dst.hi,$src.hi" %} 12342 opcode(0x0F,0x40); 12343 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12344 ins_pipe( pipe_cmov_reg_long ); 12345 %} 12346 12347 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12348 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12349 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12350 ins_cost(500); 12351 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12352 "CMOV$cmp $dst.hi,$src.hi" %} 12353 opcode(0x0F,0x40); 12354 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12355 ins_pipe( pipe_cmov_reg_long ); 12356 %} 12357 12358 // Compare 2 longs and CMOVE ints. 12359 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12360 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12361 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12362 ins_cost(200); 12363 format %{ "CMOV$cmp $dst,$src" %} 12364 opcode(0x0F,0x40); 12365 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12366 ins_pipe( pipe_cmov_reg ); 12367 %} 12368 12369 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12370 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12371 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12372 ins_cost(250); 12373 format %{ "CMOV$cmp $dst,$src" %} 12374 opcode(0x0F,0x40); 12375 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12376 ins_pipe( pipe_cmov_mem ); 12377 %} 12378 12379 // Compare 2 longs and CMOVE ints. 12380 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12381 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12382 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12383 ins_cost(200); 12384 format %{ "CMOV$cmp $dst,$src" %} 12385 opcode(0x0F,0x40); 12386 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12387 ins_pipe( pipe_cmov_reg ); 12388 %} 12389 12390 // Compare 2 longs and CMOVE doubles 12391 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 12392 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12393 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12394 ins_cost(200); 12395 expand %{ 12396 fcmovDPR_regS(cmp,flags,dst,src); 12397 %} 12398 %} 12399 12400 // Compare 2 longs and CMOVE doubles 12401 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12402 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12403 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12404 ins_cost(200); 12405 expand %{ 12406 fcmovD_regS(cmp,flags,dst,src); 12407 %} 12408 %} 12409 12410 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 12411 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12412 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12413 ins_cost(200); 12414 expand %{ 12415 fcmovFPR_regS(cmp,flags,dst,src); 12416 %} 12417 %} 12418 12419 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 12420 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12421 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12422 ins_cost(200); 12423 expand %{ 12424 fcmovF_regS(cmp,flags,dst,src); 12425 %} 12426 %} 12427 12428 //====== 12429 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12430 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12431 match( Set flags (CmpL src zero )); 12432 effect(TEMP tmp); 12433 ins_cost(200); 12434 format %{ "MOV $tmp,$src.lo\n\t" 12435 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 12436 ins_encode( long_cmp_flags0( src, tmp ) ); 12437 ins_pipe( ialu_reg_reg_long ); 12438 %} 12439 12440 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12441 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 12442 match( Set flags (CmpL src1 src2 )); 12443 ins_cost(200+300); 12444 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12445 "JNE,s skip\n\t" 12446 "CMP $src1.hi,$src2.hi\n\t" 12447 "skip:\t" %} 12448 ins_encode( long_cmp_flags1( src1, src2 ) ); 12449 ins_pipe( ialu_cr_reg_reg ); 12450 %} 12451 12452 // Long compare reg == zero/reg OR reg != zero/reg 12453 // Just a wrapper for a normal branch, plus the predicate test. 12454 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 12455 match(If cmp flags); 12456 effect(USE labl); 12457 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12458 expand %{ 12459 jmpCon(cmp,flags,labl); // JEQ or JNE... 12460 %} 12461 %} 12462 12463 // Compare 2 longs and CMOVE longs. 12464 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 12465 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12466 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12467 ins_cost(400); 12468 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12469 "CMOV$cmp $dst.hi,$src.hi" %} 12470 opcode(0x0F,0x40); 12471 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12472 ins_pipe( pipe_cmov_reg_long ); 12473 %} 12474 12475 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 12476 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12477 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12478 ins_cost(500); 12479 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12480 "CMOV$cmp $dst.hi,$src.hi" %} 12481 opcode(0x0F,0x40); 12482 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12483 ins_pipe( pipe_cmov_reg_long ); 12484 %} 12485 12486 // Compare 2 longs and CMOVE ints. 12487 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 12488 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12489 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12490 ins_cost(200); 12491 format %{ "CMOV$cmp $dst,$src" %} 12492 opcode(0x0F,0x40); 12493 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12494 ins_pipe( pipe_cmov_reg ); 12495 %} 12496 12497 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 12498 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12499 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12500 ins_cost(250); 12501 format %{ "CMOV$cmp $dst,$src" %} 12502 opcode(0x0F,0x40); 12503 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12504 ins_pipe( pipe_cmov_mem ); 12505 %} 12506 12507 // Compare 2 longs and CMOVE ints. 12508 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 12509 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12510 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12511 ins_cost(200); 12512 format %{ "CMOV$cmp $dst,$src" %} 12513 opcode(0x0F,0x40); 12514 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12515 ins_pipe( pipe_cmov_reg ); 12516 %} 12517 12518 // Compare 2 longs and CMOVE doubles 12519 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 12520 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12521 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12522 ins_cost(200); 12523 expand %{ 12524 fcmovDPR_regS(cmp,flags,dst,src); 12525 %} 12526 %} 12527 12528 // Compare 2 longs and CMOVE doubles 12529 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 12530 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12531 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12532 ins_cost(200); 12533 expand %{ 12534 fcmovD_regS(cmp,flags,dst,src); 12535 %} 12536 %} 12537 12538 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 12539 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12540 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12541 ins_cost(200); 12542 expand %{ 12543 fcmovFPR_regS(cmp,flags,dst,src); 12544 %} 12545 %} 12546 12547 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 12548 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12549 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12550 ins_cost(200); 12551 expand %{ 12552 fcmovF_regS(cmp,flags,dst,src); 12553 %} 12554 %} 12555 12556 //====== 12557 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12558 // Same as cmpL_reg_flags_LEGT except must negate src 12559 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12560 match( Set flags (CmpL src zero )); 12561 effect( TEMP tmp ); 12562 ins_cost(300); 12563 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 12564 "CMP $tmp,$src.lo\n\t" 12565 "SBB $tmp,$src.hi\n\t" %} 12566 ins_encode( long_cmp_flags3(src, tmp) ); 12567 ins_pipe( ialu_reg_reg_long ); 12568 %} 12569 12570 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12571 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 12572 // requires a commuted test to get the same result. 12573 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12574 match( Set flags (CmpL src1 src2 )); 12575 effect( TEMP tmp ); 12576 ins_cost(300); 12577 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 12578 "MOV $tmp,$src2.hi\n\t" 12579 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 12580 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 12581 ins_pipe( ialu_cr_reg_reg ); 12582 %} 12583 12584 // Long compares reg < zero/req OR reg >= zero/req. 12585 // Just a wrapper for a normal branch, plus the predicate test 12586 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 12587 match(If cmp flags); 12588 effect(USE labl); 12589 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 12590 ins_cost(300); 12591 expand %{ 12592 jmpCon(cmp,flags,labl); // JGT or JLE... 12593 %} 12594 %} 12595 12596 // Compare 2 longs and CMOVE longs. 12597 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 12598 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12599 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12600 ins_cost(400); 12601 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12602 "CMOV$cmp $dst.hi,$src.hi" %} 12603 opcode(0x0F,0x40); 12604 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12605 ins_pipe( pipe_cmov_reg_long ); 12606 %} 12607 12608 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 12609 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12610 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12611 ins_cost(500); 12612 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12613 "CMOV$cmp $dst.hi,$src.hi+4" %} 12614 opcode(0x0F,0x40); 12615 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12616 ins_pipe( pipe_cmov_reg_long ); 12617 %} 12618 12619 // Compare 2 longs and CMOVE ints. 12620 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 12621 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12622 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12623 ins_cost(200); 12624 format %{ "CMOV$cmp $dst,$src" %} 12625 opcode(0x0F,0x40); 12626 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12627 ins_pipe( pipe_cmov_reg ); 12628 %} 12629 12630 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 12631 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12632 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12633 ins_cost(250); 12634 format %{ "CMOV$cmp $dst,$src" %} 12635 opcode(0x0F,0x40); 12636 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12637 ins_pipe( pipe_cmov_mem ); 12638 %} 12639 12640 // Compare 2 longs and CMOVE ptrs. 12641 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 12642 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12643 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12644 ins_cost(200); 12645 format %{ "CMOV$cmp $dst,$src" %} 12646 opcode(0x0F,0x40); 12647 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12648 ins_pipe( pipe_cmov_reg ); 12649 %} 12650 12651 // Compare 2 longs and CMOVE doubles 12652 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 12653 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12654 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12655 ins_cost(200); 12656 expand %{ 12657 fcmovDPR_regS(cmp,flags,dst,src); 12658 %} 12659 %} 12660 12661 // Compare 2 longs and CMOVE doubles 12662 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 12663 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12664 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12665 ins_cost(200); 12666 expand %{ 12667 fcmovD_regS(cmp,flags,dst,src); 12668 %} 12669 %} 12670 12671 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 12672 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12673 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12674 ins_cost(200); 12675 expand %{ 12676 fcmovFPR_regS(cmp,flags,dst,src); 12677 %} 12678 %} 12679 12680 12681 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 12682 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12683 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12684 ins_cost(200); 12685 expand %{ 12686 fcmovF_regS(cmp,flags,dst,src); 12687 %} 12688 %} 12689 12690 12691 // ============================================================================ 12692 // Procedure Call/Return Instructions 12693 // Call Java Static Instruction 12694 // Note: If this code changes, the corresponding ret_addr_offset() and 12695 // compute_padding() functions will have to be adjusted. 12696 instruct CallStaticJavaDirect(method meth) %{ 12697 match(CallStaticJava); 12698 effect(USE meth); 12699 12700 ins_cost(300); 12701 format %{ "CALL,static " %} 12702 opcode(0xE8); /* E8 cd */ 12703 ins_encode( pre_call_resets, 12704 Java_Static_Call( meth ), 12705 call_epilog, 12706 post_call_FPU ); 12707 ins_pipe( pipe_slow ); 12708 ins_alignment(4); 12709 %} 12710 12711 // Call Java Dynamic Instruction 12712 // Note: If this code changes, the corresponding ret_addr_offset() and 12713 // compute_padding() functions will have to be adjusted. 12714 instruct CallDynamicJavaDirect(method meth) %{ 12715 match(CallDynamicJava); 12716 effect(USE meth); 12717 12718 ins_cost(300); 12719 format %{ "MOV EAX,(oop)-1\n\t" 12720 "CALL,dynamic" %} 12721 opcode(0xE8); /* E8 cd */ 12722 ins_encode( pre_call_resets, 12723 Java_Dynamic_Call( meth ), 12724 call_epilog, 12725 post_call_FPU ); 12726 ins_pipe( pipe_slow ); 12727 ins_alignment(4); 12728 %} 12729 12730 // Call Runtime Instruction 12731 instruct CallRuntimeDirect(method meth) %{ 12732 match(CallRuntime ); 12733 effect(USE meth); 12734 12735 ins_cost(300); 12736 format %{ "CALL,runtime " %} 12737 opcode(0xE8); /* E8 cd */ 12738 // Use FFREEs to clear entries in float stack 12739 ins_encode( pre_call_resets, 12740 FFree_Float_Stack_All, 12741 Java_To_Runtime( meth ), 12742 post_call_FPU ); 12743 ins_pipe( pipe_slow ); 12744 %} 12745 12746 // Call runtime without safepoint 12747 instruct CallLeafDirect(method meth) %{ 12748 match(CallLeaf); 12749 effect(USE meth); 12750 12751 ins_cost(300); 12752 format %{ "CALL_LEAF,runtime " %} 12753 opcode(0xE8); /* E8 cd */ 12754 ins_encode( pre_call_resets, 12755 FFree_Float_Stack_All, 12756 Java_To_Runtime( meth ), 12757 Verify_FPU_For_Leaf, post_call_FPU ); 12758 ins_pipe( pipe_slow ); 12759 %} 12760 12761 instruct CallLeafNoFPDirect(method meth) %{ 12762 match(CallLeafNoFP); 12763 effect(USE meth); 12764 12765 ins_cost(300); 12766 format %{ "CALL_LEAF_NOFP,runtime " %} 12767 opcode(0xE8); /* E8 cd */ 12768 ins_encode(Java_To_Runtime(meth)); 12769 ins_pipe( pipe_slow ); 12770 %} 12771 12772 12773 // Return Instruction 12774 // Remove the return address & jump to it. 12775 instruct Ret() %{ 12776 match(Return); 12777 format %{ "RET" %} 12778 opcode(0xC3); 12779 ins_encode(OpcP); 12780 ins_pipe( pipe_jmp ); 12781 %} 12782 12783 // Tail Call; Jump from runtime stub to Java code. 12784 // Also known as an 'interprocedural jump'. 12785 // Target of jump will eventually return to caller. 12786 // TailJump below removes the return address. 12787 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 12788 match(TailCall jump_target method_oop ); 12789 ins_cost(300); 12790 format %{ "JMP $jump_target \t# EBX holds method oop" %} 12791 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12792 ins_encode( OpcP, RegOpc(jump_target) ); 12793 ins_pipe( pipe_jmp ); 12794 %} 12795 12796 12797 // Tail Jump; remove the return address; jump to target. 12798 // TailCall above leaves the return address around. 12799 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 12800 match( TailJump jump_target ex_oop ); 12801 ins_cost(300); 12802 format %{ "POP EDX\t# pop return address into dummy\n\t" 12803 "JMP $jump_target " %} 12804 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12805 ins_encode( enc_pop_rdx, 12806 OpcP, RegOpc(jump_target) ); 12807 ins_pipe( pipe_jmp ); 12808 %} 12809 12810 // Create exception oop: created by stack-crawling runtime code. 12811 // Created exception is now available to this handler, and is setup 12812 // just prior to jumping to this handler. No code emitted. 12813 instruct CreateException( eAXRegP ex_oop ) 12814 %{ 12815 match(Set ex_oop (CreateEx)); 12816 12817 size(0); 12818 // use the following format syntax 12819 format %{ "# exception oop is in EAX; no code emitted" %} 12820 ins_encode(); 12821 ins_pipe( empty ); 12822 %} 12823 12824 12825 // Rethrow exception: 12826 // The exception oop will come in the first argument position. 12827 // Then JUMP (not call) to the rethrow stub code. 12828 instruct RethrowException() 12829 %{ 12830 match(Rethrow); 12831 12832 // use the following format syntax 12833 format %{ "JMP rethrow_stub" %} 12834 ins_encode(enc_rethrow); 12835 ins_pipe( pipe_jmp ); 12836 %} 12837 12838 // inlined locking and unlocking 12839 12840 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 12841 predicate(Compile::current()->use_rtm()); 12842 match(Set cr (FastLock object box)); 12843 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 12844 ins_cost(300); 12845 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 12846 ins_encode %{ 12847 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 12848 $scr$$Register, $cx1$$Register, $cx2$$Register, 12849 _counters, _rtm_counters, _stack_rtm_counters, 12850 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 12851 true, ra_->C->profile_rtm()); 12852 %} 12853 ins_pipe(pipe_slow); 12854 %} 12855 12856 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 12857 predicate(!Compile::current()->use_rtm()); 12858 match(Set cr (FastLock object box)); 12859 effect(TEMP tmp, TEMP scr, USE_KILL box); 12860 ins_cost(300); 12861 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 12862 ins_encode %{ 12863 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 12864 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 12865 %} 12866 ins_pipe(pipe_slow); 12867 %} 12868 12869 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 12870 match(Set cr (FastUnlock object box)); 12871 effect(TEMP tmp, USE_KILL box); 12872 ins_cost(300); 12873 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 12874 ins_encode %{ 12875 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 12876 %} 12877 ins_pipe(pipe_slow); 12878 %} 12879 12880 12881 12882 // ============================================================================ 12883 // Safepoint Instruction 12884 instruct safePoint_poll(eFlagsReg cr) %{ 12885 match(SafePoint); 12886 effect(KILL cr); 12887 12888 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. 12889 // On SPARC that might be acceptable as we can generate the address with 12890 // just a sethi, saving an or. By polling at offset 0 we can end up 12891 // putting additional pressure on the index-0 in the D$. Because of 12892 // alignment (just like the situation at hand) the lower indices tend 12893 // to see more traffic. It'd be better to change the polling address 12894 // to offset 0 of the last $line in the polling page. 12895 12896 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} 12897 ins_cost(125); 12898 size(6) ; 12899 ins_encode( Safepoint_Poll() ); 12900 ins_pipe( ialu_reg_mem ); 12901 %} 12902 12903 12904 // ============================================================================ 12905 // This name is KNOWN by the ADLC and cannot be changed. 12906 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 12907 // for this guy. 12908 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 12909 match(Set dst (ThreadLocal)); 12910 effect(DEF dst, KILL cr); 12911 12912 format %{ "MOV $dst, Thread::current()" %} 12913 ins_encode %{ 12914 Register dstReg = as_Register($dst$$reg); 12915 __ get_thread(dstReg); 12916 %} 12917 ins_pipe( ialu_reg_fat ); 12918 %} 12919 12920 12921 12922 //----------PEEPHOLE RULES----------------------------------------------------- 12923 // These must follow all instruction definitions as they use the names 12924 // defined in the instructions definitions. 12925 // 12926 // peepmatch ( root_instr_name [preceding_instruction]* ); 12927 // 12928 // peepconstraint %{ 12929 // (instruction_number.operand_name relational_op instruction_number.operand_name 12930 // [, ...] ); 12931 // // instruction numbers are zero-based using left to right order in peepmatch 12932 // 12933 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 12934 // // provide an instruction_number.operand_name for each operand that appears 12935 // // in the replacement instruction's match rule 12936 // 12937 // ---------VM FLAGS--------------------------------------------------------- 12938 // 12939 // All peephole optimizations can be turned off using -XX:-OptoPeephole 12940 // 12941 // Each peephole rule is given an identifying number starting with zero and 12942 // increasing by one in the order seen by the parser. An individual peephole 12943 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 12944 // on the command-line. 12945 // 12946 // ---------CURRENT LIMITATIONS---------------------------------------------- 12947 // 12948 // Only match adjacent instructions in same basic block 12949 // Only equality constraints 12950 // Only constraints between operands, not (0.dest_reg == EAX_enc) 12951 // Only one replacement instruction 12952 // 12953 // ---------EXAMPLE---------------------------------------------------------- 12954 // 12955 // // pertinent parts of existing instructions in architecture description 12956 // instruct movI(rRegI dst, rRegI src) %{ 12957 // match(Set dst (CopyI src)); 12958 // %} 12959 // 12960 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 12961 // match(Set dst (AddI dst src)); 12962 // effect(KILL cr); 12963 // %} 12964 // 12965 // // Change (inc mov) to lea 12966 // peephole %{ 12967 // // increment preceeded by register-register move 12968 // peepmatch ( incI_eReg movI ); 12969 // // require that the destination register of the increment 12970 // // match the destination register of the move 12971 // peepconstraint ( 0.dst == 1.dst ); 12972 // // construct a replacement instruction that sets 12973 // // the destination to ( move's source register + one ) 12974 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 12975 // %} 12976 // 12977 // Implementation no longer uses movX instructions since 12978 // machine-independent system no longer uses CopyX nodes. 12979 // 12980 // peephole %{ 12981 // peepmatch ( incI_eReg movI ); 12982 // peepconstraint ( 0.dst == 1.dst ); 12983 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 12984 // %} 12985 // 12986 // peephole %{ 12987 // peepmatch ( decI_eReg movI ); 12988 // peepconstraint ( 0.dst == 1.dst ); 12989 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 12990 // %} 12991 // 12992 // peephole %{ 12993 // peepmatch ( addI_eReg_imm movI ); 12994 // peepconstraint ( 0.dst == 1.dst ); 12995 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 12996 // %} 12997 // 12998 // peephole %{ 12999 // peepmatch ( addP_eReg_imm movP ); 13000 // peepconstraint ( 0.dst == 1.dst ); 13001 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13002 // %} 13003 13004 // // Change load of spilled value to only a spill 13005 // instruct storeI(memory mem, rRegI src) %{ 13006 // match(Set mem (StoreI mem src)); 13007 // %} 13008 // 13009 // instruct loadI(rRegI dst, memory mem) %{ 13010 // match(Set dst (LoadI mem)); 13011 // %} 13012 // 13013 peephole %{ 13014 peepmatch ( loadI storeI ); 13015 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13016 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13017 %} 13018 13019 //----------SMARTSPILL RULES--------------------------------------------------- 13020 // These must follow all instruction definitions as they use the names 13021 // defined in the instructions definitions.