1 // 2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2)); 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3)); 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4)); 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5)); 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6)); 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7)); 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8)); 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9)); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 137 // 138 // Class for no registers (empty set). 139 reg_class no_reg(); 140 141 // Class for all registers 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 143 // Class for all registers (excluding EBP) 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 145 // Dynamic register class that selects at runtime between register classes 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 149 150 // Class for general registers 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 152 // Class for general registers (excluding EBP). 153 // This register class can be used for implicit null checks on win95. 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 155 // Used also if the PreserveFramePointer flag is true. 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 159 160 // Class of "X" registers 161 reg_class int_x_reg(EBX, ECX, EDX, EAX); 162 163 // Class of registers that can appear in an address with no offset. 164 // EBP and ESP require an extra instruction byte for zero offset. 165 // Used in fast-unlock 166 reg_class p_reg(EDX, EDI, ESI, EBX); 167 168 // Class for general registers excluding ECX 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 170 // Class for general registers excluding ECX (and EBP) 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 174 175 // Class for general registers excluding EAX 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 177 178 // Class for general registers excluding EAX and EBX. 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 180 // Class for general registers excluding EAX and EBX (and EBP) 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 184 185 // Class of EAX (for multiply and divide operations) 186 reg_class eax_reg(EAX); 187 188 // Class of EBX (for atomic add) 189 reg_class ebx_reg(EBX); 190 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 192 reg_class ecx_reg(ECX); 193 194 // Class of EDX (for multiply and divide operations) 195 reg_class edx_reg(EDX); 196 197 // Class of EDI (for synchronization) 198 reg_class edi_reg(EDI); 199 200 // Class of ESI (for synchronization) 201 reg_class esi_reg(ESI); 202 203 // Singleton class for stack pointer 204 reg_class sp_reg(ESP); 205 206 // Singleton class for instruction pointer 207 // reg_class ip_reg(EIP); 208 209 // Class of integer register pairs 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 211 // Class of integer register pairs (excluding EBP and EDI); 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 215 216 // Class of integer register pairs that aligns with calling convention 217 reg_class eadx_reg( EAX,EDX ); 218 reg_class ebcx_reg( ECX,EBX ); 219 220 // Not AX or DX, used in divides 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 222 // Not AX or DX (and neither EBP), used in divides 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 226 227 // Floating point registers. Notice FPR0 is not a choice. 228 // FPR0 is not ever allocated; we use clever encodings to fake 229 // a 2-address instructions out of Intels FP stack. 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 231 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 233 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 234 FPR7L,FPR7H ); 235 236 reg_class fp_flt_reg0( FPR1L ); 237 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 238 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 240 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 241 242 %} 243 244 245 //----------SOURCE BLOCK------------------------------------------------------- 246 // This is a block of C++ code which provides values, functions, and 247 // definitions necessary in the rest of the architecture description 248 source_hpp %{ 249 // Must be visible to the DFA in dfa_x86_32.cpp 250 extern bool is_operand_hi32_zero(Node* n); 251 %} 252 253 source %{ 254 #define RELOC_IMM32 Assembler::imm_operand 255 #define RELOC_DISP32 Assembler::disp32_operand 256 257 #define __ _masm. 258 259 // How to find the high register of a Long pair, given the low register 260 #define HIGH_FROM_LOW(x) ((x)+2) 261 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM 263 // instructions, to allow sign-masking or sign-bit flipping. They allow 264 // fast versions of NegF/NegD and AbsF/AbsD. 265 266 // Note: 'double' and 'long long' have 32-bits alignment on x86. 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 268 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 269 // of 128-bits operands for SSE instructions. 270 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 271 // Store the value to a 128-bits operand. 272 operand[0] = lo; 273 operand[1] = hi; 274 return operand; 275 } 276 277 // Buffer for 128-bits masks used by SSE instructions. 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 279 280 // Static initialization during VM startup. 281 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 283 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 285 286 // Offset hacking within calls. 287 static int pre_call_resets_size() { 288 int size = 0; 289 Compile* C = Compile::current(); 290 if (C->in_24_bit_fp_mode()) { 291 size += 6; // fldcw 292 } 293 if (C->max_vector_size() > 16) { 294 if(UseAVX <= 2) { 295 size += 3; // vzeroupper 296 } 297 } 298 return size; 299 } 300 301 // !!!!! Special hack to get all type of calls to specify the byte offset 302 // from the start of the call to the point where the return address 303 // will point. 304 int MachCallStaticJavaNode::ret_addr_offset() { 305 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 306 } 307 308 int MachCallDynamicJavaNode::ret_addr_offset() { 309 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 310 } 311 312 static int sizeof_FFree_Float_Stack_All = -1; 313 314 int MachCallRuntimeNode::ret_addr_offset() { 315 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 316 return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size(); 317 } 318 319 // Indicate if the safepoint node needs the polling page as an input. 320 // Since x86 does have absolute addressing, it doesn't. 321 bool SafePointNode::needs_polling_address_input() { 322 return false; 323 } 324 325 // 326 // Compute padding required for nodes which need alignment 327 // 328 329 // The address of the call instruction needs to be 4-byte aligned to 330 // ensure that it does not span a cache line so that it can be patched. 331 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 332 current_offset += pre_call_resets_size(); // skip fldcw, if any 333 current_offset += 1; // skip call opcode byte 334 return round_to(current_offset, alignment_required()) - current_offset; 335 } 336 337 // The address of the call instruction needs to be 4-byte aligned to 338 // ensure that it does not span a cache line so that it can be patched. 339 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 340 current_offset += pre_call_resets_size(); // skip fldcw, if any 341 current_offset += 5; // skip MOV instruction 342 current_offset += 1; // skip call opcode byte 343 return round_to(current_offset, alignment_required()) - current_offset; 344 } 345 346 // EMIT_RM() 347 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 348 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 349 cbuf.insts()->emit_int8(c); 350 } 351 352 // EMIT_CC() 353 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 354 unsigned char c = (unsigned char)( f1 | f2 ); 355 cbuf.insts()->emit_int8(c); 356 } 357 358 // EMIT_OPCODE() 359 void emit_opcode(CodeBuffer &cbuf, int code) { 360 cbuf.insts()->emit_int8((unsigned char) code); 361 } 362 363 // EMIT_OPCODE() w/ relocation information 364 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 365 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 366 emit_opcode(cbuf, code); 367 } 368 369 // EMIT_D8() 370 void emit_d8(CodeBuffer &cbuf, int d8) { 371 cbuf.insts()->emit_int8((unsigned char) d8); 372 } 373 374 // EMIT_D16() 375 void emit_d16(CodeBuffer &cbuf, int d16) { 376 cbuf.insts()->emit_int16(d16); 377 } 378 379 // EMIT_D32() 380 void emit_d32(CodeBuffer &cbuf, int d32) { 381 cbuf.insts()->emit_int32(d32); 382 } 383 384 // emit 32 bit value and construct relocation entry from relocInfo::relocType 385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 386 int format) { 387 cbuf.relocate(cbuf.insts_mark(), reloc, format); 388 cbuf.insts()->emit_int32(d32); 389 } 390 391 // emit 32 bit value and construct relocation entry from RelocationHolder 392 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 393 int format) { 394 #ifdef ASSERT 395 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 396 assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code"); 397 } 398 #endif 399 cbuf.relocate(cbuf.insts_mark(), rspec, format); 400 cbuf.insts()->emit_int32(d32); 401 } 402 403 // Access stack slot for load or store 404 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 405 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 406 if( -128 <= disp && disp <= 127 ) { 407 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 408 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 409 emit_d8 (cbuf, disp); // Displacement // R/M byte 410 } else { 411 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 412 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 413 emit_d32(cbuf, disp); // Displacement // R/M byte 414 } 415 } 416 417 // rRegI ereg, memory mem) %{ // emit_reg_mem 418 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 419 // There is no index & no scale, use form without SIB byte 420 if ((index == 0x4) && 421 (scale == 0) && (base != ESP_enc)) { 422 // If no displacement, mode is 0x0; unless base is [EBP] 423 if ( (displace == 0) && (base != EBP_enc) ) { 424 emit_rm(cbuf, 0x0, reg_encoding, base); 425 } 426 else { // If 8-bit displacement, mode 0x1 427 if ((displace >= -128) && (displace <= 127) 428 && (disp_reloc == relocInfo::none) ) { 429 emit_rm(cbuf, 0x1, reg_encoding, base); 430 emit_d8(cbuf, displace); 431 } 432 else { // If 32-bit displacement 433 if (base == -1) { // Special flag for absolute address 434 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 435 // (manual lies; no SIB needed here) 436 if ( disp_reloc != relocInfo::none ) { 437 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 438 } else { 439 emit_d32 (cbuf, displace); 440 } 441 } 442 else { // Normal base + offset 443 emit_rm(cbuf, 0x2, reg_encoding, base); 444 if ( disp_reloc != relocInfo::none ) { 445 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 446 } else { 447 emit_d32 (cbuf, displace); 448 } 449 } 450 } 451 } 452 } 453 else { // Else, encode with the SIB byte 454 // If no displacement, mode is 0x0; unless base is [EBP] 455 if (displace == 0 && (base != EBP_enc)) { // If no displacement 456 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 457 emit_rm(cbuf, scale, index, base); 458 } 459 else { // If 8-bit displacement, mode 0x1 460 if ((displace >= -128) && (displace <= 127) 461 && (disp_reloc == relocInfo::none) ) { 462 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 463 emit_rm(cbuf, scale, index, base); 464 emit_d8(cbuf, displace); 465 } 466 else { // If 32-bit displacement 467 if (base == 0x04 ) { 468 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 469 emit_rm(cbuf, scale, index, 0x04); 470 } else { 471 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 472 emit_rm(cbuf, scale, index, base); 473 } 474 if ( disp_reloc != relocInfo::none ) { 475 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 476 } else { 477 emit_d32 (cbuf, displace); 478 } 479 } 480 } 481 } 482 } 483 484 485 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 486 if( dst_encoding == src_encoding ) { 487 // reg-reg copy, use an empty encoding 488 } else { 489 emit_opcode( cbuf, 0x8B ); 490 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 491 } 492 } 493 494 void emit_cmpfp_fixup(MacroAssembler& _masm) { 495 Label exit; 496 __ jccb(Assembler::noParity, exit); 497 __ pushf(); 498 // 499 // comiss/ucomiss instructions set ZF,PF,CF flags and 500 // zero OF,AF,SF for NaN values. 501 // Fixup flags by zeroing ZF,PF so that compare of NaN 502 // values returns 'less than' result (CF is set). 503 // Leave the rest of flags unchanged. 504 // 505 // 7 6 5 4 3 2 1 0 506 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 507 // 0 0 1 0 1 0 1 1 (0x2B) 508 // 509 __ andl(Address(rsp, 0), 0xffffff2b); 510 __ popf(); 511 __ bind(exit); 512 } 513 514 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 515 Label done; 516 __ movl(dst, -1); 517 __ jcc(Assembler::parity, done); 518 __ jcc(Assembler::below, done); 519 __ setb(Assembler::notEqual, dst); 520 __ movzbl(dst, dst); 521 __ bind(done); 522 } 523 524 525 //============================================================================= 526 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 527 528 int Compile::ConstantTable::calculate_table_base_offset() const { 529 return 0; // absolute addressing, no offset 530 } 531 532 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 533 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 534 ShouldNotReachHere(); 535 } 536 537 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 538 // Empty encoding 539 } 540 541 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 542 return 0; 543 } 544 545 #ifndef PRODUCT 546 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 547 st->print("# MachConstantBaseNode (empty encoding)"); 548 } 549 #endif 550 551 552 //============================================================================= 553 #ifndef PRODUCT 554 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 555 Compile* C = ra_->C; 556 557 int framesize = C->frame_size_in_bytes(); 558 int bangsize = C->bang_size_in_bytes(); 559 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 560 // Remove wordSize for return addr which is already pushed. 561 framesize -= wordSize; 562 563 if (C->need_stack_bang(bangsize)) { 564 framesize -= wordSize; 565 st->print("# stack bang (%d bytes)", bangsize); 566 st->print("\n\t"); 567 st->print("PUSH EBP\t# Save EBP"); 568 if (PreserveFramePointer) { 569 st->print("\n\t"); 570 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 571 } 572 if (framesize) { 573 st->print("\n\t"); 574 st->print("SUB ESP, #%d\t# Create frame",framesize); 575 } 576 } else { 577 st->print("SUB ESP, #%d\t# Create frame",framesize); 578 st->print("\n\t"); 579 framesize -= wordSize; 580 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 581 if (PreserveFramePointer) { 582 st->print("\n\t"); 583 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 584 if (framesize > 0) { 585 st->print("\n\t"); 586 st->print("ADD EBP, #%d", framesize); 587 } 588 } 589 } 590 591 if (VerifyStackAtCalls) { 592 st->print("\n\t"); 593 framesize -= wordSize; 594 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 595 } 596 597 if( C->in_24_bit_fp_mode() ) { 598 st->print("\n\t"); 599 st->print("FLDCW \t# load 24 bit fpu control word"); 600 } 601 if (UseSSE >= 2 && VerifyFPU) { 602 st->print("\n\t"); 603 st->print("# verify FPU stack (must be clean on entry)"); 604 } 605 606 #ifdef ASSERT 607 if (VerifyStackAtCalls) { 608 st->print("\n\t"); 609 st->print("# stack alignment check"); 610 } 611 #endif 612 st->cr(); 613 } 614 #endif 615 616 617 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 618 Compile* C = ra_->C; 619 MacroAssembler _masm(&cbuf); 620 621 int framesize = C->frame_size_in_bytes(); 622 int bangsize = C->bang_size_in_bytes(); 623 624 __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode()); 625 626 C->set_frame_complete(cbuf.insts_size()); 627 628 if (C->has_mach_constant_base_node()) { 629 // NOTE: We set the table base offset here because users might be 630 // emitted before MachConstantBaseNode. 631 Compile::ConstantTable& constant_table = C->constant_table(); 632 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 633 } 634 } 635 636 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 637 return MachNode::size(ra_); // too many variables; just compute it the hard way 638 } 639 640 int MachPrologNode::reloc() const { 641 return 0; // a large enough number 642 } 643 644 //============================================================================= 645 #ifndef PRODUCT 646 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 647 Compile *C = ra_->C; 648 int framesize = C->frame_size_in_bytes(); 649 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 650 // Remove two words for return addr and rbp, 651 framesize -= 2*wordSize; 652 653 if (C->max_vector_size() > 16) { 654 st->print("VZEROUPPER"); 655 st->cr(); st->print("\t"); 656 } 657 if (C->in_24_bit_fp_mode()) { 658 st->print("FLDCW standard control word"); 659 st->cr(); st->print("\t"); 660 } 661 if (framesize) { 662 st->print("ADD ESP,%d\t# Destroy frame",framesize); 663 st->cr(); st->print("\t"); 664 } 665 st->print_cr("POPL EBP"); st->print("\t"); 666 if (do_polling() && C->is_method_compilation()) { 667 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 668 st->cr(); st->print("\t"); 669 } 670 } 671 #endif 672 673 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 674 Compile *C = ra_->C; 675 MacroAssembler _masm(&cbuf); 676 677 if (C->max_vector_size() > 16) { 678 // Clear upper bits of YMM registers when current compiled code uses 679 // wide vectors to avoid AVX <-> SSE transition penalty during call. 680 _masm.vzeroupper(); 681 } 682 // If method set FPU control word, restore to standard control word 683 if (C->in_24_bit_fp_mode()) { 684 _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 685 } 686 687 int framesize = C->frame_size_in_bytes(); 688 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 689 // Remove two words for return addr and rbp, 690 framesize -= 2*wordSize; 691 692 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 693 694 if (framesize >= 128) { 695 emit_opcode(cbuf, 0x81); // add SP, #framesize 696 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 697 emit_d32(cbuf, framesize); 698 } else if (framesize) { 699 emit_opcode(cbuf, 0x83); // add SP, #framesize 700 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 701 emit_d8(cbuf, framesize); 702 } 703 704 emit_opcode(cbuf, 0x58 | EBP_enc); 705 706 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 707 __ reserved_stack_check(); 708 } 709 710 if (do_polling() && C->is_method_compilation()) { 711 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); 712 emit_opcode(cbuf,0x85); 713 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX 714 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 715 } 716 } 717 718 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 719 Compile *C = ra_->C; 720 // If method set FPU control word, restore to standard control word 721 int size = C->in_24_bit_fp_mode() ? 6 : 0; 722 if (C->max_vector_size() > 16) size += 3; // vzeroupper 723 if (do_polling() && C->is_method_compilation()) size += 6; 724 725 int framesize = C->frame_size_in_bytes(); 726 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 727 // Remove two words for return addr and rbp, 728 framesize -= 2*wordSize; 729 730 size++; // popl rbp, 731 732 if (framesize >= 128) { 733 size += 6; 734 } else { 735 size += framesize ? 3 : 0; 736 } 737 size += 64; // added to support ReservedStackAccess 738 return size; 739 } 740 741 int MachEpilogNode::reloc() const { 742 return 0; // a large enough number 743 } 744 745 const Pipeline * MachEpilogNode::pipeline() const { 746 return MachNode::pipeline_class(); 747 } 748 749 int MachEpilogNode::safepoint_offset() const { return 0; } 750 751 //============================================================================= 752 753 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 754 static enum RC rc_class( OptoReg::Name reg ) { 755 756 if( !OptoReg::is_valid(reg) ) return rc_bad; 757 if (OptoReg::is_stack(reg)) return rc_stack; 758 759 VMReg r = OptoReg::as_VMReg(reg); 760 if (r->is_Register()) return rc_int; 761 if (r->is_FloatRegister()) { 762 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 763 return rc_float; 764 } 765 assert(r->is_XMMRegister(), "must be"); 766 return rc_xmm; 767 } 768 769 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 770 int opcode, const char *op_str, int size, outputStream* st ) { 771 if( cbuf ) { 772 emit_opcode (*cbuf, opcode ); 773 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 774 #ifndef PRODUCT 775 } else if( !do_size ) { 776 if( size != 0 ) st->print("\n\t"); 777 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 778 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 779 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 780 } else { // FLD, FST, PUSH, POP 781 st->print("%s [ESP + #%d]",op_str,offset); 782 } 783 #endif 784 } 785 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 786 return size+3+offset_size; 787 } 788 789 // Helper for XMM registers. Extra opcode bits, limited syntax. 790 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 791 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 792 int in_size_in_bits = Assembler::EVEX_32bit; 793 int evex_encoding = 0; 794 if (reg_lo+1 == reg_hi) { 795 in_size_in_bits = Assembler::EVEX_64bit; 796 evex_encoding = Assembler::VEX_W; 797 } 798 if (cbuf) { 799 MacroAssembler _masm(cbuf); 800 if (reg_lo+1 == reg_hi) { // double move? 801 if (is_load) { 802 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 803 } else { 804 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 805 } 806 } else { 807 if (is_load) { 808 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 809 } else { 810 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 811 } 812 } 813 #ifndef PRODUCT 814 } else if (!do_size) { 815 if (size != 0) st->print("\n\t"); 816 if (reg_lo+1 == reg_hi) { // double move? 817 if (is_load) st->print("%s %s,[ESP + #%d]", 818 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 819 Matcher::regName[reg_lo], offset); 820 else st->print("MOVSD [ESP + #%d],%s", 821 offset, Matcher::regName[reg_lo]); 822 } else { 823 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 824 Matcher::regName[reg_lo], offset); 825 else st->print("MOVSS [ESP + #%d],%s", 826 offset, Matcher::regName[reg_lo]); 827 } 828 #endif 829 } 830 bool is_single_byte = false; 831 if ((UseAVX > 2) && (offset != 0)) { 832 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 833 } 834 int offset_size = 0; 835 if (UseAVX > 2 ) { 836 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 837 } else { 838 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 839 } 840 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 841 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 842 return size+5+offset_size; 843 } 844 845 846 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 847 int src_hi, int dst_hi, int size, outputStream* st ) { 848 if (cbuf) { 849 MacroAssembler _masm(cbuf); 850 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 851 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 852 as_XMMRegister(Matcher::_regEncode[src_lo])); 853 } else { 854 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 855 as_XMMRegister(Matcher::_regEncode[src_lo])); 856 } 857 #ifndef PRODUCT 858 } else if (!do_size) { 859 if (size != 0) st->print("\n\t"); 860 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 861 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 862 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 863 } else { 864 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 865 } 866 } else { 867 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 868 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 869 } else { 870 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 871 } 872 } 873 #endif 874 } 875 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 876 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 877 int sz = (UseAVX > 2) ? 6 : 4; 878 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 879 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 880 return size + sz; 881 } 882 883 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 884 int src_hi, int dst_hi, int size, outputStream* st ) { 885 // 32-bit 886 if (cbuf) { 887 MacroAssembler _masm(cbuf); 888 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 889 as_Register(Matcher::_regEncode[src_lo])); 890 #ifndef PRODUCT 891 } else if (!do_size) { 892 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 893 #endif 894 } 895 return (UseAVX> 2) ? 6 : 4; 896 } 897 898 899 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 900 int src_hi, int dst_hi, int size, outputStream* st ) { 901 // 32-bit 902 if (cbuf) { 903 MacroAssembler _masm(cbuf); 904 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 905 as_XMMRegister(Matcher::_regEncode[src_lo])); 906 #ifndef PRODUCT 907 } else if (!do_size) { 908 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 909 #endif 910 } 911 return (UseAVX> 2) ? 6 : 4; 912 } 913 914 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 915 if( cbuf ) { 916 emit_opcode(*cbuf, 0x8B ); 917 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 918 #ifndef PRODUCT 919 } else if( !do_size ) { 920 if( size != 0 ) st->print("\n\t"); 921 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 922 #endif 923 } 924 return size+2; 925 } 926 927 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 928 int offset, int size, outputStream* st ) { 929 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 930 if( cbuf ) { 931 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 932 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 933 #ifndef PRODUCT 934 } else if( !do_size ) { 935 if( size != 0 ) st->print("\n\t"); 936 st->print("FLD %s",Matcher::regName[src_lo]); 937 #endif 938 } 939 size += 2; 940 } 941 942 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 943 const char *op_str; 944 int op; 945 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 946 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 947 op = 0xDD; 948 } else { // 32-bit store 949 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 950 op = 0xD9; 951 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 952 } 953 954 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 955 } 956 957 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 958 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 959 int src_hi, int dst_hi, uint ireg, outputStream* st); 960 961 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 962 int stack_offset, int reg, uint ireg, outputStream* st); 963 964 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, 965 int dst_offset, uint ireg, outputStream* st) { 966 int calc_size = 0; 967 int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 968 int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 969 switch (ireg) { 970 case Op_VecS: 971 calc_size = 3+src_offset_size + 3+dst_offset_size; 972 break; 973 case Op_VecD: 974 calc_size = 3+src_offset_size + 3+dst_offset_size; 975 src_offset += 4; 976 dst_offset += 4; 977 src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 978 dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 979 calc_size += 3+src_offset_size + 3+dst_offset_size; 980 break; 981 case Op_VecX: 982 case Op_VecY: 983 case Op_VecZ: 984 calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; 985 break; 986 default: 987 ShouldNotReachHere(); 988 } 989 if (cbuf) { 990 MacroAssembler _masm(cbuf); 991 int offset = __ offset(); 992 switch (ireg) { 993 case Op_VecS: 994 __ pushl(Address(rsp, src_offset)); 995 __ popl (Address(rsp, dst_offset)); 996 break; 997 case Op_VecD: 998 __ pushl(Address(rsp, src_offset)); 999 __ popl (Address(rsp, dst_offset)); 1000 __ pushl(Address(rsp, src_offset+4)); 1001 __ popl (Address(rsp, dst_offset+4)); 1002 break; 1003 case Op_VecX: 1004 __ movdqu(Address(rsp, -16), xmm0); 1005 __ movdqu(xmm0, Address(rsp, src_offset)); 1006 __ movdqu(Address(rsp, dst_offset), xmm0); 1007 __ movdqu(xmm0, Address(rsp, -16)); 1008 break; 1009 case Op_VecY: 1010 __ vmovdqu(Address(rsp, -32), xmm0); 1011 __ vmovdqu(xmm0, Address(rsp, src_offset)); 1012 __ vmovdqu(Address(rsp, dst_offset), xmm0); 1013 __ vmovdqu(xmm0, Address(rsp, -32)); 1014 case Op_VecZ: 1015 __ evmovdqul(Address(rsp, -64), xmm0, 2); 1016 __ evmovdqul(xmm0, Address(rsp, src_offset), 2); 1017 __ evmovdqul(Address(rsp, dst_offset), xmm0, 2); 1018 __ evmovdqul(xmm0, Address(rsp, -64), 2); 1019 break; 1020 default: 1021 ShouldNotReachHere(); 1022 } 1023 int size = __ offset() - offset; 1024 assert(size == calc_size, "incorrect size calculattion"); 1025 return size; 1026 #ifndef PRODUCT 1027 } else if (!do_size) { 1028 switch (ireg) { 1029 case Op_VecS: 1030 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1031 "popl [rsp + #%d]", 1032 src_offset, dst_offset); 1033 break; 1034 case Op_VecD: 1035 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1036 "popq [rsp + #%d]\n\t" 1037 "pushl [rsp + #%d]\n\t" 1038 "popq [rsp + #%d]", 1039 src_offset, dst_offset, src_offset+4, dst_offset+4); 1040 break; 1041 case Op_VecX: 1042 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1043 "movdqu xmm0, [rsp + #%d]\n\t" 1044 "movdqu [rsp + #%d], xmm0\n\t" 1045 "movdqu xmm0, [rsp - #16]", 1046 src_offset, dst_offset); 1047 break; 1048 case Op_VecY: 1049 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1050 "vmovdqu xmm0, [rsp + #%d]\n\t" 1051 "vmovdqu [rsp + #%d], xmm0\n\t" 1052 "vmovdqu xmm0, [rsp - #32]", 1053 src_offset, dst_offset); 1054 case Op_VecZ: 1055 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1056 "vmovdqu xmm0, [rsp + #%d]\n\t" 1057 "vmovdqu [rsp + #%d], xmm0\n\t" 1058 "vmovdqu xmm0, [rsp - #64]", 1059 src_offset, dst_offset); 1060 break; 1061 default: 1062 ShouldNotReachHere(); 1063 } 1064 #endif 1065 } 1066 return calc_size; 1067 } 1068 1069 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1070 // Get registers to move 1071 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1072 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1073 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1074 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1075 1076 enum RC src_second_rc = rc_class(src_second); 1077 enum RC src_first_rc = rc_class(src_first); 1078 enum RC dst_second_rc = rc_class(dst_second); 1079 enum RC dst_first_rc = rc_class(dst_first); 1080 1081 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1082 1083 // Generate spill code! 1084 int size = 0; 1085 1086 if( src_first == dst_first && src_second == dst_second ) 1087 return size; // Self copy, no move 1088 1089 if (bottom_type()->isa_vect() != NULL) { 1090 uint ireg = ideal_reg(); 1091 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1092 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1093 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1094 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1095 // mem -> mem 1096 int src_offset = ra_->reg2offset(src_first); 1097 int dst_offset = ra_->reg2offset(dst_first); 1098 return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); 1099 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1100 return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); 1101 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1102 int stack_offset = ra_->reg2offset(dst_first); 1103 return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); 1104 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1105 int stack_offset = ra_->reg2offset(src_first); 1106 return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); 1107 } else { 1108 ShouldNotReachHere(); 1109 } 1110 } 1111 1112 // -------------------------------------- 1113 // Check for mem-mem move. push/pop to move. 1114 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1115 if( src_second == dst_first ) { // overlapping stack copy ranges 1116 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1117 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1118 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1119 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1120 } 1121 // move low bits 1122 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1123 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1124 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1125 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1126 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1127 } 1128 return size; 1129 } 1130 1131 // -------------------------------------- 1132 // Check for integer reg-reg copy 1133 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1134 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1135 1136 // Check for integer store 1137 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1138 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1139 1140 // Check for integer load 1141 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 1142 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1143 1144 // Check for integer reg-xmm reg copy 1145 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1146 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1147 "no 64 bit integer-float reg moves" ); 1148 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1149 } 1150 // -------------------------------------- 1151 // Check for float reg-reg copy 1152 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1153 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1154 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1155 if( cbuf ) { 1156 1157 // Note the mucking with the register encode to compensate for the 0/1 1158 // indexing issue mentioned in a comment in the reg_def sections 1159 // for FPR registers many lines above here. 1160 1161 if( src_first != FPR1L_num ) { 1162 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1163 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1164 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1165 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1166 } else { 1167 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1168 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1169 } 1170 #ifndef PRODUCT 1171 } else if( !do_size ) { 1172 if( size != 0 ) st->print("\n\t"); 1173 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1174 else st->print( "FST %s", Matcher::regName[dst_first]); 1175 #endif 1176 } 1177 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1178 } 1179 1180 // Check for float store 1181 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1182 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1183 } 1184 1185 // Check for float load 1186 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1187 int offset = ra_->reg2offset(src_first); 1188 const char *op_str; 1189 int op; 1190 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1191 op_str = "FLD_D"; 1192 op = 0xDD; 1193 } else { // 32-bit load 1194 op_str = "FLD_S"; 1195 op = 0xD9; 1196 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1197 } 1198 if( cbuf ) { 1199 emit_opcode (*cbuf, op ); 1200 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1201 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1202 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1203 #ifndef PRODUCT 1204 } else if( !do_size ) { 1205 if( size != 0 ) st->print("\n\t"); 1206 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1207 #endif 1208 } 1209 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1210 return size + 3+offset_size+2; 1211 } 1212 1213 // Check for xmm reg-reg copy 1214 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1215 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1216 (src_first+1 == src_second && dst_first+1 == dst_second), 1217 "no non-adjacent float-moves" ); 1218 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1219 } 1220 1221 // Check for xmm reg-integer reg copy 1222 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1223 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1224 "no 64 bit float-integer reg moves" ); 1225 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1226 } 1227 1228 // Check for xmm store 1229 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1230 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1231 } 1232 1233 // Check for float xmm load 1234 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1235 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1236 } 1237 1238 // Copy from float reg to xmm reg 1239 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1240 // copy to the top of stack from floating point reg 1241 // and use LEA to preserve flags 1242 if( cbuf ) { 1243 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1244 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1245 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1246 emit_d8(*cbuf,0xF8); 1247 #ifndef PRODUCT 1248 } else if( !do_size ) { 1249 if( size != 0 ) st->print("\n\t"); 1250 st->print("LEA ESP,[ESP-8]"); 1251 #endif 1252 } 1253 size += 4; 1254 1255 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1256 1257 // Copy from the temp memory to the xmm reg. 1258 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1259 1260 if( cbuf ) { 1261 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1262 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1263 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1264 emit_d8(*cbuf,0x08); 1265 #ifndef PRODUCT 1266 } else if( !do_size ) { 1267 if( size != 0 ) st->print("\n\t"); 1268 st->print("LEA ESP,[ESP+8]"); 1269 #endif 1270 } 1271 size += 4; 1272 return size; 1273 } 1274 1275 assert( size > 0, "missed a case" ); 1276 1277 // -------------------------------------------------------------------- 1278 // Check for second bits still needing moving. 1279 if( src_second == dst_second ) 1280 return size; // Self copy; no move 1281 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1282 1283 // Check for second word int-int move 1284 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1285 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1286 1287 // Check for second word integer store 1288 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1289 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1290 1291 // Check for second word integer load 1292 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1293 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1294 1295 1296 Unimplemented(); 1297 return 0; // Mute compiler 1298 } 1299 1300 #ifndef PRODUCT 1301 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1302 implementation( NULL, ra_, false, st ); 1303 } 1304 #endif 1305 1306 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1307 implementation( &cbuf, ra_, false, NULL ); 1308 } 1309 1310 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1311 return implementation( NULL, ra_, true, NULL ); 1312 } 1313 1314 1315 //============================================================================= 1316 #ifndef PRODUCT 1317 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1318 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1319 int reg = ra_->get_reg_first(this); 1320 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1321 } 1322 #endif 1323 1324 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1325 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1326 int reg = ra_->get_encode(this); 1327 if( offset >= 128 ) { 1328 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1329 emit_rm(cbuf, 0x2, reg, 0x04); 1330 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1331 emit_d32(cbuf, offset); 1332 } 1333 else { 1334 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1335 emit_rm(cbuf, 0x1, reg, 0x04); 1336 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1337 emit_d8(cbuf, offset); 1338 } 1339 } 1340 1341 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1342 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1343 if( offset >= 128 ) { 1344 return 7; 1345 } 1346 else { 1347 return 4; 1348 } 1349 } 1350 1351 //============================================================================= 1352 #ifndef PRODUCT 1353 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1354 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1355 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1356 st->print_cr("\tNOP"); 1357 st->print_cr("\tNOP"); 1358 if( !OptoBreakpoint ) 1359 st->print_cr("\tNOP"); 1360 } 1361 #endif 1362 1363 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1364 MacroAssembler masm(&cbuf); 1365 #ifdef ASSERT 1366 uint insts_size = cbuf.insts_size(); 1367 #endif 1368 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1369 masm.jump_cc(Assembler::notEqual, 1370 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1371 /* WARNING these NOPs are critical so that verified entry point is properly 1372 aligned for patching by NativeJump::patch_verified_entry() */ 1373 int nops_cnt = 2; 1374 if( !OptoBreakpoint ) // Leave space for int3 1375 nops_cnt += 1; 1376 masm.nop(nops_cnt); 1377 1378 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1379 } 1380 1381 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1382 return OptoBreakpoint ? 11 : 12; 1383 } 1384 1385 1386 //============================================================================= 1387 1388 int Matcher::regnum_to_fpu_offset(int regnum) { 1389 return regnum - 32; // The FP registers are in the second chunk 1390 } 1391 1392 // This is UltraSparc specific, true just means we have fast l2f conversion 1393 const bool Matcher::convL2FSupported(void) { 1394 return true; 1395 } 1396 1397 // Is this branch offset short enough that a short branch can be used? 1398 // 1399 // NOTE: If the platform does not provide any short branch variants, then 1400 // this method should return false for offset 0. 1401 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1402 // The passed offset is relative to address of the branch. 1403 // On 86 a branch displacement is calculated relative to address 1404 // of a next instruction. 1405 offset -= br_size; 1406 1407 // the short version of jmpConUCF2 contains multiple branches, 1408 // making the reach slightly less 1409 if (rule == jmpConUCF2_rule) 1410 return (-126 <= offset && offset <= 125); 1411 return (-128 <= offset && offset <= 127); 1412 } 1413 1414 const bool Matcher::isSimpleConstant64(jlong value) { 1415 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1416 return false; 1417 } 1418 1419 // The ecx parameter to rep stos for the ClearArray node is in dwords. 1420 const bool Matcher::init_array_count_is_in_bytes = false; 1421 1422 // Threshold size for cleararray. 1423 const int Matcher::init_array_short_size = 8 * BytesPerLong; 1424 1425 // Needs 2 CMOV's for longs. 1426 const int Matcher::long_cmove_cost() { return 1; } 1427 1428 // No CMOVF/CMOVD with SSE/SSE2 1429 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 1430 1431 // Does the CPU require late expand (see block.cpp for description of late expand)? 1432 const bool Matcher::require_postalloc_expand = false; 1433 1434 // Should the Matcher clone shifts on addressing modes, expecting them to 1435 // be subsumed into complex addressing expressions or compute them into 1436 // registers? True for Intel but false for most RISCs 1437 const bool Matcher::clone_shift_expressions = true; 1438 1439 // Do we need to mask the count passed to shift instructions or does 1440 // the cpu only look at the lower 5/6 bits anyway? 1441 const bool Matcher::need_masked_shift_count = false; 1442 1443 bool Matcher::narrow_oop_use_complex_address() { 1444 ShouldNotCallThis(); 1445 return true; 1446 } 1447 1448 bool Matcher::narrow_klass_use_complex_address() { 1449 ShouldNotCallThis(); 1450 return true; 1451 } 1452 1453 1454 // Is it better to copy float constants, or load them directly from memory? 1455 // Intel can load a float constant from a direct address, requiring no 1456 // extra registers. Most RISCs will have to materialize an address into a 1457 // register first, so they would do better to copy the constant from stack. 1458 const bool Matcher::rematerialize_float_constants = true; 1459 1460 // If CPU can load and store mis-aligned doubles directly then no fixup is 1461 // needed. Else we split the double into 2 integer pieces and move it 1462 // piece-by-piece. Only happens when passing doubles into C code as the 1463 // Java calling convention forces doubles to be aligned. 1464 const bool Matcher::misaligned_doubles_ok = true; 1465 1466 1467 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1468 // Get the memory operand from the node 1469 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1470 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1471 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1472 uint opcnt = 1; // First operand 1473 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1474 while( idx >= skipped+num_edges ) { 1475 skipped += num_edges; 1476 opcnt++; // Bump operand count 1477 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1478 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1479 } 1480 1481 MachOper *memory = node->_opnds[opcnt]; 1482 MachOper *new_memory = NULL; 1483 switch (memory->opcode()) { 1484 case DIRECT: 1485 case INDOFFSET32X: 1486 // No transformation necessary. 1487 return; 1488 case INDIRECT: 1489 new_memory = new indirect_win95_safeOper( ); 1490 break; 1491 case INDOFFSET8: 1492 new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1493 break; 1494 case INDOFFSET32: 1495 new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1496 break; 1497 case INDINDEXOFFSET: 1498 new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1499 break; 1500 case INDINDEXSCALE: 1501 new_memory = new indIndexScale_win95_safeOper(memory->scale()); 1502 break; 1503 case INDINDEXSCALEOFFSET: 1504 new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1505 break; 1506 case LOAD_LONG_INDIRECT: 1507 case LOAD_LONG_INDOFFSET32: 1508 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1509 return; 1510 default: 1511 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1512 return; 1513 } 1514 node->_opnds[opcnt] = new_memory; 1515 } 1516 1517 // Advertise here if the CPU requires explicit rounding operations 1518 // to implement the UseStrictFP mode. 1519 const bool Matcher::strict_fp_requires_explicit_rounding = true; 1520 1521 // Are floats conerted to double when stored to stack during deoptimization? 1522 // On x32 it is stored with convertion only when FPU is used for floats. 1523 bool Matcher::float_in_double() { return (UseSSE == 0); } 1524 1525 // Do ints take an entire long register or just half? 1526 const bool Matcher::int_in_long = false; 1527 1528 // Return whether or not this register is ever used as an argument. This 1529 // function is used on startup to build the trampoline stubs in generateOptoStub. 1530 // Registers not mentioned will be killed by the VM call in the trampoline, and 1531 // arguments in those registers not be available to the callee. 1532 bool Matcher::can_be_java_arg( int reg ) { 1533 if( reg == ECX_num || reg == EDX_num ) return true; 1534 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1535 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1536 return false; 1537 } 1538 1539 bool Matcher::is_spillable_arg( int reg ) { 1540 return can_be_java_arg(reg); 1541 } 1542 1543 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1544 // Use hardware integer DIV instruction when 1545 // it is faster than a code which use multiply. 1546 // Only when constant divisor fits into 32 bit 1547 // (min_jint is excluded to get only correct 1548 // positive 32 bit values from negative). 1549 return VM_Version::has_fast_idiv() && 1550 (divisor == (int)divisor && divisor != min_jint); 1551 } 1552 1553 // Register for DIVI projection of divmodI 1554 RegMask Matcher::divI_proj_mask() { 1555 return EAX_REG_mask(); 1556 } 1557 1558 // Register for MODI projection of divmodI 1559 RegMask Matcher::modI_proj_mask() { 1560 return EDX_REG_mask(); 1561 } 1562 1563 // Register for DIVL projection of divmodL 1564 RegMask Matcher::divL_proj_mask() { 1565 ShouldNotReachHere(); 1566 return RegMask(); 1567 } 1568 1569 // Register for MODL projection of divmodL 1570 RegMask Matcher::modL_proj_mask() { 1571 ShouldNotReachHere(); 1572 return RegMask(); 1573 } 1574 1575 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1576 return NO_REG_mask(); 1577 } 1578 1579 // Returns true if the high 32 bits of the value is known to be zero. 1580 bool is_operand_hi32_zero(Node* n) { 1581 int opc = n->Opcode(); 1582 if (opc == Op_AndL) { 1583 Node* o2 = n->in(2); 1584 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1585 return true; 1586 } 1587 } 1588 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1589 return true; 1590 } 1591 return false; 1592 } 1593 1594 %} 1595 1596 //----------ENCODING BLOCK----------------------------------------------------- 1597 // This block specifies the encoding classes used by the compiler to output 1598 // byte streams. Encoding classes generate functions which are called by 1599 // Machine Instruction Nodes in order to generate the bit encoding of the 1600 // instruction. Operands specify their base encoding interface with the 1601 // interface keyword. There are currently supported four interfaces, 1602 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1603 // operand to generate a function which returns its register number when 1604 // queried. CONST_INTER causes an operand to generate a function which 1605 // returns the value of the constant when queried. MEMORY_INTER causes an 1606 // operand to generate four functions which return the Base Register, the 1607 // Index Register, the Scale Value, and the Offset Value of the operand when 1608 // queried. COND_INTER causes an operand to generate six functions which 1609 // return the encoding code (ie - encoding bits for the instruction) 1610 // associated with each basic boolean condition for a conditional instruction. 1611 // Instructions specify two basic values for encoding. They use the 1612 // ins_encode keyword to specify their encoding class (which must be one of 1613 // the class names specified in the encoding block), and they use the 1614 // opcode keyword to specify, in order, their primary, secondary, and 1615 // tertiary opcode. Only the opcode sections which a particular instruction 1616 // needs for encoding need to be specified. 1617 encode %{ 1618 // Build emit functions for each basic byte or larger field in the intel 1619 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1620 // code in the enc_class source block. Emit functions will live in the 1621 // main source block for now. In future, we can generalize this by 1622 // adding a syntax that specifies the sizes of fields in an order, 1623 // so that the adlc can build the emit functions automagically 1624 1625 // Emit primary opcode 1626 enc_class OpcP %{ 1627 emit_opcode(cbuf, $primary); 1628 %} 1629 1630 // Emit secondary opcode 1631 enc_class OpcS %{ 1632 emit_opcode(cbuf, $secondary); 1633 %} 1634 1635 // Emit opcode directly 1636 enc_class Opcode(immI d8) %{ 1637 emit_opcode(cbuf, $d8$$constant); 1638 %} 1639 1640 enc_class SizePrefix %{ 1641 emit_opcode(cbuf,0x66); 1642 %} 1643 1644 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1645 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1646 %} 1647 1648 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1649 emit_opcode(cbuf,$opcode$$constant); 1650 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1651 %} 1652 1653 enc_class mov_r32_imm0( rRegI dst ) %{ 1654 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1655 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1656 %} 1657 1658 enc_class cdq_enc %{ 1659 // Full implementation of Java idiv and irem; checks for 1660 // special case as described in JVM spec., p.243 & p.271. 1661 // 1662 // normal case special case 1663 // 1664 // input : rax,: dividend min_int 1665 // reg: divisor -1 1666 // 1667 // output: rax,: quotient (= rax, idiv reg) min_int 1668 // rdx: remainder (= rax, irem reg) 0 1669 // 1670 // Code sequnce: 1671 // 1672 // 81 F8 00 00 00 80 cmp rax,80000000h 1673 // 0F 85 0B 00 00 00 jne normal_case 1674 // 33 D2 xor rdx,edx 1675 // 83 F9 FF cmp rcx,0FFh 1676 // 0F 84 03 00 00 00 je done 1677 // normal_case: 1678 // 99 cdq 1679 // F7 F9 idiv rax,ecx 1680 // done: 1681 // 1682 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1683 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1684 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1685 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1686 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1687 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1688 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1689 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1690 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1691 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1692 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1693 // normal_case: 1694 emit_opcode(cbuf,0x99); // cdq 1695 // idiv (note: must be emitted by the user of this rule) 1696 // normal: 1697 %} 1698 1699 // Dense encoding for older common ops 1700 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1701 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1702 %} 1703 1704 1705 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1706 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1707 // Check for 8-bit immediate, and set sign extend bit in opcode 1708 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1709 emit_opcode(cbuf, $primary | 0x02); 1710 } 1711 else { // If 32-bit immediate 1712 emit_opcode(cbuf, $primary); 1713 } 1714 %} 1715 1716 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1717 // Emit primary opcode and set sign-extend bit 1718 // Check for 8-bit immediate, and set sign extend bit in opcode 1719 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1720 emit_opcode(cbuf, $primary | 0x02); } 1721 else { // If 32-bit immediate 1722 emit_opcode(cbuf, $primary); 1723 } 1724 // Emit r/m byte with secondary opcode, after primary opcode. 1725 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1726 %} 1727 1728 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1729 // Check for 8-bit immediate, and set sign extend bit in opcode 1730 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1731 $$$emit8$imm$$constant; 1732 } 1733 else { // If 32-bit immediate 1734 // Output immediate 1735 $$$emit32$imm$$constant; 1736 } 1737 %} 1738 1739 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1740 // Emit primary opcode and set sign-extend bit 1741 // Check for 8-bit immediate, and set sign extend bit in opcode 1742 int con = (int)$imm$$constant; // Throw away top bits 1743 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1744 // Emit r/m byte with secondary opcode, after primary opcode. 1745 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1746 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1747 else emit_d32(cbuf,con); 1748 %} 1749 1750 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1751 // Emit primary opcode and set sign-extend bit 1752 // Check for 8-bit immediate, and set sign extend bit in opcode 1753 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1754 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1755 // Emit r/m byte with tertiary opcode, after primary opcode. 1756 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1757 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1758 else emit_d32(cbuf,con); 1759 %} 1760 1761 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1762 emit_cc(cbuf, $secondary, $dst$$reg ); 1763 %} 1764 1765 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1766 int destlo = $dst$$reg; 1767 int desthi = HIGH_FROM_LOW(destlo); 1768 // bswap lo 1769 emit_opcode(cbuf, 0x0F); 1770 emit_cc(cbuf, 0xC8, destlo); 1771 // bswap hi 1772 emit_opcode(cbuf, 0x0F); 1773 emit_cc(cbuf, 0xC8, desthi); 1774 // xchg lo and hi 1775 emit_opcode(cbuf, 0x87); 1776 emit_rm(cbuf, 0x3, destlo, desthi); 1777 %} 1778 1779 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1780 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1781 %} 1782 1783 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1784 $$$emit8$primary; 1785 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1786 %} 1787 1788 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1789 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1790 emit_d8(cbuf, op >> 8 ); 1791 emit_d8(cbuf, op & 255); 1792 %} 1793 1794 // emulate a CMOV with a conditional branch around a MOV 1795 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1796 // Invert sense of branch from sense of CMOV 1797 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1798 emit_d8( cbuf, $brOffs$$constant ); 1799 %} 1800 1801 enc_class enc_PartialSubtypeCheck( ) %{ 1802 Register Redi = as_Register(EDI_enc); // result register 1803 Register Reax = as_Register(EAX_enc); // super class 1804 Register Recx = as_Register(ECX_enc); // killed 1805 Register Resi = as_Register(ESI_enc); // sub class 1806 Label miss; 1807 1808 MacroAssembler _masm(&cbuf); 1809 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1810 NULL, &miss, 1811 /*set_cond_codes:*/ true); 1812 if ($primary) { 1813 __ xorptr(Redi, Redi); 1814 } 1815 __ bind(miss); 1816 %} 1817 1818 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1819 MacroAssembler masm(&cbuf); 1820 int start = masm.offset(); 1821 if (UseSSE >= 2) { 1822 if (VerifyFPU) { 1823 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1824 } 1825 } else { 1826 // External c_calling_convention expects the FPU stack to be 'clean'. 1827 // Compiled code leaves it dirty. Do cleanup now. 1828 masm.empty_FPU_stack(); 1829 } 1830 if (sizeof_FFree_Float_Stack_All == -1) { 1831 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1832 } else { 1833 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1834 } 1835 %} 1836 1837 enc_class Verify_FPU_For_Leaf %{ 1838 if( VerifyFPU ) { 1839 MacroAssembler masm(&cbuf); 1840 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1841 } 1842 %} 1843 1844 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1845 // This is the instruction starting address for relocation info. 1846 cbuf.set_insts_mark(); 1847 $$$emit8$primary; 1848 // CALL directly to the runtime 1849 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1850 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1851 1852 if (UseSSE >= 2) { 1853 MacroAssembler _masm(&cbuf); 1854 BasicType rt = tf()->return_type(); 1855 1856 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1857 // A C runtime call where the return value is unused. In SSE2+ 1858 // mode the result needs to be removed from the FPU stack. It's 1859 // likely that this function call could be removed by the 1860 // optimizer if the C function is a pure function. 1861 __ ffree(0); 1862 } else if (rt == T_FLOAT) { 1863 __ lea(rsp, Address(rsp, -4)); 1864 __ fstp_s(Address(rsp, 0)); 1865 __ movflt(xmm0, Address(rsp, 0)); 1866 __ lea(rsp, Address(rsp, 4)); 1867 } else if (rt == T_DOUBLE) { 1868 __ lea(rsp, Address(rsp, -8)); 1869 __ fstp_d(Address(rsp, 0)); 1870 __ movdbl(xmm0, Address(rsp, 0)); 1871 __ lea(rsp, Address(rsp, 8)); 1872 } 1873 } 1874 %} 1875 1876 1877 enc_class pre_call_resets %{ 1878 // If method sets FPU control word restore it here 1879 debug_only(int off0 = cbuf.insts_size()); 1880 if (ra_->C->in_24_bit_fp_mode()) { 1881 MacroAssembler _masm(&cbuf); 1882 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1883 } 1884 if (ra_->C->max_vector_size() > 16) { 1885 // Clear upper bits of YMM registers when current compiled code uses 1886 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1887 MacroAssembler _masm(&cbuf); 1888 __ vzeroupper(); 1889 } 1890 debug_only(int off1 = cbuf.insts_size()); 1891 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1892 %} 1893 1894 enc_class post_call_FPU %{ 1895 // If method sets FPU control word do it here also 1896 if (Compile::current()->in_24_bit_fp_mode()) { 1897 MacroAssembler masm(&cbuf); 1898 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1899 } 1900 %} 1901 1902 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1903 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1904 // who we intended to call. 1905 cbuf.set_insts_mark(); 1906 $$$emit8$primary; 1907 if (!_method) { 1908 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1909 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1910 } else if (_optimized_virtual) { 1911 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1912 opt_virtual_call_Relocation::spec(), RELOC_IMM32 ); 1913 } else { 1914 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1915 static_call_Relocation::spec(), RELOC_IMM32 ); 1916 } 1917 if (_method) { // Emit stub for static call. 1918 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1919 if (stub == NULL) { 1920 ciEnv::current()->record_failure("CodeCache is full"); 1921 return; 1922 } 1923 } 1924 %} 1925 1926 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1927 MacroAssembler _masm(&cbuf); 1928 __ ic_call((address)$meth$$method); 1929 %} 1930 1931 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1932 int disp = in_bytes(Method::from_compiled_offset()); 1933 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1934 1935 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1936 cbuf.set_insts_mark(); 1937 $$$emit8$primary; 1938 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1939 emit_d8(cbuf, disp); // Displacement 1940 1941 %} 1942 1943 // Following encoding is no longer used, but may be restored if calling 1944 // convention changes significantly. 1945 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1946 // 1947 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1948 // // int ic_reg = Matcher::inline_cache_reg(); 1949 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1950 // // int imo_reg = Matcher::interpreter_method_oop_reg(); 1951 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1952 // 1953 // // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1954 // // // so we load it immediately before the call 1955 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1956 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1957 // 1958 // // xor rbp,ebp 1959 // emit_opcode(cbuf, 0x33); 1960 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1961 // 1962 // // CALL to interpreter. 1963 // cbuf.set_insts_mark(); 1964 // $$$emit8$primary; 1965 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1966 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1967 // %} 1968 1969 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1970 $$$emit8$primary; 1971 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1972 $$$emit8$shift$$constant; 1973 %} 1974 1975 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1976 // Load immediate does not have a zero or sign extended version 1977 // for 8-bit immediates 1978 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1979 $$$emit32$src$$constant; 1980 %} 1981 1982 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1983 // Load immediate does not have a zero or sign extended version 1984 // for 8-bit immediates 1985 emit_opcode(cbuf, $primary + $dst$$reg); 1986 $$$emit32$src$$constant; 1987 %} 1988 1989 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1990 // Load immediate does not have a zero or sign extended version 1991 // for 8-bit immediates 1992 int dst_enc = $dst$$reg; 1993 int src_con = $src$$constant & 0x0FFFFFFFFL; 1994 if (src_con == 0) { 1995 // xor dst, dst 1996 emit_opcode(cbuf, 0x33); 1997 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1998 } else { 1999 emit_opcode(cbuf, $primary + dst_enc); 2000 emit_d32(cbuf, src_con); 2001 } 2002 %} 2003 2004 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 2005 // Load immediate does not have a zero or sign extended version 2006 // for 8-bit immediates 2007 int dst_enc = $dst$$reg + 2; 2008 int src_con = ((julong)($src$$constant)) >> 32; 2009 if (src_con == 0) { 2010 // xor dst, dst 2011 emit_opcode(cbuf, 0x33); 2012 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2013 } else { 2014 emit_opcode(cbuf, $primary + dst_enc); 2015 emit_d32(cbuf, src_con); 2016 } 2017 %} 2018 2019 2020 // Encode a reg-reg copy. If it is useless, then empty encoding. 2021 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 2022 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2023 %} 2024 2025 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 2026 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2027 %} 2028 2029 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 2030 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2031 %} 2032 2033 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 2034 $$$emit8$primary; 2035 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2036 %} 2037 2038 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 2039 $$$emit8$secondary; 2040 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2041 %} 2042 2043 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 2044 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2045 %} 2046 2047 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2048 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2049 %} 2050 2051 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 2052 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2053 %} 2054 2055 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2056 // Output immediate 2057 $$$emit32$src$$constant; 2058 %} 2059 2060 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 2061 // Output Float immediate bits 2062 jfloat jf = $src$$constant; 2063 int jf_as_bits = jint_cast( jf ); 2064 emit_d32(cbuf, jf_as_bits); 2065 %} 2066 2067 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 2068 // Output Float immediate bits 2069 jfloat jf = $src$$constant; 2070 int jf_as_bits = jint_cast( jf ); 2071 emit_d32(cbuf, jf_as_bits); 2072 %} 2073 2074 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2075 // Output immediate 2076 $$$emit16$src$$constant; 2077 %} 2078 2079 enc_class Con_d32(immI src) %{ 2080 emit_d32(cbuf,$src$$constant); 2081 %} 2082 2083 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2084 // Output immediate memory reference 2085 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2086 emit_d32(cbuf, 0x00); 2087 %} 2088 2089 enc_class lock_prefix( ) %{ 2090 if( os::is_MP() ) 2091 emit_opcode(cbuf,0xF0); // [Lock] 2092 %} 2093 2094 // Cmp-xchg long value. 2095 // Note: we need to swap rbx, and rcx before and after the 2096 // cmpxchg8 instruction because the instruction uses 2097 // rcx as the high order word of the new value to store but 2098 // our register encoding uses rbx,. 2099 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2100 2101 // XCHG rbx,ecx 2102 emit_opcode(cbuf,0x87); 2103 emit_opcode(cbuf,0xD9); 2104 // [Lock] 2105 if( os::is_MP() ) 2106 emit_opcode(cbuf,0xF0); 2107 // CMPXCHG8 [Eptr] 2108 emit_opcode(cbuf,0x0F); 2109 emit_opcode(cbuf,0xC7); 2110 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2111 // XCHG rbx,ecx 2112 emit_opcode(cbuf,0x87); 2113 emit_opcode(cbuf,0xD9); 2114 %} 2115 2116 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2117 // [Lock] 2118 if( os::is_MP() ) 2119 emit_opcode(cbuf,0xF0); 2120 2121 // CMPXCHG [Eptr] 2122 emit_opcode(cbuf,0x0F); 2123 emit_opcode(cbuf,0xB1); 2124 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2125 %} 2126 2127 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2128 int res_encoding = $res$$reg; 2129 2130 // MOV res,0 2131 emit_opcode( cbuf, 0xB8 + res_encoding); 2132 emit_d32( cbuf, 0 ); 2133 // JNE,s fail 2134 emit_opcode(cbuf,0x75); 2135 emit_d8(cbuf, 5 ); 2136 // MOV res,1 2137 emit_opcode( cbuf, 0xB8 + res_encoding); 2138 emit_d32( cbuf, 1 ); 2139 // fail: 2140 %} 2141 2142 enc_class set_instruction_start( ) %{ 2143 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2144 %} 2145 2146 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2147 int reg_encoding = $ereg$$reg; 2148 int base = $mem$$base; 2149 int index = $mem$$index; 2150 int scale = $mem$$scale; 2151 int displace = $mem$$disp; 2152 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2153 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2154 %} 2155 2156 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2157 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2158 int base = $mem$$base; 2159 int index = $mem$$index; 2160 int scale = $mem$$scale; 2161 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2162 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2163 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2164 %} 2165 2166 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2167 int r1, r2; 2168 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2169 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2170 emit_opcode(cbuf,0x0F); 2171 emit_opcode(cbuf,$tertiary); 2172 emit_rm(cbuf, 0x3, r1, r2); 2173 emit_d8(cbuf,$cnt$$constant); 2174 emit_d8(cbuf,$primary); 2175 emit_rm(cbuf, 0x3, $secondary, r1); 2176 emit_d8(cbuf,$cnt$$constant); 2177 %} 2178 2179 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2180 emit_opcode( cbuf, 0x8B ); // Move 2181 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2182 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2183 emit_d8(cbuf,$primary); 2184 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2185 emit_d8(cbuf,$cnt$$constant-32); 2186 } 2187 emit_d8(cbuf,$primary); 2188 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2189 emit_d8(cbuf,31); 2190 %} 2191 2192 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2193 int r1, r2; 2194 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2195 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2196 2197 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2198 emit_rm(cbuf, 0x3, r1, r2); 2199 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2200 emit_opcode(cbuf,$primary); 2201 emit_rm(cbuf, 0x3, $secondary, r1); 2202 emit_d8(cbuf,$cnt$$constant-32); 2203 } 2204 emit_opcode(cbuf,0x33); // XOR r2,r2 2205 emit_rm(cbuf, 0x3, r2, r2); 2206 %} 2207 2208 // Clone of RegMem but accepts an extra parameter to access each 2209 // half of a double in memory; it never needs relocation info. 2210 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2211 emit_opcode(cbuf,$opcode$$constant); 2212 int reg_encoding = $rm_reg$$reg; 2213 int base = $mem$$base; 2214 int index = $mem$$index; 2215 int scale = $mem$$scale; 2216 int displace = $mem$$disp + $disp_for_half$$constant; 2217 relocInfo::relocType disp_reloc = relocInfo::none; 2218 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2219 %} 2220 2221 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2222 // 2223 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2224 // and it never needs relocation information. 2225 // Frequently used to move data between FPU's Stack Top and memory. 2226 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2227 int rm_byte_opcode = $rm_opcode$$constant; 2228 int base = $mem$$base; 2229 int index = $mem$$index; 2230 int scale = $mem$$scale; 2231 int displace = $mem$$disp; 2232 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2233 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2234 %} 2235 2236 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2237 int rm_byte_opcode = $rm_opcode$$constant; 2238 int base = $mem$$base; 2239 int index = $mem$$index; 2240 int scale = $mem$$scale; 2241 int displace = $mem$$disp; 2242 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2243 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2244 %} 2245 2246 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2247 int reg_encoding = $dst$$reg; 2248 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2249 int index = 0x04; // 0x04 indicates no index 2250 int scale = 0x00; // 0x00 indicates no scale 2251 int displace = $src1$$constant; // 0x00 indicates no displacement 2252 relocInfo::relocType disp_reloc = relocInfo::none; 2253 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2254 %} 2255 2256 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2257 // Compare dst,src 2258 emit_opcode(cbuf,0x3B); 2259 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2260 // jmp dst < src around move 2261 emit_opcode(cbuf,0x7C); 2262 emit_d8(cbuf,2); 2263 // move dst,src 2264 emit_opcode(cbuf,0x8B); 2265 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2266 %} 2267 2268 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2269 // Compare dst,src 2270 emit_opcode(cbuf,0x3B); 2271 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2272 // jmp dst > src around move 2273 emit_opcode(cbuf,0x7F); 2274 emit_d8(cbuf,2); 2275 // move dst,src 2276 emit_opcode(cbuf,0x8B); 2277 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2278 %} 2279 2280 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2281 // If src is FPR1, we can just FST to store it. 2282 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2283 int reg_encoding = 0x2; // Just store 2284 int base = $mem$$base; 2285 int index = $mem$$index; 2286 int scale = $mem$$scale; 2287 int displace = $mem$$disp; 2288 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2289 if( $src$$reg != FPR1L_enc ) { 2290 reg_encoding = 0x3; // Store & pop 2291 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2292 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2293 } 2294 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2295 emit_opcode(cbuf,$primary); 2296 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2297 %} 2298 2299 enc_class neg_reg(rRegI dst) %{ 2300 // NEG $dst 2301 emit_opcode(cbuf,0xF7); 2302 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2303 %} 2304 2305 enc_class setLT_reg(eCXRegI dst) %{ 2306 // SETLT $dst 2307 emit_opcode(cbuf,0x0F); 2308 emit_opcode(cbuf,0x9C); 2309 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2310 %} 2311 2312 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2313 int tmpReg = $tmp$$reg; 2314 2315 // SUB $p,$q 2316 emit_opcode(cbuf,0x2B); 2317 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2318 // SBB $tmp,$tmp 2319 emit_opcode(cbuf,0x1B); 2320 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2321 // AND $tmp,$y 2322 emit_opcode(cbuf,0x23); 2323 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2324 // ADD $p,$tmp 2325 emit_opcode(cbuf,0x03); 2326 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2327 %} 2328 2329 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2330 // TEST shift,32 2331 emit_opcode(cbuf,0xF7); 2332 emit_rm(cbuf, 0x3, 0, ECX_enc); 2333 emit_d32(cbuf,0x20); 2334 // JEQ,s small 2335 emit_opcode(cbuf, 0x74); 2336 emit_d8(cbuf, 0x04); 2337 // MOV $dst.hi,$dst.lo 2338 emit_opcode( cbuf, 0x8B ); 2339 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2340 // CLR $dst.lo 2341 emit_opcode(cbuf, 0x33); 2342 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2343 // small: 2344 // SHLD $dst.hi,$dst.lo,$shift 2345 emit_opcode(cbuf,0x0F); 2346 emit_opcode(cbuf,0xA5); 2347 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2348 // SHL $dst.lo,$shift" 2349 emit_opcode(cbuf,0xD3); 2350 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2351 %} 2352 2353 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2354 // TEST shift,32 2355 emit_opcode(cbuf,0xF7); 2356 emit_rm(cbuf, 0x3, 0, ECX_enc); 2357 emit_d32(cbuf,0x20); 2358 // JEQ,s small 2359 emit_opcode(cbuf, 0x74); 2360 emit_d8(cbuf, 0x04); 2361 // MOV $dst.lo,$dst.hi 2362 emit_opcode( cbuf, 0x8B ); 2363 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2364 // CLR $dst.hi 2365 emit_opcode(cbuf, 0x33); 2366 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2367 // small: 2368 // SHRD $dst.lo,$dst.hi,$shift 2369 emit_opcode(cbuf,0x0F); 2370 emit_opcode(cbuf,0xAD); 2371 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2372 // SHR $dst.hi,$shift" 2373 emit_opcode(cbuf,0xD3); 2374 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2375 %} 2376 2377 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2378 // TEST shift,32 2379 emit_opcode(cbuf,0xF7); 2380 emit_rm(cbuf, 0x3, 0, ECX_enc); 2381 emit_d32(cbuf,0x20); 2382 // JEQ,s small 2383 emit_opcode(cbuf, 0x74); 2384 emit_d8(cbuf, 0x05); 2385 // MOV $dst.lo,$dst.hi 2386 emit_opcode( cbuf, 0x8B ); 2387 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2388 // SAR $dst.hi,31 2389 emit_opcode(cbuf, 0xC1); 2390 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2391 emit_d8(cbuf, 0x1F ); 2392 // small: 2393 // SHRD $dst.lo,$dst.hi,$shift 2394 emit_opcode(cbuf,0x0F); 2395 emit_opcode(cbuf,0xAD); 2396 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2397 // SAR $dst.hi,$shift" 2398 emit_opcode(cbuf,0xD3); 2399 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2400 %} 2401 2402 2403 // ----------------- Encodings for floating point unit ----------------- 2404 // May leave result in FPU-TOS or FPU reg depending on opcodes 2405 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2406 $$$emit8$primary; 2407 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2408 %} 2409 2410 // Pop argument in FPR0 with FSTP ST(0) 2411 enc_class PopFPU() %{ 2412 emit_opcode( cbuf, 0xDD ); 2413 emit_d8( cbuf, 0xD8 ); 2414 %} 2415 2416 // !!!!! equivalent to Pop_Reg_F 2417 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2418 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2419 emit_d8( cbuf, 0xD8+$dst$$reg ); 2420 %} 2421 2422 enc_class Push_Reg_DPR( regDPR dst ) %{ 2423 emit_opcode( cbuf, 0xD9 ); 2424 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2425 %} 2426 2427 enc_class strictfp_bias1( regDPR dst ) %{ 2428 emit_opcode( cbuf, 0xDB ); // FLD m80real 2429 emit_opcode( cbuf, 0x2D ); 2430 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2431 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2432 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2433 %} 2434 2435 enc_class strictfp_bias2( regDPR dst ) %{ 2436 emit_opcode( cbuf, 0xDB ); // FLD m80real 2437 emit_opcode( cbuf, 0x2D ); 2438 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2439 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2440 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2441 %} 2442 2443 // Special case for moving an integer register to a stack slot. 2444 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2445 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2446 %} 2447 2448 // Special case for moving a register to a stack slot. 2449 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2450 // Opcode already emitted 2451 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2452 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2453 emit_d32(cbuf, $dst$$disp); // Displacement 2454 %} 2455 2456 // Push the integer in stackSlot 'src' onto FP-stack 2457 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2458 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2459 %} 2460 2461 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2462 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2463 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2464 %} 2465 2466 // Same as Pop_Mem_F except for opcode 2467 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2468 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2469 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2470 %} 2471 2472 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2473 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2474 emit_d8( cbuf, 0xD8+$dst$$reg ); 2475 %} 2476 2477 enc_class Push_Reg_FPR( regFPR dst ) %{ 2478 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2479 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2480 %} 2481 2482 // Push FPU's float to a stack-slot, and pop FPU-stack 2483 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2484 int pop = 0x02; 2485 if ($src$$reg != FPR1L_enc) { 2486 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2487 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2488 pop = 0x03; 2489 } 2490 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2491 %} 2492 2493 // Push FPU's double to a stack-slot, and pop FPU-stack 2494 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2495 int pop = 0x02; 2496 if ($src$$reg != FPR1L_enc) { 2497 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2498 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2499 pop = 0x03; 2500 } 2501 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2502 %} 2503 2504 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2505 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2506 int pop = 0xD0 - 1; // -1 since we skip FLD 2507 if ($src$$reg != FPR1L_enc) { 2508 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2509 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2510 pop = 0xD8; 2511 } 2512 emit_opcode( cbuf, 0xDD ); 2513 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2514 %} 2515 2516 2517 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2518 // load dst in FPR0 2519 emit_opcode( cbuf, 0xD9 ); 2520 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2521 if ($src$$reg != FPR1L_enc) { 2522 // fincstp 2523 emit_opcode (cbuf, 0xD9); 2524 emit_opcode (cbuf, 0xF7); 2525 // swap src with FPR1: 2526 // FXCH FPR1 with src 2527 emit_opcode(cbuf, 0xD9); 2528 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2529 // fdecstp 2530 emit_opcode (cbuf, 0xD9); 2531 emit_opcode (cbuf, 0xF6); 2532 } 2533 %} 2534 2535 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2536 MacroAssembler _masm(&cbuf); 2537 __ subptr(rsp, 8); 2538 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2539 __ fld_d(Address(rsp, 0)); 2540 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2541 __ fld_d(Address(rsp, 0)); 2542 %} 2543 2544 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2545 MacroAssembler _masm(&cbuf); 2546 __ subptr(rsp, 4); 2547 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2548 __ fld_s(Address(rsp, 0)); 2549 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2550 __ fld_s(Address(rsp, 0)); 2551 %} 2552 2553 enc_class Push_ResultD(regD dst) %{ 2554 MacroAssembler _masm(&cbuf); 2555 __ fstp_d(Address(rsp, 0)); 2556 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2557 __ addptr(rsp, 8); 2558 %} 2559 2560 enc_class Push_ResultF(regF dst, immI d8) %{ 2561 MacroAssembler _masm(&cbuf); 2562 __ fstp_s(Address(rsp, 0)); 2563 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2564 __ addptr(rsp, $d8$$constant); 2565 %} 2566 2567 enc_class Push_SrcD(regD src) %{ 2568 MacroAssembler _masm(&cbuf); 2569 __ subptr(rsp, 8); 2570 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2571 __ fld_d(Address(rsp, 0)); 2572 %} 2573 2574 enc_class push_stack_temp_qword() %{ 2575 MacroAssembler _masm(&cbuf); 2576 __ subptr(rsp, 8); 2577 %} 2578 2579 enc_class pop_stack_temp_qword() %{ 2580 MacroAssembler _masm(&cbuf); 2581 __ addptr(rsp, 8); 2582 %} 2583 2584 enc_class push_xmm_to_fpr1(regD src) %{ 2585 MacroAssembler _masm(&cbuf); 2586 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2587 __ fld_d(Address(rsp, 0)); 2588 %} 2589 2590 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2591 if ($src$$reg != FPR1L_enc) { 2592 // fincstp 2593 emit_opcode (cbuf, 0xD9); 2594 emit_opcode (cbuf, 0xF7); 2595 // FXCH FPR1 with src 2596 emit_opcode(cbuf, 0xD9); 2597 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2598 // fdecstp 2599 emit_opcode (cbuf, 0xD9); 2600 emit_opcode (cbuf, 0xF6); 2601 } 2602 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2603 // // FSTP FPR$dst$$reg 2604 // emit_opcode( cbuf, 0xDD ); 2605 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2606 %} 2607 2608 enc_class fnstsw_sahf_skip_parity() %{ 2609 // fnstsw ax 2610 emit_opcode( cbuf, 0xDF ); 2611 emit_opcode( cbuf, 0xE0 ); 2612 // sahf 2613 emit_opcode( cbuf, 0x9E ); 2614 // jnp ::skip 2615 emit_opcode( cbuf, 0x7B ); 2616 emit_opcode( cbuf, 0x05 ); 2617 %} 2618 2619 enc_class emitModDPR() %{ 2620 // fprem must be iterative 2621 // :: loop 2622 // fprem 2623 emit_opcode( cbuf, 0xD9 ); 2624 emit_opcode( cbuf, 0xF8 ); 2625 // wait 2626 emit_opcode( cbuf, 0x9b ); 2627 // fnstsw ax 2628 emit_opcode( cbuf, 0xDF ); 2629 emit_opcode( cbuf, 0xE0 ); 2630 // sahf 2631 emit_opcode( cbuf, 0x9E ); 2632 // jp ::loop 2633 emit_opcode( cbuf, 0x0F ); 2634 emit_opcode( cbuf, 0x8A ); 2635 emit_opcode( cbuf, 0xF4 ); 2636 emit_opcode( cbuf, 0xFF ); 2637 emit_opcode( cbuf, 0xFF ); 2638 emit_opcode( cbuf, 0xFF ); 2639 %} 2640 2641 enc_class fpu_flags() %{ 2642 // fnstsw_ax 2643 emit_opcode( cbuf, 0xDF); 2644 emit_opcode( cbuf, 0xE0); 2645 // test ax,0x0400 2646 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2647 emit_opcode( cbuf, 0xA9 ); 2648 emit_d16 ( cbuf, 0x0400 ); 2649 // // // This sequence works, but stalls for 12-16 cycles on PPro 2650 // // test rax,0x0400 2651 // emit_opcode( cbuf, 0xA9 ); 2652 // emit_d32 ( cbuf, 0x00000400 ); 2653 // 2654 // jz exit (no unordered comparison) 2655 emit_opcode( cbuf, 0x74 ); 2656 emit_d8 ( cbuf, 0x02 ); 2657 // mov ah,1 - treat as LT case (set carry flag) 2658 emit_opcode( cbuf, 0xB4 ); 2659 emit_d8 ( cbuf, 0x01 ); 2660 // sahf 2661 emit_opcode( cbuf, 0x9E); 2662 %} 2663 2664 enc_class cmpF_P6_fixup() %{ 2665 // Fixup the integer flags in case comparison involved a NaN 2666 // 2667 // JNP exit (no unordered comparison, P-flag is set by NaN) 2668 emit_opcode( cbuf, 0x7B ); 2669 emit_d8 ( cbuf, 0x03 ); 2670 // MOV AH,1 - treat as LT case (set carry flag) 2671 emit_opcode( cbuf, 0xB4 ); 2672 emit_d8 ( cbuf, 0x01 ); 2673 // SAHF 2674 emit_opcode( cbuf, 0x9E); 2675 // NOP // target for branch to avoid branch to branch 2676 emit_opcode( cbuf, 0x90); 2677 %} 2678 2679 // fnstsw_ax(); 2680 // sahf(); 2681 // movl(dst, nan_result); 2682 // jcc(Assembler::parity, exit); 2683 // movl(dst, less_result); 2684 // jcc(Assembler::below, exit); 2685 // movl(dst, equal_result); 2686 // jcc(Assembler::equal, exit); 2687 // movl(dst, greater_result); 2688 2689 // less_result = 1; 2690 // greater_result = -1; 2691 // equal_result = 0; 2692 // nan_result = -1; 2693 2694 enc_class CmpF_Result(rRegI dst) %{ 2695 // fnstsw_ax(); 2696 emit_opcode( cbuf, 0xDF); 2697 emit_opcode( cbuf, 0xE0); 2698 // sahf 2699 emit_opcode( cbuf, 0x9E); 2700 // movl(dst, nan_result); 2701 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2702 emit_d32( cbuf, -1 ); 2703 // jcc(Assembler::parity, exit); 2704 emit_opcode( cbuf, 0x7A ); 2705 emit_d8 ( cbuf, 0x13 ); 2706 // movl(dst, less_result); 2707 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2708 emit_d32( cbuf, -1 ); 2709 // jcc(Assembler::below, exit); 2710 emit_opcode( cbuf, 0x72 ); 2711 emit_d8 ( cbuf, 0x0C ); 2712 // movl(dst, equal_result); 2713 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2714 emit_d32( cbuf, 0 ); 2715 // jcc(Assembler::equal, exit); 2716 emit_opcode( cbuf, 0x74 ); 2717 emit_d8 ( cbuf, 0x05 ); 2718 // movl(dst, greater_result); 2719 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2720 emit_d32( cbuf, 1 ); 2721 %} 2722 2723 2724 // Compare the longs and set flags 2725 // BROKEN! Do Not use as-is 2726 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2727 // CMP $src1.hi,$src2.hi 2728 emit_opcode( cbuf, 0x3B ); 2729 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2730 // JNE,s done 2731 emit_opcode(cbuf,0x75); 2732 emit_d8(cbuf, 2 ); 2733 // CMP $src1.lo,$src2.lo 2734 emit_opcode( cbuf, 0x3B ); 2735 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2736 // done: 2737 %} 2738 2739 enc_class convert_int_long( regL dst, rRegI src ) %{ 2740 // mov $dst.lo,$src 2741 int dst_encoding = $dst$$reg; 2742 int src_encoding = $src$$reg; 2743 encode_Copy( cbuf, dst_encoding , src_encoding ); 2744 // mov $dst.hi,$src 2745 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2746 // sar $dst.hi,31 2747 emit_opcode( cbuf, 0xC1 ); 2748 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2749 emit_d8(cbuf, 0x1F ); 2750 %} 2751 2752 enc_class convert_long_double( eRegL src ) %{ 2753 // push $src.hi 2754 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2755 // push $src.lo 2756 emit_opcode(cbuf, 0x50+$src$$reg ); 2757 // fild 64-bits at [SP] 2758 emit_opcode(cbuf,0xdf); 2759 emit_d8(cbuf, 0x6C); 2760 emit_d8(cbuf, 0x24); 2761 emit_d8(cbuf, 0x00); 2762 // pop stack 2763 emit_opcode(cbuf, 0x83); // add SP, #8 2764 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2765 emit_d8(cbuf, 0x8); 2766 %} 2767 2768 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2769 // IMUL EDX:EAX,$src1 2770 emit_opcode( cbuf, 0xF7 ); 2771 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2772 // SAR EDX,$cnt-32 2773 int shift_count = ((int)$cnt$$constant) - 32; 2774 if (shift_count > 0) { 2775 emit_opcode(cbuf, 0xC1); 2776 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2777 emit_d8(cbuf, shift_count); 2778 } 2779 %} 2780 2781 // this version doesn't have add sp, 8 2782 enc_class convert_long_double2( eRegL src ) %{ 2783 // push $src.hi 2784 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2785 // push $src.lo 2786 emit_opcode(cbuf, 0x50+$src$$reg ); 2787 // fild 64-bits at [SP] 2788 emit_opcode(cbuf,0xdf); 2789 emit_d8(cbuf, 0x6C); 2790 emit_d8(cbuf, 0x24); 2791 emit_d8(cbuf, 0x00); 2792 %} 2793 2794 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2795 // Basic idea: long = (long)int * (long)int 2796 // IMUL EDX:EAX, src 2797 emit_opcode( cbuf, 0xF7 ); 2798 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2799 %} 2800 2801 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2802 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2803 // MUL EDX:EAX, src 2804 emit_opcode( cbuf, 0xF7 ); 2805 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2806 %} 2807 2808 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2809 // Basic idea: lo(result) = lo(x_lo * y_lo) 2810 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2811 // MOV $tmp,$src.lo 2812 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2813 // IMUL $tmp,EDX 2814 emit_opcode( cbuf, 0x0F ); 2815 emit_opcode( cbuf, 0xAF ); 2816 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2817 // MOV EDX,$src.hi 2818 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2819 // IMUL EDX,EAX 2820 emit_opcode( cbuf, 0x0F ); 2821 emit_opcode( cbuf, 0xAF ); 2822 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2823 // ADD $tmp,EDX 2824 emit_opcode( cbuf, 0x03 ); 2825 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2826 // MUL EDX:EAX,$src.lo 2827 emit_opcode( cbuf, 0xF7 ); 2828 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2829 // ADD EDX,ESI 2830 emit_opcode( cbuf, 0x03 ); 2831 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2832 %} 2833 2834 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2835 // Basic idea: lo(result) = lo(src * y_lo) 2836 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2837 // IMUL $tmp,EDX,$src 2838 emit_opcode( cbuf, 0x6B ); 2839 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2840 emit_d8( cbuf, (int)$src$$constant ); 2841 // MOV EDX,$src 2842 emit_opcode(cbuf, 0xB8 + EDX_enc); 2843 emit_d32( cbuf, (int)$src$$constant ); 2844 // MUL EDX:EAX,EDX 2845 emit_opcode( cbuf, 0xF7 ); 2846 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2847 // ADD EDX,ESI 2848 emit_opcode( cbuf, 0x03 ); 2849 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2850 %} 2851 2852 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2853 // PUSH src1.hi 2854 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2855 // PUSH src1.lo 2856 emit_opcode(cbuf, 0x50+$src1$$reg ); 2857 // PUSH src2.hi 2858 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2859 // PUSH src2.lo 2860 emit_opcode(cbuf, 0x50+$src2$$reg ); 2861 // CALL directly to the runtime 2862 cbuf.set_insts_mark(); 2863 emit_opcode(cbuf,0xE8); // Call into runtime 2864 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2865 // Restore stack 2866 emit_opcode(cbuf, 0x83); // add SP, #framesize 2867 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2868 emit_d8(cbuf, 4*4); 2869 %} 2870 2871 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2872 // PUSH src1.hi 2873 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2874 // PUSH src1.lo 2875 emit_opcode(cbuf, 0x50+$src1$$reg ); 2876 // PUSH src2.hi 2877 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2878 // PUSH src2.lo 2879 emit_opcode(cbuf, 0x50+$src2$$reg ); 2880 // CALL directly to the runtime 2881 cbuf.set_insts_mark(); 2882 emit_opcode(cbuf,0xE8); // Call into runtime 2883 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2884 // Restore stack 2885 emit_opcode(cbuf, 0x83); // add SP, #framesize 2886 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2887 emit_d8(cbuf, 4*4); 2888 %} 2889 2890 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2891 // MOV $tmp,$src.lo 2892 emit_opcode(cbuf, 0x8B); 2893 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2894 // OR $tmp,$src.hi 2895 emit_opcode(cbuf, 0x0B); 2896 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2897 %} 2898 2899 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2900 // CMP $src1.lo,$src2.lo 2901 emit_opcode( cbuf, 0x3B ); 2902 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2903 // JNE,s skip 2904 emit_cc(cbuf, 0x70, 0x5); 2905 emit_d8(cbuf,2); 2906 // CMP $src1.hi,$src2.hi 2907 emit_opcode( cbuf, 0x3B ); 2908 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2909 %} 2910 2911 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2912 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2913 emit_opcode( cbuf, 0x3B ); 2914 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2915 // MOV $tmp,$src1.hi 2916 emit_opcode( cbuf, 0x8B ); 2917 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2918 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2919 emit_opcode( cbuf, 0x1B ); 2920 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2921 %} 2922 2923 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2924 // XOR $tmp,$tmp 2925 emit_opcode(cbuf,0x33); // XOR 2926 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2927 // CMP $tmp,$src.lo 2928 emit_opcode( cbuf, 0x3B ); 2929 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2930 // SBB $tmp,$src.hi 2931 emit_opcode( cbuf, 0x1B ); 2932 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2933 %} 2934 2935 // Sniff, sniff... smells like Gnu Superoptimizer 2936 enc_class neg_long( eRegL dst ) %{ 2937 emit_opcode(cbuf,0xF7); // NEG hi 2938 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2939 emit_opcode(cbuf,0xF7); // NEG lo 2940 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2941 emit_opcode(cbuf,0x83); // SBB hi,0 2942 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2943 emit_d8 (cbuf,0 ); 2944 %} 2945 2946 enc_class enc_pop_rdx() %{ 2947 emit_opcode(cbuf,0x5A); 2948 %} 2949 2950 enc_class enc_rethrow() %{ 2951 cbuf.set_insts_mark(); 2952 emit_opcode(cbuf, 0xE9); // jmp entry 2953 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2954 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2955 %} 2956 2957 2958 // Convert a double to an int. Java semantics require we do complex 2959 // manglelations in the corner cases. So we set the rounding mode to 2960 // 'zero', store the darned double down as an int, and reset the 2961 // rounding mode to 'nearest'. The hardware throws an exception which 2962 // patches up the correct value directly to the stack. 2963 enc_class DPR2I_encoding( regDPR src ) %{ 2964 // Flip to round-to-zero mode. We attempted to allow invalid-op 2965 // exceptions here, so that a NAN or other corner-case value will 2966 // thrown an exception (but normal values get converted at full speed). 2967 // However, I2C adapters and other float-stack manglers leave pending 2968 // invalid-op exceptions hanging. We would have to clear them before 2969 // enabling them and that is more expensive than just testing for the 2970 // invalid value Intel stores down in the corner cases. 2971 emit_opcode(cbuf,0xD9); // FLDCW trunc 2972 emit_opcode(cbuf,0x2D); 2973 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 2974 // Allocate a word 2975 emit_opcode(cbuf,0x83); // SUB ESP,4 2976 emit_opcode(cbuf,0xEC); 2977 emit_d8(cbuf,0x04); 2978 // Encoding assumes a double has been pushed into FPR0. 2979 // Store down the double as an int, popping the FPU stack 2980 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2981 emit_opcode(cbuf,0x1C); 2982 emit_d8(cbuf,0x24); 2983 // Restore the rounding mode; mask the exception 2984 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2985 emit_opcode(cbuf,0x2D); 2986 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2987 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 2988 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 2989 2990 // Load the converted int; adjust CPU stack 2991 emit_opcode(cbuf,0x58); // POP EAX 2992 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2993 emit_d32 (cbuf,0x80000000); // 0x80000000 2994 emit_opcode(cbuf,0x75); // JNE around_slow_call 2995 emit_d8 (cbuf,0x07); // Size of slow_call 2996 // Push src onto stack slow-path 2997 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2998 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2999 // CALL directly to the runtime 3000 cbuf.set_insts_mark(); 3001 emit_opcode(cbuf,0xE8); // Call into runtime 3002 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3003 // Carry on here... 3004 %} 3005 3006 enc_class DPR2L_encoding( regDPR src ) %{ 3007 emit_opcode(cbuf,0xD9); // FLDCW trunc 3008 emit_opcode(cbuf,0x2D); 3009 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3010 // Allocate a word 3011 emit_opcode(cbuf,0x83); // SUB ESP,8 3012 emit_opcode(cbuf,0xEC); 3013 emit_d8(cbuf,0x08); 3014 // Encoding assumes a double has been pushed into FPR0. 3015 // Store down the double as a long, popping the FPU stack 3016 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3017 emit_opcode(cbuf,0x3C); 3018 emit_d8(cbuf,0x24); 3019 // Restore the rounding mode; mask the exception 3020 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3021 emit_opcode(cbuf,0x2D); 3022 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3023 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3024 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3025 3026 // Load the converted int; adjust CPU stack 3027 emit_opcode(cbuf,0x58); // POP EAX 3028 emit_opcode(cbuf,0x5A); // POP EDX 3029 emit_opcode(cbuf,0x81); // CMP EDX,imm 3030 emit_d8 (cbuf,0xFA); // rdx 3031 emit_d32 (cbuf,0x80000000); // 0x80000000 3032 emit_opcode(cbuf,0x75); // JNE around_slow_call 3033 emit_d8 (cbuf,0x07+4); // Size of slow_call 3034 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3035 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3036 emit_opcode(cbuf,0x75); // JNE around_slow_call 3037 emit_d8 (cbuf,0x07); // Size of slow_call 3038 // Push src onto stack slow-path 3039 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3040 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3041 // CALL directly to the runtime 3042 cbuf.set_insts_mark(); 3043 emit_opcode(cbuf,0xE8); // Call into runtime 3044 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3045 // Carry on here... 3046 %} 3047 3048 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3049 // Operand was loaded from memory into fp ST (stack top) 3050 // FMUL ST,$src /* D8 C8+i */ 3051 emit_opcode(cbuf, 0xD8); 3052 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3053 %} 3054 3055 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3056 // FADDP ST,src2 /* D8 C0+i */ 3057 emit_opcode(cbuf, 0xD8); 3058 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3059 //could use FADDP src2,fpST /* DE C0+i */ 3060 %} 3061 3062 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3063 // FADDP src2,ST /* DE C0+i */ 3064 emit_opcode(cbuf, 0xDE); 3065 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3066 %} 3067 3068 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3069 // Operand has been loaded into fp ST (stack top) 3070 // FSUB ST,$src1 3071 emit_opcode(cbuf, 0xD8); 3072 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3073 3074 // FDIV 3075 emit_opcode(cbuf, 0xD8); 3076 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3077 %} 3078 3079 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3080 // Operand was loaded from memory into fp ST (stack top) 3081 // FADD ST,$src /* D8 C0+i */ 3082 emit_opcode(cbuf, 0xD8); 3083 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3084 3085 // FMUL ST,src2 /* D8 C*+i */ 3086 emit_opcode(cbuf, 0xD8); 3087 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3088 %} 3089 3090 3091 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3092 // Operand was loaded from memory into fp ST (stack top) 3093 // FADD ST,$src /* D8 C0+i */ 3094 emit_opcode(cbuf, 0xD8); 3095 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3096 3097 // FMULP src2,ST /* DE C8+i */ 3098 emit_opcode(cbuf, 0xDE); 3099 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3100 %} 3101 3102 // Atomically load the volatile long 3103 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3104 emit_opcode(cbuf,0xDF); 3105 int rm_byte_opcode = 0x05; 3106 int base = $mem$$base; 3107 int index = $mem$$index; 3108 int scale = $mem$$scale; 3109 int displace = $mem$$disp; 3110 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3111 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3112 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3113 %} 3114 3115 // Volatile Store Long. Must be atomic, so move it into 3116 // the FP TOS and then do a 64-bit FIST. Has to probe the 3117 // target address before the store (for null-ptr checks) 3118 // so the memory operand is used twice in the encoding. 3119 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3120 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3121 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3122 emit_opcode(cbuf,0xDF); 3123 int rm_byte_opcode = 0x07; 3124 int base = $mem$$base; 3125 int index = $mem$$index; 3126 int scale = $mem$$scale; 3127 int displace = $mem$$disp; 3128 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3129 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3130 %} 3131 3132 // Safepoint Poll. This polls the safepoint page, and causes an 3133 // exception if it is not readable. Unfortunately, it kills the condition code 3134 // in the process 3135 // We current use TESTL [spp],EDI 3136 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 3137 3138 enc_class Safepoint_Poll() %{ 3139 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); 3140 emit_opcode(cbuf,0x85); 3141 emit_rm (cbuf, 0x0, 0x7, 0x5); 3142 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 3143 %} 3144 %} 3145 3146 3147 //----------FRAME-------------------------------------------------------------- 3148 // Definition of frame structure and management information. 3149 // 3150 // S T A C K L A Y O U T Allocators stack-slot number 3151 // | (to get allocators register number 3152 // G Owned by | | v add OptoReg::stack0()) 3153 // r CALLER | | 3154 // o | +--------+ pad to even-align allocators stack-slot 3155 // w V | pad0 | numbers; owned by CALLER 3156 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3157 // h ^ | in | 5 3158 // | | args | 4 Holes in incoming args owned by SELF 3159 // | | | | 3 3160 // | | +--------+ 3161 // V | | old out| Empty on Intel, window on Sparc 3162 // | old |preserve| Must be even aligned. 3163 // | SP-+--------+----> Matcher::_old_SP, even aligned 3164 // | | in | 3 area for Intel ret address 3165 // Owned by |preserve| Empty on Sparc. 3166 // SELF +--------+ 3167 // | | pad2 | 2 pad to align old SP 3168 // | +--------+ 1 3169 // | | locks | 0 3170 // | +--------+----> OptoReg::stack0(), even aligned 3171 // | | pad1 | 11 pad to align new SP 3172 // | +--------+ 3173 // | | | 10 3174 // | | spills | 9 spills 3175 // V | | 8 (pad0 slot for callee) 3176 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3177 // ^ | out | 7 3178 // | | args | 6 Holes in outgoing args owned by CALLEE 3179 // Owned by +--------+ 3180 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3181 // | new |preserve| Must be even-aligned. 3182 // | SP-+--------+----> Matcher::_new_SP, even aligned 3183 // | | | 3184 // 3185 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3186 // known from SELF's arguments and the Java calling convention. 3187 // Region 6-7 is determined per call site. 3188 // Note 2: If the calling convention leaves holes in the incoming argument 3189 // area, those holes are owned by SELF. Holes in the outgoing area 3190 // are owned by the CALLEE. Holes should not be nessecary in the 3191 // incoming area, as the Java calling convention is completely under 3192 // the control of the AD file. Doubles can be sorted and packed to 3193 // avoid holes. Holes in the outgoing arguments may be nessecary for 3194 // varargs C calling conventions. 3195 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3196 // even aligned with pad0 as needed. 3197 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3198 // region 6-11 is even aligned; it may be padded out more so that 3199 // the region from SP to FP meets the minimum stack alignment. 3200 3201 frame %{ 3202 // What direction does stack grow in (assumed to be same for C & Java) 3203 stack_direction(TOWARDS_LOW); 3204 3205 // These three registers define part of the calling convention 3206 // between compiled code and the interpreter. 3207 inline_cache_reg(EAX); // Inline Cache Register 3208 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 3209 3210 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3211 cisc_spilling_operand_name(indOffset32); 3212 3213 // Number of stack slots consumed by locking an object 3214 sync_stack_slots(1); 3215 3216 // Compiled code's Frame Pointer 3217 frame_pointer(ESP); 3218 // Interpreter stores its frame pointer in a register which is 3219 // stored to the stack by I2CAdaptors. 3220 // I2CAdaptors convert from interpreted java to compiled java. 3221 interpreter_frame_pointer(EBP); 3222 3223 // Stack alignment requirement 3224 // Alignment size in bytes (128-bit -> 16 bytes) 3225 stack_alignment(StackAlignmentInBytes); 3226 3227 // Number of stack slots between incoming argument block and the start of 3228 // a new frame. The PROLOG must add this many slots to the stack. The 3229 // EPILOG must remove this many slots. Intel needs one slot for 3230 // return address and one for rbp, (must save rbp) 3231 in_preserve_stack_slots(2+VerifyStackAtCalls); 3232 3233 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3234 // for calls to C. Supports the var-args backing area for register parms. 3235 varargs_C_out_slots_killed(0); 3236 3237 // The after-PROLOG location of the return address. Location of 3238 // return address specifies a type (REG or STACK) and a number 3239 // representing the register number (i.e. - use a register name) or 3240 // stack slot. 3241 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3242 // Otherwise, it is above the locks and verification slot and alignment word 3243 return_addr(STACK - 1 + 3244 round_to((Compile::current()->in_preserve_stack_slots() + 3245 Compile::current()->fixed_slots()), 3246 stack_alignment_in_slots())); 3247 3248 // Body of function which returns an integer array locating 3249 // arguments either in registers or in stack slots. Passed an array 3250 // of ideal registers called "sig" and a "length" count. Stack-slot 3251 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3252 // arguments for a CALLEE. Incoming stack arguments are 3253 // automatically biased by the preserve_stack_slots field above. 3254 calling_convention %{ 3255 // No difference between ingoing/outgoing just pass false 3256 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 3257 %} 3258 3259 3260 // Body of function which returns an integer array locating 3261 // arguments either in registers or in stack slots. Passed an array 3262 // of ideal registers called "sig" and a "length" count. Stack-slot 3263 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3264 // arguments for a CALLEE. Incoming stack arguments are 3265 // automatically biased by the preserve_stack_slots field above. 3266 c_calling_convention %{ 3267 // This is obviously always outgoing 3268 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 3269 %} 3270 3271 // Location of C & interpreter return values 3272 c_return_value %{ 3273 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3274 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3275 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3276 3277 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3278 // that C functions return float and double results in XMM0. 3279 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3280 return OptoRegPair(XMM0b_num,XMM0_num); 3281 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3282 return OptoRegPair(OptoReg::Bad,XMM0_num); 3283 3284 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3285 %} 3286 3287 // Location of return values 3288 return_value %{ 3289 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3290 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3291 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3292 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3293 return OptoRegPair(XMM0b_num,XMM0_num); 3294 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3295 return OptoRegPair(OptoReg::Bad,XMM0_num); 3296 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3297 %} 3298 3299 %} 3300 3301 //----------ATTRIBUTES--------------------------------------------------------- 3302 //----------Operand Attributes------------------------------------------------- 3303 op_attrib op_cost(0); // Required cost attribute 3304 3305 //----------Instruction Attributes--------------------------------------------- 3306 ins_attrib ins_cost(100); // Required cost attribute 3307 ins_attrib ins_size(8); // Required size attribute (in bits) 3308 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3309 // non-matching short branch variant of some 3310 // long branch? 3311 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3312 // specifies the alignment that some part of the instruction (not 3313 // necessarily the start) requires. If > 1, a compute_padding() 3314 // function must be provided for the instruction 3315 3316 //----------OPERANDS----------------------------------------------------------- 3317 // Operand definitions must precede instruction definitions for correct parsing 3318 // in the ADLC because operands constitute user defined types which are used in 3319 // instruction definitions. 3320 3321 //----------Simple Operands---------------------------------------------------- 3322 // Immediate Operands 3323 // Integer Immediate 3324 operand immI() %{ 3325 match(ConI); 3326 3327 op_cost(10); 3328 format %{ %} 3329 interface(CONST_INTER); 3330 %} 3331 3332 // Constant for test vs zero 3333 operand immI0() %{ 3334 predicate(n->get_int() == 0); 3335 match(ConI); 3336 3337 op_cost(0); 3338 format %{ %} 3339 interface(CONST_INTER); 3340 %} 3341 3342 // Constant for increment 3343 operand immI1() %{ 3344 predicate(n->get_int() == 1); 3345 match(ConI); 3346 3347 op_cost(0); 3348 format %{ %} 3349 interface(CONST_INTER); 3350 %} 3351 3352 // Constant for decrement 3353 operand immI_M1() %{ 3354 predicate(n->get_int() == -1); 3355 match(ConI); 3356 3357 op_cost(0); 3358 format %{ %} 3359 interface(CONST_INTER); 3360 %} 3361 3362 // Valid scale values for addressing modes 3363 operand immI2() %{ 3364 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3365 match(ConI); 3366 3367 format %{ %} 3368 interface(CONST_INTER); 3369 %} 3370 3371 operand immI8() %{ 3372 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3373 match(ConI); 3374 3375 op_cost(5); 3376 format %{ %} 3377 interface(CONST_INTER); 3378 %} 3379 3380 operand immI16() %{ 3381 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3382 match(ConI); 3383 3384 op_cost(10); 3385 format %{ %} 3386 interface(CONST_INTER); 3387 %} 3388 3389 // Int Immediate non-negative 3390 operand immU31() 3391 %{ 3392 predicate(n->get_int() >= 0); 3393 match(ConI); 3394 3395 op_cost(0); 3396 format %{ %} 3397 interface(CONST_INTER); 3398 %} 3399 3400 // Constant for long shifts 3401 operand immI_32() %{ 3402 predicate( n->get_int() == 32 ); 3403 match(ConI); 3404 3405 op_cost(0); 3406 format %{ %} 3407 interface(CONST_INTER); 3408 %} 3409 3410 operand immI_1_31() %{ 3411 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3412 match(ConI); 3413 3414 op_cost(0); 3415 format %{ %} 3416 interface(CONST_INTER); 3417 %} 3418 3419 operand immI_32_63() %{ 3420 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3421 match(ConI); 3422 op_cost(0); 3423 3424 format %{ %} 3425 interface(CONST_INTER); 3426 %} 3427 3428 operand immI_1() %{ 3429 predicate( n->get_int() == 1 ); 3430 match(ConI); 3431 3432 op_cost(0); 3433 format %{ %} 3434 interface(CONST_INTER); 3435 %} 3436 3437 operand immI_2() %{ 3438 predicate( n->get_int() == 2 ); 3439 match(ConI); 3440 3441 op_cost(0); 3442 format %{ %} 3443 interface(CONST_INTER); 3444 %} 3445 3446 operand immI_3() %{ 3447 predicate( n->get_int() == 3 ); 3448 match(ConI); 3449 3450 op_cost(0); 3451 format %{ %} 3452 interface(CONST_INTER); 3453 %} 3454 3455 // Pointer Immediate 3456 operand immP() %{ 3457 match(ConP); 3458 3459 op_cost(10); 3460 format %{ %} 3461 interface(CONST_INTER); 3462 %} 3463 3464 // NULL Pointer Immediate 3465 operand immP0() %{ 3466 predicate( n->get_ptr() == 0 ); 3467 match(ConP); 3468 op_cost(0); 3469 3470 format %{ %} 3471 interface(CONST_INTER); 3472 %} 3473 3474 // Long Immediate 3475 operand immL() %{ 3476 match(ConL); 3477 3478 op_cost(20); 3479 format %{ %} 3480 interface(CONST_INTER); 3481 %} 3482 3483 // Long Immediate zero 3484 operand immL0() %{ 3485 predicate( n->get_long() == 0L ); 3486 match(ConL); 3487 op_cost(0); 3488 3489 format %{ %} 3490 interface(CONST_INTER); 3491 %} 3492 3493 // Long Immediate zero 3494 operand immL_M1() %{ 3495 predicate( n->get_long() == -1L ); 3496 match(ConL); 3497 op_cost(0); 3498 3499 format %{ %} 3500 interface(CONST_INTER); 3501 %} 3502 3503 // Long immediate from 0 to 127. 3504 // Used for a shorter form of long mul by 10. 3505 operand immL_127() %{ 3506 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3507 match(ConL); 3508 op_cost(0); 3509 3510 format %{ %} 3511 interface(CONST_INTER); 3512 %} 3513 3514 // Long Immediate: low 32-bit mask 3515 operand immL_32bits() %{ 3516 predicate(n->get_long() == 0xFFFFFFFFL); 3517 match(ConL); 3518 op_cost(0); 3519 3520 format %{ %} 3521 interface(CONST_INTER); 3522 %} 3523 3524 // Long Immediate: low 32-bit mask 3525 operand immL32() %{ 3526 predicate(n->get_long() == (int)(n->get_long())); 3527 match(ConL); 3528 op_cost(20); 3529 3530 format %{ %} 3531 interface(CONST_INTER); 3532 %} 3533 3534 //Double Immediate zero 3535 operand immDPR0() %{ 3536 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3537 // bug that generates code such that NaNs compare equal to 0.0 3538 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3539 match(ConD); 3540 3541 op_cost(5); 3542 format %{ %} 3543 interface(CONST_INTER); 3544 %} 3545 3546 // Double Immediate one 3547 operand immDPR1() %{ 3548 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3549 match(ConD); 3550 3551 op_cost(5); 3552 format %{ %} 3553 interface(CONST_INTER); 3554 %} 3555 3556 // Double Immediate 3557 operand immDPR() %{ 3558 predicate(UseSSE<=1); 3559 match(ConD); 3560 3561 op_cost(5); 3562 format %{ %} 3563 interface(CONST_INTER); 3564 %} 3565 3566 operand immD() %{ 3567 predicate(UseSSE>=2); 3568 match(ConD); 3569 3570 op_cost(5); 3571 format %{ %} 3572 interface(CONST_INTER); 3573 %} 3574 3575 // Double Immediate zero 3576 operand immD0() %{ 3577 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3578 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3579 // compare equal to -0.0. 3580 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3581 match(ConD); 3582 3583 format %{ %} 3584 interface(CONST_INTER); 3585 %} 3586 3587 // Float Immediate zero 3588 operand immFPR0() %{ 3589 predicate(UseSSE == 0 && n->getf() == 0.0F); 3590 match(ConF); 3591 3592 op_cost(5); 3593 format %{ %} 3594 interface(CONST_INTER); 3595 %} 3596 3597 // Float Immediate one 3598 operand immFPR1() %{ 3599 predicate(UseSSE == 0 && n->getf() == 1.0F); 3600 match(ConF); 3601 3602 op_cost(5); 3603 format %{ %} 3604 interface(CONST_INTER); 3605 %} 3606 3607 // Float Immediate 3608 operand immFPR() %{ 3609 predicate( UseSSE == 0 ); 3610 match(ConF); 3611 3612 op_cost(5); 3613 format %{ %} 3614 interface(CONST_INTER); 3615 %} 3616 3617 // Float Immediate 3618 operand immF() %{ 3619 predicate(UseSSE >= 1); 3620 match(ConF); 3621 3622 op_cost(5); 3623 format %{ %} 3624 interface(CONST_INTER); 3625 %} 3626 3627 // Float Immediate zero. Zero and not -0.0 3628 operand immF0() %{ 3629 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3630 match(ConF); 3631 3632 op_cost(5); 3633 format %{ %} 3634 interface(CONST_INTER); 3635 %} 3636 3637 // Immediates for special shifts (sign extend) 3638 3639 // Constants for increment 3640 operand immI_16() %{ 3641 predicate( n->get_int() == 16 ); 3642 match(ConI); 3643 3644 format %{ %} 3645 interface(CONST_INTER); 3646 %} 3647 3648 operand immI_24() %{ 3649 predicate( n->get_int() == 24 ); 3650 match(ConI); 3651 3652 format %{ %} 3653 interface(CONST_INTER); 3654 %} 3655 3656 // Constant for byte-wide masking 3657 operand immI_255() %{ 3658 predicate( n->get_int() == 255 ); 3659 match(ConI); 3660 3661 format %{ %} 3662 interface(CONST_INTER); 3663 %} 3664 3665 // Constant for short-wide masking 3666 operand immI_65535() %{ 3667 predicate(n->get_int() == 65535); 3668 match(ConI); 3669 3670 format %{ %} 3671 interface(CONST_INTER); 3672 %} 3673 3674 // Register Operands 3675 // Integer Register 3676 operand rRegI() %{ 3677 constraint(ALLOC_IN_RC(int_reg)); 3678 match(RegI); 3679 match(xRegI); 3680 match(eAXRegI); 3681 match(eBXRegI); 3682 match(eCXRegI); 3683 match(eDXRegI); 3684 match(eDIRegI); 3685 match(eSIRegI); 3686 3687 format %{ %} 3688 interface(REG_INTER); 3689 %} 3690 3691 // Subset of Integer Register 3692 operand xRegI(rRegI reg) %{ 3693 constraint(ALLOC_IN_RC(int_x_reg)); 3694 match(reg); 3695 match(eAXRegI); 3696 match(eBXRegI); 3697 match(eCXRegI); 3698 match(eDXRegI); 3699 3700 format %{ %} 3701 interface(REG_INTER); 3702 %} 3703 3704 // Special Registers 3705 operand eAXRegI(xRegI reg) %{ 3706 constraint(ALLOC_IN_RC(eax_reg)); 3707 match(reg); 3708 match(rRegI); 3709 3710 format %{ "EAX" %} 3711 interface(REG_INTER); 3712 %} 3713 3714 // Special Registers 3715 operand eBXRegI(xRegI reg) %{ 3716 constraint(ALLOC_IN_RC(ebx_reg)); 3717 match(reg); 3718 match(rRegI); 3719 3720 format %{ "EBX" %} 3721 interface(REG_INTER); 3722 %} 3723 3724 operand eCXRegI(xRegI reg) %{ 3725 constraint(ALLOC_IN_RC(ecx_reg)); 3726 match(reg); 3727 match(rRegI); 3728 3729 format %{ "ECX" %} 3730 interface(REG_INTER); 3731 %} 3732 3733 operand eDXRegI(xRegI reg) %{ 3734 constraint(ALLOC_IN_RC(edx_reg)); 3735 match(reg); 3736 match(rRegI); 3737 3738 format %{ "EDX" %} 3739 interface(REG_INTER); 3740 %} 3741 3742 operand eDIRegI(xRegI reg) %{ 3743 constraint(ALLOC_IN_RC(edi_reg)); 3744 match(reg); 3745 match(rRegI); 3746 3747 format %{ "EDI" %} 3748 interface(REG_INTER); 3749 %} 3750 3751 operand naxRegI() %{ 3752 constraint(ALLOC_IN_RC(nax_reg)); 3753 match(RegI); 3754 match(eCXRegI); 3755 match(eDXRegI); 3756 match(eSIRegI); 3757 match(eDIRegI); 3758 3759 format %{ %} 3760 interface(REG_INTER); 3761 %} 3762 3763 operand nadxRegI() %{ 3764 constraint(ALLOC_IN_RC(nadx_reg)); 3765 match(RegI); 3766 match(eBXRegI); 3767 match(eCXRegI); 3768 match(eSIRegI); 3769 match(eDIRegI); 3770 3771 format %{ %} 3772 interface(REG_INTER); 3773 %} 3774 3775 operand ncxRegI() %{ 3776 constraint(ALLOC_IN_RC(ncx_reg)); 3777 match(RegI); 3778 match(eAXRegI); 3779 match(eDXRegI); 3780 match(eSIRegI); 3781 match(eDIRegI); 3782 3783 format %{ %} 3784 interface(REG_INTER); 3785 %} 3786 3787 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3788 // // 3789 operand eSIRegI(xRegI reg) %{ 3790 constraint(ALLOC_IN_RC(esi_reg)); 3791 match(reg); 3792 match(rRegI); 3793 3794 format %{ "ESI" %} 3795 interface(REG_INTER); 3796 %} 3797 3798 // Pointer Register 3799 operand anyRegP() %{ 3800 constraint(ALLOC_IN_RC(any_reg)); 3801 match(RegP); 3802 match(eAXRegP); 3803 match(eBXRegP); 3804 match(eCXRegP); 3805 match(eDIRegP); 3806 match(eRegP); 3807 3808 format %{ %} 3809 interface(REG_INTER); 3810 %} 3811 3812 operand eRegP() %{ 3813 constraint(ALLOC_IN_RC(int_reg)); 3814 match(RegP); 3815 match(eAXRegP); 3816 match(eBXRegP); 3817 match(eCXRegP); 3818 match(eDIRegP); 3819 3820 format %{ %} 3821 interface(REG_INTER); 3822 %} 3823 3824 // On windows95, EBP is not safe to use for implicit null tests. 3825 operand eRegP_no_EBP() %{ 3826 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3827 match(RegP); 3828 match(eAXRegP); 3829 match(eBXRegP); 3830 match(eCXRegP); 3831 match(eDIRegP); 3832 3833 op_cost(100); 3834 format %{ %} 3835 interface(REG_INTER); 3836 %} 3837 3838 operand naxRegP() %{ 3839 constraint(ALLOC_IN_RC(nax_reg)); 3840 match(RegP); 3841 match(eBXRegP); 3842 match(eDXRegP); 3843 match(eCXRegP); 3844 match(eSIRegP); 3845 match(eDIRegP); 3846 3847 format %{ %} 3848 interface(REG_INTER); 3849 %} 3850 3851 operand nabxRegP() %{ 3852 constraint(ALLOC_IN_RC(nabx_reg)); 3853 match(RegP); 3854 match(eCXRegP); 3855 match(eDXRegP); 3856 match(eSIRegP); 3857 match(eDIRegP); 3858 3859 format %{ %} 3860 interface(REG_INTER); 3861 %} 3862 3863 operand pRegP() %{ 3864 constraint(ALLOC_IN_RC(p_reg)); 3865 match(RegP); 3866 match(eBXRegP); 3867 match(eDXRegP); 3868 match(eSIRegP); 3869 match(eDIRegP); 3870 3871 format %{ %} 3872 interface(REG_INTER); 3873 %} 3874 3875 // Special Registers 3876 // Return a pointer value 3877 operand eAXRegP(eRegP reg) %{ 3878 constraint(ALLOC_IN_RC(eax_reg)); 3879 match(reg); 3880 format %{ "EAX" %} 3881 interface(REG_INTER); 3882 %} 3883 3884 // Used in AtomicAdd 3885 operand eBXRegP(eRegP reg) %{ 3886 constraint(ALLOC_IN_RC(ebx_reg)); 3887 match(reg); 3888 format %{ "EBX" %} 3889 interface(REG_INTER); 3890 %} 3891 3892 // Tail-call (interprocedural jump) to interpreter 3893 operand eCXRegP(eRegP reg) %{ 3894 constraint(ALLOC_IN_RC(ecx_reg)); 3895 match(reg); 3896 format %{ "ECX" %} 3897 interface(REG_INTER); 3898 %} 3899 3900 operand eSIRegP(eRegP reg) %{ 3901 constraint(ALLOC_IN_RC(esi_reg)); 3902 match(reg); 3903 format %{ "ESI" %} 3904 interface(REG_INTER); 3905 %} 3906 3907 // Used in rep stosw 3908 operand eDIRegP(eRegP reg) %{ 3909 constraint(ALLOC_IN_RC(edi_reg)); 3910 match(reg); 3911 format %{ "EDI" %} 3912 interface(REG_INTER); 3913 %} 3914 3915 operand eRegL() %{ 3916 constraint(ALLOC_IN_RC(long_reg)); 3917 match(RegL); 3918 match(eADXRegL); 3919 3920 format %{ %} 3921 interface(REG_INTER); 3922 %} 3923 3924 operand eADXRegL( eRegL reg ) %{ 3925 constraint(ALLOC_IN_RC(eadx_reg)); 3926 match(reg); 3927 3928 format %{ "EDX:EAX" %} 3929 interface(REG_INTER); 3930 %} 3931 3932 operand eBCXRegL( eRegL reg ) %{ 3933 constraint(ALLOC_IN_RC(ebcx_reg)); 3934 match(reg); 3935 3936 format %{ "EBX:ECX" %} 3937 interface(REG_INTER); 3938 %} 3939 3940 // Special case for integer high multiply 3941 operand eADXRegL_low_only() %{ 3942 constraint(ALLOC_IN_RC(eadx_reg)); 3943 match(RegL); 3944 3945 format %{ "EAX" %} 3946 interface(REG_INTER); 3947 %} 3948 3949 // Flags register, used as output of compare instructions 3950 operand eFlagsReg() %{ 3951 constraint(ALLOC_IN_RC(int_flags)); 3952 match(RegFlags); 3953 3954 format %{ "EFLAGS" %} 3955 interface(REG_INTER); 3956 %} 3957 3958 // Flags register, used as output of FLOATING POINT compare instructions 3959 operand eFlagsRegU() %{ 3960 constraint(ALLOC_IN_RC(int_flags)); 3961 match(RegFlags); 3962 3963 format %{ "EFLAGS_U" %} 3964 interface(REG_INTER); 3965 %} 3966 3967 operand eFlagsRegUCF() %{ 3968 constraint(ALLOC_IN_RC(int_flags)); 3969 match(RegFlags); 3970 predicate(false); 3971 3972 format %{ "EFLAGS_U_CF" %} 3973 interface(REG_INTER); 3974 %} 3975 3976 // Condition Code Register used by long compare 3977 operand flagsReg_long_LTGE() %{ 3978 constraint(ALLOC_IN_RC(int_flags)); 3979 match(RegFlags); 3980 format %{ "FLAGS_LTGE" %} 3981 interface(REG_INTER); 3982 %} 3983 operand flagsReg_long_EQNE() %{ 3984 constraint(ALLOC_IN_RC(int_flags)); 3985 match(RegFlags); 3986 format %{ "FLAGS_EQNE" %} 3987 interface(REG_INTER); 3988 %} 3989 operand flagsReg_long_LEGT() %{ 3990 constraint(ALLOC_IN_RC(int_flags)); 3991 match(RegFlags); 3992 format %{ "FLAGS_LEGT" %} 3993 interface(REG_INTER); 3994 %} 3995 3996 // Float register operands 3997 operand regDPR() %{ 3998 predicate( UseSSE < 2 ); 3999 constraint(ALLOC_IN_RC(fp_dbl_reg)); 4000 match(RegD); 4001 match(regDPR1); 4002 match(regDPR2); 4003 format %{ %} 4004 interface(REG_INTER); 4005 %} 4006 4007 operand regDPR1(regDPR reg) %{ 4008 predicate( UseSSE < 2 ); 4009 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4010 match(reg); 4011 format %{ "FPR1" %} 4012 interface(REG_INTER); 4013 %} 4014 4015 operand regDPR2(regDPR reg) %{ 4016 predicate( UseSSE < 2 ); 4017 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4018 match(reg); 4019 format %{ "FPR2" %} 4020 interface(REG_INTER); 4021 %} 4022 4023 operand regnotDPR1(regDPR reg) %{ 4024 predicate( UseSSE < 2 ); 4025 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4026 match(reg); 4027 format %{ %} 4028 interface(REG_INTER); 4029 %} 4030 4031 // Float register operands 4032 operand regFPR() %{ 4033 predicate( UseSSE < 2 ); 4034 constraint(ALLOC_IN_RC(fp_flt_reg)); 4035 match(RegF); 4036 match(regFPR1); 4037 format %{ %} 4038 interface(REG_INTER); 4039 %} 4040 4041 // Float register operands 4042 operand regFPR1(regFPR reg) %{ 4043 predicate( UseSSE < 2 ); 4044 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4045 match(reg); 4046 format %{ "FPR1" %} 4047 interface(REG_INTER); 4048 %} 4049 4050 // XMM Float register operands 4051 operand regF() %{ 4052 predicate( UseSSE>=1 ); 4053 constraint(ALLOC_IN_RC(float_reg_legacy)); 4054 match(RegF); 4055 format %{ %} 4056 interface(REG_INTER); 4057 %} 4058 4059 // XMM Double register operands 4060 operand regD() %{ 4061 predicate( UseSSE>=2 ); 4062 constraint(ALLOC_IN_RC(double_reg_legacy)); 4063 match(RegD); 4064 format %{ %} 4065 interface(REG_INTER); 4066 %} 4067 4068 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM) 4069 // runtime code generation via reg_class_dynamic. 4070 operand vecS() %{ 4071 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 4072 match(VecS); 4073 4074 format %{ %} 4075 interface(REG_INTER); 4076 %} 4077 4078 operand vecD() %{ 4079 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 4080 match(VecD); 4081 4082 format %{ %} 4083 interface(REG_INTER); 4084 %} 4085 4086 operand vecX() %{ 4087 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 4088 match(VecX); 4089 4090 format %{ %} 4091 interface(REG_INTER); 4092 %} 4093 4094 operand vecY() %{ 4095 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 4096 match(VecY); 4097 4098 format %{ %} 4099 interface(REG_INTER); 4100 %} 4101 4102 //----------Memory Operands---------------------------------------------------- 4103 // Direct Memory Operand 4104 operand direct(immP addr) %{ 4105 match(addr); 4106 4107 format %{ "[$addr]" %} 4108 interface(MEMORY_INTER) %{ 4109 base(0xFFFFFFFF); 4110 index(0x4); 4111 scale(0x0); 4112 disp($addr); 4113 %} 4114 %} 4115 4116 // Indirect Memory Operand 4117 operand indirect(eRegP reg) %{ 4118 constraint(ALLOC_IN_RC(int_reg)); 4119 match(reg); 4120 4121 format %{ "[$reg]" %} 4122 interface(MEMORY_INTER) %{ 4123 base($reg); 4124 index(0x4); 4125 scale(0x0); 4126 disp(0x0); 4127 %} 4128 %} 4129 4130 // Indirect Memory Plus Short Offset Operand 4131 operand indOffset8(eRegP reg, immI8 off) %{ 4132 match(AddP reg off); 4133 4134 format %{ "[$reg + $off]" %} 4135 interface(MEMORY_INTER) %{ 4136 base($reg); 4137 index(0x4); 4138 scale(0x0); 4139 disp($off); 4140 %} 4141 %} 4142 4143 // Indirect Memory Plus Long Offset Operand 4144 operand indOffset32(eRegP reg, immI off) %{ 4145 match(AddP reg off); 4146 4147 format %{ "[$reg + $off]" %} 4148 interface(MEMORY_INTER) %{ 4149 base($reg); 4150 index(0x4); 4151 scale(0x0); 4152 disp($off); 4153 %} 4154 %} 4155 4156 // Indirect Memory Plus Long Offset Operand 4157 operand indOffset32X(rRegI reg, immP off) %{ 4158 match(AddP off reg); 4159 4160 format %{ "[$reg + $off]" %} 4161 interface(MEMORY_INTER) %{ 4162 base($reg); 4163 index(0x4); 4164 scale(0x0); 4165 disp($off); 4166 %} 4167 %} 4168 4169 // Indirect Memory Plus Index Register Plus Offset Operand 4170 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4171 match(AddP (AddP reg ireg) off); 4172 4173 op_cost(10); 4174 format %{"[$reg + $off + $ireg]" %} 4175 interface(MEMORY_INTER) %{ 4176 base($reg); 4177 index($ireg); 4178 scale(0x0); 4179 disp($off); 4180 %} 4181 %} 4182 4183 // Indirect Memory Plus Index Register Plus Offset Operand 4184 operand indIndex(eRegP reg, rRegI ireg) %{ 4185 match(AddP reg ireg); 4186 4187 op_cost(10); 4188 format %{"[$reg + $ireg]" %} 4189 interface(MEMORY_INTER) %{ 4190 base($reg); 4191 index($ireg); 4192 scale(0x0); 4193 disp(0x0); 4194 %} 4195 %} 4196 4197 // // ------------------------------------------------------------------------- 4198 // // 486 architecture doesn't support "scale * index + offset" with out a base 4199 // // ------------------------------------------------------------------------- 4200 // // Scaled Memory Operands 4201 // // Indirect Memory Times Scale Plus Offset Operand 4202 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4203 // match(AddP off (LShiftI ireg scale)); 4204 // 4205 // op_cost(10); 4206 // format %{"[$off + $ireg << $scale]" %} 4207 // interface(MEMORY_INTER) %{ 4208 // base(0x4); 4209 // index($ireg); 4210 // scale($scale); 4211 // disp($off); 4212 // %} 4213 // %} 4214 4215 // Indirect Memory Times Scale Plus Index Register 4216 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4217 match(AddP reg (LShiftI ireg scale)); 4218 4219 op_cost(10); 4220 format %{"[$reg + $ireg << $scale]" %} 4221 interface(MEMORY_INTER) %{ 4222 base($reg); 4223 index($ireg); 4224 scale($scale); 4225 disp(0x0); 4226 %} 4227 %} 4228 4229 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4230 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4231 match(AddP (AddP reg (LShiftI ireg scale)) off); 4232 4233 op_cost(10); 4234 format %{"[$reg + $off + $ireg << $scale]" %} 4235 interface(MEMORY_INTER) %{ 4236 base($reg); 4237 index($ireg); 4238 scale($scale); 4239 disp($off); 4240 %} 4241 %} 4242 4243 //----------Load Long Memory Operands------------------------------------------ 4244 // The load-long idiom will use it's address expression again after loading 4245 // the first word of the long. If the load-long destination overlaps with 4246 // registers used in the addressing expression, the 2nd half will be loaded 4247 // from a clobbered address. Fix this by requiring that load-long use 4248 // address registers that do not overlap with the load-long target. 4249 4250 // load-long support 4251 operand load_long_RegP() %{ 4252 constraint(ALLOC_IN_RC(esi_reg)); 4253 match(RegP); 4254 match(eSIRegP); 4255 op_cost(100); 4256 format %{ %} 4257 interface(REG_INTER); 4258 %} 4259 4260 // Indirect Memory Operand Long 4261 operand load_long_indirect(load_long_RegP reg) %{ 4262 constraint(ALLOC_IN_RC(esi_reg)); 4263 match(reg); 4264 4265 format %{ "[$reg]" %} 4266 interface(MEMORY_INTER) %{ 4267 base($reg); 4268 index(0x4); 4269 scale(0x0); 4270 disp(0x0); 4271 %} 4272 %} 4273 4274 // Indirect Memory Plus Long Offset Operand 4275 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4276 match(AddP reg off); 4277 4278 format %{ "[$reg + $off]" %} 4279 interface(MEMORY_INTER) %{ 4280 base($reg); 4281 index(0x4); 4282 scale(0x0); 4283 disp($off); 4284 %} 4285 %} 4286 4287 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4288 4289 4290 //----------Special Memory Operands-------------------------------------------- 4291 // Stack Slot Operand - This operand is used for loading and storing temporary 4292 // values on the stack where a match requires a value to 4293 // flow through memory. 4294 operand stackSlotP(sRegP reg) %{ 4295 constraint(ALLOC_IN_RC(stack_slots)); 4296 // No match rule because this operand is only generated in matching 4297 format %{ "[$reg]" %} 4298 interface(MEMORY_INTER) %{ 4299 base(0x4); // ESP 4300 index(0x4); // No Index 4301 scale(0x0); // No Scale 4302 disp($reg); // Stack Offset 4303 %} 4304 %} 4305 4306 operand stackSlotI(sRegI reg) %{ 4307 constraint(ALLOC_IN_RC(stack_slots)); 4308 // No match rule because this operand is only generated in matching 4309 format %{ "[$reg]" %} 4310 interface(MEMORY_INTER) %{ 4311 base(0x4); // ESP 4312 index(0x4); // No Index 4313 scale(0x0); // No Scale 4314 disp($reg); // Stack Offset 4315 %} 4316 %} 4317 4318 operand stackSlotF(sRegF reg) %{ 4319 constraint(ALLOC_IN_RC(stack_slots)); 4320 // No match rule because this operand is only generated in matching 4321 format %{ "[$reg]" %} 4322 interface(MEMORY_INTER) %{ 4323 base(0x4); // ESP 4324 index(0x4); // No Index 4325 scale(0x0); // No Scale 4326 disp($reg); // Stack Offset 4327 %} 4328 %} 4329 4330 operand stackSlotD(sRegD reg) %{ 4331 constraint(ALLOC_IN_RC(stack_slots)); 4332 // No match rule because this operand is only generated in matching 4333 format %{ "[$reg]" %} 4334 interface(MEMORY_INTER) %{ 4335 base(0x4); // ESP 4336 index(0x4); // No Index 4337 scale(0x0); // No Scale 4338 disp($reg); // Stack Offset 4339 %} 4340 %} 4341 4342 operand stackSlotL(sRegL reg) %{ 4343 constraint(ALLOC_IN_RC(stack_slots)); 4344 // No match rule because this operand is only generated in matching 4345 format %{ "[$reg]" %} 4346 interface(MEMORY_INTER) %{ 4347 base(0x4); // ESP 4348 index(0x4); // No Index 4349 scale(0x0); // No Scale 4350 disp($reg); // Stack Offset 4351 %} 4352 %} 4353 4354 //----------Memory Operands - Win95 Implicit Null Variants---------------- 4355 // Indirect Memory Operand 4356 operand indirect_win95_safe(eRegP_no_EBP reg) 4357 %{ 4358 constraint(ALLOC_IN_RC(int_reg)); 4359 match(reg); 4360 4361 op_cost(100); 4362 format %{ "[$reg]" %} 4363 interface(MEMORY_INTER) %{ 4364 base($reg); 4365 index(0x4); 4366 scale(0x0); 4367 disp(0x0); 4368 %} 4369 %} 4370 4371 // Indirect Memory Plus Short Offset Operand 4372 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 4373 %{ 4374 match(AddP reg off); 4375 4376 op_cost(100); 4377 format %{ "[$reg + $off]" %} 4378 interface(MEMORY_INTER) %{ 4379 base($reg); 4380 index(0x4); 4381 scale(0x0); 4382 disp($off); 4383 %} 4384 %} 4385 4386 // Indirect Memory Plus Long Offset Operand 4387 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 4388 %{ 4389 match(AddP reg off); 4390 4391 op_cost(100); 4392 format %{ "[$reg + $off]" %} 4393 interface(MEMORY_INTER) %{ 4394 base($reg); 4395 index(0x4); 4396 scale(0x0); 4397 disp($off); 4398 %} 4399 %} 4400 4401 // Indirect Memory Plus Index Register Plus Offset Operand 4402 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) 4403 %{ 4404 match(AddP (AddP reg ireg) off); 4405 4406 op_cost(100); 4407 format %{"[$reg + $off + $ireg]" %} 4408 interface(MEMORY_INTER) %{ 4409 base($reg); 4410 index($ireg); 4411 scale(0x0); 4412 disp($off); 4413 %} 4414 %} 4415 4416 // Indirect Memory Times Scale Plus Index Register 4417 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) 4418 %{ 4419 match(AddP reg (LShiftI ireg scale)); 4420 4421 op_cost(100); 4422 format %{"[$reg + $ireg << $scale]" %} 4423 interface(MEMORY_INTER) %{ 4424 base($reg); 4425 index($ireg); 4426 scale($scale); 4427 disp(0x0); 4428 %} 4429 %} 4430 4431 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4432 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) 4433 %{ 4434 match(AddP (AddP reg (LShiftI ireg scale)) off); 4435 4436 op_cost(100); 4437 format %{"[$reg + $off + $ireg << $scale]" %} 4438 interface(MEMORY_INTER) %{ 4439 base($reg); 4440 index($ireg); 4441 scale($scale); 4442 disp($off); 4443 %} 4444 %} 4445 4446 //----------Conditional Branch Operands---------------------------------------- 4447 // Comparison Op - This is the operation of the comparison, and is limited to 4448 // the following set of codes: 4449 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4450 // 4451 // Other attributes of the comparison, such as unsignedness, are specified 4452 // by the comparison instruction that sets a condition code flags register. 4453 // That result is represented by a flags operand whose subtype is appropriate 4454 // to the unsignedness (etc.) of the comparison. 4455 // 4456 // Later, the instruction which matches both the Comparison Op (a Bool) and 4457 // the flags (produced by the Cmp) specifies the coding of the comparison op 4458 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4459 4460 // Comparision Code 4461 operand cmpOp() %{ 4462 match(Bool); 4463 4464 format %{ "" %} 4465 interface(COND_INTER) %{ 4466 equal(0x4, "e"); 4467 not_equal(0x5, "ne"); 4468 less(0xC, "l"); 4469 greater_equal(0xD, "ge"); 4470 less_equal(0xE, "le"); 4471 greater(0xF, "g"); 4472 overflow(0x0, "o"); 4473 no_overflow(0x1, "no"); 4474 %} 4475 %} 4476 4477 // Comparison Code, unsigned compare. Used by FP also, with 4478 // C2 (unordered) turned into GT or LT already. The other bits 4479 // C0 and C3 are turned into Carry & Zero flags. 4480 operand cmpOpU() %{ 4481 match(Bool); 4482 4483 format %{ "" %} 4484 interface(COND_INTER) %{ 4485 equal(0x4, "e"); 4486 not_equal(0x5, "ne"); 4487 less(0x2, "b"); 4488 greater_equal(0x3, "nb"); 4489 less_equal(0x6, "be"); 4490 greater(0x7, "nbe"); 4491 overflow(0x0, "o"); 4492 no_overflow(0x1, "no"); 4493 %} 4494 %} 4495 4496 // Floating comparisons that don't require any fixup for the unordered case 4497 operand cmpOpUCF() %{ 4498 match(Bool); 4499 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4500 n->as_Bool()->_test._test == BoolTest::ge || 4501 n->as_Bool()->_test._test == BoolTest::le || 4502 n->as_Bool()->_test._test == BoolTest::gt); 4503 format %{ "" %} 4504 interface(COND_INTER) %{ 4505 equal(0x4, "e"); 4506 not_equal(0x5, "ne"); 4507 less(0x2, "b"); 4508 greater_equal(0x3, "nb"); 4509 less_equal(0x6, "be"); 4510 greater(0x7, "nbe"); 4511 overflow(0x0, "o"); 4512 no_overflow(0x1, "no"); 4513 %} 4514 %} 4515 4516 4517 // Floating comparisons that can be fixed up with extra conditional jumps 4518 operand cmpOpUCF2() %{ 4519 match(Bool); 4520 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4521 n->as_Bool()->_test._test == BoolTest::eq); 4522 format %{ "" %} 4523 interface(COND_INTER) %{ 4524 equal(0x4, "e"); 4525 not_equal(0x5, "ne"); 4526 less(0x2, "b"); 4527 greater_equal(0x3, "nb"); 4528 less_equal(0x6, "be"); 4529 greater(0x7, "nbe"); 4530 overflow(0x0, "o"); 4531 no_overflow(0x1, "no"); 4532 %} 4533 %} 4534 4535 // Comparison Code for FP conditional move 4536 operand cmpOp_fcmov() %{ 4537 match(Bool); 4538 4539 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4540 n->as_Bool()->_test._test != BoolTest::no_overflow); 4541 format %{ "" %} 4542 interface(COND_INTER) %{ 4543 equal (0x0C8); 4544 not_equal (0x1C8); 4545 less (0x0C0); 4546 greater_equal(0x1C0); 4547 less_equal (0x0D0); 4548 greater (0x1D0); 4549 overflow(0x0, "o"); // not really supported by the instruction 4550 no_overflow(0x1, "no"); // not really supported by the instruction 4551 %} 4552 %} 4553 4554 // Comparision Code used in long compares 4555 operand cmpOp_commute() %{ 4556 match(Bool); 4557 4558 format %{ "" %} 4559 interface(COND_INTER) %{ 4560 equal(0x4, "e"); 4561 not_equal(0x5, "ne"); 4562 less(0xF, "g"); 4563 greater_equal(0xE, "le"); 4564 less_equal(0xD, "ge"); 4565 greater(0xC, "l"); 4566 overflow(0x0, "o"); 4567 no_overflow(0x1, "no"); 4568 %} 4569 %} 4570 4571 //----------OPERAND CLASSES---------------------------------------------------- 4572 // Operand Classes are groups of operands that are used as to simplify 4573 // instruction definitions by not requiring the AD writer to specify separate 4574 // instructions for every form of operand when the instruction accepts 4575 // multiple operand types with the same basic encoding and format. The classic 4576 // case of this is memory operands. 4577 4578 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4579 indIndex, indIndexScale, indIndexScaleOffset); 4580 4581 // Long memory operations are encoded in 2 instructions and a +4 offset. 4582 // This means some kind of offset is always required and you cannot use 4583 // an oop as the offset (done when working on static globals). 4584 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4585 indIndex, indIndexScale, indIndexScaleOffset); 4586 4587 4588 //----------PIPELINE----------------------------------------------------------- 4589 // Rules which define the behavior of the target architectures pipeline. 4590 pipeline %{ 4591 4592 //----------ATTRIBUTES--------------------------------------------------------- 4593 attributes %{ 4594 variable_size_instructions; // Fixed size instructions 4595 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4596 instruction_unit_size = 1; // An instruction is 1 bytes long 4597 instruction_fetch_unit_size = 16; // The processor fetches one line 4598 instruction_fetch_units = 1; // of 16 bytes 4599 4600 // List of nop instructions 4601 nops( MachNop ); 4602 %} 4603 4604 //----------RESOURCES---------------------------------------------------------- 4605 // Resources are the functional units available to the machine 4606 4607 // Generic P2/P3 pipeline 4608 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4609 // 3 instructions decoded per cycle. 4610 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4611 // 2 ALU op, only ALU0 handles mul/div instructions. 4612 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4613 MS0, MS1, MEM = MS0 | MS1, 4614 BR, FPU, 4615 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4616 4617 //----------PIPELINE DESCRIPTION----------------------------------------------- 4618 // Pipeline Description specifies the stages in the machine's pipeline 4619 4620 // Generic P2/P3 pipeline 4621 pipe_desc(S0, S1, S2, S3, S4, S5); 4622 4623 //----------PIPELINE CLASSES--------------------------------------------------- 4624 // Pipeline Classes describe the stages in which input and output are 4625 // referenced by the hardware pipeline. 4626 4627 // Naming convention: ialu or fpu 4628 // Then: _reg 4629 // Then: _reg if there is a 2nd register 4630 // Then: _long if it's a pair of instructions implementing a long 4631 // Then: _fat if it requires the big decoder 4632 // Or: _mem if it requires the big decoder and a memory unit. 4633 4634 // Integer ALU reg operation 4635 pipe_class ialu_reg(rRegI dst) %{ 4636 single_instruction; 4637 dst : S4(write); 4638 dst : S3(read); 4639 DECODE : S0; // any decoder 4640 ALU : S3; // any alu 4641 %} 4642 4643 // Long ALU reg operation 4644 pipe_class ialu_reg_long(eRegL dst) %{ 4645 instruction_count(2); 4646 dst : S4(write); 4647 dst : S3(read); 4648 DECODE : S0(2); // any 2 decoders 4649 ALU : S3(2); // both alus 4650 %} 4651 4652 // Integer ALU reg operation using big decoder 4653 pipe_class ialu_reg_fat(rRegI dst) %{ 4654 single_instruction; 4655 dst : S4(write); 4656 dst : S3(read); 4657 D0 : S0; // big decoder only 4658 ALU : S3; // any alu 4659 %} 4660 4661 // Long ALU reg operation using big decoder 4662 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4663 instruction_count(2); 4664 dst : S4(write); 4665 dst : S3(read); 4666 D0 : S0(2); // big decoder only; twice 4667 ALU : S3(2); // any 2 alus 4668 %} 4669 4670 // Integer ALU reg-reg operation 4671 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4672 single_instruction; 4673 dst : S4(write); 4674 src : S3(read); 4675 DECODE : S0; // any decoder 4676 ALU : S3; // any alu 4677 %} 4678 4679 // Long ALU reg-reg operation 4680 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4681 instruction_count(2); 4682 dst : S4(write); 4683 src : S3(read); 4684 DECODE : S0(2); // any 2 decoders 4685 ALU : S3(2); // both alus 4686 %} 4687 4688 // Integer ALU reg-reg operation 4689 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4690 single_instruction; 4691 dst : S4(write); 4692 src : S3(read); 4693 D0 : S0; // big decoder only 4694 ALU : S3; // any alu 4695 %} 4696 4697 // Long ALU reg-reg operation 4698 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4699 instruction_count(2); 4700 dst : S4(write); 4701 src : S3(read); 4702 D0 : S0(2); // big decoder only; twice 4703 ALU : S3(2); // both alus 4704 %} 4705 4706 // Integer ALU reg-mem operation 4707 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4708 single_instruction; 4709 dst : S5(write); 4710 mem : S3(read); 4711 D0 : S0; // big decoder only 4712 ALU : S4; // any alu 4713 MEM : S3; // any mem 4714 %} 4715 4716 // Long ALU reg-mem operation 4717 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4718 instruction_count(2); 4719 dst : S5(write); 4720 mem : S3(read); 4721 D0 : S0(2); // big decoder only; twice 4722 ALU : S4(2); // any 2 alus 4723 MEM : S3(2); // both mems 4724 %} 4725 4726 // Integer mem operation (prefetch) 4727 pipe_class ialu_mem(memory mem) 4728 %{ 4729 single_instruction; 4730 mem : S3(read); 4731 D0 : S0; // big decoder only 4732 MEM : S3; // any mem 4733 %} 4734 4735 // Integer Store to Memory 4736 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4737 single_instruction; 4738 mem : S3(read); 4739 src : S5(read); 4740 D0 : S0; // big decoder only 4741 ALU : S4; // any alu 4742 MEM : S3; 4743 %} 4744 4745 // Long Store to Memory 4746 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4747 instruction_count(2); 4748 mem : S3(read); 4749 src : S5(read); 4750 D0 : S0(2); // big decoder only; twice 4751 ALU : S4(2); // any 2 alus 4752 MEM : S3(2); // Both mems 4753 %} 4754 4755 // Integer Store to Memory 4756 pipe_class ialu_mem_imm(memory mem) %{ 4757 single_instruction; 4758 mem : S3(read); 4759 D0 : S0; // big decoder only 4760 ALU : S4; // any alu 4761 MEM : S3; 4762 %} 4763 4764 // Integer ALU0 reg-reg operation 4765 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4766 single_instruction; 4767 dst : S4(write); 4768 src : S3(read); 4769 D0 : S0; // Big decoder only 4770 ALU0 : S3; // only alu0 4771 %} 4772 4773 // Integer ALU0 reg-mem operation 4774 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4775 single_instruction; 4776 dst : S5(write); 4777 mem : S3(read); 4778 D0 : S0; // big decoder only 4779 ALU0 : S4; // ALU0 only 4780 MEM : S3; // any mem 4781 %} 4782 4783 // Integer ALU reg-reg operation 4784 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4785 single_instruction; 4786 cr : S4(write); 4787 src1 : S3(read); 4788 src2 : S3(read); 4789 DECODE : S0; // any decoder 4790 ALU : S3; // any alu 4791 %} 4792 4793 // Integer ALU reg-imm operation 4794 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4795 single_instruction; 4796 cr : S4(write); 4797 src1 : S3(read); 4798 DECODE : S0; // any decoder 4799 ALU : S3; // any alu 4800 %} 4801 4802 // Integer ALU reg-mem operation 4803 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4804 single_instruction; 4805 cr : S4(write); 4806 src1 : S3(read); 4807 src2 : S3(read); 4808 D0 : S0; // big decoder only 4809 ALU : S4; // any alu 4810 MEM : S3; 4811 %} 4812 4813 // Conditional move reg-reg 4814 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4815 instruction_count(4); 4816 y : S4(read); 4817 q : S3(read); 4818 p : S3(read); 4819 DECODE : S0(4); // any decoder 4820 %} 4821 4822 // Conditional move reg-reg 4823 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4824 single_instruction; 4825 dst : S4(write); 4826 src : S3(read); 4827 cr : S3(read); 4828 DECODE : S0; // any decoder 4829 %} 4830 4831 // Conditional move reg-mem 4832 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4833 single_instruction; 4834 dst : S4(write); 4835 src : S3(read); 4836 cr : S3(read); 4837 DECODE : S0; // any decoder 4838 MEM : S3; 4839 %} 4840 4841 // Conditional move reg-reg long 4842 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4843 single_instruction; 4844 dst : S4(write); 4845 src : S3(read); 4846 cr : S3(read); 4847 DECODE : S0(2); // any 2 decoders 4848 %} 4849 4850 // Conditional move double reg-reg 4851 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4852 single_instruction; 4853 dst : S4(write); 4854 src : S3(read); 4855 cr : S3(read); 4856 DECODE : S0; // any decoder 4857 %} 4858 4859 // Float reg-reg operation 4860 pipe_class fpu_reg(regDPR dst) %{ 4861 instruction_count(2); 4862 dst : S3(read); 4863 DECODE : S0(2); // any 2 decoders 4864 FPU : S3; 4865 %} 4866 4867 // Float reg-reg operation 4868 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4869 instruction_count(2); 4870 dst : S4(write); 4871 src : S3(read); 4872 DECODE : S0(2); // any 2 decoders 4873 FPU : S3; 4874 %} 4875 4876 // Float reg-reg operation 4877 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4878 instruction_count(3); 4879 dst : S4(write); 4880 src1 : S3(read); 4881 src2 : S3(read); 4882 DECODE : S0(3); // any 3 decoders 4883 FPU : S3(2); 4884 %} 4885 4886 // Float reg-reg operation 4887 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4888 instruction_count(4); 4889 dst : S4(write); 4890 src1 : S3(read); 4891 src2 : S3(read); 4892 src3 : S3(read); 4893 DECODE : S0(4); // any 3 decoders 4894 FPU : S3(2); 4895 %} 4896 4897 // Float reg-reg operation 4898 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4899 instruction_count(4); 4900 dst : S4(write); 4901 src1 : S3(read); 4902 src2 : S3(read); 4903 src3 : S3(read); 4904 DECODE : S1(3); // any 3 decoders 4905 D0 : S0; // Big decoder only 4906 FPU : S3(2); 4907 MEM : S3; 4908 %} 4909 4910 // Float reg-mem operation 4911 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4912 instruction_count(2); 4913 dst : S5(write); 4914 mem : S3(read); 4915 D0 : S0; // big decoder only 4916 DECODE : S1; // any decoder for FPU POP 4917 FPU : S4; 4918 MEM : S3; // any mem 4919 %} 4920 4921 // Float reg-mem operation 4922 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4923 instruction_count(3); 4924 dst : S5(write); 4925 src1 : S3(read); 4926 mem : S3(read); 4927 D0 : S0; // big decoder only 4928 DECODE : S1(2); // any decoder for FPU POP 4929 FPU : S4; 4930 MEM : S3; // any mem 4931 %} 4932 4933 // Float mem-reg operation 4934 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4935 instruction_count(2); 4936 src : S5(read); 4937 mem : S3(read); 4938 DECODE : S0; // any decoder for FPU PUSH 4939 D0 : S1; // big decoder only 4940 FPU : S4; 4941 MEM : S3; // any mem 4942 %} 4943 4944 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4945 instruction_count(3); 4946 src1 : S3(read); 4947 src2 : S3(read); 4948 mem : S3(read); 4949 DECODE : S0(2); // any decoder for FPU PUSH 4950 D0 : S1; // big decoder only 4951 FPU : S4; 4952 MEM : S3; // any mem 4953 %} 4954 4955 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4956 instruction_count(3); 4957 src1 : S3(read); 4958 src2 : S3(read); 4959 mem : S4(read); 4960 DECODE : S0; // any decoder for FPU PUSH 4961 D0 : S0(2); // big decoder only 4962 FPU : S4; 4963 MEM : S3(2); // any mem 4964 %} 4965 4966 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4967 instruction_count(2); 4968 src1 : S3(read); 4969 dst : S4(read); 4970 D0 : S0(2); // big decoder only 4971 MEM : S3(2); // any mem 4972 %} 4973 4974 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4975 instruction_count(3); 4976 src1 : S3(read); 4977 src2 : S3(read); 4978 dst : S4(read); 4979 D0 : S0(3); // big decoder only 4980 FPU : S4; 4981 MEM : S3(3); // any mem 4982 %} 4983 4984 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4985 instruction_count(3); 4986 src1 : S4(read); 4987 mem : S4(read); 4988 DECODE : S0; // any decoder for FPU PUSH 4989 D0 : S0(2); // big decoder only 4990 FPU : S4; 4991 MEM : S3(2); // any mem 4992 %} 4993 4994 // Float load constant 4995 pipe_class fpu_reg_con(regDPR dst) %{ 4996 instruction_count(2); 4997 dst : S5(write); 4998 D0 : S0; // big decoder only for the load 4999 DECODE : S1; // any decoder for FPU POP 5000 FPU : S4; 5001 MEM : S3; // any mem 5002 %} 5003 5004 // Float load constant 5005 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 5006 instruction_count(3); 5007 dst : S5(write); 5008 src : S3(read); 5009 D0 : S0; // big decoder only for the load 5010 DECODE : S1(2); // any decoder for FPU POP 5011 FPU : S4; 5012 MEM : S3; // any mem 5013 %} 5014 5015 // UnConditional branch 5016 pipe_class pipe_jmp( label labl ) %{ 5017 single_instruction; 5018 BR : S3; 5019 %} 5020 5021 // Conditional branch 5022 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5023 single_instruction; 5024 cr : S1(read); 5025 BR : S3; 5026 %} 5027 5028 // Allocation idiom 5029 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5030 instruction_count(1); force_serialization; 5031 fixed_latency(6); 5032 heap_ptr : S3(read); 5033 DECODE : S0(3); 5034 D0 : S2; 5035 MEM : S3; 5036 ALU : S3(2); 5037 dst : S5(write); 5038 BR : S5; 5039 %} 5040 5041 // Generic big/slow expanded idiom 5042 pipe_class pipe_slow( ) %{ 5043 instruction_count(10); multiple_bundles; force_serialization; 5044 fixed_latency(100); 5045 D0 : S0(2); 5046 MEM : S3(2); 5047 %} 5048 5049 // The real do-nothing guy 5050 pipe_class empty( ) %{ 5051 instruction_count(0); 5052 %} 5053 5054 // Define the class for the Nop node 5055 define %{ 5056 MachNop = empty; 5057 %} 5058 5059 %} 5060 5061 //----------INSTRUCTIONS------------------------------------------------------- 5062 // 5063 // match -- States which machine-independent subtree may be replaced 5064 // by this instruction. 5065 // ins_cost -- The estimated cost of this instruction is used by instruction 5066 // selection to identify a minimum cost tree of machine 5067 // instructions that matches a tree of machine-independent 5068 // instructions. 5069 // format -- A string providing the disassembly for this instruction. 5070 // The value of an instruction's operand may be inserted 5071 // by referring to it with a '$' prefix. 5072 // opcode -- Three instruction opcodes may be provided. These are referred 5073 // to within an encode class as $primary, $secondary, and $tertiary 5074 // respectively. The primary opcode is commonly used to 5075 // indicate the type of machine instruction, while secondary 5076 // and tertiary are often used for prefix options or addressing 5077 // modes. 5078 // ins_encode -- A list of encode classes with parameters. The encode class 5079 // name must have been defined in an 'enc_class' specification 5080 // in the encode section of the architecture description. 5081 5082 //----------BSWAP-Instruction-------------------------------------------------- 5083 instruct bytes_reverse_int(rRegI dst) %{ 5084 match(Set dst (ReverseBytesI dst)); 5085 5086 format %{ "BSWAP $dst" %} 5087 opcode(0x0F, 0xC8); 5088 ins_encode( OpcP, OpcSReg(dst) ); 5089 ins_pipe( ialu_reg ); 5090 %} 5091 5092 instruct bytes_reverse_long(eRegL dst) %{ 5093 match(Set dst (ReverseBytesL dst)); 5094 5095 format %{ "BSWAP $dst.lo\n\t" 5096 "BSWAP $dst.hi\n\t" 5097 "XCHG $dst.lo $dst.hi" %} 5098 5099 ins_cost(125); 5100 ins_encode( bswap_long_bytes(dst) ); 5101 ins_pipe( ialu_reg_reg); 5102 %} 5103 5104 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5105 match(Set dst (ReverseBytesUS dst)); 5106 effect(KILL cr); 5107 5108 format %{ "BSWAP $dst\n\t" 5109 "SHR $dst,16\n\t" %} 5110 ins_encode %{ 5111 __ bswapl($dst$$Register); 5112 __ shrl($dst$$Register, 16); 5113 %} 5114 ins_pipe( ialu_reg ); 5115 %} 5116 5117 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5118 match(Set dst (ReverseBytesS dst)); 5119 effect(KILL cr); 5120 5121 format %{ "BSWAP $dst\n\t" 5122 "SAR $dst,16\n\t" %} 5123 ins_encode %{ 5124 __ bswapl($dst$$Register); 5125 __ sarl($dst$$Register, 16); 5126 %} 5127 ins_pipe( ialu_reg ); 5128 %} 5129 5130 5131 //---------- Zeros Count Instructions ------------------------------------------ 5132 5133 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5134 predicate(UseCountLeadingZerosInstruction); 5135 match(Set dst (CountLeadingZerosI src)); 5136 effect(KILL cr); 5137 5138 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5139 ins_encode %{ 5140 __ lzcntl($dst$$Register, $src$$Register); 5141 %} 5142 ins_pipe(ialu_reg); 5143 %} 5144 5145 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5146 predicate(!UseCountLeadingZerosInstruction); 5147 match(Set dst (CountLeadingZerosI src)); 5148 effect(KILL cr); 5149 5150 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5151 "JNZ skip\n\t" 5152 "MOV $dst, -1\n" 5153 "skip:\n\t" 5154 "NEG $dst\n\t" 5155 "ADD $dst, 31" %} 5156 ins_encode %{ 5157 Register Rdst = $dst$$Register; 5158 Register Rsrc = $src$$Register; 5159 Label skip; 5160 __ bsrl(Rdst, Rsrc); 5161 __ jccb(Assembler::notZero, skip); 5162 __ movl(Rdst, -1); 5163 __ bind(skip); 5164 __ negl(Rdst); 5165 __ addl(Rdst, BitsPerInt - 1); 5166 %} 5167 ins_pipe(ialu_reg); 5168 %} 5169 5170 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5171 predicate(UseCountLeadingZerosInstruction); 5172 match(Set dst (CountLeadingZerosL src)); 5173 effect(TEMP dst, KILL cr); 5174 5175 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5176 "JNC done\n\t" 5177 "LZCNT $dst, $src.lo\n\t" 5178 "ADD $dst, 32\n" 5179 "done:" %} 5180 ins_encode %{ 5181 Register Rdst = $dst$$Register; 5182 Register Rsrc = $src$$Register; 5183 Label done; 5184 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5185 __ jccb(Assembler::carryClear, done); 5186 __ lzcntl(Rdst, Rsrc); 5187 __ addl(Rdst, BitsPerInt); 5188 __ bind(done); 5189 %} 5190 ins_pipe(ialu_reg); 5191 %} 5192 5193 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5194 predicate(!UseCountLeadingZerosInstruction); 5195 match(Set dst (CountLeadingZerosL src)); 5196 effect(TEMP dst, KILL cr); 5197 5198 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5199 "JZ msw_is_zero\n\t" 5200 "ADD $dst, 32\n\t" 5201 "JMP not_zero\n" 5202 "msw_is_zero:\n\t" 5203 "BSR $dst, $src.lo\n\t" 5204 "JNZ not_zero\n\t" 5205 "MOV $dst, -1\n" 5206 "not_zero:\n\t" 5207 "NEG $dst\n\t" 5208 "ADD $dst, 63\n" %} 5209 ins_encode %{ 5210 Register Rdst = $dst$$Register; 5211 Register Rsrc = $src$$Register; 5212 Label msw_is_zero; 5213 Label not_zero; 5214 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5215 __ jccb(Assembler::zero, msw_is_zero); 5216 __ addl(Rdst, BitsPerInt); 5217 __ jmpb(not_zero); 5218 __ bind(msw_is_zero); 5219 __ bsrl(Rdst, Rsrc); 5220 __ jccb(Assembler::notZero, not_zero); 5221 __ movl(Rdst, -1); 5222 __ bind(not_zero); 5223 __ negl(Rdst); 5224 __ addl(Rdst, BitsPerLong - 1); 5225 %} 5226 ins_pipe(ialu_reg); 5227 %} 5228 5229 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5230 predicate(UseCountTrailingZerosInstruction); 5231 match(Set dst (CountTrailingZerosI src)); 5232 effect(KILL cr); 5233 5234 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5235 ins_encode %{ 5236 __ tzcntl($dst$$Register, $src$$Register); 5237 %} 5238 ins_pipe(ialu_reg); 5239 %} 5240 5241 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5242 predicate(!UseCountTrailingZerosInstruction); 5243 match(Set dst (CountTrailingZerosI src)); 5244 effect(KILL cr); 5245 5246 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5247 "JNZ done\n\t" 5248 "MOV $dst, 32\n" 5249 "done:" %} 5250 ins_encode %{ 5251 Register Rdst = $dst$$Register; 5252 Label done; 5253 __ bsfl(Rdst, $src$$Register); 5254 __ jccb(Assembler::notZero, done); 5255 __ movl(Rdst, BitsPerInt); 5256 __ bind(done); 5257 %} 5258 ins_pipe(ialu_reg); 5259 %} 5260 5261 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5262 predicate(UseCountTrailingZerosInstruction); 5263 match(Set dst (CountTrailingZerosL src)); 5264 effect(TEMP dst, KILL cr); 5265 5266 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5267 "JNC done\n\t" 5268 "TZCNT $dst, $src.hi\n\t" 5269 "ADD $dst, 32\n" 5270 "done:" %} 5271 ins_encode %{ 5272 Register Rdst = $dst$$Register; 5273 Register Rsrc = $src$$Register; 5274 Label done; 5275 __ tzcntl(Rdst, Rsrc); 5276 __ jccb(Assembler::carryClear, done); 5277 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5278 __ addl(Rdst, BitsPerInt); 5279 __ bind(done); 5280 %} 5281 ins_pipe(ialu_reg); 5282 %} 5283 5284 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5285 predicate(!UseCountTrailingZerosInstruction); 5286 match(Set dst (CountTrailingZerosL src)); 5287 effect(TEMP dst, KILL cr); 5288 5289 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5290 "JNZ done\n\t" 5291 "BSF $dst, $src.hi\n\t" 5292 "JNZ msw_not_zero\n\t" 5293 "MOV $dst, 32\n" 5294 "msw_not_zero:\n\t" 5295 "ADD $dst, 32\n" 5296 "done:" %} 5297 ins_encode %{ 5298 Register Rdst = $dst$$Register; 5299 Register Rsrc = $src$$Register; 5300 Label msw_not_zero; 5301 Label done; 5302 __ bsfl(Rdst, Rsrc); 5303 __ jccb(Assembler::notZero, done); 5304 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5305 __ jccb(Assembler::notZero, msw_not_zero); 5306 __ movl(Rdst, BitsPerInt); 5307 __ bind(msw_not_zero); 5308 __ addl(Rdst, BitsPerInt); 5309 __ bind(done); 5310 %} 5311 ins_pipe(ialu_reg); 5312 %} 5313 5314 5315 //---------- Population Count Instructions ------------------------------------- 5316 5317 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5318 predicate(UsePopCountInstruction); 5319 match(Set dst (PopCountI src)); 5320 effect(KILL cr); 5321 5322 format %{ "POPCNT $dst, $src" %} 5323 ins_encode %{ 5324 __ popcntl($dst$$Register, $src$$Register); 5325 %} 5326 ins_pipe(ialu_reg); 5327 %} 5328 5329 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5330 predicate(UsePopCountInstruction); 5331 match(Set dst (PopCountI (LoadI mem))); 5332 effect(KILL cr); 5333 5334 format %{ "POPCNT $dst, $mem" %} 5335 ins_encode %{ 5336 __ popcntl($dst$$Register, $mem$$Address); 5337 %} 5338 ins_pipe(ialu_reg); 5339 %} 5340 5341 // Note: Long.bitCount(long) returns an int. 5342 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5343 predicate(UsePopCountInstruction); 5344 match(Set dst (PopCountL src)); 5345 effect(KILL cr, TEMP tmp, TEMP dst); 5346 5347 format %{ "POPCNT $dst, $src.lo\n\t" 5348 "POPCNT $tmp, $src.hi\n\t" 5349 "ADD $dst, $tmp" %} 5350 ins_encode %{ 5351 __ popcntl($dst$$Register, $src$$Register); 5352 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5353 __ addl($dst$$Register, $tmp$$Register); 5354 %} 5355 ins_pipe(ialu_reg); 5356 %} 5357 5358 // Note: Long.bitCount(long) returns an int. 5359 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5360 predicate(UsePopCountInstruction); 5361 match(Set dst (PopCountL (LoadL mem))); 5362 effect(KILL cr, TEMP tmp, TEMP dst); 5363 5364 format %{ "POPCNT $dst, $mem\n\t" 5365 "POPCNT $tmp, $mem+4\n\t" 5366 "ADD $dst, $tmp" %} 5367 ins_encode %{ 5368 //__ popcntl($dst$$Register, $mem$$Address$$first); 5369 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5370 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5371 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5372 __ addl($dst$$Register, $tmp$$Register); 5373 %} 5374 ins_pipe(ialu_reg); 5375 %} 5376 5377 5378 //----------Load/Store/Move Instructions--------------------------------------- 5379 //----------Load Instructions-------------------------------------------------- 5380 // Load Byte (8bit signed) 5381 instruct loadB(xRegI dst, memory mem) %{ 5382 match(Set dst (LoadB mem)); 5383 5384 ins_cost(125); 5385 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5386 5387 ins_encode %{ 5388 __ movsbl($dst$$Register, $mem$$Address); 5389 %} 5390 5391 ins_pipe(ialu_reg_mem); 5392 %} 5393 5394 // Load Byte (8bit signed) into Long Register 5395 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5396 match(Set dst (ConvI2L (LoadB mem))); 5397 effect(KILL cr); 5398 5399 ins_cost(375); 5400 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5401 "MOV $dst.hi,$dst.lo\n\t" 5402 "SAR $dst.hi,7" %} 5403 5404 ins_encode %{ 5405 __ movsbl($dst$$Register, $mem$$Address); 5406 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5407 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5408 %} 5409 5410 ins_pipe(ialu_reg_mem); 5411 %} 5412 5413 // Load Unsigned Byte (8bit UNsigned) 5414 instruct loadUB(xRegI dst, memory mem) %{ 5415 match(Set dst (LoadUB mem)); 5416 5417 ins_cost(125); 5418 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5419 5420 ins_encode %{ 5421 __ movzbl($dst$$Register, $mem$$Address); 5422 %} 5423 5424 ins_pipe(ialu_reg_mem); 5425 %} 5426 5427 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5428 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5429 match(Set dst (ConvI2L (LoadUB mem))); 5430 effect(KILL cr); 5431 5432 ins_cost(250); 5433 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5434 "XOR $dst.hi,$dst.hi" %} 5435 5436 ins_encode %{ 5437 Register Rdst = $dst$$Register; 5438 __ movzbl(Rdst, $mem$$Address); 5439 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5440 %} 5441 5442 ins_pipe(ialu_reg_mem); 5443 %} 5444 5445 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5446 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5447 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5448 effect(KILL cr); 5449 5450 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5451 "XOR $dst.hi,$dst.hi\n\t" 5452 "AND $dst.lo,right_n_bits($mask, 8)" %} 5453 ins_encode %{ 5454 Register Rdst = $dst$$Register; 5455 __ movzbl(Rdst, $mem$$Address); 5456 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5457 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5458 %} 5459 ins_pipe(ialu_reg_mem); 5460 %} 5461 5462 // Load Short (16bit signed) 5463 instruct loadS(rRegI dst, memory mem) %{ 5464 match(Set dst (LoadS mem)); 5465 5466 ins_cost(125); 5467 format %{ "MOVSX $dst,$mem\t# short" %} 5468 5469 ins_encode %{ 5470 __ movswl($dst$$Register, $mem$$Address); 5471 %} 5472 5473 ins_pipe(ialu_reg_mem); 5474 %} 5475 5476 // Load Short (16 bit signed) to Byte (8 bit signed) 5477 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5478 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5479 5480 ins_cost(125); 5481 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5482 ins_encode %{ 5483 __ movsbl($dst$$Register, $mem$$Address); 5484 %} 5485 ins_pipe(ialu_reg_mem); 5486 %} 5487 5488 // Load Short (16bit signed) into Long Register 5489 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5490 match(Set dst (ConvI2L (LoadS mem))); 5491 effect(KILL cr); 5492 5493 ins_cost(375); 5494 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5495 "MOV $dst.hi,$dst.lo\n\t" 5496 "SAR $dst.hi,15" %} 5497 5498 ins_encode %{ 5499 __ movswl($dst$$Register, $mem$$Address); 5500 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5501 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5502 %} 5503 5504 ins_pipe(ialu_reg_mem); 5505 %} 5506 5507 // Load Unsigned Short/Char (16bit unsigned) 5508 instruct loadUS(rRegI dst, memory mem) %{ 5509 match(Set dst (LoadUS mem)); 5510 5511 ins_cost(125); 5512 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5513 5514 ins_encode %{ 5515 __ movzwl($dst$$Register, $mem$$Address); 5516 %} 5517 5518 ins_pipe(ialu_reg_mem); 5519 %} 5520 5521 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5522 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5523 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5524 5525 ins_cost(125); 5526 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5527 ins_encode %{ 5528 __ movsbl($dst$$Register, $mem$$Address); 5529 %} 5530 ins_pipe(ialu_reg_mem); 5531 %} 5532 5533 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5534 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5535 match(Set dst (ConvI2L (LoadUS mem))); 5536 effect(KILL cr); 5537 5538 ins_cost(250); 5539 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5540 "XOR $dst.hi,$dst.hi" %} 5541 5542 ins_encode %{ 5543 __ movzwl($dst$$Register, $mem$$Address); 5544 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5545 %} 5546 5547 ins_pipe(ialu_reg_mem); 5548 %} 5549 5550 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5551 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5552 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5553 effect(KILL cr); 5554 5555 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5556 "XOR $dst.hi,$dst.hi" %} 5557 ins_encode %{ 5558 Register Rdst = $dst$$Register; 5559 __ movzbl(Rdst, $mem$$Address); 5560 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5561 %} 5562 ins_pipe(ialu_reg_mem); 5563 %} 5564 5565 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5566 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5567 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5568 effect(KILL cr); 5569 5570 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5571 "XOR $dst.hi,$dst.hi\n\t" 5572 "AND $dst.lo,right_n_bits($mask, 16)" %} 5573 ins_encode %{ 5574 Register Rdst = $dst$$Register; 5575 __ movzwl(Rdst, $mem$$Address); 5576 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5577 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5578 %} 5579 ins_pipe(ialu_reg_mem); 5580 %} 5581 5582 // Load Integer 5583 instruct loadI(rRegI dst, memory mem) %{ 5584 match(Set dst (LoadI mem)); 5585 5586 ins_cost(125); 5587 format %{ "MOV $dst,$mem\t# int" %} 5588 5589 ins_encode %{ 5590 __ movl($dst$$Register, $mem$$Address); 5591 %} 5592 5593 ins_pipe(ialu_reg_mem); 5594 %} 5595 5596 // Load Integer (32 bit signed) to Byte (8 bit signed) 5597 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5598 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5599 5600 ins_cost(125); 5601 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5602 ins_encode %{ 5603 __ movsbl($dst$$Register, $mem$$Address); 5604 %} 5605 ins_pipe(ialu_reg_mem); 5606 %} 5607 5608 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5609 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5610 match(Set dst (AndI (LoadI mem) mask)); 5611 5612 ins_cost(125); 5613 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5614 ins_encode %{ 5615 __ movzbl($dst$$Register, $mem$$Address); 5616 %} 5617 ins_pipe(ialu_reg_mem); 5618 %} 5619 5620 // Load Integer (32 bit signed) to Short (16 bit signed) 5621 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5622 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5623 5624 ins_cost(125); 5625 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5626 ins_encode %{ 5627 __ movswl($dst$$Register, $mem$$Address); 5628 %} 5629 ins_pipe(ialu_reg_mem); 5630 %} 5631 5632 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5633 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5634 match(Set dst (AndI (LoadI mem) mask)); 5635 5636 ins_cost(125); 5637 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5638 ins_encode %{ 5639 __ movzwl($dst$$Register, $mem$$Address); 5640 %} 5641 ins_pipe(ialu_reg_mem); 5642 %} 5643 5644 // Load Integer into Long Register 5645 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5646 match(Set dst (ConvI2L (LoadI mem))); 5647 effect(KILL cr); 5648 5649 ins_cost(375); 5650 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5651 "MOV $dst.hi,$dst.lo\n\t" 5652 "SAR $dst.hi,31" %} 5653 5654 ins_encode %{ 5655 __ movl($dst$$Register, $mem$$Address); 5656 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5657 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5658 %} 5659 5660 ins_pipe(ialu_reg_mem); 5661 %} 5662 5663 // Load Integer with mask 0xFF into Long Register 5664 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5665 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5666 effect(KILL cr); 5667 5668 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5669 "XOR $dst.hi,$dst.hi" %} 5670 ins_encode %{ 5671 Register Rdst = $dst$$Register; 5672 __ movzbl(Rdst, $mem$$Address); 5673 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5674 %} 5675 ins_pipe(ialu_reg_mem); 5676 %} 5677 5678 // Load Integer with mask 0xFFFF into Long Register 5679 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5680 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5681 effect(KILL cr); 5682 5683 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5684 "XOR $dst.hi,$dst.hi" %} 5685 ins_encode %{ 5686 Register Rdst = $dst$$Register; 5687 __ movzwl(Rdst, $mem$$Address); 5688 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5689 %} 5690 ins_pipe(ialu_reg_mem); 5691 %} 5692 5693 // Load Integer with 31-bit mask into Long Register 5694 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5695 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5696 effect(KILL cr); 5697 5698 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5699 "XOR $dst.hi,$dst.hi\n\t" 5700 "AND $dst.lo,$mask" %} 5701 ins_encode %{ 5702 Register Rdst = $dst$$Register; 5703 __ movl(Rdst, $mem$$Address); 5704 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5705 __ andl(Rdst, $mask$$constant); 5706 %} 5707 ins_pipe(ialu_reg_mem); 5708 %} 5709 5710 // Load Unsigned Integer into Long Register 5711 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5712 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5713 effect(KILL cr); 5714 5715 ins_cost(250); 5716 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5717 "XOR $dst.hi,$dst.hi" %} 5718 5719 ins_encode %{ 5720 __ movl($dst$$Register, $mem$$Address); 5721 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5722 %} 5723 5724 ins_pipe(ialu_reg_mem); 5725 %} 5726 5727 // Load Long. Cannot clobber address while loading, so restrict address 5728 // register to ESI 5729 instruct loadL(eRegL dst, load_long_memory mem) %{ 5730 predicate(!((LoadLNode*)n)->require_atomic_access()); 5731 match(Set dst (LoadL mem)); 5732 5733 ins_cost(250); 5734 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5735 "MOV $dst.hi,$mem+4" %} 5736 5737 ins_encode %{ 5738 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5739 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5740 __ movl($dst$$Register, Amemlo); 5741 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5742 %} 5743 5744 ins_pipe(ialu_reg_long_mem); 5745 %} 5746 5747 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5748 // then store it down to the stack and reload on the int 5749 // side. 5750 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5751 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5752 match(Set dst (LoadL mem)); 5753 5754 ins_cost(200); 5755 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5756 "FISTp $dst" %} 5757 ins_encode(enc_loadL_volatile(mem,dst)); 5758 ins_pipe( fpu_reg_mem ); 5759 %} 5760 5761 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5762 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5763 match(Set dst (LoadL mem)); 5764 effect(TEMP tmp); 5765 ins_cost(180); 5766 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5767 "MOVSD $dst,$tmp" %} 5768 ins_encode %{ 5769 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5770 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5771 %} 5772 ins_pipe( pipe_slow ); 5773 %} 5774 5775 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5776 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5777 match(Set dst (LoadL mem)); 5778 effect(TEMP tmp); 5779 ins_cost(160); 5780 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5781 "MOVD $dst.lo,$tmp\n\t" 5782 "PSRLQ $tmp,32\n\t" 5783 "MOVD $dst.hi,$tmp" %} 5784 ins_encode %{ 5785 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5786 __ movdl($dst$$Register, $tmp$$XMMRegister); 5787 __ psrlq($tmp$$XMMRegister, 32); 5788 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5789 %} 5790 ins_pipe( pipe_slow ); 5791 %} 5792 5793 // Load Range 5794 instruct loadRange(rRegI dst, memory mem) %{ 5795 match(Set dst (LoadRange mem)); 5796 5797 ins_cost(125); 5798 format %{ "MOV $dst,$mem" %} 5799 opcode(0x8B); 5800 ins_encode( OpcP, RegMem(dst,mem)); 5801 ins_pipe( ialu_reg_mem ); 5802 %} 5803 5804 5805 // Load Pointer 5806 instruct loadP(eRegP dst, memory mem) %{ 5807 match(Set dst (LoadP mem)); 5808 5809 ins_cost(125); 5810 format %{ "MOV $dst,$mem" %} 5811 opcode(0x8B); 5812 ins_encode( OpcP, RegMem(dst,mem)); 5813 ins_pipe( ialu_reg_mem ); 5814 %} 5815 5816 // Load Klass Pointer 5817 instruct loadKlass(eRegP dst, memory mem) %{ 5818 match(Set dst (LoadKlass mem)); 5819 5820 ins_cost(125); 5821 format %{ "MOV $dst,$mem" %} 5822 opcode(0x8B); 5823 ins_encode( OpcP, RegMem(dst,mem)); 5824 ins_pipe( ialu_reg_mem ); 5825 %} 5826 5827 // Load Double 5828 instruct loadDPR(regDPR dst, memory mem) %{ 5829 predicate(UseSSE<=1); 5830 match(Set dst (LoadD mem)); 5831 5832 ins_cost(150); 5833 format %{ "FLD_D ST,$mem\n\t" 5834 "FSTP $dst" %} 5835 opcode(0xDD); /* DD /0 */ 5836 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5837 Pop_Reg_DPR(dst) ); 5838 ins_pipe( fpu_reg_mem ); 5839 %} 5840 5841 // Load Double to XMM 5842 instruct loadD(regD dst, memory mem) %{ 5843 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5844 match(Set dst (LoadD mem)); 5845 ins_cost(145); 5846 format %{ "MOVSD $dst,$mem" %} 5847 ins_encode %{ 5848 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5849 %} 5850 ins_pipe( pipe_slow ); 5851 %} 5852 5853 instruct loadD_partial(regD dst, memory mem) %{ 5854 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5855 match(Set dst (LoadD mem)); 5856 ins_cost(145); 5857 format %{ "MOVLPD $dst,$mem" %} 5858 ins_encode %{ 5859 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5860 %} 5861 ins_pipe( pipe_slow ); 5862 %} 5863 5864 // Load to XMM register (single-precision floating point) 5865 // MOVSS instruction 5866 instruct loadF(regF dst, memory mem) %{ 5867 predicate(UseSSE>=1); 5868 match(Set dst (LoadF mem)); 5869 ins_cost(145); 5870 format %{ "MOVSS $dst,$mem" %} 5871 ins_encode %{ 5872 __ movflt ($dst$$XMMRegister, $mem$$Address); 5873 %} 5874 ins_pipe( pipe_slow ); 5875 %} 5876 5877 // Load Float 5878 instruct loadFPR(regFPR dst, memory mem) %{ 5879 predicate(UseSSE==0); 5880 match(Set dst (LoadF mem)); 5881 5882 ins_cost(150); 5883 format %{ "FLD_S ST,$mem\n\t" 5884 "FSTP $dst" %} 5885 opcode(0xD9); /* D9 /0 */ 5886 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5887 Pop_Reg_FPR(dst) ); 5888 ins_pipe( fpu_reg_mem ); 5889 %} 5890 5891 // Load Effective Address 5892 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5893 match(Set dst mem); 5894 5895 ins_cost(110); 5896 format %{ "LEA $dst,$mem" %} 5897 opcode(0x8D); 5898 ins_encode( OpcP, RegMem(dst,mem)); 5899 ins_pipe( ialu_reg_reg_fat ); 5900 %} 5901 5902 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5903 match(Set dst mem); 5904 5905 ins_cost(110); 5906 format %{ "LEA $dst,$mem" %} 5907 opcode(0x8D); 5908 ins_encode( OpcP, RegMem(dst,mem)); 5909 ins_pipe( ialu_reg_reg_fat ); 5910 %} 5911 5912 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5913 match(Set dst mem); 5914 5915 ins_cost(110); 5916 format %{ "LEA $dst,$mem" %} 5917 opcode(0x8D); 5918 ins_encode( OpcP, RegMem(dst,mem)); 5919 ins_pipe( ialu_reg_reg_fat ); 5920 %} 5921 5922 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5923 match(Set dst mem); 5924 5925 ins_cost(110); 5926 format %{ "LEA $dst,$mem" %} 5927 opcode(0x8D); 5928 ins_encode( OpcP, RegMem(dst,mem)); 5929 ins_pipe( ialu_reg_reg_fat ); 5930 %} 5931 5932 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5933 match(Set dst mem); 5934 5935 ins_cost(110); 5936 format %{ "LEA $dst,$mem" %} 5937 opcode(0x8D); 5938 ins_encode( OpcP, RegMem(dst,mem)); 5939 ins_pipe( ialu_reg_reg_fat ); 5940 %} 5941 5942 // Load Constant 5943 instruct loadConI(rRegI dst, immI src) %{ 5944 match(Set dst src); 5945 5946 format %{ "MOV $dst,$src" %} 5947 ins_encode( LdImmI(dst, src) ); 5948 ins_pipe( ialu_reg_fat ); 5949 %} 5950 5951 // Load Constant zero 5952 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ 5953 match(Set dst src); 5954 effect(KILL cr); 5955 5956 ins_cost(50); 5957 format %{ "XOR $dst,$dst" %} 5958 opcode(0x33); /* + rd */ 5959 ins_encode( OpcP, RegReg( dst, dst ) ); 5960 ins_pipe( ialu_reg ); 5961 %} 5962 5963 instruct loadConP(eRegP dst, immP src) %{ 5964 match(Set dst src); 5965 5966 format %{ "MOV $dst,$src" %} 5967 opcode(0xB8); /* + rd */ 5968 ins_encode( LdImmP(dst, src) ); 5969 ins_pipe( ialu_reg_fat ); 5970 %} 5971 5972 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 5973 match(Set dst src); 5974 effect(KILL cr); 5975 ins_cost(200); 5976 format %{ "MOV $dst.lo,$src.lo\n\t" 5977 "MOV $dst.hi,$src.hi" %} 5978 opcode(0xB8); 5979 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 5980 ins_pipe( ialu_reg_long_fat ); 5981 %} 5982 5983 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 5984 match(Set dst src); 5985 effect(KILL cr); 5986 ins_cost(150); 5987 format %{ "XOR $dst.lo,$dst.lo\n\t" 5988 "XOR $dst.hi,$dst.hi" %} 5989 opcode(0x33,0x33); 5990 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 5991 ins_pipe( ialu_reg_long ); 5992 %} 5993 5994 // The instruction usage is guarded by predicate in operand immFPR(). 5995 instruct loadConFPR(regFPR dst, immFPR con) %{ 5996 match(Set dst con); 5997 ins_cost(125); 5998 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 5999 "FSTP $dst" %} 6000 ins_encode %{ 6001 __ fld_s($constantaddress($con)); 6002 __ fstp_d($dst$$reg); 6003 %} 6004 ins_pipe(fpu_reg_con); 6005 %} 6006 6007 // The instruction usage is guarded by predicate in operand immFPR0(). 6008 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6009 match(Set dst con); 6010 ins_cost(125); 6011 format %{ "FLDZ ST\n\t" 6012 "FSTP $dst" %} 6013 ins_encode %{ 6014 __ fldz(); 6015 __ fstp_d($dst$$reg); 6016 %} 6017 ins_pipe(fpu_reg_con); 6018 %} 6019 6020 // The instruction usage is guarded by predicate in operand immFPR1(). 6021 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6022 match(Set dst con); 6023 ins_cost(125); 6024 format %{ "FLD1 ST\n\t" 6025 "FSTP $dst" %} 6026 ins_encode %{ 6027 __ fld1(); 6028 __ fstp_d($dst$$reg); 6029 %} 6030 ins_pipe(fpu_reg_con); 6031 %} 6032 6033 // The instruction usage is guarded by predicate in operand immF(). 6034 instruct loadConF(regF dst, immF con) %{ 6035 match(Set dst con); 6036 ins_cost(125); 6037 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6038 ins_encode %{ 6039 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6040 %} 6041 ins_pipe(pipe_slow); 6042 %} 6043 6044 // The instruction usage is guarded by predicate in operand immF0(). 6045 instruct loadConF0(regF dst, immF0 src) %{ 6046 match(Set dst src); 6047 ins_cost(100); 6048 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6049 ins_encode %{ 6050 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6051 %} 6052 ins_pipe(pipe_slow); 6053 %} 6054 6055 // The instruction usage is guarded by predicate in operand immDPR(). 6056 instruct loadConDPR(regDPR dst, immDPR con) %{ 6057 match(Set dst con); 6058 ins_cost(125); 6059 6060 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6061 "FSTP $dst" %} 6062 ins_encode %{ 6063 __ fld_d($constantaddress($con)); 6064 __ fstp_d($dst$$reg); 6065 %} 6066 ins_pipe(fpu_reg_con); 6067 %} 6068 6069 // The instruction usage is guarded by predicate in operand immDPR0(). 6070 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6071 match(Set dst con); 6072 ins_cost(125); 6073 6074 format %{ "FLDZ ST\n\t" 6075 "FSTP $dst" %} 6076 ins_encode %{ 6077 __ fldz(); 6078 __ fstp_d($dst$$reg); 6079 %} 6080 ins_pipe(fpu_reg_con); 6081 %} 6082 6083 // The instruction usage is guarded by predicate in operand immDPR1(). 6084 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6085 match(Set dst con); 6086 ins_cost(125); 6087 6088 format %{ "FLD1 ST\n\t" 6089 "FSTP $dst" %} 6090 ins_encode %{ 6091 __ fld1(); 6092 __ fstp_d($dst$$reg); 6093 %} 6094 ins_pipe(fpu_reg_con); 6095 %} 6096 6097 // The instruction usage is guarded by predicate in operand immD(). 6098 instruct loadConD(regD dst, immD con) %{ 6099 match(Set dst con); 6100 ins_cost(125); 6101 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6102 ins_encode %{ 6103 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6104 %} 6105 ins_pipe(pipe_slow); 6106 %} 6107 6108 // The instruction usage is guarded by predicate in operand immD0(). 6109 instruct loadConD0(regD dst, immD0 src) %{ 6110 match(Set dst src); 6111 ins_cost(100); 6112 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6113 ins_encode %{ 6114 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6115 %} 6116 ins_pipe( pipe_slow ); 6117 %} 6118 6119 // Load Stack Slot 6120 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6121 match(Set dst src); 6122 ins_cost(125); 6123 6124 format %{ "MOV $dst,$src" %} 6125 opcode(0x8B); 6126 ins_encode( OpcP, RegMem(dst,src)); 6127 ins_pipe( ialu_reg_mem ); 6128 %} 6129 6130 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6131 match(Set dst src); 6132 6133 ins_cost(200); 6134 format %{ "MOV $dst,$src.lo\n\t" 6135 "MOV $dst+4,$src.hi" %} 6136 opcode(0x8B, 0x8B); 6137 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6138 ins_pipe( ialu_mem_long_reg ); 6139 %} 6140 6141 // Load Stack Slot 6142 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6143 match(Set dst src); 6144 ins_cost(125); 6145 6146 format %{ "MOV $dst,$src" %} 6147 opcode(0x8B); 6148 ins_encode( OpcP, RegMem(dst,src)); 6149 ins_pipe( ialu_reg_mem ); 6150 %} 6151 6152 // Load Stack Slot 6153 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6154 match(Set dst src); 6155 ins_cost(125); 6156 6157 format %{ "FLD_S $src\n\t" 6158 "FSTP $dst" %} 6159 opcode(0xD9); /* D9 /0, FLD m32real */ 6160 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6161 Pop_Reg_FPR(dst) ); 6162 ins_pipe( fpu_reg_mem ); 6163 %} 6164 6165 // Load Stack Slot 6166 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6167 match(Set dst src); 6168 ins_cost(125); 6169 6170 format %{ "FLD_D $src\n\t" 6171 "FSTP $dst" %} 6172 opcode(0xDD); /* DD /0, FLD m64real */ 6173 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6174 Pop_Reg_DPR(dst) ); 6175 ins_pipe( fpu_reg_mem ); 6176 %} 6177 6178 // Prefetch instructions for allocation. 6179 // Must be safe to execute with invalid address (cannot fault). 6180 6181 instruct prefetchAlloc0( memory mem ) %{ 6182 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6183 match(PrefetchAllocation mem); 6184 ins_cost(0); 6185 size(0); 6186 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6187 ins_encode(); 6188 ins_pipe(empty); 6189 %} 6190 6191 instruct prefetchAlloc( memory mem ) %{ 6192 predicate(AllocatePrefetchInstr==3); 6193 match( PrefetchAllocation mem ); 6194 ins_cost(100); 6195 6196 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6197 ins_encode %{ 6198 __ prefetchw($mem$$Address); 6199 %} 6200 ins_pipe(ialu_mem); 6201 %} 6202 6203 instruct prefetchAllocNTA( memory mem ) %{ 6204 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6205 match(PrefetchAllocation mem); 6206 ins_cost(100); 6207 6208 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6209 ins_encode %{ 6210 __ prefetchnta($mem$$Address); 6211 %} 6212 ins_pipe(ialu_mem); 6213 %} 6214 6215 instruct prefetchAllocT0( memory mem ) %{ 6216 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6217 match(PrefetchAllocation mem); 6218 ins_cost(100); 6219 6220 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6221 ins_encode %{ 6222 __ prefetcht0($mem$$Address); 6223 %} 6224 ins_pipe(ialu_mem); 6225 %} 6226 6227 instruct prefetchAllocT2( memory mem ) %{ 6228 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6229 match(PrefetchAllocation mem); 6230 ins_cost(100); 6231 6232 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6233 ins_encode %{ 6234 __ prefetcht2($mem$$Address); 6235 %} 6236 ins_pipe(ialu_mem); 6237 %} 6238 6239 //----------Store Instructions------------------------------------------------- 6240 6241 // Store Byte 6242 instruct storeB(memory mem, xRegI src) %{ 6243 match(Set mem (StoreB mem src)); 6244 6245 ins_cost(125); 6246 format %{ "MOV8 $mem,$src" %} 6247 opcode(0x88); 6248 ins_encode( OpcP, RegMem( src, mem ) ); 6249 ins_pipe( ialu_mem_reg ); 6250 %} 6251 6252 // Store Char/Short 6253 instruct storeC(memory mem, rRegI src) %{ 6254 match(Set mem (StoreC mem src)); 6255 6256 ins_cost(125); 6257 format %{ "MOV16 $mem,$src" %} 6258 opcode(0x89, 0x66); 6259 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6260 ins_pipe( ialu_mem_reg ); 6261 %} 6262 6263 // Store Integer 6264 instruct storeI(memory mem, rRegI src) %{ 6265 match(Set mem (StoreI mem src)); 6266 6267 ins_cost(125); 6268 format %{ "MOV $mem,$src" %} 6269 opcode(0x89); 6270 ins_encode( OpcP, RegMem( src, mem ) ); 6271 ins_pipe( ialu_mem_reg ); 6272 %} 6273 6274 // Store Long 6275 instruct storeL(long_memory mem, eRegL src) %{ 6276 predicate(!((StoreLNode*)n)->require_atomic_access()); 6277 match(Set mem (StoreL mem src)); 6278 6279 ins_cost(200); 6280 format %{ "MOV $mem,$src.lo\n\t" 6281 "MOV $mem+4,$src.hi" %} 6282 opcode(0x89, 0x89); 6283 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6284 ins_pipe( ialu_mem_long_reg ); 6285 %} 6286 6287 // Store Long to Integer 6288 instruct storeL2I(memory mem, eRegL src) %{ 6289 match(Set mem (StoreI mem (ConvL2I src))); 6290 6291 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6292 ins_encode %{ 6293 __ movl($mem$$Address, $src$$Register); 6294 %} 6295 ins_pipe(ialu_mem_reg); 6296 %} 6297 6298 // Volatile Store Long. Must be atomic, so move it into 6299 // the FP TOS and then do a 64-bit FIST. Has to probe the 6300 // target address before the store (for null-ptr checks) 6301 // so the memory operand is used twice in the encoding. 6302 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6303 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6304 match(Set mem (StoreL mem src)); 6305 effect( KILL cr ); 6306 ins_cost(400); 6307 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6308 "FILD $src\n\t" 6309 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6310 opcode(0x3B); 6311 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6312 ins_pipe( fpu_reg_mem ); 6313 %} 6314 6315 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6316 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6317 match(Set mem (StoreL mem src)); 6318 effect( TEMP tmp, KILL cr ); 6319 ins_cost(380); 6320 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6321 "MOVSD $tmp,$src\n\t" 6322 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6323 ins_encode %{ 6324 __ cmpl(rax, $mem$$Address); 6325 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6326 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6327 %} 6328 ins_pipe( pipe_slow ); 6329 %} 6330 6331 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6332 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6333 match(Set mem (StoreL mem src)); 6334 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6335 ins_cost(360); 6336 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6337 "MOVD $tmp,$src.lo\n\t" 6338 "MOVD $tmp2,$src.hi\n\t" 6339 "PUNPCKLDQ $tmp,$tmp2\n\t" 6340 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6341 ins_encode %{ 6342 __ cmpl(rax, $mem$$Address); 6343 __ movdl($tmp$$XMMRegister, $src$$Register); 6344 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6345 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6346 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6347 %} 6348 ins_pipe( pipe_slow ); 6349 %} 6350 6351 // Store Pointer; for storing unknown oops and raw pointers 6352 instruct storeP(memory mem, anyRegP src) %{ 6353 match(Set mem (StoreP mem src)); 6354 6355 ins_cost(125); 6356 format %{ "MOV $mem,$src" %} 6357 opcode(0x89); 6358 ins_encode( OpcP, RegMem( src, mem ) ); 6359 ins_pipe( ialu_mem_reg ); 6360 %} 6361 6362 // Store Integer Immediate 6363 instruct storeImmI(memory mem, immI src) %{ 6364 match(Set mem (StoreI mem src)); 6365 6366 ins_cost(150); 6367 format %{ "MOV $mem,$src" %} 6368 opcode(0xC7); /* C7 /0 */ 6369 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6370 ins_pipe( ialu_mem_imm ); 6371 %} 6372 6373 // Store Short/Char Immediate 6374 instruct storeImmI16(memory mem, immI16 src) %{ 6375 predicate(UseStoreImmI16); 6376 match(Set mem (StoreC mem src)); 6377 6378 ins_cost(150); 6379 format %{ "MOV16 $mem,$src" %} 6380 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6381 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6382 ins_pipe( ialu_mem_imm ); 6383 %} 6384 6385 // Store Pointer Immediate; null pointers or constant oops that do not 6386 // need card-mark barriers. 6387 instruct storeImmP(memory mem, immP src) %{ 6388 match(Set mem (StoreP mem src)); 6389 6390 ins_cost(150); 6391 format %{ "MOV $mem,$src" %} 6392 opcode(0xC7); /* C7 /0 */ 6393 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6394 ins_pipe( ialu_mem_imm ); 6395 %} 6396 6397 // Store Byte Immediate 6398 instruct storeImmB(memory mem, immI8 src) %{ 6399 match(Set mem (StoreB mem src)); 6400 6401 ins_cost(150); 6402 format %{ "MOV8 $mem,$src" %} 6403 opcode(0xC6); /* C6 /0 */ 6404 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6405 ins_pipe( ialu_mem_imm ); 6406 %} 6407 6408 // Store CMS card-mark Immediate 6409 instruct storeImmCM(memory mem, immI8 src) %{ 6410 match(Set mem (StoreCM mem src)); 6411 6412 ins_cost(150); 6413 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6414 opcode(0xC6); /* C6 /0 */ 6415 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6416 ins_pipe( ialu_mem_imm ); 6417 %} 6418 6419 // Store Double 6420 instruct storeDPR( memory mem, regDPR1 src) %{ 6421 predicate(UseSSE<=1); 6422 match(Set mem (StoreD mem src)); 6423 6424 ins_cost(100); 6425 format %{ "FST_D $mem,$src" %} 6426 opcode(0xDD); /* DD /2 */ 6427 ins_encode( enc_FPR_store(mem,src) ); 6428 ins_pipe( fpu_mem_reg ); 6429 %} 6430 6431 // Store double does rounding on x86 6432 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6433 predicate(UseSSE<=1); 6434 match(Set mem (StoreD mem (RoundDouble src))); 6435 6436 ins_cost(100); 6437 format %{ "FST_D $mem,$src\t# round" %} 6438 opcode(0xDD); /* DD /2 */ 6439 ins_encode( enc_FPR_store(mem,src) ); 6440 ins_pipe( fpu_mem_reg ); 6441 %} 6442 6443 // Store XMM register to memory (double-precision floating points) 6444 // MOVSD instruction 6445 instruct storeD(memory mem, regD src) %{ 6446 predicate(UseSSE>=2); 6447 match(Set mem (StoreD mem src)); 6448 ins_cost(95); 6449 format %{ "MOVSD $mem,$src" %} 6450 ins_encode %{ 6451 __ movdbl($mem$$Address, $src$$XMMRegister); 6452 %} 6453 ins_pipe( pipe_slow ); 6454 %} 6455 6456 // Store XMM register to memory (single-precision floating point) 6457 // MOVSS instruction 6458 instruct storeF(memory mem, regF src) %{ 6459 predicate(UseSSE>=1); 6460 match(Set mem (StoreF mem src)); 6461 ins_cost(95); 6462 format %{ "MOVSS $mem,$src" %} 6463 ins_encode %{ 6464 __ movflt($mem$$Address, $src$$XMMRegister); 6465 %} 6466 ins_pipe( pipe_slow ); 6467 %} 6468 6469 // Store Float 6470 instruct storeFPR( memory mem, regFPR1 src) %{ 6471 predicate(UseSSE==0); 6472 match(Set mem (StoreF mem src)); 6473 6474 ins_cost(100); 6475 format %{ "FST_S $mem,$src" %} 6476 opcode(0xD9); /* D9 /2 */ 6477 ins_encode( enc_FPR_store(mem,src) ); 6478 ins_pipe( fpu_mem_reg ); 6479 %} 6480 6481 // Store Float does rounding on x86 6482 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6483 predicate(UseSSE==0); 6484 match(Set mem (StoreF mem (RoundFloat src))); 6485 6486 ins_cost(100); 6487 format %{ "FST_S $mem,$src\t# round" %} 6488 opcode(0xD9); /* D9 /2 */ 6489 ins_encode( enc_FPR_store(mem,src) ); 6490 ins_pipe( fpu_mem_reg ); 6491 %} 6492 6493 // Store Float does rounding on x86 6494 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6495 predicate(UseSSE<=1); 6496 match(Set mem (StoreF mem (ConvD2F src))); 6497 6498 ins_cost(100); 6499 format %{ "FST_S $mem,$src\t# D-round" %} 6500 opcode(0xD9); /* D9 /2 */ 6501 ins_encode( enc_FPR_store(mem,src) ); 6502 ins_pipe( fpu_mem_reg ); 6503 %} 6504 6505 // Store immediate Float value (it is faster than store from FPU register) 6506 // The instruction usage is guarded by predicate in operand immFPR(). 6507 instruct storeFPR_imm( memory mem, immFPR src) %{ 6508 match(Set mem (StoreF mem src)); 6509 6510 ins_cost(50); 6511 format %{ "MOV $mem,$src\t# store float" %} 6512 opcode(0xC7); /* C7 /0 */ 6513 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6514 ins_pipe( ialu_mem_imm ); 6515 %} 6516 6517 // Store immediate Float value (it is faster than store from XMM register) 6518 // The instruction usage is guarded by predicate in operand immF(). 6519 instruct storeF_imm( memory mem, immF src) %{ 6520 match(Set mem (StoreF mem src)); 6521 6522 ins_cost(50); 6523 format %{ "MOV $mem,$src\t# store float" %} 6524 opcode(0xC7); /* C7 /0 */ 6525 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6526 ins_pipe( ialu_mem_imm ); 6527 %} 6528 6529 // Store Integer to stack slot 6530 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6531 match(Set dst src); 6532 6533 ins_cost(100); 6534 format %{ "MOV $dst,$src" %} 6535 opcode(0x89); 6536 ins_encode( OpcPRegSS( dst, src ) ); 6537 ins_pipe( ialu_mem_reg ); 6538 %} 6539 6540 // Store Integer to stack slot 6541 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6542 match(Set dst src); 6543 6544 ins_cost(100); 6545 format %{ "MOV $dst,$src" %} 6546 opcode(0x89); 6547 ins_encode( OpcPRegSS( dst, src ) ); 6548 ins_pipe( ialu_mem_reg ); 6549 %} 6550 6551 // Store Long to stack slot 6552 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6553 match(Set dst src); 6554 6555 ins_cost(200); 6556 format %{ "MOV $dst,$src.lo\n\t" 6557 "MOV $dst+4,$src.hi" %} 6558 opcode(0x89, 0x89); 6559 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6560 ins_pipe( ialu_mem_long_reg ); 6561 %} 6562 6563 //----------MemBar Instructions----------------------------------------------- 6564 // Memory barrier flavors 6565 6566 instruct membar_acquire() %{ 6567 match(MemBarAcquire); 6568 match(LoadFence); 6569 ins_cost(400); 6570 6571 size(0); 6572 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6573 ins_encode(); 6574 ins_pipe(empty); 6575 %} 6576 6577 instruct membar_acquire_lock() %{ 6578 match(MemBarAcquireLock); 6579 ins_cost(0); 6580 6581 size(0); 6582 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6583 ins_encode( ); 6584 ins_pipe(empty); 6585 %} 6586 6587 instruct membar_release() %{ 6588 match(MemBarRelease); 6589 match(StoreFence); 6590 ins_cost(400); 6591 6592 size(0); 6593 format %{ "MEMBAR-release ! (empty encoding)" %} 6594 ins_encode( ); 6595 ins_pipe(empty); 6596 %} 6597 6598 instruct membar_release_lock() %{ 6599 match(MemBarReleaseLock); 6600 ins_cost(0); 6601 6602 size(0); 6603 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6604 ins_encode( ); 6605 ins_pipe(empty); 6606 %} 6607 6608 instruct membar_volatile(eFlagsReg cr) %{ 6609 match(MemBarVolatile); 6610 effect(KILL cr); 6611 ins_cost(400); 6612 6613 format %{ 6614 $$template 6615 if (os::is_MP()) { 6616 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6617 } else { 6618 $$emit$$"MEMBAR-volatile ! (empty encoding)" 6619 } 6620 %} 6621 ins_encode %{ 6622 __ membar(Assembler::StoreLoad); 6623 %} 6624 ins_pipe(pipe_slow); 6625 %} 6626 6627 instruct unnecessary_membar_volatile() %{ 6628 match(MemBarVolatile); 6629 predicate(Matcher::post_store_load_barrier(n)); 6630 ins_cost(0); 6631 6632 size(0); 6633 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6634 ins_encode( ); 6635 ins_pipe(empty); 6636 %} 6637 6638 instruct membar_storestore() %{ 6639 match(MemBarStoreStore); 6640 ins_cost(0); 6641 6642 size(0); 6643 format %{ "MEMBAR-storestore (empty encoding)" %} 6644 ins_encode( ); 6645 ins_pipe(empty); 6646 %} 6647 6648 //----------Move Instructions-------------------------------------------------- 6649 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6650 match(Set dst (CastX2P src)); 6651 format %{ "# X2P $dst, $src" %} 6652 ins_encode( /*empty encoding*/ ); 6653 ins_cost(0); 6654 ins_pipe(empty); 6655 %} 6656 6657 instruct castP2X(rRegI dst, eRegP src ) %{ 6658 match(Set dst (CastP2X src)); 6659 ins_cost(50); 6660 format %{ "MOV $dst, $src\t# CastP2X" %} 6661 ins_encode( enc_Copy( dst, src) ); 6662 ins_pipe( ialu_reg_reg ); 6663 %} 6664 6665 //----------Conditional Move--------------------------------------------------- 6666 // Conditional move 6667 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6668 predicate(!VM_Version::supports_cmov() ); 6669 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6670 ins_cost(200); 6671 format %{ "J$cop,us skip\t# signed cmove\n\t" 6672 "MOV $dst,$src\n" 6673 "skip:" %} 6674 ins_encode %{ 6675 Label Lskip; 6676 // Invert sense of branch from sense of CMOV 6677 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6678 __ movl($dst$$Register, $src$$Register); 6679 __ bind(Lskip); 6680 %} 6681 ins_pipe( pipe_cmov_reg ); 6682 %} 6683 6684 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6685 predicate(!VM_Version::supports_cmov() ); 6686 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6687 ins_cost(200); 6688 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6689 "MOV $dst,$src\n" 6690 "skip:" %} 6691 ins_encode %{ 6692 Label Lskip; 6693 // Invert sense of branch from sense of CMOV 6694 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6695 __ movl($dst$$Register, $src$$Register); 6696 __ bind(Lskip); 6697 %} 6698 ins_pipe( pipe_cmov_reg ); 6699 %} 6700 6701 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6702 predicate(VM_Version::supports_cmov() ); 6703 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6704 ins_cost(200); 6705 format %{ "CMOV$cop $dst,$src" %} 6706 opcode(0x0F,0x40); 6707 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6708 ins_pipe( pipe_cmov_reg ); 6709 %} 6710 6711 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6712 predicate(VM_Version::supports_cmov() ); 6713 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6714 ins_cost(200); 6715 format %{ "CMOV$cop $dst,$src" %} 6716 opcode(0x0F,0x40); 6717 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6718 ins_pipe( pipe_cmov_reg ); 6719 %} 6720 6721 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6722 predicate(VM_Version::supports_cmov() ); 6723 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6724 ins_cost(200); 6725 expand %{ 6726 cmovI_regU(cop, cr, dst, src); 6727 %} 6728 %} 6729 6730 // Conditional move 6731 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6732 predicate(VM_Version::supports_cmov() ); 6733 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6734 ins_cost(250); 6735 format %{ "CMOV$cop $dst,$src" %} 6736 opcode(0x0F,0x40); 6737 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6738 ins_pipe( pipe_cmov_mem ); 6739 %} 6740 6741 // Conditional move 6742 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6743 predicate(VM_Version::supports_cmov() ); 6744 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6745 ins_cost(250); 6746 format %{ "CMOV$cop $dst,$src" %} 6747 opcode(0x0F,0x40); 6748 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6749 ins_pipe( pipe_cmov_mem ); 6750 %} 6751 6752 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6753 predicate(VM_Version::supports_cmov() ); 6754 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6755 ins_cost(250); 6756 expand %{ 6757 cmovI_memU(cop, cr, dst, src); 6758 %} 6759 %} 6760 6761 // Conditional move 6762 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6763 predicate(VM_Version::supports_cmov() ); 6764 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6765 ins_cost(200); 6766 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6767 opcode(0x0F,0x40); 6768 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6769 ins_pipe( pipe_cmov_reg ); 6770 %} 6771 6772 // Conditional move (non-P6 version) 6773 // Note: a CMoveP is generated for stubs and native wrappers 6774 // regardless of whether we are on a P6, so we 6775 // emulate a cmov here 6776 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6777 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6778 ins_cost(300); 6779 format %{ "Jn$cop skip\n\t" 6780 "MOV $dst,$src\t# pointer\n" 6781 "skip:" %} 6782 opcode(0x8b); 6783 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6784 ins_pipe( pipe_cmov_reg ); 6785 %} 6786 6787 // Conditional move 6788 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6789 predicate(VM_Version::supports_cmov() ); 6790 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6791 ins_cost(200); 6792 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6793 opcode(0x0F,0x40); 6794 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6795 ins_pipe( pipe_cmov_reg ); 6796 %} 6797 6798 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6799 predicate(VM_Version::supports_cmov() ); 6800 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6801 ins_cost(200); 6802 expand %{ 6803 cmovP_regU(cop, cr, dst, src); 6804 %} 6805 %} 6806 6807 // DISABLED: Requires the ADLC to emit a bottom_type call that 6808 // correctly meets the two pointer arguments; one is an incoming 6809 // register but the other is a memory operand. ALSO appears to 6810 // be buggy with implicit null checks. 6811 // 6812 //// Conditional move 6813 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6814 // predicate(VM_Version::supports_cmov() ); 6815 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6816 // ins_cost(250); 6817 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6818 // opcode(0x0F,0x40); 6819 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6820 // ins_pipe( pipe_cmov_mem ); 6821 //%} 6822 // 6823 //// Conditional move 6824 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6825 // predicate(VM_Version::supports_cmov() ); 6826 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6827 // ins_cost(250); 6828 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6829 // opcode(0x0F,0x40); 6830 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6831 // ins_pipe( pipe_cmov_mem ); 6832 //%} 6833 6834 // Conditional move 6835 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6836 predicate(UseSSE<=1); 6837 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6838 ins_cost(200); 6839 format %{ "FCMOV$cop $dst,$src\t# double" %} 6840 opcode(0xDA); 6841 ins_encode( enc_cmov_dpr(cop,src) ); 6842 ins_pipe( pipe_cmovDPR_reg ); 6843 %} 6844 6845 // Conditional move 6846 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6847 predicate(UseSSE==0); 6848 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6849 ins_cost(200); 6850 format %{ "FCMOV$cop $dst,$src\t# float" %} 6851 opcode(0xDA); 6852 ins_encode( enc_cmov_dpr(cop,src) ); 6853 ins_pipe( pipe_cmovDPR_reg ); 6854 %} 6855 6856 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6857 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6858 predicate(UseSSE<=1); 6859 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6860 ins_cost(200); 6861 format %{ "Jn$cop skip\n\t" 6862 "MOV $dst,$src\t# double\n" 6863 "skip:" %} 6864 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6865 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6866 ins_pipe( pipe_cmovDPR_reg ); 6867 %} 6868 6869 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6870 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6871 predicate(UseSSE==0); 6872 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6873 ins_cost(200); 6874 format %{ "Jn$cop skip\n\t" 6875 "MOV $dst,$src\t# float\n" 6876 "skip:" %} 6877 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6878 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6879 ins_pipe( pipe_cmovDPR_reg ); 6880 %} 6881 6882 // No CMOVE with SSE/SSE2 6883 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6884 predicate (UseSSE>=1); 6885 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6886 ins_cost(200); 6887 format %{ "Jn$cop skip\n\t" 6888 "MOVSS $dst,$src\t# float\n" 6889 "skip:" %} 6890 ins_encode %{ 6891 Label skip; 6892 // Invert sense of branch from sense of CMOV 6893 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6894 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6895 __ bind(skip); 6896 %} 6897 ins_pipe( pipe_slow ); 6898 %} 6899 6900 // No CMOVE with SSE/SSE2 6901 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6902 predicate (UseSSE>=2); 6903 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6904 ins_cost(200); 6905 format %{ "Jn$cop skip\n\t" 6906 "MOVSD $dst,$src\t# float\n" 6907 "skip:" %} 6908 ins_encode %{ 6909 Label skip; 6910 // Invert sense of branch from sense of CMOV 6911 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6912 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6913 __ bind(skip); 6914 %} 6915 ins_pipe( pipe_slow ); 6916 %} 6917 6918 // unsigned version 6919 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6920 predicate (UseSSE>=1); 6921 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6922 ins_cost(200); 6923 format %{ "Jn$cop skip\n\t" 6924 "MOVSS $dst,$src\t# float\n" 6925 "skip:" %} 6926 ins_encode %{ 6927 Label skip; 6928 // Invert sense of branch from sense of CMOV 6929 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6930 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6931 __ bind(skip); 6932 %} 6933 ins_pipe( pipe_slow ); 6934 %} 6935 6936 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6937 predicate (UseSSE>=1); 6938 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6939 ins_cost(200); 6940 expand %{ 6941 fcmovF_regU(cop, cr, dst, src); 6942 %} 6943 %} 6944 6945 // unsigned version 6946 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6947 predicate (UseSSE>=2); 6948 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6949 ins_cost(200); 6950 format %{ "Jn$cop skip\n\t" 6951 "MOVSD $dst,$src\t# float\n" 6952 "skip:" %} 6953 ins_encode %{ 6954 Label skip; 6955 // Invert sense of branch from sense of CMOV 6956 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6957 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6958 __ bind(skip); 6959 %} 6960 ins_pipe( pipe_slow ); 6961 %} 6962 6963 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 6964 predicate (UseSSE>=2); 6965 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6966 ins_cost(200); 6967 expand %{ 6968 fcmovD_regU(cop, cr, dst, src); 6969 %} 6970 %} 6971 6972 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 6973 predicate(VM_Version::supports_cmov() ); 6974 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6975 ins_cost(200); 6976 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6977 "CMOV$cop $dst.hi,$src.hi" %} 6978 opcode(0x0F,0x40); 6979 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6980 ins_pipe( pipe_cmov_reg_long ); 6981 %} 6982 6983 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 6984 predicate(VM_Version::supports_cmov() ); 6985 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6986 ins_cost(200); 6987 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6988 "CMOV$cop $dst.hi,$src.hi" %} 6989 opcode(0x0F,0x40); 6990 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6991 ins_pipe( pipe_cmov_reg_long ); 6992 %} 6993 6994 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 6995 predicate(VM_Version::supports_cmov() ); 6996 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6997 ins_cost(200); 6998 expand %{ 6999 cmovL_regU(cop, cr, dst, src); 7000 %} 7001 %} 7002 7003 //----------Arithmetic Instructions-------------------------------------------- 7004 //----------Addition Instructions---------------------------------------------- 7005 7006 // Integer Addition Instructions 7007 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7008 match(Set dst (AddI dst src)); 7009 effect(KILL cr); 7010 7011 size(2); 7012 format %{ "ADD $dst,$src" %} 7013 opcode(0x03); 7014 ins_encode( OpcP, RegReg( dst, src) ); 7015 ins_pipe( ialu_reg_reg ); 7016 %} 7017 7018 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7019 match(Set dst (AddI dst src)); 7020 effect(KILL cr); 7021 7022 format %{ "ADD $dst,$src" %} 7023 opcode(0x81, 0x00); /* /0 id */ 7024 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7025 ins_pipe( ialu_reg ); 7026 %} 7027 7028 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 7029 predicate(UseIncDec); 7030 match(Set dst (AddI dst src)); 7031 effect(KILL cr); 7032 7033 size(1); 7034 format %{ "INC $dst" %} 7035 opcode(0x40); /* */ 7036 ins_encode( Opc_plus( primary, dst ) ); 7037 ins_pipe( ialu_reg ); 7038 %} 7039 7040 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7041 match(Set dst (AddI src0 src1)); 7042 ins_cost(110); 7043 7044 format %{ "LEA $dst,[$src0 + $src1]" %} 7045 opcode(0x8D); /* 0x8D /r */ 7046 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7047 ins_pipe( ialu_reg_reg ); 7048 %} 7049 7050 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7051 match(Set dst (AddP src0 src1)); 7052 ins_cost(110); 7053 7054 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7055 opcode(0x8D); /* 0x8D /r */ 7056 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7057 ins_pipe( ialu_reg_reg ); 7058 %} 7059 7060 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7061 predicate(UseIncDec); 7062 match(Set dst (AddI dst src)); 7063 effect(KILL cr); 7064 7065 size(1); 7066 format %{ "DEC $dst" %} 7067 opcode(0x48); /* */ 7068 ins_encode( Opc_plus( primary, dst ) ); 7069 ins_pipe( ialu_reg ); 7070 %} 7071 7072 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7073 match(Set dst (AddP dst src)); 7074 effect(KILL cr); 7075 7076 size(2); 7077 format %{ "ADD $dst,$src" %} 7078 opcode(0x03); 7079 ins_encode( OpcP, RegReg( dst, src) ); 7080 ins_pipe( ialu_reg_reg ); 7081 %} 7082 7083 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7084 match(Set dst (AddP dst src)); 7085 effect(KILL cr); 7086 7087 format %{ "ADD $dst,$src" %} 7088 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7089 // ins_encode( RegImm( dst, src) ); 7090 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7091 ins_pipe( ialu_reg ); 7092 %} 7093 7094 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7095 match(Set dst (AddI dst (LoadI src))); 7096 effect(KILL cr); 7097 7098 ins_cost(125); 7099 format %{ "ADD $dst,$src" %} 7100 opcode(0x03); 7101 ins_encode( OpcP, RegMem( dst, src) ); 7102 ins_pipe( ialu_reg_mem ); 7103 %} 7104 7105 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7106 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7107 effect(KILL cr); 7108 7109 ins_cost(150); 7110 format %{ "ADD $dst,$src" %} 7111 opcode(0x01); /* Opcode 01 /r */ 7112 ins_encode( OpcP, RegMem( src, dst ) ); 7113 ins_pipe( ialu_mem_reg ); 7114 %} 7115 7116 // Add Memory with Immediate 7117 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7118 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7119 effect(KILL cr); 7120 7121 ins_cost(125); 7122 format %{ "ADD $dst,$src" %} 7123 opcode(0x81); /* Opcode 81 /0 id */ 7124 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7125 ins_pipe( ialu_mem_imm ); 7126 %} 7127 7128 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 7129 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7130 effect(KILL cr); 7131 7132 ins_cost(125); 7133 format %{ "INC $dst" %} 7134 opcode(0xFF); /* Opcode FF /0 */ 7135 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7136 ins_pipe( ialu_mem_imm ); 7137 %} 7138 7139 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7140 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7141 effect(KILL cr); 7142 7143 ins_cost(125); 7144 format %{ "DEC $dst" %} 7145 opcode(0xFF); /* Opcode FF /1 */ 7146 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7147 ins_pipe( ialu_mem_imm ); 7148 %} 7149 7150 7151 instruct checkCastPP( eRegP dst ) %{ 7152 match(Set dst (CheckCastPP dst)); 7153 7154 size(0); 7155 format %{ "#checkcastPP of $dst" %} 7156 ins_encode( /*empty encoding*/ ); 7157 ins_pipe( empty ); 7158 %} 7159 7160 instruct castPP( eRegP dst ) %{ 7161 match(Set dst (CastPP dst)); 7162 format %{ "#castPP of $dst" %} 7163 ins_encode( /*empty encoding*/ ); 7164 ins_pipe( empty ); 7165 %} 7166 7167 instruct castII( rRegI dst ) %{ 7168 match(Set dst (CastII dst)); 7169 format %{ "#castII of $dst" %} 7170 ins_encode( /*empty encoding*/ ); 7171 ins_cost(0); 7172 ins_pipe( empty ); 7173 %} 7174 7175 7176 // Load-locked - same as a regular pointer load when used with compare-swap 7177 instruct loadPLocked(eRegP dst, memory mem) %{ 7178 match(Set dst (LoadPLocked mem)); 7179 7180 ins_cost(125); 7181 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7182 opcode(0x8B); 7183 ins_encode( OpcP, RegMem(dst,mem)); 7184 ins_pipe( ialu_reg_mem ); 7185 %} 7186 7187 // Conditional-store of the updated heap-top. 7188 // Used during allocation of the shared heap. 7189 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7190 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7191 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7192 // EAX is killed if there is contention, but then it's also unused. 7193 // In the common case of no contention, EAX holds the new oop address. 7194 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7195 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7196 ins_pipe( pipe_cmpxchg ); 7197 %} 7198 7199 // Conditional-store of an int value. 7200 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7201 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7202 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7203 effect(KILL oldval); 7204 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7205 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7206 ins_pipe( pipe_cmpxchg ); 7207 %} 7208 7209 // Conditional-store of a long value. 7210 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7211 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7212 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7213 effect(KILL oldval); 7214 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7215 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7216 "XCHG EBX,ECX" 7217 %} 7218 ins_encode %{ 7219 // Note: we need to swap rbx, and rcx before and after the 7220 // cmpxchg8 instruction because the instruction uses 7221 // rcx as the high order word of the new value to store but 7222 // our register encoding uses rbx. 7223 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7224 if( os::is_MP() ) 7225 __ lock(); 7226 __ cmpxchg8($mem$$Address); 7227 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7228 %} 7229 ins_pipe( pipe_cmpxchg ); 7230 %} 7231 7232 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7233 7234 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7235 predicate(VM_Version::supports_cx8()); 7236 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7237 effect(KILL cr, KILL oldval); 7238 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7239 "MOV $res,0\n\t" 7240 "JNE,s fail\n\t" 7241 "MOV $res,1\n" 7242 "fail:" %} 7243 ins_encode( enc_cmpxchg8(mem_ptr), 7244 enc_flags_ne_to_boolean(res) ); 7245 ins_pipe( pipe_cmpxchg ); 7246 %} 7247 7248 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7249 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7250 effect(KILL cr, KILL oldval); 7251 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7252 "MOV $res,0\n\t" 7253 "JNE,s fail\n\t" 7254 "MOV $res,1\n" 7255 "fail:" %} 7256 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7257 ins_pipe( pipe_cmpxchg ); 7258 %} 7259 7260 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7261 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7262 effect(KILL cr, KILL oldval); 7263 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7264 "MOV $res,0\n\t" 7265 "JNE,s fail\n\t" 7266 "MOV $res,1\n" 7267 "fail:" %} 7268 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7269 ins_pipe( pipe_cmpxchg ); 7270 %} 7271 7272 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7273 predicate(n->as_LoadStore()->result_not_used()); 7274 match(Set dummy (GetAndAddI mem add)); 7275 effect(KILL cr); 7276 format %{ "ADDL [$mem],$add" %} 7277 ins_encode %{ 7278 if (os::is_MP()) { __ lock(); } 7279 __ addl($mem$$Address, $add$$constant); 7280 %} 7281 ins_pipe( pipe_cmpxchg ); 7282 %} 7283 7284 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7285 match(Set newval (GetAndAddI mem newval)); 7286 effect(KILL cr); 7287 format %{ "XADDL [$mem],$newval" %} 7288 ins_encode %{ 7289 if (os::is_MP()) { __ lock(); } 7290 __ xaddl($mem$$Address, $newval$$Register); 7291 %} 7292 ins_pipe( pipe_cmpxchg ); 7293 %} 7294 7295 instruct xchgI( memory mem, rRegI newval) %{ 7296 match(Set newval (GetAndSetI mem newval)); 7297 format %{ "XCHGL $newval,[$mem]" %} 7298 ins_encode %{ 7299 __ xchgl($newval$$Register, $mem$$Address); 7300 %} 7301 ins_pipe( pipe_cmpxchg ); 7302 %} 7303 7304 instruct xchgP( memory mem, pRegP newval) %{ 7305 match(Set newval (GetAndSetP mem newval)); 7306 format %{ "XCHGL $newval,[$mem]" %} 7307 ins_encode %{ 7308 __ xchgl($newval$$Register, $mem$$Address); 7309 %} 7310 ins_pipe( pipe_cmpxchg ); 7311 %} 7312 7313 //----------Subtraction Instructions------------------------------------------- 7314 7315 // Integer Subtraction Instructions 7316 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7317 match(Set dst (SubI dst src)); 7318 effect(KILL cr); 7319 7320 size(2); 7321 format %{ "SUB $dst,$src" %} 7322 opcode(0x2B); 7323 ins_encode( OpcP, RegReg( dst, src) ); 7324 ins_pipe( ialu_reg_reg ); 7325 %} 7326 7327 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7328 match(Set dst (SubI dst src)); 7329 effect(KILL cr); 7330 7331 format %{ "SUB $dst,$src" %} 7332 opcode(0x81,0x05); /* Opcode 81 /5 */ 7333 // ins_encode( RegImm( dst, src) ); 7334 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7335 ins_pipe( ialu_reg ); 7336 %} 7337 7338 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7339 match(Set dst (SubI dst (LoadI src))); 7340 effect(KILL cr); 7341 7342 ins_cost(125); 7343 format %{ "SUB $dst,$src" %} 7344 opcode(0x2B); 7345 ins_encode( OpcP, RegMem( dst, src) ); 7346 ins_pipe( ialu_reg_mem ); 7347 %} 7348 7349 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7350 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7351 effect(KILL cr); 7352 7353 ins_cost(150); 7354 format %{ "SUB $dst,$src" %} 7355 opcode(0x29); /* Opcode 29 /r */ 7356 ins_encode( OpcP, RegMem( src, dst ) ); 7357 ins_pipe( ialu_mem_reg ); 7358 %} 7359 7360 // Subtract from a pointer 7361 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ 7362 match(Set dst (AddP dst (SubI zero src))); 7363 effect(KILL cr); 7364 7365 size(2); 7366 format %{ "SUB $dst,$src" %} 7367 opcode(0x2B); 7368 ins_encode( OpcP, RegReg( dst, src) ); 7369 ins_pipe( ialu_reg_reg ); 7370 %} 7371 7372 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ 7373 match(Set dst (SubI zero dst)); 7374 effect(KILL cr); 7375 7376 size(2); 7377 format %{ "NEG $dst" %} 7378 opcode(0xF7,0x03); // Opcode F7 /3 7379 ins_encode( OpcP, RegOpc( dst ) ); 7380 ins_pipe( ialu_reg ); 7381 %} 7382 7383 //----------Multiplication/Division Instructions------------------------------- 7384 // Integer Multiplication Instructions 7385 // Multiply Register 7386 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7387 match(Set dst (MulI dst src)); 7388 effect(KILL cr); 7389 7390 size(3); 7391 ins_cost(300); 7392 format %{ "IMUL $dst,$src" %} 7393 opcode(0xAF, 0x0F); 7394 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7395 ins_pipe( ialu_reg_reg_alu0 ); 7396 %} 7397 7398 // Multiply 32-bit Immediate 7399 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7400 match(Set dst (MulI src imm)); 7401 effect(KILL cr); 7402 7403 ins_cost(300); 7404 format %{ "IMUL $dst,$src,$imm" %} 7405 opcode(0x69); /* 69 /r id */ 7406 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7407 ins_pipe( ialu_reg_reg_alu0 ); 7408 %} 7409 7410 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7411 match(Set dst src); 7412 effect(KILL cr); 7413 7414 // Note that this is artificially increased to make it more expensive than loadConL 7415 ins_cost(250); 7416 format %{ "MOV EAX,$src\t// low word only" %} 7417 opcode(0xB8); 7418 ins_encode( LdImmL_Lo(dst, src) ); 7419 ins_pipe( ialu_reg_fat ); 7420 %} 7421 7422 // Multiply by 32-bit Immediate, taking the shifted high order results 7423 // (special case for shift by 32) 7424 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7425 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7426 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7427 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7428 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7429 effect(USE src1, KILL cr); 7430 7431 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7432 ins_cost(0*100 + 1*400 - 150); 7433 format %{ "IMUL EDX:EAX,$src1" %} 7434 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7435 ins_pipe( pipe_slow ); 7436 %} 7437 7438 // Multiply by 32-bit Immediate, taking the shifted high order results 7439 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7440 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7441 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7442 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7443 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7444 effect(USE src1, KILL cr); 7445 7446 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7447 ins_cost(1*100 + 1*400 - 150); 7448 format %{ "IMUL EDX:EAX,$src1\n\t" 7449 "SAR EDX,$cnt-32" %} 7450 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7451 ins_pipe( pipe_slow ); 7452 %} 7453 7454 // Multiply Memory 32-bit Immediate 7455 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7456 match(Set dst (MulI (LoadI src) imm)); 7457 effect(KILL cr); 7458 7459 ins_cost(300); 7460 format %{ "IMUL $dst,$src,$imm" %} 7461 opcode(0x69); /* 69 /r id */ 7462 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7463 ins_pipe( ialu_reg_mem_alu0 ); 7464 %} 7465 7466 // Multiply Memory 7467 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7468 match(Set dst (MulI dst (LoadI src))); 7469 effect(KILL cr); 7470 7471 ins_cost(350); 7472 format %{ "IMUL $dst,$src" %} 7473 opcode(0xAF, 0x0F); 7474 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7475 ins_pipe( ialu_reg_mem_alu0 ); 7476 %} 7477 7478 // Multiply Register Int to Long 7479 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7480 // Basic Idea: long = (long)int * (long)int 7481 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7482 effect(DEF dst, USE src, USE src1, KILL flags); 7483 7484 ins_cost(300); 7485 format %{ "IMUL $dst,$src1" %} 7486 7487 ins_encode( long_int_multiply( dst, src1 ) ); 7488 ins_pipe( ialu_reg_reg_alu0 ); 7489 %} 7490 7491 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7492 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7493 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7494 effect(KILL flags); 7495 7496 ins_cost(300); 7497 format %{ "MUL $dst,$src1" %} 7498 7499 ins_encode( long_uint_multiply(dst, src1) ); 7500 ins_pipe( ialu_reg_reg_alu0 ); 7501 %} 7502 7503 // Multiply Register Long 7504 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7505 match(Set dst (MulL dst src)); 7506 effect(KILL cr, TEMP tmp); 7507 ins_cost(4*100+3*400); 7508 // Basic idea: lo(result) = lo(x_lo * y_lo) 7509 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7510 format %{ "MOV $tmp,$src.lo\n\t" 7511 "IMUL $tmp,EDX\n\t" 7512 "MOV EDX,$src.hi\n\t" 7513 "IMUL EDX,EAX\n\t" 7514 "ADD $tmp,EDX\n\t" 7515 "MUL EDX:EAX,$src.lo\n\t" 7516 "ADD EDX,$tmp" %} 7517 ins_encode( long_multiply( dst, src, tmp ) ); 7518 ins_pipe( pipe_slow ); 7519 %} 7520 7521 // Multiply Register Long where the left operand's high 32 bits are zero 7522 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7523 predicate(is_operand_hi32_zero(n->in(1))); 7524 match(Set dst (MulL dst src)); 7525 effect(KILL cr, TEMP tmp); 7526 ins_cost(2*100+2*400); 7527 // Basic idea: lo(result) = lo(x_lo * y_lo) 7528 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7529 format %{ "MOV $tmp,$src.hi\n\t" 7530 "IMUL $tmp,EAX\n\t" 7531 "MUL EDX:EAX,$src.lo\n\t" 7532 "ADD EDX,$tmp" %} 7533 ins_encode %{ 7534 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7535 __ imull($tmp$$Register, rax); 7536 __ mull($src$$Register); 7537 __ addl(rdx, $tmp$$Register); 7538 %} 7539 ins_pipe( pipe_slow ); 7540 %} 7541 7542 // Multiply Register Long where the right operand's high 32 bits are zero 7543 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7544 predicate(is_operand_hi32_zero(n->in(2))); 7545 match(Set dst (MulL dst src)); 7546 effect(KILL cr, TEMP tmp); 7547 ins_cost(2*100+2*400); 7548 // Basic idea: lo(result) = lo(x_lo * y_lo) 7549 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7550 format %{ "MOV $tmp,$src.lo\n\t" 7551 "IMUL $tmp,EDX\n\t" 7552 "MUL EDX:EAX,$src.lo\n\t" 7553 "ADD EDX,$tmp" %} 7554 ins_encode %{ 7555 __ movl($tmp$$Register, $src$$Register); 7556 __ imull($tmp$$Register, rdx); 7557 __ mull($src$$Register); 7558 __ addl(rdx, $tmp$$Register); 7559 %} 7560 ins_pipe( pipe_slow ); 7561 %} 7562 7563 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7564 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7565 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7566 match(Set dst (MulL dst src)); 7567 effect(KILL cr); 7568 ins_cost(1*400); 7569 // Basic idea: lo(result) = lo(x_lo * y_lo) 7570 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7571 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7572 ins_encode %{ 7573 __ mull($src$$Register); 7574 %} 7575 ins_pipe( pipe_slow ); 7576 %} 7577 7578 // Multiply Register Long by small constant 7579 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7580 match(Set dst (MulL dst src)); 7581 effect(KILL cr, TEMP tmp); 7582 ins_cost(2*100+2*400); 7583 size(12); 7584 // Basic idea: lo(result) = lo(src * EAX) 7585 // hi(result) = hi(src * EAX) + lo(src * EDX) 7586 format %{ "IMUL $tmp,EDX,$src\n\t" 7587 "MOV EDX,$src\n\t" 7588 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7589 "ADD EDX,$tmp" %} 7590 ins_encode( long_multiply_con( dst, src, tmp ) ); 7591 ins_pipe( pipe_slow ); 7592 %} 7593 7594 // Integer DIV with Register 7595 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7596 match(Set rax (DivI rax div)); 7597 effect(KILL rdx, KILL cr); 7598 size(26); 7599 ins_cost(30*100+10*100); 7600 format %{ "CMP EAX,0x80000000\n\t" 7601 "JNE,s normal\n\t" 7602 "XOR EDX,EDX\n\t" 7603 "CMP ECX,-1\n\t" 7604 "JE,s done\n" 7605 "normal: CDQ\n\t" 7606 "IDIV $div\n\t" 7607 "done:" %} 7608 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7609 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7610 ins_pipe( ialu_reg_reg_alu0 ); 7611 %} 7612 7613 // Divide Register Long 7614 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7615 match(Set dst (DivL src1 src2)); 7616 effect( KILL cr, KILL cx, KILL bx ); 7617 ins_cost(10000); 7618 format %{ "PUSH $src1.hi\n\t" 7619 "PUSH $src1.lo\n\t" 7620 "PUSH $src2.hi\n\t" 7621 "PUSH $src2.lo\n\t" 7622 "CALL SharedRuntime::ldiv\n\t" 7623 "ADD ESP,16" %} 7624 ins_encode( long_div(src1,src2) ); 7625 ins_pipe( pipe_slow ); 7626 %} 7627 7628 // Integer DIVMOD with Register, both quotient and mod results 7629 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7630 match(DivModI rax div); 7631 effect(KILL cr); 7632 size(26); 7633 ins_cost(30*100+10*100); 7634 format %{ "CMP EAX,0x80000000\n\t" 7635 "JNE,s normal\n\t" 7636 "XOR EDX,EDX\n\t" 7637 "CMP ECX,-1\n\t" 7638 "JE,s done\n" 7639 "normal: CDQ\n\t" 7640 "IDIV $div\n\t" 7641 "done:" %} 7642 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7643 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7644 ins_pipe( pipe_slow ); 7645 %} 7646 7647 // Integer MOD with Register 7648 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7649 match(Set rdx (ModI rax div)); 7650 effect(KILL rax, KILL cr); 7651 7652 size(26); 7653 ins_cost(300); 7654 format %{ "CDQ\n\t" 7655 "IDIV $div" %} 7656 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7657 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7658 ins_pipe( ialu_reg_reg_alu0 ); 7659 %} 7660 7661 // Remainder Register Long 7662 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7663 match(Set dst (ModL src1 src2)); 7664 effect( KILL cr, KILL cx, KILL bx ); 7665 ins_cost(10000); 7666 format %{ "PUSH $src1.hi\n\t" 7667 "PUSH $src1.lo\n\t" 7668 "PUSH $src2.hi\n\t" 7669 "PUSH $src2.lo\n\t" 7670 "CALL SharedRuntime::lrem\n\t" 7671 "ADD ESP,16" %} 7672 ins_encode( long_mod(src1,src2) ); 7673 ins_pipe( pipe_slow ); 7674 %} 7675 7676 // Divide Register Long (no special case since divisor != -1) 7677 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7678 match(Set dst (DivL dst imm)); 7679 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7680 ins_cost(1000); 7681 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7682 "XOR $tmp2,$tmp2\n\t" 7683 "CMP $tmp,EDX\n\t" 7684 "JA,s fast\n\t" 7685 "MOV $tmp2,EAX\n\t" 7686 "MOV EAX,EDX\n\t" 7687 "MOV EDX,0\n\t" 7688 "JLE,s pos\n\t" 7689 "LNEG EAX : $tmp2\n\t" 7690 "DIV $tmp # unsigned division\n\t" 7691 "XCHG EAX,$tmp2\n\t" 7692 "DIV $tmp\n\t" 7693 "LNEG $tmp2 : EAX\n\t" 7694 "JMP,s done\n" 7695 "pos:\n\t" 7696 "DIV $tmp\n\t" 7697 "XCHG EAX,$tmp2\n" 7698 "fast:\n\t" 7699 "DIV $tmp\n" 7700 "done:\n\t" 7701 "MOV EDX,$tmp2\n\t" 7702 "NEG EDX:EAX # if $imm < 0" %} 7703 ins_encode %{ 7704 int con = (int)$imm$$constant; 7705 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7706 int pcon = (con > 0) ? con : -con; 7707 Label Lfast, Lpos, Ldone; 7708 7709 __ movl($tmp$$Register, pcon); 7710 __ xorl($tmp2$$Register,$tmp2$$Register); 7711 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7712 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7713 7714 __ movl($tmp2$$Register, $dst$$Register); // save 7715 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7716 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7717 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7718 7719 // Negative dividend. 7720 // convert value to positive to use unsigned division 7721 __ lneg($dst$$Register, $tmp2$$Register); 7722 __ divl($tmp$$Register); 7723 __ xchgl($dst$$Register, $tmp2$$Register); 7724 __ divl($tmp$$Register); 7725 // revert result back to negative 7726 __ lneg($tmp2$$Register, $dst$$Register); 7727 __ jmpb(Ldone); 7728 7729 __ bind(Lpos); 7730 __ divl($tmp$$Register); // Use unsigned division 7731 __ xchgl($dst$$Register, $tmp2$$Register); 7732 // Fallthrow for final divide, tmp2 has 32 bit hi result 7733 7734 __ bind(Lfast); 7735 // fast path: src is positive 7736 __ divl($tmp$$Register); // Use unsigned division 7737 7738 __ bind(Ldone); 7739 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7740 if (con < 0) { 7741 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7742 } 7743 %} 7744 ins_pipe( pipe_slow ); 7745 %} 7746 7747 // Remainder Register Long (remainder fit into 32 bits) 7748 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7749 match(Set dst (ModL dst imm)); 7750 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7751 ins_cost(1000); 7752 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7753 "CMP $tmp,EDX\n\t" 7754 "JA,s fast\n\t" 7755 "MOV $tmp2,EAX\n\t" 7756 "MOV EAX,EDX\n\t" 7757 "MOV EDX,0\n\t" 7758 "JLE,s pos\n\t" 7759 "LNEG EAX : $tmp2\n\t" 7760 "DIV $tmp # unsigned division\n\t" 7761 "MOV EAX,$tmp2\n\t" 7762 "DIV $tmp\n\t" 7763 "NEG EDX\n\t" 7764 "JMP,s done\n" 7765 "pos:\n\t" 7766 "DIV $tmp\n\t" 7767 "MOV EAX,$tmp2\n" 7768 "fast:\n\t" 7769 "DIV $tmp\n" 7770 "done:\n\t" 7771 "MOV EAX,EDX\n\t" 7772 "SAR EDX,31\n\t" %} 7773 ins_encode %{ 7774 int con = (int)$imm$$constant; 7775 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7776 int pcon = (con > 0) ? con : -con; 7777 Label Lfast, Lpos, Ldone; 7778 7779 __ movl($tmp$$Register, pcon); 7780 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7781 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7782 7783 __ movl($tmp2$$Register, $dst$$Register); // save 7784 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7785 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7786 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7787 7788 // Negative dividend. 7789 // convert value to positive to use unsigned division 7790 __ lneg($dst$$Register, $tmp2$$Register); 7791 __ divl($tmp$$Register); 7792 __ movl($dst$$Register, $tmp2$$Register); 7793 __ divl($tmp$$Register); 7794 // revert remainder back to negative 7795 __ negl(HIGH_FROM_LOW($dst$$Register)); 7796 __ jmpb(Ldone); 7797 7798 __ bind(Lpos); 7799 __ divl($tmp$$Register); 7800 __ movl($dst$$Register, $tmp2$$Register); 7801 7802 __ bind(Lfast); 7803 // fast path: src is positive 7804 __ divl($tmp$$Register); 7805 7806 __ bind(Ldone); 7807 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7808 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 7809 7810 %} 7811 ins_pipe( pipe_slow ); 7812 %} 7813 7814 // Integer Shift Instructions 7815 // Shift Left by one 7816 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7817 match(Set dst (LShiftI dst shift)); 7818 effect(KILL cr); 7819 7820 size(2); 7821 format %{ "SHL $dst,$shift" %} 7822 opcode(0xD1, 0x4); /* D1 /4 */ 7823 ins_encode( OpcP, RegOpc( dst ) ); 7824 ins_pipe( ialu_reg ); 7825 %} 7826 7827 // Shift Left by 8-bit immediate 7828 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7829 match(Set dst (LShiftI dst shift)); 7830 effect(KILL cr); 7831 7832 size(3); 7833 format %{ "SHL $dst,$shift" %} 7834 opcode(0xC1, 0x4); /* C1 /4 ib */ 7835 ins_encode( RegOpcImm( dst, shift) ); 7836 ins_pipe( ialu_reg ); 7837 %} 7838 7839 // Shift Left by variable 7840 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7841 match(Set dst (LShiftI dst shift)); 7842 effect(KILL cr); 7843 7844 size(2); 7845 format %{ "SHL $dst,$shift" %} 7846 opcode(0xD3, 0x4); /* D3 /4 */ 7847 ins_encode( OpcP, RegOpc( dst ) ); 7848 ins_pipe( ialu_reg_reg ); 7849 %} 7850 7851 // Arithmetic shift right by one 7852 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7853 match(Set dst (RShiftI dst shift)); 7854 effect(KILL cr); 7855 7856 size(2); 7857 format %{ "SAR $dst,$shift" %} 7858 opcode(0xD1, 0x7); /* D1 /7 */ 7859 ins_encode( OpcP, RegOpc( dst ) ); 7860 ins_pipe( ialu_reg ); 7861 %} 7862 7863 // Arithmetic shift right by one 7864 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 7865 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7866 effect(KILL cr); 7867 format %{ "SAR $dst,$shift" %} 7868 opcode(0xD1, 0x7); /* D1 /7 */ 7869 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 7870 ins_pipe( ialu_mem_imm ); 7871 %} 7872 7873 // Arithmetic Shift Right by 8-bit immediate 7874 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7875 match(Set dst (RShiftI dst shift)); 7876 effect(KILL cr); 7877 7878 size(3); 7879 format %{ "SAR $dst,$shift" %} 7880 opcode(0xC1, 0x7); /* C1 /7 ib */ 7881 ins_encode( RegOpcImm( dst, shift ) ); 7882 ins_pipe( ialu_mem_imm ); 7883 %} 7884 7885 // Arithmetic Shift Right by 8-bit immediate 7886 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 7887 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7888 effect(KILL cr); 7889 7890 format %{ "SAR $dst,$shift" %} 7891 opcode(0xC1, 0x7); /* C1 /7 ib */ 7892 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 7893 ins_pipe( ialu_mem_imm ); 7894 %} 7895 7896 // Arithmetic Shift Right by variable 7897 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7898 match(Set dst (RShiftI dst shift)); 7899 effect(KILL cr); 7900 7901 size(2); 7902 format %{ "SAR $dst,$shift" %} 7903 opcode(0xD3, 0x7); /* D3 /7 */ 7904 ins_encode( OpcP, RegOpc( dst ) ); 7905 ins_pipe( ialu_reg_reg ); 7906 %} 7907 7908 // Logical shift right by one 7909 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7910 match(Set dst (URShiftI dst shift)); 7911 effect(KILL cr); 7912 7913 size(2); 7914 format %{ "SHR $dst,$shift" %} 7915 opcode(0xD1, 0x5); /* D1 /5 */ 7916 ins_encode( OpcP, RegOpc( dst ) ); 7917 ins_pipe( ialu_reg ); 7918 %} 7919 7920 // Logical Shift Right by 8-bit immediate 7921 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7922 match(Set dst (URShiftI dst shift)); 7923 effect(KILL cr); 7924 7925 size(3); 7926 format %{ "SHR $dst,$shift" %} 7927 opcode(0xC1, 0x5); /* C1 /5 ib */ 7928 ins_encode( RegOpcImm( dst, shift) ); 7929 ins_pipe( ialu_reg ); 7930 %} 7931 7932 7933 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 7934 // This idiom is used by the compiler for the i2b bytecode. 7935 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 7936 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 7937 7938 size(3); 7939 format %{ "MOVSX $dst,$src :8" %} 7940 ins_encode %{ 7941 __ movsbl($dst$$Register, $src$$Register); 7942 %} 7943 ins_pipe(ialu_reg_reg); 7944 %} 7945 7946 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 7947 // This idiom is used by the compiler the i2s bytecode. 7948 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 7949 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 7950 7951 size(3); 7952 format %{ "MOVSX $dst,$src :16" %} 7953 ins_encode %{ 7954 __ movswl($dst$$Register, $src$$Register); 7955 %} 7956 ins_pipe(ialu_reg_reg); 7957 %} 7958 7959 7960 // Logical Shift Right by variable 7961 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7962 match(Set dst (URShiftI dst shift)); 7963 effect(KILL cr); 7964 7965 size(2); 7966 format %{ "SHR $dst,$shift" %} 7967 opcode(0xD3, 0x5); /* D3 /5 */ 7968 ins_encode( OpcP, RegOpc( dst ) ); 7969 ins_pipe( ialu_reg_reg ); 7970 %} 7971 7972 7973 //----------Logical Instructions----------------------------------------------- 7974 //----------Integer Logical Instructions--------------------------------------- 7975 // And Instructions 7976 // And Register with Register 7977 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7978 match(Set dst (AndI dst src)); 7979 effect(KILL cr); 7980 7981 size(2); 7982 format %{ "AND $dst,$src" %} 7983 opcode(0x23); 7984 ins_encode( OpcP, RegReg( dst, src) ); 7985 ins_pipe( ialu_reg_reg ); 7986 %} 7987 7988 // And Register with Immediate 7989 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7990 match(Set dst (AndI dst src)); 7991 effect(KILL cr); 7992 7993 format %{ "AND $dst,$src" %} 7994 opcode(0x81,0x04); /* Opcode 81 /4 */ 7995 // ins_encode( RegImm( dst, src) ); 7996 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7997 ins_pipe( ialu_reg ); 7998 %} 7999 8000 // And Register with Memory 8001 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8002 match(Set dst (AndI dst (LoadI src))); 8003 effect(KILL cr); 8004 8005 ins_cost(125); 8006 format %{ "AND $dst,$src" %} 8007 opcode(0x23); 8008 ins_encode( OpcP, RegMem( dst, src) ); 8009 ins_pipe( ialu_reg_mem ); 8010 %} 8011 8012 // And Memory with Register 8013 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8014 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8015 effect(KILL cr); 8016 8017 ins_cost(150); 8018 format %{ "AND $dst,$src" %} 8019 opcode(0x21); /* Opcode 21 /r */ 8020 ins_encode( OpcP, RegMem( src, dst ) ); 8021 ins_pipe( ialu_mem_reg ); 8022 %} 8023 8024 // And Memory with Immediate 8025 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8026 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8027 effect(KILL cr); 8028 8029 ins_cost(125); 8030 format %{ "AND $dst,$src" %} 8031 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8032 // ins_encode( MemImm( dst, src) ); 8033 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8034 ins_pipe( ialu_mem_imm ); 8035 %} 8036 8037 // BMI1 instructions 8038 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8039 match(Set dst (AndI (XorI src1 minus_1) src2)); 8040 predicate(UseBMI1Instructions); 8041 effect(KILL cr); 8042 8043 format %{ "ANDNL $dst, $src1, $src2" %} 8044 8045 ins_encode %{ 8046 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8047 %} 8048 ins_pipe(ialu_reg); 8049 %} 8050 8051 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8052 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8053 predicate(UseBMI1Instructions); 8054 effect(KILL cr); 8055 8056 ins_cost(125); 8057 format %{ "ANDNL $dst, $src1, $src2" %} 8058 8059 ins_encode %{ 8060 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8061 %} 8062 ins_pipe(ialu_reg_mem); 8063 %} 8064 8065 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ 8066 match(Set dst (AndI (SubI imm_zero src) src)); 8067 predicate(UseBMI1Instructions); 8068 effect(KILL cr); 8069 8070 format %{ "BLSIL $dst, $src" %} 8071 8072 ins_encode %{ 8073 __ blsil($dst$$Register, $src$$Register); 8074 %} 8075 ins_pipe(ialu_reg); 8076 %} 8077 8078 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ 8079 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8080 predicate(UseBMI1Instructions); 8081 effect(KILL cr); 8082 8083 ins_cost(125); 8084 format %{ "BLSIL $dst, $src" %} 8085 8086 ins_encode %{ 8087 __ blsil($dst$$Register, $src$$Address); 8088 %} 8089 ins_pipe(ialu_reg_mem); 8090 %} 8091 8092 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8093 %{ 8094 match(Set dst (XorI (AddI src minus_1) src)); 8095 predicate(UseBMI1Instructions); 8096 effect(KILL cr); 8097 8098 format %{ "BLSMSKL $dst, $src" %} 8099 8100 ins_encode %{ 8101 __ blsmskl($dst$$Register, $src$$Register); 8102 %} 8103 8104 ins_pipe(ialu_reg); 8105 %} 8106 8107 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8108 %{ 8109 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8110 predicate(UseBMI1Instructions); 8111 effect(KILL cr); 8112 8113 ins_cost(125); 8114 format %{ "BLSMSKL $dst, $src" %} 8115 8116 ins_encode %{ 8117 __ blsmskl($dst$$Register, $src$$Address); 8118 %} 8119 8120 ins_pipe(ialu_reg_mem); 8121 %} 8122 8123 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8124 %{ 8125 match(Set dst (AndI (AddI src minus_1) src) ); 8126 predicate(UseBMI1Instructions); 8127 effect(KILL cr); 8128 8129 format %{ "BLSRL $dst, $src" %} 8130 8131 ins_encode %{ 8132 __ blsrl($dst$$Register, $src$$Register); 8133 %} 8134 8135 ins_pipe(ialu_reg); 8136 %} 8137 8138 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8139 %{ 8140 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8141 predicate(UseBMI1Instructions); 8142 effect(KILL cr); 8143 8144 ins_cost(125); 8145 format %{ "BLSRL $dst, $src" %} 8146 8147 ins_encode %{ 8148 __ blsrl($dst$$Register, $src$$Address); 8149 %} 8150 8151 ins_pipe(ialu_reg_mem); 8152 %} 8153 8154 // Or Instructions 8155 // Or Register with Register 8156 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8157 match(Set dst (OrI dst src)); 8158 effect(KILL cr); 8159 8160 size(2); 8161 format %{ "OR $dst,$src" %} 8162 opcode(0x0B); 8163 ins_encode( OpcP, RegReg( dst, src) ); 8164 ins_pipe( ialu_reg_reg ); 8165 %} 8166 8167 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8168 match(Set dst (OrI dst (CastP2X src))); 8169 effect(KILL cr); 8170 8171 size(2); 8172 format %{ "OR $dst,$src" %} 8173 opcode(0x0B); 8174 ins_encode( OpcP, RegReg( dst, src) ); 8175 ins_pipe( ialu_reg_reg ); 8176 %} 8177 8178 8179 // Or Register with Immediate 8180 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8181 match(Set dst (OrI dst src)); 8182 effect(KILL cr); 8183 8184 format %{ "OR $dst,$src" %} 8185 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8186 // ins_encode( RegImm( dst, src) ); 8187 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8188 ins_pipe( ialu_reg ); 8189 %} 8190 8191 // Or Register with Memory 8192 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8193 match(Set dst (OrI dst (LoadI src))); 8194 effect(KILL cr); 8195 8196 ins_cost(125); 8197 format %{ "OR $dst,$src" %} 8198 opcode(0x0B); 8199 ins_encode( OpcP, RegMem( dst, src) ); 8200 ins_pipe( ialu_reg_mem ); 8201 %} 8202 8203 // Or Memory with Register 8204 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8205 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8206 effect(KILL cr); 8207 8208 ins_cost(150); 8209 format %{ "OR $dst,$src" %} 8210 opcode(0x09); /* Opcode 09 /r */ 8211 ins_encode( OpcP, RegMem( src, dst ) ); 8212 ins_pipe( ialu_mem_reg ); 8213 %} 8214 8215 // Or Memory with Immediate 8216 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8217 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8218 effect(KILL cr); 8219 8220 ins_cost(125); 8221 format %{ "OR $dst,$src" %} 8222 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8223 // ins_encode( MemImm( dst, src) ); 8224 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8225 ins_pipe( ialu_mem_imm ); 8226 %} 8227 8228 // ROL/ROR 8229 // ROL expand 8230 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8231 effect(USE_DEF dst, USE shift, KILL cr); 8232 8233 format %{ "ROL $dst, $shift" %} 8234 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8235 ins_encode( OpcP, RegOpc( dst )); 8236 ins_pipe( ialu_reg ); 8237 %} 8238 8239 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8240 effect(USE_DEF dst, USE shift, KILL cr); 8241 8242 format %{ "ROL $dst, $shift" %} 8243 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8244 ins_encode( RegOpcImm(dst, shift) ); 8245 ins_pipe(ialu_reg); 8246 %} 8247 8248 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8249 effect(USE_DEF dst, USE shift, KILL cr); 8250 8251 format %{ "ROL $dst, $shift" %} 8252 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8253 ins_encode(OpcP, RegOpc(dst)); 8254 ins_pipe( ialu_reg_reg ); 8255 %} 8256 // end of ROL expand 8257 8258 // ROL 32bit by one once 8259 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8260 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8261 8262 expand %{ 8263 rolI_eReg_imm1(dst, lshift, cr); 8264 %} 8265 %} 8266 8267 // ROL 32bit var by imm8 once 8268 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8269 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8270 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8271 8272 expand %{ 8273 rolI_eReg_imm8(dst, lshift, cr); 8274 %} 8275 %} 8276 8277 // ROL 32bit var by var once 8278 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8279 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8280 8281 expand %{ 8282 rolI_eReg_CL(dst, shift, cr); 8283 %} 8284 %} 8285 8286 // ROL 32bit var by var once 8287 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8288 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8289 8290 expand %{ 8291 rolI_eReg_CL(dst, shift, cr); 8292 %} 8293 %} 8294 8295 // ROR expand 8296 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8297 effect(USE_DEF dst, USE shift, KILL cr); 8298 8299 format %{ "ROR $dst, $shift" %} 8300 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8301 ins_encode( OpcP, RegOpc( dst ) ); 8302 ins_pipe( ialu_reg ); 8303 %} 8304 8305 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8306 effect (USE_DEF dst, USE shift, KILL cr); 8307 8308 format %{ "ROR $dst, $shift" %} 8309 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8310 ins_encode( RegOpcImm(dst, shift) ); 8311 ins_pipe( ialu_reg ); 8312 %} 8313 8314 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8315 effect(USE_DEF dst, USE shift, KILL cr); 8316 8317 format %{ "ROR $dst, $shift" %} 8318 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8319 ins_encode(OpcP, RegOpc(dst)); 8320 ins_pipe( ialu_reg_reg ); 8321 %} 8322 // end of ROR expand 8323 8324 // ROR right once 8325 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8326 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8327 8328 expand %{ 8329 rorI_eReg_imm1(dst, rshift, cr); 8330 %} 8331 %} 8332 8333 // ROR 32bit by immI8 once 8334 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8335 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8336 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8337 8338 expand %{ 8339 rorI_eReg_imm8(dst, rshift, cr); 8340 %} 8341 %} 8342 8343 // ROR 32bit var by var once 8344 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8345 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8346 8347 expand %{ 8348 rorI_eReg_CL(dst, shift, cr); 8349 %} 8350 %} 8351 8352 // ROR 32bit var by var once 8353 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8354 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8355 8356 expand %{ 8357 rorI_eReg_CL(dst, shift, cr); 8358 %} 8359 %} 8360 8361 // Xor Instructions 8362 // Xor Register with Register 8363 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8364 match(Set dst (XorI dst src)); 8365 effect(KILL cr); 8366 8367 size(2); 8368 format %{ "XOR $dst,$src" %} 8369 opcode(0x33); 8370 ins_encode( OpcP, RegReg( dst, src) ); 8371 ins_pipe( ialu_reg_reg ); 8372 %} 8373 8374 // Xor Register with Immediate -1 8375 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8376 match(Set dst (XorI dst imm)); 8377 8378 size(2); 8379 format %{ "NOT $dst" %} 8380 ins_encode %{ 8381 __ notl($dst$$Register); 8382 %} 8383 ins_pipe( ialu_reg ); 8384 %} 8385 8386 // Xor Register with Immediate 8387 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8388 match(Set dst (XorI dst src)); 8389 effect(KILL cr); 8390 8391 format %{ "XOR $dst,$src" %} 8392 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8393 // ins_encode( RegImm( dst, src) ); 8394 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8395 ins_pipe( ialu_reg ); 8396 %} 8397 8398 // Xor Register with Memory 8399 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8400 match(Set dst (XorI dst (LoadI src))); 8401 effect(KILL cr); 8402 8403 ins_cost(125); 8404 format %{ "XOR $dst,$src" %} 8405 opcode(0x33); 8406 ins_encode( OpcP, RegMem(dst, src) ); 8407 ins_pipe( ialu_reg_mem ); 8408 %} 8409 8410 // Xor Memory with Register 8411 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8412 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8413 effect(KILL cr); 8414 8415 ins_cost(150); 8416 format %{ "XOR $dst,$src" %} 8417 opcode(0x31); /* Opcode 31 /r */ 8418 ins_encode( OpcP, RegMem( src, dst ) ); 8419 ins_pipe( ialu_mem_reg ); 8420 %} 8421 8422 // Xor Memory with Immediate 8423 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8424 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8425 effect(KILL cr); 8426 8427 ins_cost(125); 8428 format %{ "XOR $dst,$src" %} 8429 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8430 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8431 ins_pipe( ialu_mem_imm ); 8432 %} 8433 8434 //----------Convert Int to Boolean--------------------------------------------- 8435 8436 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8437 effect( DEF dst, USE src ); 8438 format %{ "MOV $dst,$src" %} 8439 ins_encode( enc_Copy( dst, src) ); 8440 ins_pipe( ialu_reg_reg ); 8441 %} 8442 8443 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8444 effect( USE_DEF dst, USE src, KILL cr ); 8445 8446 size(4); 8447 format %{ "NEG $dst\n\t" 8448 "ADC $dst,$src" %} 8449 ins_encode( neg_reg(dst), 8450 OpcRegReg(0x13,dst,src) ); 8451 ins_pipe( ialu_reg_reg_long ); 8452 %} 8453 8454 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8455 match(Set dst (Conv2B src)); 8456 8457 expand %{ 8458 movI_nocopy(dst,src); 8459 ci2b(dst,src,cr); 8460 %} 8461 %} 8462 8463 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8464 effect( DEF dst, USE src ); 8465 format %{ "MOV $dst,$src" %} 8466 ins_encode( enc_Copy( dst, src) ); 8467 ins_pipe( ialu_reg_reg ); 8468 %} 8469 8470 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8471 effect( USE_DEF dst, USE src, KILL cr ); 8472 format %{ "NEG $dst\n\t" 8473 "ADC $dst,$src" %} 8474 ins_encode( neg_reg(dst), 8475 OpcRegReg(0x13,dst,src) ); 8476 ins_pipe( ialu_reg_reg_long ); 8477 %} 8478 8479 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8480 match(Set dst (Conv2B src)); 8481 8482 expand %{ 8483 movP_nocopy(dst,src); 8484 cp2b(dst,src,cr); 8485 %} 8486 %} 8487 8488 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8489 match(Set dst (CmpLTMask p q)); 8490 effect(KILL cr); 8491 ins_cost(400); 8492 8493 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8494 format %{ "XOR $dst,$dst\n\t" 8495 "CMP $p,$q\n\t" 8496 "SETlt $dst\n\t" 8497 "NEG $dst" %} 8498 ins_encode %{ 8499 Register Rp = $p$$Register; 8500 Register Rq = $q$$Register; 8501 Register Rd = $dst$$Register; 8502 Label done; 8503 __ xorl(Rd, Rd); 8504 __ cmpl(Rp, Rq); 8505 __ setb(Assembler::less, Rd); 8506 __ negl(Rd); 8507 %} 8508 8509 ins_pipe(pipe_slow); 8510 %} 8511 8512 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ 8513 match(Set dst (CmpLTMask dst zero)); 8514 effect(DEF dst, KILL cr); 8515 ins_cost(100); 8516 8517 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8518 ins_encode %{ 8519 __ sarl($dst$$Register, 31); 8520 %} 8521 ins_pipe(ialu_reg); 8522 %} 8523 8524 /* better to save a register than avoid a branch */ 8525 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8526 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8527 effect(KILL cr); 8528 ins_cost(400); 8529 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8530 "JGE done\n\t" 8531 "ADD $p,$y\n" 8532 "done: " %} 8533 ins_encode %{ 8534 Register Rp = $p$$Register; 8535 Register Rq = $q$$Register; 8536 Register Ry = $y$$Register; 8537 Label done; 8538 __ subl(Rp, Rq); 8539 __ jccb(Assembler::greaterEqual, done); 8540 __ addl(Rp, Ry); 8541 __ bind(done); 8542 %} 8543 8544 ins_pipe(pipe_cmplt); 8545 %} 8546 8547 /* better to save a register than avoid a branch */ 8548 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8549 match(Set y (AndI (CmpLTMask p q) y)); 8550 effect(KILL cr); 8551 8552 ins_cost(300); 8553 8554 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8555 "JLT done\n\t" 8556 "XORL $y, $y\n" 8557 "done: " %} 8558 ins_encode %{ 8559 Register Rp = $p$$Register; 8560 Register Rq = $q$$Register; 8561 Register Ry = $y$$Register; 8562 Label done; 8563 __ cmpl(Rp, Rq); 8564 __ jccb(Assembler::less, done); 8565 __ xorl(Ry, Ry); 8566 __ bind(done); 8567 %} 8568 8569 ins_pipe(pipe_cmplt); 8570 %} 8571 8572 /* If I enable this, I encourage spilling in the inner loop of compress. 8573 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8574 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8575 */ 8576 //----------Overflow Math Instructions----------------------------------------- 8577 8578 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8579 %{ 8580 match(Set cr (OverflowAddI op1 op2)); 8581 effect(DEF cr, USE_KILL op1, USE op2); 8582 8583 format %{ "ADD $op1, $op2\t# overflow check int" %} 8584 8585 ins_encode %{ 8586 __ addl($op1$$Register, $op2$$Register); 8587 %} 8588 ins_pipe(ialu_reg_reg); 8589 %} 8590 8591 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8592 %{ 8593 match(Set cr (OverflowAddI op1 op2)); 8594 effect(DEF cr, USE_KILL op1, USE op2); 8595 8596 format %{ "ADD $op1, $op2\t# overflow check int" %} 8597 8598 ins_encode %{ 8599 __ addl($op1$$Register, $op2$$constant); 8600 %} 8601 ins_pipe(ialu_reg_reg); 8602 %} 8603 8604 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8605 %{ 8606 match(Set cr (OverflowSubI op1 op2)); 8607 8608 format %{ "CMP $op1, $op2\t# overflow check int" %} 8609 ins_encode %{ 8610 __ cmpl($op1$$Register, $op2$$Register); 8611 %} 8612 ins_pipe(ialu_reg_reg); 8613 %} 8614 8615 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8616 %{ 8617 match(Set cr (OverflowSubI op1 op2)); 8618 8619 format %{ "CMP $op1, $op2\t# overflow check int" %} 8620 ins_encode %{ 8621 __ cmpl($op1$$Register, $op2$$constant); 8622 %} 8623 ins_pipe(ialu_reg_reg); 8624 %} 8625 8626 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) 8627 %{ 8628 match(Set cr (OverflowSubI zero op2)); 8629 effect(DEF cr, USE_KILL op2); 8630 8631 format %{ "NEG $op2\t# overflow check int" %} 8632 ins_encode %{ 8633 __ negl($op2$$Register); 8634 %} 8635 ins_pipe(ialu_reg_reg); 8636 %} 8637 8638 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8639 %{ 8640 match(Set cr (OverflowMulI op1 op2)); 8641 effect(DEF cr, USE_KILL op1, USE op2); 8642 8643 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8644 ins_encode %{ 8645 __ imull($op1$$Register, $op2$$Register); 8646 %} 8647 ins_pipe(ialu_reg_reg_alu0); 8648 %} 8649 8650 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8651 %{ 8652 match(Set cr (OverflowMulI op1 op2)); 8653 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8654 8655 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8656 ins_encode %{ 8657 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8658 %} 8659 ins_pipe(ialu_reg_reg_alu0); 8660 %} 8661 8662 //----------Long Instructions------------------------------------------------ 8663 // Add Long Register with Register 8664 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8665 match(Set dst (AddL dst src)); 8666 effect(KILL cr); 8667 ins_cost(200); 8668 format %{ "ADD $dst.lo,$src.lo\n\t" 8669 "ADC $dst.hi,$src.hi" %} 8670 opcode(0x03, 0x13); 8671 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8672 ins_pipe( ialu_reg_reg_long ); 8673 %} 8674 8675 // Add Long Register with Immediate 8676 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8677 match(Set dst (AddL dst src)); 8678 effect(KILL cr); 8679 format %{ "ADD $dst.lo,$src.lo\n\t" 8680 "ADC $dst.hi,$src.hi" %} 8681 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8682 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8683 ins_pipe( ialu_reg_long ); 8684 %} 8685 8686 // Add Long Register with Memory 8687 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8688 match(Set dst (AddL dst (LoadL mem))); 8689 effect(KILL cr); 8690 ins_cost(125); 8691 format %{ "ADD $dst.lo,$mem\n\t" 8692 "ADC $dst.hi,$mem+4" %} 8693 opcode(0x03, 0x13); 8694 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8695 ins_pipe( ialu_reg_long_mem ); 8696 %} 8697 8698 // Subtract Long Register with Register. 8699 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8700 match(Set dst (SubL dst src)); 8701 effect(KILL cr); 8702 ins_cost(200); 8703 format %{ "SUB $dst.lo,$src.lo\n\t" 8704 "SBB $dst.hi,$src.hi" %} 8705 opcode(0x2B, 0x1B); 8706 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8707 ins_pipe( ialu_reg_reg_long ); 8708 %} 8709 8710 // Subtract Long Register with Immediate 8711 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8712 match(Set dst (SubL dst src)); 8713 effect(KILL cr); 8714 format %{ "SUB $dst.lo,$src.lo\n\t" 8715 "SBB $dst.hi,$src.hi" %} 8716 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8717 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8718 ins_pipe( ialu_reg_long ); 8719 %} 8720 8721 // Subtract Long Register with Memory 8722 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8723 match(Set dst (SubL dst (LoadL mem))); 8724 effect(KILL cr); 8725 ins_cost(125); 8726 format %{ "SUB $dst.lo,$mem\n\t" 8727 "SBB $dst.hi,$mem+4" %} 8728 opcode(0x2B, 0x1B); 8729 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8730 ins_pipe( ialu_reg_long_mem ); 8731 %} 8732 8733 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8734 match(Set dst (SubL zero dst)); 8735 effect(KILL cr); 8736 ins_cost(300); 8737 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8738 ins_encode( neg_long(dst) ); 8739 ins_pipe( ialu_reg_reg_long ); 8740 %} 8741 8742 // And Long Register with Register 8743 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8744 match(Set dst (AndL dst src)); 8745 effect(KILL cr); 8746 format %{ "AND $dst.lo,$src.lo\n\t" 8747 "AND $dst.hi,$src.hi" %} 8748 opcode(0x23,0x23); 8749 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8750 ins_pipe( ialu_reg_reg_long ); 8751 %} 8752 8753 // And Long Register with Immediate 8754 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8755 match(Set dst (AndL dst src)); 8756 effect(KILL cr); 8757 format %{ "AND $dst.lo,$src.lo\n\t" 8758 "AND $dst.hi,$src.hi" %} 8759 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8760 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8761 ins_pipe( ialu_reg_long ); 8762 %} 8763 8764 // And Long Register with Memory 8765 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8766 match(Set dst (AndL dst (LoadL mem))); 8767 effect(KILL cr); 8768 ins_cost(125); 8769 format %{ "AND $dst.lo,$mem\n\t" 8770 "AND $dst.hi,$mem+4" %} 8771 opcode(0x23, 0x23); 8772 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8773 ins_pipe( ialu_reg_long_mem ); 8774 %} 8775 8776 // BMI1 instructions 8777 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8778 match(Set dst (AndL (XorL src1 minus_1) src2)); 8779 predicate(UseBMI1Instructions); 8780 effect(KILL cr, TEMP dst); 8781 8782 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8783 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8784 %} 8785 8786 ins_encode %{ 8787 Register Rdst = $dst$$Register; 8788 Register Rsrc1 = $src1$$Register; 8789 Register Rsrc2 = $src2$$Register; 8790 __ andnl(Rdst, Rsrc1, Rsrc2); 8791 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 8792 %} 8793 ins_pipe(ialu_reg_reg_long); 8794 %} 8795 8796 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 8797 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 8798 predicate(UseBMI1Instructions); 8799 effect(KILL cr, TEMP dst); 8800 8801 ins_cost(125); 8802 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 8803 "ANDNL $dst.hi, $src1.hi, $src2+4" 8804 %} 8805 8806 ins_encode %{ 8807 Register Rdst = $dst$$Register; 8808 Register Rsrc1 = $src1$$Register; 8809 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 8810 8811 __ andnl(Rdst, Rsrc1, $src2$$Address); 8812 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 8813 %} 8814 ins_pipe(ialu_reg_mem); 8815 %} 8816 8817 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 8818 match(Set dst (AndL (SubL imm_zero src) src)); 8819 predicate(UseBMI1Instructions); 8820 effect(KILL cr, TEMP dst); 8821 8822 format %{ "MOVL $dst.hi, 0\n\t" 8823 "BLSIL $dst.lo, $src.lo\n\t" 8824 "JNZ done\n\t" 8825 "BLSIL $dst.hi, $src.hi\n" 8826 "done:" 8827 %} 8828 8829 ins_encode %{ 8830 Label done; 8831 Register Rdst = $dst$$Register; 8832 Register Rsrc = $src$$Register; 8833 __ movl(HIGH_FROM_LOW(Rdst), 0); 8834 __ blsil(Rdst, Rsrc); 8835 __ jccb(Assembler::notZero, done); 8836 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8837 __ bind(done); 8838 %} 8839 ins_pipe(ialu_reg); 8840 %} 8841 8842 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 8843 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 8844 predicate(UseBMI1Instructions); 8845 effect(KILL cr, TEMP dst); 8846 8847 ins_cost(125); 8848 format %{ "MOVL $dst.hi, 0\n\t" 8849 "BLSIL $dst.lo, $src\n\t" 8850 "JNZ done\n\t" 8851 "BLSIL $dst.hi, $src+4\n" 8852 "done:" 8853 %} 8854 8855 ins_encode %{ 8856 Label done; 8857 Register Rdst = $dst$$Register; 8858 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8859 8860 __ movl(HIGH_FROM_LOW(Rdst), 0); 8861 __ blsil(Rdst, $src$$Address); 8862 __ jccb(Assembler::notZero, done); 8863 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 8864 __ bind(done); 8865 %} 8866 ins_pipe(ialu_reg_mem); 8867 %} 8868 8869 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8870 %{ 8871 match(Set dst (XorL (AddL src minus_1) src)); 8872 predicate(UseBMI1Instructions); 8873 effect(KILL cr, TEMP dst); 8874 8875 format %{ "MOVL $dst.hi, 0\n\t" 8876 "BLSMSKL $dst.lo, $src.lo\n\t" 8877 "JNC done\n\t" 8878 "BLSMSKL $dst.hi, $src.hi\n" 8879 "done:" 8880 %} 8881 8882 ins_encode %{ 8883 Label done; 8884 Register Rdst = $dst$$Register; 8885 Register Rsrc = $src$$Register; 8886 __ movl(HIGH_FROM_LOW(Rdst), 0); 8887 __ blsmskl(Rdst, Rsrc); 8888 __ jccb(Assembler::carryClear, done); 8889 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8890 __ bind(done); 8891 %} 8892 8893 ins_pipe(ialu_reg); 8894 %} 8895 8896 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8897 %{ 8898 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 8899 predicate(UseBMI1Instructions); 8900 effect(KILL cr, TEMP dst); 8901 8902 ins_cost(125); 8903 format %{ "MOVL $dst.hi, 0\n\t" 8904 "BLSMSKL $dst.lo, $src\n\t" 8905 "JNC done\n\t" 8906 "BLSMSKL $dst.hi, $src+4\n" 8907 "done:" 8908 %} 8909 8910 ins_encode %{ 8911 Label done; 8912 Register Rdst = $dst$$Register; 8913 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8914 8915 __ movl(HIGH_FROM_LOW(Rdst), 0); 8916 __ blsmskl(Rdst, $src$$Address); 8917 __ jccb(Assembler::carryClear, done); 8918 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 8919 __ bind(done); 8920 %} 8921 8922 ins_pipe(ialu_reg_mem); 8923 %} 8924 8925 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8926 %{ 8927 match(Set dst (AndL (AddL src minus_1) src) ); 8928 predicate(UseBMI1Instructions); 8929 effect(KILL cr, TEMP dst); 8930 8931 format %{ "MOVL $dst.hi, $src.hi\n\t" 8932 "BLSRL $dst.lo, $src.lo\n\t" 8933 "JNC done\n\t" 8934 "BLSRL $dst.hi, $src.hi\n" 8935 "done:" 8936 %} 8937 8938 ins_encode %{ 8939 Label done; 8940 Register Rdst = $dst$$Register; 8941 Register Rsrc = $src$$Register; 8942 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8943 __ blsrl(Rdst, Rsrc); 8944 __ jccb(Assembler::carryClear, done); 8945 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8946 __ bind(done); 8947 %} 8948 8949 ins_pipe(ialu_reg); 8950 %} 8951 8952 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8953 %{ 8954 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 8955 predicate(UseBMI1Instructions); 8956 effect(KILL cr, TEMP dst); 8957 8958 ins_cost(125); 8959 format %{ "MOVL $dst.hi, $src+4\n\t" 8960 "BLSRL $dst.lo, $src\n\t" 8961 "JNC done\n\t" 8962 "BLSRL $dst.hi, $src+4\n" 8963 "done:" 8964 %} 8965 8966 ins_encode %{ 8967 Label done; 8968 Register Rdst = $dst$$Register; 8969 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8970 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 8971 __ blsrl(Rdst, $src$$Address); 8972 __ jccb(Assembler::carryClear, done); 8973 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 8974 __ bind(done); 8975 %} 8976 8977 ins_pipe(ialu_reg_mem); 8978 %} 8979 8980 // Or Long Register with Register 8981 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8982 match(Set dst (OrL dst src)); 8983 effect(KILL cr); 8984 format %{ "OR $dst.lo,$src.lo\n\t" 8985 "OR $dst.hi,$src.hi" %} 8986 opcode(0x0B,0x0B); 8987 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8988 ins_pipe( ialu_reg_reg_long ); 8989 %} 8990 8991 // Or Long Register with Immediate 8992 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8993 match(Set dst (OrL dst src)); 8994 effect(KILL cr); 8995 format %{ "OR $dst.lo,$src.lo\n\t" 8996 "OR $dst.hi,$src.hi" %} 8997 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 8998 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8999 ins_pipe( ialu_reg_long ); 9000 %} 9001 9002 // Or Long Register with Memory 9003 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9004 match(Set dst (OrL dst (LoadL mem))); 9005 effect(KILL cr); 9006 ins_cost(125); 9007 format %{ "OR $dst.lo,$mem\n\t" 9008 "OR $dst.hi,$mem+4" %} 9009 opcode(0x0B,0x0B); 9010 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9011 ins_pipe( ialu_reg_long_mem ); 9012 %} 9013 9014 // Xor Long Register with Register 9015 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9016 match(Set dst (XorL dst src)); 9017 effect(KILL cr); 9018 format %{ "XOR $dst.lo,$src.lo\n\t" 9019 "XOR $dst.hi,$src.hi" %} 9020 opcode(0x33,0x33); 9021 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9022 ins_pipe( ialu_reg_reg_long ); 9023 %} 9024 9025 // Xor Long Register with Immediate -1 9026 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9027 match(Set dst (XorL dst imm)); 9028 format %{ "NOT $dst.lo\n\t" 9029 "NOT $dst.hi" %} 9030 ins_encode %{ 9031 __ notl($dst$$Register); 9032 __ notl(HIGH_FROM_LOW($dst$$Register)); 9033 %} 9034 ins_pipe( ialu_reg_long ); 9035 %} 9036 9037 // Xor Long Register with Immediate 9038 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9039 match(Set dst (XorL dst src)); 9040 effect(KILL cr); 9041 format %{ "XOR $dst.lo,$src.lo\n\t" 9042 "XOR $dst.hi,$src.hi" %} 9043 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9044 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9045 ins_pipe( ialu_reg_long ); 9046 %} 9047 9048 // Xor Long Register with Memory 9049 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9050 match(Set dst (XorL dst (LoadL mem))); 9051 effect(KILL cr); 9052 ins_cost(125); 9053 format %{ "XOR $dst.lo,$mem\n\t" 9054 "XOR $dst.hi,$mem+4" %} 9055 opcode(0x33,0x33); 9056 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9057 ins_pipe( ialu_reg_long_mem ); 9058 %} 9059 9060 // Shift Left Long by 1 9061 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9062 predicate(UseNewLongLShift); 9063 match(Set dst (LShiftL dst cnt)); 9064 effect(KILL cr); 9065 ins_cost(100); 9066 format %{ "ADD $dst.lo,$dst.lo\n\t" 9067 "ADC $dst.hi,$dst.hi" %} 9068 ins_encode %{ 9069 __ addl($dst$$Register,$dst$$Register); 9070 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9071 %} 9072 ins_pipe( ialu_reg_long ); 9073 %} 9074 9075 // Shift Left Long by 2 9076 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9077 predicate(UseNewLongLShift); 9078 match(Set dst (LShiftL dst cnt)); 9079 effect(KILL cr); 9080 ins_cost(100); 9081 format %{ "ADD $dst.lo,$dst.lo\n\t" 9082 "ADC $dst.hi,$dst.hi\n\t" 9083 "ADD $dst.lo,$dst.lo\n\t" 9084 "ADC $dst.hi,$dst.hi" %} 9085 ins_encode %{ 9086 __ addl($dst$$Register,$dst$$Register); 9087 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9088 __ addl($dst$$Register,$dst$$Register); 9089 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9090 %} 9091 ins_pipe( ialu_reg_long ); 9092 %} 9093 9094 // Shift Left Long by 3 9095 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9096 predicate(UseNewLongLShift); 9097 match(Set dst (LShiftL dst cnt)); 9098 effect(KILL cr); 9099 ins_cost(100); 9100 format %{ "ADD $dst.lo,$dst.lo\n\t" 9101 "ADC $dst.hi,$dst.hi\n\t" 9102 "ADD $dst.lo,$dst.lo\n\t" 9103 "ADC $dst.hi,$dst.hi\n\t" 9104 "ADD $dst.lo,$dst.lo\n\t" 9105 "ADC $dst.hi,$dst.hi" %} 9106 ins_encode %{ 9107 __ addl($dst$$Register,$dst$$Register); 9108 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9109 __ addl($dst$$Register,$dst$$Register); 9110 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9111 __ addl($dst$$Register,$dst$$Register); 9112 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9113 %} 9114 ins_pipe( ialu_reg_long ); 9115 %} 9116 9117 // Shift Left Long by 1-31 9118 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9119 match(Set dst (LShiftL dst cnt)); 9120 effect(KILL cr); 9121 ins_cost(200); 9122 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9123 "SHL $dst.lo,$cnt" %} 9124 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9125 ins_encode( move_long_small_shift(dst,cnt) ); 9126 ins_pipe( ialu_reg_long ); 9127 %} 9128 9129 // Shift Left Long by 32-63 9130 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9131 match(Set dst (LShiftL dst cnt)); 9132 effect(KILL cr); 9133 ins_cost(300); 9134 format %{ "MOV $dst.hi,$dst.lo\n" 9135 "\tSHL $dst.hi,$cnt-32\n" 9136 "\tXOR $dst.lo,$dst.lo" %} 9137 opcode(0xC1, 0x4); /* C1 /4 ib */ 9138 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9139 ins_pipe( ialu_reg_long ); 9140 %} 9141 9142 // Shift Left Long by variable 9143 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9144 match(Set dst (LShiftL dst shift)); 9145 effect(KILL cr); 9146 ins_cost(500+200); 9147 size(17); 9148 format %{ "TEST $shift,32\n\t" 9149 "JEQ,s small\n\t" 9150 "MOV $dst.hi,$dst.lo\n\t" 9151 "XOR $dst.lo,$dst.lo\n" 9152 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9153 "SHL $dst.lo,$shift" %} 9154 ins_encode( shift_left_long( dst, shift ) ); 9155 ins_pipe( pipe_slow ); 9156 %} 9157 9158 // Shift Right Long by 1-31 9159 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9160 match(Set dst (URShiftL dst cnt)); 9161 effect(KILL cr); 9162 ins_cost(200); 9163 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9164 "SHR $dst.hi,$cnt" %} 9165 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9166 ins_encode( move_long_small_shift(dst,cnt) ); 9167 ins_pipe( ialu_reg_long ); 9168 %} 9169 9170 // Shift Right Long by 32-63 9171 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9172 match(Set dst (URShiftL dst cnt)); 9173 effect(KILL cr); 9174 ins_cost(300); 9175 format %{ "MOV $dst.lo,$dst.hi\n" 9176 "\tSHR $dst.lo,$cnt-32\n" 9177 "\tXOR $dst.hi,$dst.hi" %} 9178 opcode(0xC1, 0x5); /* C1 /5 ib */ 9179 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9180 ins_pipe( ialu_reg_long ); 9181 %} 9182 9183 // Shift Right Long by variable 9184 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9185 match(Set dst (URShiftL dst shift)); 9186 effect(KILL cr); 9187 ins_cost(600); 9188 size(17); 9189 format %{ "TEST $shift,32\n\t" 9190 "JEQ,s small\n\t" 9191 "MOV $dst.lo,$dst.hi\n\t" 9192 "XOR $dst.hi,$dst.hi\n" 9193 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9194 "SHR $dst.hi,$shift" %} 9195 ins_encode( shift_right_long( dst, shift ) ); 9196 ins_pipe( pipe_slow ); 9197 %} 9198 9199 // Shift Right Long by 1-31 9200 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9201 match(Set dst (RShiftL dst cnt)); 9202 effect(KILL cr); 9203 ins_cost(200); 9204 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9205 "SAR $dst.hi,$cnt" %} 9206 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9207 ins_encode( move_long_small_shift(dst,cnt) ); 9208 ins_pipe( ialu_reg_long ); 9209 %} 9210 9211 // Shift Right Long by 32-63 9212 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9213 match(Set dst (RShiftL dst cnt)); 9214 effect(KILL cr); 9215 ins_cost(300); 9216 format %{ "MOV $dst.lo,$dst.hi\n" 9217 "\tSAR $dst.lo,$cnt-32\n" 9218 "\tSAR $dst.hi,31" %} 9219 opcode(0xC1, 0x7); /* C1 /7 ib */ 9220 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9221 ins_pipe( ialu_reg_long ); 9222 %} 9223 9224 // Shift Right arithmetic Long by variable 9225 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9226 match(Set dst (RShiftL dst shift)); 9227 effect(KILL cr); 9228 ins_cost(600); 9229 size(18); 9230 format %{ "TEST $shift,32\n\t" 9231 "JEQ,s small\n\t" 9232 "MOV $dst.lo,$dst.hi\n\t" 9233 "SAR $dst.hi,31\n" 9234 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9235 "SAR $dst.hi,$shift" %} 9236 ins_encode( shift_right_arith_long( dst, shift ) ); 9237 ins_pipe( pipe_slow ); 9238 %} 9239 9240 9241 //----------Double Instructions------------------------------------------------ 9242 // Double Math 9243 9244 // Compare & branch 9245 9246 // P6 version of float compare, sets condition codes in EFLAGS 9247 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9248 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9249 match(Set cr (CmpD src1 src2)); 9250 effect(KILL rax); 9251 ins_cost(150); 9252 format %{ "FLD $src1\n\t" 9253 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9254 "JNP exit\n\t" 9255 "MOV ah,1 // saw a NaN, set CF\n\t" 9256 "SAHF\n" 9257 "exit:\tNOP // avoid branch to branch" %} 9258 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9259 ins_encode( Push_Reg_DPR(src1), 9260 OpcP, RegOpc(src2), 9261 cmpF_P6_fixup ); 9262 ins_pipe( pipe_slow ); 9263 %} 9264 9265 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9266 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9267 match(Set cr (CmpD src1 src2)); 9268 ins_cost(150); 9269 format %{ "FLD $src1\n\t" 9270 "FUCOMIP ST,$src2 // P6 instruction" %} 9271 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9272 ins_encode( Push_Reg_DPR(src1), 9273 OpcP, RegOpc(src2)); 9274 ins_pipe( pipe_slow ); 9275 %} 9276 9277 // Compare & branch 9278 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9279 predicate(UseSSE<=1); 9280 match(Set cr (CmpD src1 src2)); 9281 effect(KILL rax); 9282 ins_cost(200); 9283 format %{ "FLD $src1\n\t" 9284 "FCOMp $src2\n\t" 9285 "FNSTSW AX\n\t" 9286 "TEST AX,0x400\n\t" 9287 "JZ,s flags\n\t" 9288 "MOV AH,1\t# unordered treat as LT\n" 9289 "flags:\tSAHF" %} 9290 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9291 ins_encode( Push_Reg_DPR(src1), 9292 OpcP, RegOpc(src2), 9293 fpu_flags); 9294 ins_pipe( pipe_slow ); 9295 %} 9296 9297 // Compare vs zero into -1,0,1 9298 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9299 predicate(UseSSE<=1); 9300 match(Set dst (CmpD3 src1 zero)); 9301 effect(KILL cr, KILL rax); 9302 ins_cost(280); 9303 format %{ "FTSTD $dst,$src1" %} 9304 opcode(0xE4, 0xD9); 9305 ins_encode( Push_Reg_DPR(src1), 9306 OpcS, OpcP, PopFPU, 9307 CmpF_Result(dst)); 9308 ins_pipe( pipe_slow ); 9309 %} 9310 9311 // Compare into -1,0,1 9312 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9313 predicate(UseSSE<=1); 9314 match(Set dst (CmpD3 src1 src2)); 9315 effect(KILL cr, KILL rax); 9316 ins_cost(300); 9317 format %{ "FCMPD $dst,$src1,$src2" %} 9318 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9319 ins_encode( Push_Reg_DPR(src1), 9320 OpcP, RegOpc(src2), 9321 CmpF_Result(dst)); 9322 ins_pipe( pipe_slow ); 9323 %} 9324 9325 // float compare and set condition codes in EFLAGS by XMM regs 9326 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9327 predicate(UseSSE>=2); 9328 match(Set cr (CmpD src1 src2)); 9329 ins_cost(145); 9330 format %{ "UCOMISD $src1,$src2\n\t" 9331 "JNP,s exit\n\t" 9332 "PUSHF\t# saw NaN, set CF\n\t" 9333 "AND [rsp], #0xffffff2b\n\t" 9334 "POPF\n" 9335 "exit:" %} 9336 ins_encode %{ 9337 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9338 emit_cmpfp_fixup(_masm); 9339 %} 9340 ins_pipe( pipe_slow ); 9341 %} 9342 9343 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9344 predicate(UseSSE>=2); 9345 match(Set cr (CmpD src1 src2)); 9346 ins_cost(100); 9347 format %{ "UCOMISD $src1,$src2" %} 9348 ins_encode %{ 9349 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9350 %} 9351 ins_pipe( pipe_slow ); 9352 %} 9353 9354 // float compare and set condition codes in EFLAGS by XMM regs 9355 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9356 predicate(UseSSE>=2); 9357 match(Set cr (CmpD src1 (LoadD src2))); 9358 ins_cost(145); 9359 format %{ "UCOMISD $src1,$src2\n\t" 9360 "JNP,s exit\n\t" 9361 "PUSHF\t# saw NaN, set CF\n\t" 9362 "AND [rsp], #0xffffff2b\n\t" 9363 "POPF\n" 9364 "exit:" %} 9365 ins_encode %{ 9366 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9367 emit_cmpfp_fixup(_masm); 9368 %} 9369 ins_pipe( pipe_slow ); 9370 %} 9371 9372 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9373 predicate(UseSSE>=2); 9374 match(Set cr (CmpD src1 (LoadD src2))); 9375 ins_cost(100); 9376 format %{ "UCOMISD $src1,$src2" %} 9377 ins_encode %{ 9378 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9379 %} 9380 ins_pipe( pipe_slow ); 9381 %} 9382 9383 // Compare into -1,0,1 in XMM 9384 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9385 predicate(UseSSE>=2); 9386 match(Set dst (CmpD3 src1 src2)); 9387 effect(KILL cr); 9388 ins_cost(255); 9389 format %{ "UCOMISD $src1, $src2\n\t" 9390 "MOV $dst, #-1\n\t" 9391 "JP,s done\n\t" 9392 "JB,s done\n\t" 9393 "SETNE $dst\n\t" 9394 "MOVZB $dst, $dst\n" 9395 "done:" %} 9396 ins_encode %{ 9397 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9398 emit_cmpfp3(_masm, $dst$$Register); 9399 %} 9400 ins_pipe( pipe_slow ); 9401 %} 9402 9403 // Compare into -1,0,1 in XMM and memory 9404 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9405 predicate(UseSSE>=2); 9406 match(Set dst (CmpD3 src1 (LoadD src2))); 9407 effect(KILL cr); 9408 ins_cost(275); 9409 format %{ "UCOMISD $src1, $src2\n\t" 9410 "MOV $dst, #-1\n\t" 9411 "JP,s done\n\t" 9412 "JB,s done\n\t" 9413 "SETNE $dst\n\t" 9414 "MOVZB $dst, $dst\n" 9415 "done:" %} 9416 ins_encode %{ 9417 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9418 emit_cmpfp3(_masm, $dst$$Register); 9419 %} 9420 ins_pipe( pipe_slow ); 9421 %} 9422 9423 9424 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9425 predicate (UseSSE <=1); 9426 match(Set dst (SubD dst src)); 9427 9428 format %{ "FLD $src\n\t" 9429 "DSUBp $dst,ST" %} 9430 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9431 ins_cost(150); 9432 ins_encode( Push_Reg_DPR(src), 9433 OpcP, RegOpc(dst) ); 9434 ins_pipe( fpu_reg_reg ); 9435 %} 9436 9437 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9438 predicate (UseSSE <=1); 9439 match(Set dst (RoundDouble (SubD src1 src2))); 9440 ins_cost(250); 9441 9442 format %{ "FLD $src2\n\t" 9443 "DSUB ST,$src1\n\t" 9444 "FSTP_D $dst\t# D-round" %} 9445 opcode(0xD8, 0x5); 9446 ins_encode( Push_Reg_DPR(src2), 9447 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9448 ins_pipe( fpu_mem_reg_reg ); 9449 %} 9450 9451 9452 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9453 predicate (UseSSE <=1); 9454 match(Set dst (SubD dst (LoadD src))); 9455 ins_cost(150); 9456 9457 format %{ "FLD $src\n\t" 9458 "DSUBp $dst,ST" %} 9459 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9460 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9461 OpcP, RegOpc(dst) ); 9462 ins_pipe( fpu_reg_mem ); 9463 %} 9464 9465 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9466 predicate (UseSSE<=1); 9467 match(Set dst (AbsD src)); 9468 ins_cost(100); 9469 format %{ "FABS" %} 9470 opcode(0xE1, 0xD9); 9471 ins_encode( OpcS, OpcP ); 9472 ins_pipe( fpu_reg_reg ); 9473 %} 9474 9475 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9476 predicate(UseSSE<=1); 9477 match(Set dst (NegD src)); 9478 ins_cost(100); 9479 format %{ "FCHS" %} 9480 opcode(0xE0, 0xD9); 9481 ins_encode( OpcS, OpcP ); 9482 ins_pipe( fpu_reg_reg ); 9483 %} 9484 9485 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9486 predicate(UseSSE<=1); 9487 match(Set dst (AddD dst src)); 9488 format %{ "FLD $src\n\t" 9489 "DADD $dst,ST" %} 9490 size(4); 9491 ins_cost(150); 9492 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9493 ins_encode( Push_Reg_DPR(src), 9494 OpcP, RegOpc(dst) ); 9495 ins_pipe( fpu_reg_reg ); 9496 %} 9497 9498 9499 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9500 predicate(UseSSE<=1); 9501 match(Set dst (RoundDouble (AddD src1 src2))); 9502 ins_cost(250); 9503 9504 format %{ "FLD $src2\n\t" 9505 "DADD ST,$src1\n\t" 9506 "FSTP_D $dst\t# D-round" %} 9507 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9508 ins_encode( Push_Reg_DPR(src2), 9509 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9510 ins_pipe( fpu_mem_reg_reg ); 9511 %} 9512 9513 9514 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9515 predicate(UseSSE<=1); 9516 match(Set dst (AddD dst (LoadD src))); 9517 ins_cost(150); 9518 9519 format %{ "FLD $src\n\t" 9520 "DADDp $dst,ST" %} 9521 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9522 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9523 OpcP, RegOpc(dst) ); 9524 ins_pipe( fpu_reg_mem ); 9525 %} 9526 9527 // add-to-memory 9528 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9529 predicate(UseSSE<=1); 9530 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9531 ins_cost(150); 9532 9533 format %{ "FLD_D $dst\n\t" 9534 "DADD ST,$src\n\t" 9535 "FST_D $dst" %} 9536 opcode(0xDD, 0x0); 9537 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9538 Opcode(0xD8), RegOpc(src), 9539 set_instruction_start, 9540 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9541 ins_pipe( fpu_reg_mem ); 9542 %} 9543 9544 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9545 predicate(UseSSE<=1); 9546 match(Set dst (AddD dst con)); 9547 ins_cost(125); 9548 format %{ "FLD1\n\t" 9549 "DADDp $dst,ST" %} 9550 ins_encode %{ 9551 __ fld1(); 9552 __ faddp($dst$$reg); 9553 %} 9554 ins_pipe(fpu_reg); 9555 %} 9556 9557 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9558 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9559 match(Set dst (AddD dst con)); 9560 ins_cost(200); 9561 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9562 "DADDp $dst,ST" %} 9563 ins_encode %{ 9564 __ fld_d($constantaddress($con)); 9565 __ faddp($dst$$reg); 9566 %} 9567 ins_pipe(fpu_reg_mem); 9568 %} 9569 9570 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9571 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9572 match(Set dst (RoundDouble (AddD src con))); 9573 ins_cost(200); 9574 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9575 "DADD ST,$src\n\t" 9576 "FSTP_D $dst\t# D-round" %} 9577 ins_encode %{ 9578 __ fld_d($constantaddress($con)); 9579 __ fadd($src$$reg); 9580 __ fstp_d(Address(rsp, $dst$$disp)); 9581 %} 9582 ins_pipe(fpu_mem_reg_con); 9583 %} 9584 9585 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9586 predicate(UseSSE<=1); 9587 match(Set dst (MulD dst src)); 9588 format %{ "FLD $src\n\t" 9589 "DMULp $dst,ST" %} 9590 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9591 ins_cost(150); 9592 ins_encode( Push_Reg_DPR(src), 9593 OpcP, RegOpc(dst) ); 9594 ins_pipe( fpu_reg_reg ); 9595 %} 9596 9597 // Strict FP instruction biases argument before multiply then 9598 // biases result to avoid double rounding of subnormals. 9599 // 9600 // scale arg1 by multiplying arg1 by 2^(-15360) 9601 // load arg2 9602 // multiply scaled arg1 by arg2 9603 // rescale product by 2^(15360) 9604 // 9605 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9606 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9607 match(Set dst (MulD dst src)); 9608 ins_cost(1); // Select this instruction for all strict FP double multiplies 9609 9610 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9611 "DMULp $dst,ST\n\t" 9612 "FLD $src\n\t" 9613 "DMULp $dst,ST\n\t" 9614 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9615 "DMULp $dst,ST\n\t" %} 9616 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9617 ins_encode( strictfp_bias1(dst), 9618 Push_Reg_DPR(src), 9619 OpcP, RegOpc(dst), 9620 strictfp_bias2(dst) ); 9621 ins_pipe( fpu_reg_reg ); 9622 %} 9623 9624 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9625 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9626 match(Set dst (MulD dst con)); 9627 ins_cost(200); 9628 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9629 "DMULp $dst,ST" %} 9630 ins_encode %{ 9631 __ fld_d($constantaddress($con)); 9632 __ fmulp($dst$$reg); 9633 %} 9634 ins_pipe(fpu_reg_mem); 9635 %} 9636 9637 9638 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9639 predicate( UseSSE<=1 ); 9640 match(Set dst (MulD dst (LoadD src))); 9641 ins_cost(200); 9642 format %{ "FLD_D $src\n\t" 9643 "DMULp $dst,ST" %} 9644 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9645 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9646 OpcP, RegOpc(dst) ); 9647 ins_pipe( fpu_reg_mem ); 9648 %} 9649 9650 // 9651 // Cisc-alternate to reg-reg multiply 9652 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9653 predicate( UseSSE<=1 ); 9654 match(Set dst (MulD src (LoadD mem))); 9655 ins_cost(250); 9656 format %{ "FLD_D $mem\n\t" 9657 "DMUL ST,$src\n\t" 9658 "FSTP_D $dst" %} 9659 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9660 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9661 OpcReg_FPR(src), 9662 Pop_Reg_DPR(dst) ); 9663 ins_pipe( fpu_reg_reg_mem ); 9664 %} 9665 9666 9667 // MACRO3 -- addDPR a mulDPR 9668 // This instruction is a '2-address' instruction in that the result goes 9669 // back to src2. This eliminates a move from the macro; possibly the 9670 // register allocator will have to add it back (and maybe not). 9671 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9672 predicate( UseSSE<=1 ); 9673 match(Set src2 (AddD (MulD src0 src1) src2)); 9674 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9675 "DMUL ST,$src1\n\t" 9676 "DADDp $src2,ST" %} 9677 ins_cost(250); 9678 opcode(0xDD); /* LoadD DD /0 */ 9679 ins_encode( Push_Reg_FPR(src0), 9680 FMul_ST_reg(src1), 9681 FAddP_reg_ST(src2) ); 9682 ins_pipe( fpu_reg_reg_reg ); 9683 %} 9684 9685 9686 // MACRO3 -- subDPR a mulDPR 9687 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9688 predicate( UseSSE<=1 ); 9689 match(Set src2 (SubD (MulD src0 src1) src2)); 9690 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9691 "DMUL ST,$src1\n\t" 9692 "DSUBRp $src2,ST" %} 9693 ins_cost(250); 9694 ins_encode( Push_Reg_FPR(src0), 9695 FMul_ST_reg(src1), 9696 Opcode(0xDE), Opc_plus(0xE0,src2)); 9697 ins_pipe( fpu_reg_reg_reg ); 9698 %} 9699 9700 9701 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9702 predicate( UseSSE<=1 ); 9703 match(Set dst (DivD dst src)); 9704 9705 format %{ "FLD $src\n\t" 9706 "FDIVp $dst,ST" %} 9707 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9708 ins_cost(150); 9709 ins_encode( Push_Reg_DPR(src), 9710 OpcP, RegOpc(dst) ); 9711 ins_pipe( fpu_reg_reg ); 9712 %} 9713 9714 // Strict FP instruction biases argument before division then 9715 // biases result, to avoid double rounding of subnormals. 9716 // 9717 // scale dividend by multiplying dividend by 2^(-15360) 9718 // load divisor 9719 // divide scaled dividend by divisor 9720 // rescale quotient by 2^(15360) 9721 // 9722 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9723 predicate (UseSSE<=1); 9724 match(Set dst (DivD dst src)); 9725 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9726 ins_cost(01); 9727 9728 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9729 "DMULp $dst,ST\n\t" 9730 "FLD $src\n\t" 9731 "FDIVp $dst,ST\n\t" 9732 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9733 "DMULp $dst,ST\n\t" %} 9734 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9735 ins_encode( strictfp_bias1(dst), 9736 Push_Reg_DPR(src), 9737 OpcP, RegOpc(dst), 9738 strictfp_bias2(dst) ); 9739 ins_pipe( fpu_reg_reg ); 9740 %} 9741 9742 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9743 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 9744 match(Set dst (RoundDouble (DivD src1 src2))); 9745 9746 format %{ "FLD $src1\n\t" 9747 "FDIV ST,$src2\n\t" 9748 "FSTP_D $dst\t# D-round" %} 9749 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 9750 ins_encode( Push_Reg_DPR(src1), 9751 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 9752 ins_pipe( fpu_mem_reg_reg ); 9753 %} 9754 9755 9756 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9757 predicate(UseSSE<=1); 9758 match(Set dst (ModD dst src)); 9759 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9760 9761 format %{ "DMOD $dst,$src" %} 9762 ins_cost(250); 9763 ins_encode(Push_Reg_Mod_DPR(dst, src), 9764 emitModDPR(), 9765 Push_Result_Mod_DPR(src), 9766 Pop_Reg_DPR(dst)); 9767 ins_pipe( pipe_slow ); 9768 %} 9769 9770 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9771 predicate(UseSSE>=2); 9772 match(Set dst (ModD src0 src1)); 9773 effect(KILL rax, KILL cr); 9774 9775 format %{ "SUB ESP,8\t # DMOD\n" 9776 "\tMOVSD [ESP+0],$src1\n" 9777 "\tFLD_D [ESP+0]\n" 9778 "\tMOVSD [ESP+0],$src0\n" 9779 "\tFLD_D [ESP+0]\n" 9780 "loop:\tFPREM\n" 9781 "\tFWAIT\n" 9782 "\tFNSTSW AX\n" 9783 "\tSAHF\n" 9784 "\tJP loop\n" 9785 "\tFSTP_D [ESP+0]\n" 9786 "\tMOVSD $dst,[ESP+0]\n" 9787 "\tADD ESP,8\n" 9788 "\tFSTP ST0\t # Restore FPU Stack" 9789 %} 9790 ins_cost(250); 9791 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9792 ins_pipe( pipe_slow ); 9793 %} 9794 9795 instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{ 9796 predicate (UseSSE<=1); 9797 match(Set dst (SinD src)); 9798 ins_cost(1800); 9799 format %{ "DSIN $dst" %} 9800 opcode(0xD9, 0xFE); 9801 ins_encode( OpcP, OpcS ); 9802 ins_pipe( pipe_slow ); 9803 %} 9804 9805 instruct sinD_reg(regD dst, eFlagsReg cr) %{ 9806 predicate (UseSSE>=2); 9807 match(Set dst (SinD dst)); 9808 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9809 ins_cost(1800); 9810 format %{ "DSIN $dst" %} 9811 opcode(0xD9, 0xFE); 9812 ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); 9813 ins_pipe( pipe_slow ); 9814 %} 9815 9816 instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{ 9817 predicate (UseSSE<=1); 9818 match(Set dst (CosD src)); 9819 ins_cost(1800); 9820 format %{ "DCOS $dst" %} 9821 opcode(0xD9, 0xFF); 9822 ins_encode( OpcP, OpcS ); 9823 ins_pipe( pipe_slow ); 9824 %} 9825 9826 instruct cosD_reg(regD dst, eFlagsReg cr) %{ 9827 predicate (UseSSE>=2); 9828 match(Set dst (CosD dst)); 9829 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9830 ins_cost(1800); 9831 format %{ "DCOS $dst" %} 9832 opcode(0xD9, 0xFF); 9833 ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); 9834 ins_pipe( pipe_slow ); 9835 %} 9836 9837 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{ 9838 predicate (UseSSE<=1); 9839 match(Set dst(TanD src)); 9840 format %{ "DTAN $dst" %} 9841 ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan 9842 Opcode(0xDD), Opcode(0xD8)); // fstp st 9843 ins_pipe( pipe_slow ); 9844 %} 9845 9846 instruct tanD_reg(regD dst, eFlagsReg cr) %{ 9847 predicate (UseSSE>=2); 9848 match(Set dst(TanD dst)); 9849 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9850 format %{ "DTAN $dst" %} 9851 ins_encode( Push_SrcD(dst), 9852 Opcode(0xD9), Opcode(0xF2), // fptan 9853 Opcode(0xDD), Opcode(0xD8), // fstp st 9854 Push_ResultD(dst) ); 9855 ins_pipe( pipe_slow ); 9856 %} 9857 9858 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9859 predicate (UseSSE<=1); 9860 match(Set dst(AtanD dst src)); 9861 format %{ "DATA $dst,$src" %} 9862 opcode(0xD9, 0xF3); 9863 ins_encode( Push_Reg_DPR(src), 9864 OpcP, OpcS, RegOpc(dst) ); 9865 ins_pipe( pipe_slow ); 9866 %} 9867 9868 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 9869 predicate (UseSSE>=2); 9870 match(Set dst(AtanD dst src)); 9871 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9872 format %{ "DATA $dst,$src" %} 9873 opcode(0xD9, 0xF3); 9874 ins_encode( Push_SrcD(src), 9875 OpcP, OpcS, Push_ResultD(dst) ); 9876 ins_pipe( pipe_slow ); 9877 %} 9878 9879 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 9880 predicate (UseSSE<=1); 9881 match(Set dst (SqrtD src)); 9882 format %{ "DSQRT $dst,$src" %} 9883 opcode(0xFA, 0xD9); 9884 ins_encode( Push_Reg_DPR(src), 9885 OpcS, OpcP, Pop_Reg_DPR(dst) ); 9886 ins_pipe( pipe_slow ); 9887 %} 9888 9889 instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ 9890 predicate (UseSSE<=1); 9891 match(Set Y (PowD X Y)); // Raise X to the Yth power 9892 effect(KILL rax, KILL rdx, KILL rcx, KILL cr); 9893 format %{ "fast_pow $X $Y -> $Y // KILL $rax, $rcx, $rdx" %} 9894 ins_encode %{ 9895 __ subptr(rsp, 8); 9896 __ fld_s($X$$reg - 1); 9897 __ fast_pow(); 9898 __ addptr(rsp, 8); 9899 %} 9900 ins_pipe( pipe_slow ); 9901 %} 9902 9903 instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ 9904 predicate (UseSSE>=2); 9905 match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power 9906 effect(KILL rax, KILL rdx, KILL rcx, KILL cr); 9907 format %{ "fast_pow $src0 $src1 -> $dst // KILL $rax, $rcx, $rdx" %} 9908 ins_encode %{ 9909 __ subptr(rsp, 8); 9910 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 9911 __ fld_d(Address(rsp, 0)); 9912 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 9913 __ fld_d(Address(rsp, 0)); 9914 __ fast_pow(); 9915 __ fstp_d(Address(rsp, 0)); 9916 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 9917 __ addptr(rsp, 8); 9918 %} 9919 ins_pipe( pipe_slow ); 9920 %} 9921 9922 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{ 9923 predicate (UseSSE<=1); 9924 // The source Double operand on FPU stack 9925 match(Set dst (Log10D src)); 9926 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 9927 // fxch ; swap ST(0) with ST(1) 9928 // fyl2x ; compute log_10(2) * log_2(x) 9929 format %{ "FLDLG2 \t\t\t#Log10\n\t" 9930 "FXCH \n\t" 9931 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 9932 %} 9933 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 9934 Opcode(0xD9), Opcode(0xC9), // fxch 9935 Opcode(0xD9), Opcode(0xF1)); // fyl2x 9936 9937 ins_pipe( pipe_slow ); 9938 %} 9939 9940 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{ 9941 predicate (UseSSE>=2); 9942 effect(KILL cr); 9943 match(Set dst (Log10D src)); 9944 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 9945 // fyl2x ; compute log_10(2) * log_2(x) 9946 format %{ "FLDLG2 \t\t\t#Log10\n\t" 9947 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 9948 %} 9949 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 9950 Push_SrcD(src), 9951 Opcode(0xD9), Opcode(0xF1), // fyl2x 9952 Push_ResultD(dst)); 9953 9954 ins_pipe( pipe_slow ); 9955 %} 9956 9957 instruct logDPR_reg(regDPR1 dst, regDPR1 src) %{ 9958 predicate (UseSSE<=1); 9959 // The source Double operand on FPU stack 9960 match(Set dst (LogD src)); 9961 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number 9962 // fxch ; swap ST(0) with ST(1) 9963 // fyl2x ; compute log_e(2) * log_2(x) 9964 format %{ "FLDLN2 \t\t\t#Log_e\n\t" 9965 "FXCH \n\t" 9966 "FYL2X \t\t\t# Q=Log_e*Log_2(x)" 9967 %} 9968 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2 9969 Opcode(0xD9), Opcode(0xC9), // fxch 9970 Opcode(0xD9), Opcode(0xF1)); // fyl2x 9971 9972 ins_pipe( pipe_slow ); 9973 %} 9974 9975 instruct logD_reg(regD dst, regD src, eFlagsReg cr) %{ 9976 predicate (UseSSE>=2); 9977 effect(KILL cr); 9978 // The source and result Double operands in XMM registers 9979 match(Set dst (LogD src)); 9980 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number 9981 // fyl2x ; compute log_e(2) * log_2(x) 9982 format %{ "FLDLN2 \t\t\t#Log_e\n\t" 9983 "FYL2X \t\t\t# Q=Log_e*Log_2(x)" 9984 %} 9985 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2 9986 Push_SrcD(src), 9987 Opcode(0xD9), Opcode(0xF1), // fyl2x 9988 Push_ResultD(dst)); 9989 ins_pipe( pipe_slow ); 9990 %} 9991 9992 //-------------Float Instructions------------------------------- 9993 // Float Math 9994 9995 // Code for float compare: 9996 // fcompp(); 9997 // fwait(); fnstsw_ax(); 9998 // sahf(); 9999 // movl(dst, unordered_result); 10000 // jcc(Assembler::parity, exit); 10001 // movl(dst, less_result); 10002 // jcc(Assembler::below, exit); 10003 // movl(dst, equal_result); 10004 // jcc(Assembler::equal, exit); 10005 // movl(dst, greater_result); 10006 // exit: 10007 10008 // P6 version of float compare, sets condition codes in EFLAGS 10009 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10010 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10011 match(Set cr (CmpF src1 src2)); 10012 effect(KILL rax); 10013 ins_cost(150); 10014 format %{ "FLD $src1\n\t" 10015 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10016 "JNP exit\n\t" 10017 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10018 "SAHF\n" 10019 "exit:\tNOP // avoid branch to branch" %} 10020 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10021 ins_encode( Push_Reg_DPR(src1), 10022 OpcP, RegOpc(src2), 10023 cmpF_P6_fixup ); 10024 ins_pipe( pipe_slow ); 10025 %} 10026 10027 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 10028 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10029 match(Set cr (CmpF src1 src2)); 10030 ins_cost(100); 10031 format %{ "FLD $src1\n\t" 10032 "FUCOMIP ST,$src2 // P6 instruction" %} 10033 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10034 ins_encode( Push_Reg_DPR(src1), 10035 OpcP, RegOpc(src2)); 10036 ins_pipe( pipe_slow ); 10037 %} 10038 10039 10040 // Compare & branch 10041 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10042 predicate(UseSSE == 0); 10043 match(Set cr (CmpF src1 src2)); 10044 effect(KILL rax); 10045 ins_cost(200); 10046 format %{ "FLD $src1\n\t" 10047 "FCOMp $src2\n\t" 10048 "FNSTSW AX\n\t" 10049 "TEST AX,0x400\n\t" 10050 "JZ,s flags\n\t" 10051 "MOV AH,1\t# unordered treat as LT\n" 10052 "flags:\tSAHF" %} 10053 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10054 ins_encode( Push_Reg_DPR(src1), 10055 OpcP, RegOpc(src2), 10056 fpu_flags); 10057 ins_pipe( pipe_slow ); 10058 %} 10059 10060 // Compare vs zero into -1,0,1 10061 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10062 predicate(UseSSE == 0); 10063 match(Set dst (CmpF3 src1 zero)); 10064 effect(KILL cr, KILL rax); 10065 ins_cost(280); 10066 format %{ "FTSTF $dst,$src1" %} 10067 opcode(0xE4, 0xD9); 10068 ins_encode( Push_Reg_DPR(src1), 10069 OpcS, OpcP, PopFPU, 10070 CmpF_Result(dst)); 10071 ins_pipe( pipe_slow ); 10072 %} 10073 10074 // Compare into -1,0,1 10075 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10076 predicate(UseSSE == 0); 10077 match(Set dst (CmpF3 src1 src2)); 10078 effect(KILL cr, KILL rax); 10079 ins_cost(300); 10080 format %{ "FCMPF $dst,$src1,$src2" %} 10081 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10082 ins_encode( Push_Reg_DPR(src1), 10083 OpcP, RegOpc(src2), 10084 CmpF_Result(dst)); 10085 ins_pipe( pipe_slow ); 10086 %} 10087 10088 // float compare and set condition codes in EFLAGS by XMM regs 10089 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10090 predicate(UseSSE>=1); 10091 match(Set cr (CmpF src1 src2)); 10092 ins_cost(145); 10093 format %{ "UCOMISS $src1,$src2\n\t" 10094 "JNP,s exit\n\t" 10095 "PUSHF\t# saw NaN, set CF\n\t" 10096 "AND [rsp], #0xffffff2b\n\t" 10097 "POPF\n" 10098 "exit:" %} 10099 ins_encode %{ 10100 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10101 emit_cmpfp_fixup(_masm); 10102 %} 10103 ins_pipe( pipe_slow ); 10104 %} 10105 10106 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10107 predicate(UseSSE>=1); 10108 match(Set cr (CmpF src1 src2)); 10109 ins_cost(100); 10110 format %{ "UCOMISS $src1,$src2" %} 10111 ins_encode %{ 10112 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10113 %} 10114 ins_pipe( pipe_slow ); 10115 %} 10116 10117 // float compare and set condition codes in EFLAGS by XMM regs 10118 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10119 predicate(UseSSE>=1); 10120 match(Set cr (CmpF src1 (LoadF src2))); 10121 ins_cost(165); 10122 format %{ "UCOMISS $src1,$src2\n\t" 10123 "JNP,s exit\n\t" 10124 "PUSHF\t# saw NaN, set CF\n\t" 10125 "AND [rsp], #0xffffff2b\n\t" 10126 "POPF\n" 10127 "exit:" %} 10128 ins_encode %{ 10129 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10130 emit_cmpfp_fixup(_masm); 10131 %} 10132 ins_pipe( pipe_slow ); 10133 %} 10134 10135 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10136 predicate(UseSSE>=1); 10137 match(Set cr (CmpF src1 (LoadF src2))); 10138 ins_cost(100); 10139 format %{ "UCOMISS $src1,$src2" %} 10140 ins_encode %{ 10141 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10142 %} 10143 ins_pipe( pipe_slow ); 10144 %} 10145 10146 // Compare into -1,0,1 in XMM 10147 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10148 predicate(UseSSE>=1); 10149 match(Set dst (CmpF3 src1 src2)); 10150 effect(KILL cr); 10151 ins_cost(255); 10152 format %{ "UCOMISS $src1, $src2\n\t" 10153 "MOV $dst, #-1\n\t" 10154 "JP,s done\n\t" 10155 "JB,s done\n\t" 10156 "SETNE $dst\n\t" 10157 "MOVZB $dst, $dst\n" 10158 "done:" %} 10159 ins_encode %{ 10160 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10161 emit_cmpfp3(_masm, $dst$$Register); 10162 %} 10163 ins_pipe( pipe_slow ); 10164 %} 10165 10166 // Compare into -1,0,1 in XMM and memory 10167 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10168 predicate(UseSSE>=1); 10169 match(Set dst (CmpF3 src1 (LoadF src2))); 10170 effect(KILL cr); 10171 ins_cost(275); 10172 format %{ "UCOMISS $src1, $src2\n\t" 10173 "MOV $dst, #-1\n\t" 10174 "JP,s done\n\t" 10175 "JB,s done\n\t" 10176 "SETNE $dst\n\t" 10177 "MOVZB $dst, $dst\n" 10178 "done:" %} 10179 ins_encode %{ 10180 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10181 emit_cmpfp3(_masm, $dst$$Register); 10182 %} 10183 ins_pipe( pipe_slow ); 10184 %} 10185 10186 // Spill to obtain 24-bit precision 10187 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10188 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10189 match(Set dst (SubF src1 src2)); 10190 10191 format %{ "FSUB $dst,$src1 - $src2" %} 10192 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10193 ins_encode( Push_Reg_FPR(src1), 10194 OpcReg_FPR(src2), 10195 Pop_Mem_FPR(dst) ); 10196 ins_pipe( fpu_mem_reg_reg ); 10197 %} 10198 // 10199 // This instruction does not round to 24-bits 10200 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10201 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10202 match(Set dst (SubF dst src)); 10203 10204 format %{ "FSUB $dst,$src" %} 10205 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10206 ins_encode( Push_Reg_FPR(src), 10207 OpcP, RegOpc(dst) ); 10208 ins_pipe( fpu_reg_reg ); 10209 %} 10210 10211 // Spill to obtain 24-bit precision 10212 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10213 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10214 match(Set dst (AddF src1 src2)); 10215 10216 format %{ "FADD $dst,$src1,$src2" %} 10217 opcode(0xD8, 0x0); /* D8 C0+i */ 10218 ins_encode( Push_Reg_FPR(src2), 10219 OpcReg_FPR(src1), 10220 Pop_Mem_FPR(dst) ); 10221 ins_pipe( fpu_mem_reg_reg ); 10222 %} 10223 // 10224 // This instruction does not round to 24-bits 10225 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10226 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10227 match(Set dst (AddF dst src)); 10228 10229 format %{ "FLD $src\n\t" 10230 "FADDp $dst,ST" %} 10231 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10232 ins_encode( Push_Reg_FPR(src), 10233 OpcP, RegOpc(dst) ); 10234 ins_pipe( fpu_reg_reg ); 10235 %} 10236 10237 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10238 predicate(UseSSE==0); 10239 match(Set dst (AbsF src)); 10240 ins_cost(100); 10241 format %{ "FABS" %} 10242 opcode(0xE1, 0xD9); 10243 ins_encode( OpcS, OpcP ); 10244 ins_pipe( fpu_reg_reg ); 10245 %} 10246 10247 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10248 predicate(UseSSE==0); 10249 match(Set dst (NegF src)); 10250 ins_cost(100); 10251 format %{ "FCHS" %} 10252 opcode(0xE0, 0xD9); 10253 ins_encode( OpcS, OpcP ); 10254 ins_pipe( fpu_reg_reg ); 10255 %} 10256 10257 // Cisc-alternate to addFPR_reg 10258 // Spill to obtain 24-bit precision 10259 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10260 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10261 match(Set dst (AddF src1 (LoadF src2))); 10262 10263 format %{ "FLD $src2\n\t" 10264 "FADD ST,$src1\n\t" 10265 "FSTP_S $dst" %} 10266 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10267 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10268 OpcReg_FPR(src1), 10269 Pop_Mem_FPR(dst) ); 10270 ins_pipe( fpu_mem_reg_mem ); 10271 %} 10272 // 10273 // Cisc-alternate to addFPR_reg 10274 // This instruction does not round to 24-bits 10275 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10276 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10277 match(Set dst (AddF dst (LoadF src))); 10278 10279 format %{ "FADD $dst,$src" %} 10280 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10281 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10282 OpcP, RegOpc(dst) ); 10283 ins_pipe( fpu_reg_mem ); 10284 %} 10285 10286 // // Following two instructions for _222_mpegaudio 10287 // Spill to obtain 24-bit precision 10288 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10289 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10290 match(Set dst (AddF src1 src2)); 10291 10292 format %{ "FADD $dst,$src1,$src2" %} 10293 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10294 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10295 OpcReg_FPR(src2), 10296 Pop_Mem_FPR(dst) ); 10297 ins_pipe( fpu_mem_reg_mem ); 10298 %} 10299 10300 // Cisc-spill variant 10301 // Spill to obtain 24-bit precision 10302 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10303 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10304 match(Set dst (AddF src1 (LoadF src2))); 10305 10306 format %{ "FADD $dst,$src1,$src2 cisc" %} 10307 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10308 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10309 set_instruction_start, 10310 OpcP, RMopc_Mem(secondary,src1), 10311 Pop_Mem_FPR(dst) ); 10312 ins_pipe( fpu_mem_mem_mem ); 10313 %} 10314 10315 // Spill to obtain 24-bit precision 10316 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10317 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10318 match(Set dst (AddF src1 src2)); 10319 10320 format %{ "FADD $dst,$src1,$src2" %} 10321 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10322 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10323 set_instruction_start, 10324 OpcP, RMopc_Mem(secondary,src1), 10325 Pop_Mem_FPR(dst) ); 10326 ins_pipe( fpu_mem_mem_mem ); 10327 %} 10328 10329 10330 // Spill to obtain 24-bit precision 10331 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10332 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10333 match(Set dst (AddF src con)); 10334 format %{ "FLD $src\n\t" 10335 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10336 "FSTP_S $dst" %} 10337 ins_encode %{ 10338 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10339 __ fadd_s($constantaddress($con)); 10340 __ fstp_s(Address(rsp, $dst$$disp)); 10341 %} 10342 ins_pipe(fpu_mem_reg_con); 10343 %} 10344 // 10345 // This instruction does not round to 24-bits 10346 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10347 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10348 match(Set dst (AddF src con)); 10349 format %{ "FLD $src\n\t" 10350 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10351 "FSTP $dst" %} 10352 ins_encode %{ 10353 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10354 __ fadd_s($constantaddress($con)); 10355 __ fstp_d($dst$$reg); 10356 %} 10357 ins_pipe(fpu_reg_reg_con); 10358 %} 10359 10360 // Spill to obtain 24-bit precision 10361 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10362 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10363 match(Set dst (MulF src1 src2)); 10364 10365 format %{ "FLD $src1\n\t" 10366 "FMUL $src2\n\t" 10367 "FSTP_S $dst" %} 10368 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10369 ins_encode( Push_Reg_FPR(src1), 10370 OpcReg_FPR(src2), 10371 Pop_Mem_FPR(dst) ); 10372 ins_pipe( fpu_mem_reg_reg ); 10373 %} 10374 // 10375 // This instruction does not round to 24-bits 10376 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10377 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10378 match(Set dst (MulF src1 src2)); 10379 10380 format %{ "FLD $src1\n\t" 10381 "FMUL $src2\n\t" 10382 "FSTP_S $dst" %} 10383 opcode(0xD8, 0x1); /* D8 C8+i */ 10384 ins_encode( Push_Reg_FPR(src2), 10385 OpcReg_FPR(src1), 10386 Pop_Reg_FPR(dst) ); 10387 ins_pipe( fpu_reg_reg_reg ); 10388 %} 10389 10390 10391 // Spill to obtain 24-bit precision 10392 // Cisc-alternate to reg-reg multiply 10393 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10394 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10395 match(Set dst (MulF src1 (LoadF src2))); 10396 10397 format %{ "FLD_S $src2\n\t" 10398 "FMUL $src1\n\t" 10399 "FSTP_S $dst" %} 10400 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10401 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10402 OpcReg_FPR(src1), 10403 Pop_Mem_FPR(dst) ); 10404 ins_pipe( fpu_mem_reg_mem ); 10405 %} 10406 // 10407 // This instruction does not round to 24-bits 10408 // Cisc-alternate to reg-reg multiply 10409 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10410 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10411 match(Set dst (MulF src1 (LoadF src2))); 10412 10413 format %{ "FMUL $dst,$src1,$src2" %} 10414 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10415 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10416 OpcReg_FPR(src1), 10417 Pop_Reg_FPR(dst) ); 10418 ins_pipe( fpu_reg_reg_mem ); 10419 %} 10420 10421 // Spill to obtain 24-bit precision 10422 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10423 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10424 match(Set dst (MulF src1 src2)); 10425 10426 format %{ "FMUL $dst,$src1,$src2" %} 10427 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10428 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10429 set_instruction_start, 10430 OpcP, RMopc_Mem(secondary,src1), 10431 Pop_Mem_FPR(dst) ); 10432 ins_pipe( fpu_mem_mem_mem ); 10433 %} 10434 10435 // Spill to obtain 24-bit precision 10436 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10437 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10438 match(Set dst (MulF src con)); 10439 10440 format %{ "FLD $src\n\t" 10441 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10442 "FSTP_S $dst" %} 10443 ins_encode %{ 10444 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10445 __ fmul_s($constantaddress($con)); 10446 __ fstp_s(Address(rsp, $dst$$disp)); 10447 %} 10448 ins_pipe(fpu_mem_reg_con); 10449 %} 10450 // 10451 // This instruction does not round to 24-bits 10452 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10453 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10454 match(Set dst (MulF src con)); 10455 10456 format %{ "FLD $src\n\t" 10457 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10458 "FSTP $dst" %} 10459 ins_encode %{ 10460 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10461 __ fmul_s($constantaddress($con)); 10462 __ fstp_d($dst$$reg); 10463 %} 10464 ins_pipe(fpu_reg_reg_con); 10465 %} 10466 10467 10468 // 10469 // MACRO1 -- subsume unshared load into mulFPR 10470 // This instruction does not round to 24-bits 10471 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10472 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10473 match(Set dst (MulF (LoadF mem1) src)); 10474 10475 format %{ "FLD $mem1 ===MACRO1===\n\t" 10476 "FMUL ST,$src\n\t" 10477 "FSTP $dst" %} 10478 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10479 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10480 OpcReg_FPR(src), 10481 Pop_Reg_FPR(dst) ); 10482 ins_pipe( fpu_reg_reg_mem ); 10483 %} 10484 // 10485 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10486 // This instruction does not round to 24-bits 10487 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10488 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10489 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10490 ins_cost(95); 10491 10492 format %{ "FLD $mem1 ===MACRO2===\n\t" 10493 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10494 "FADD ST,$src2\n\t" 10495 "FSTP $dst" %} 10496 opcode(0xD9); /* LoadF D9 /0 */ 10497 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10498 FMul_ST_reg(src1), 10499 FAdd_ST_reg(src2), 10500 Pop_Reg_FPR(dst) ); 10501 ins_pipe( fpu_reg_mem_reg_reg ); 10502 %} 10503 10504 // MACRO3 -- addFPR a mulFPR 10505 // This instruction does not round to 24-bits. It is a '2-address' 10506 // instruction in that the result goes back to src2. This eliminates 10507 // a move from the macro; possibly the register allocator will have 10508 // to add it back (and maybe not). 10509 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10510 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10511 match(Set src2 (AddF (MulF src0 src1) src2)); 10512 10513 format %{ "FLD $src0 ===MACRO3===\n\t" 10514 "FMUL ST,$src1\n\t" 10515 "FADDP $src2,ST" %} 10516 opcode(0xD9); /* LoadF D9 /0 */ 10517 ins_encode( Push_Reg_FPR(src0), 10518 FMul_ST_reg(src1), 10519 FAddP_reg_ST(src2) ); 10520 ins_pipe( fpu_reg_reg_reg ); 10521 %} 10522 10523 // MACRO4 -- divFPR subFPR 10524 // This instruction does not round to 24-bits 10525 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10526 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10527 match(Set dst (DivF (SubF src2 src1) src3)); 10528 10529 format %{ "FLD $src2 ===MACRO4===\n\t" 10530 "FSUB ST,$src1\n\t" 10531 "FDIV ST,$src3\n\t" 10532 "FSTP $dst" %} 10533 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10534 ins_encode( Push_Reg_FPR(src2), 10535 subFPR_divFPR_encode(src1,src3), 10536 Pop_Reg_FPR(dst) ); 10537 ins_pipe( fpu_reg_reg_reg_reg ); 10538 %} 10539 10540 // Spill to obtain 24-bit precision 10541 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10542 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10543 match(Set dst (DivF src1 src2)); 10544 10545 format %{ "FDIV $dst,$src1,$src2" %} 10546 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10547 ins_encode( Push_Reg_FPR(src1), 10548 OpcReg_FPR(src2), 10549 Pop_Mem_FPR(dst) ); 10550 ins_pipe( fpu_mem_reg_reg ); 10551 %} 10552 // 10553 // This instruction does not round to 24-bits 10554 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10555 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10556 match(Set dst (DivF dst src)); 10557 10558 format %{ "FDIV $dst,$src" %} 10559 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10560 ins_encode( Push_Reg_FPR(src), 10561 OpcP, RegOpc(dst) ); 10562 ins_pipe( fpu_reg_reg ); 10563 %} 10564 10565 10566 // Spill to obtain 24-bit precision 10567 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10568 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10569 match(Set dst (ModF src1 src2)); 10570 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10571 10572 format %{ "FMOD $dst,$src1,$src2" %} 10573 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10574 emitModDPR(), 10575 Push_Result_Mod_DPR(src2), 10576 Pop_Mem_FPR(dst)); 10577 ins_pipe( pipe_slow ); 10578 %} 10579 // 10580 // This instruction does not round to 24-bits 10581 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10582 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10583 match(Set dst (ModF dst src)); 10584 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10585 10586 format %{ "FMOD $dst,$src" %} 10587 ins_encode(Push_Reg_Mod_DPR(dst, src), 10588 emitModDPR(), 10589 Push_Result_Mod_DPR(src), 10590 Pop_Reg_FPR(dst)); 10591 ins_pipe( pipe_slow ); 10592 %} 10593 10594 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10595 predicate(UseSSE>=1); 10596 match(Set dst (ModF src0 src1)); 10597 effect(KILL rax, KILL cr); 10598 format %{ "SUB ESP,4\t # FMOD\n" 10599 "\tMOVSS [ESP+0],$src1\n" 10600 "\tFLD_S [ESP+0]\n" 10601 "\tMOVSS [ESP+0],$src0\n" 10602 "\tFLD_S [ESP+0]\n" 10603 "loop:\tFPREM\n" 10604 "\tFWAIT\n" 10605 "\tFNSTSW AX\n" 10606 "\tSAHF\n" 10607 "\tJP loop\n" 10608 "\tFSTP_S [ESP+0]\n" 10609 "\tMOVSS $dst,[ESP+0]\n" 10610 "\tADD ESP,4\n" 10611 "\tFSTP ST0\t # Restore FPU Stack" 10612 %} 10613 ins_cost(250); 10614 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10615 ins_pipe( pipe_slow ); 10616 %} 10617 10618 10619 //----------Arithmetic Conversion Instructions--------------------------------- 10620 // The conversions operations are all Alpha sorted. Please keep it that way! 10621 10622 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10623 predicate(UseSSE==0); 10624 match(Set dst (RoundFloat src)); 10625 ins_cost(125); 10626 format %{ "FST_S $dst,$src\t# F-round" %} 10627 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10628 ins_pipe( fpu_mem_reg ); 10629 %} 10630 10631 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10632 predicate(UseSSE<=1); 10633 match(Set dst (RoundDouble src)); 10634 ins_cost(125); 10635 format %{ "FST_D $dst,$src\t# D-round" %} 10636 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10637 ins_pipe( fpu_mem_reg ); 10638 %} 10639 10640 // Force rounding to 24-bit precision and 6-bit exponent 10641 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10642 predicate(UseSSE==0); 10643 match(Set dst (ConvD2F src)); 10644 format %{ "FST_S $dst,$src\t# F-round" %} 10645 expand %{ 10646 roundFloat_mem_reg(dst,src); 10647 %} 10648 %} 10649 10650 // Force rounding to 24-bit precision and 6-bit exponent 10651 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10652 predicate(UseSSE==1); 10653 match(Set dst (ConvD2F src)); 10654 effect( KILL cr ); 10655 format %{ "SUB ESP,4\n\t" 10656 "FST_S [ESP],$src\t# F-round\n\t" 10657 "MOVSS $dst,[ESP]\n\t" 10658 "ADD ESP,4" %} 10659 ins_encode %{ 10660 __ subptr(rsp, 4); 10661 if ($src$$reg != FPR1L_enc) { 10662 __ fld_s($src$$reg-1); 10663 __ fstp_s(Address(rsp, 0)); 10664 } else { 10665 __ fst_s(Address(rsp, 0)); 10666 } 10667 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10668 __ addptr(rsp, 4); 10669 %} 10670 ins_pipe( pipe_slow ); 10671 %} 10672 10673 // Force rounding double precision to single precision 10674 instruct convD2F_reg(regF dst, regD src) %{ 10675 predicate(UseSSE>=2); 10676 match(Set dst (ConvD2F src)); 10677 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10678 ins_encode %{ 10679 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10680 %} 10681 ins_pipe( pipe_slow ); 10682 %} 10683 10684 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10685 predicate(UseSSE==0); 10686 match(Set dst (ConvF2D src)); 10687 format %{ "FST_S $dst,$src\t# D-round" %} 10688 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10689 ins_pipe( fpu_reg_reg ); 10690 %} 10691 10692 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10693 predicate(UseSSE==1); 10694 match(Set dst (ConvF2D src)); 10695 format %{ "FST_D $dst,$src\t# D-round" %} 10696 expand %{ 10697 roundDouble_mem_reg(dst,src); 10698 %} 10699 %} 10700 10701 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10702 predicate(UseSSE==1); 10703 match(Set dst (ConvF2D src)); 10704 effect( KILL cr ); 10705 format %{ "SUB ESP,4\n\t" 10706 "MOVSS [ESP] $src\n\t" 10707 "FLD_S [ESP]\n\t" 10708 "ADD ESP,4\n\t" 10709 "FSTP $dst\t# D-round" %} 10710 ins_encode %{ 10711 __ subptr(rsp, 4); 10712 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10713 __ fld_s(Address(rsp, 0)); 10714 __ addptr(rsp, 4); 10715 __ fstp_d($dst$$reg); 10716 %} 10717 ins_pipe( pipe_slow ); 10718 %} 10719 10720 instruct convF2D_reg(regD dst, regF src) %{ 10721 predicate(UseSSE>=2); 10722 match(Set dst (ConvF2D src)); 10723 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10724 ins_encode %{ 10725 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10726 %} 10727 ins_pipe( pipe_slow ); 10728 %} 10729 10730 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10731 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10732 predicate(UseSSE<=1); 10733 match(Set dst (ConvD2I src)); 10734 effect( KILL tmp, KILL cr ); 10735 format %{ "FLD $src\t# Convert double to int \n\t" 10736 "FLDCW trunc mode\n\t" 10737 "SUB ESP,4\n\t" 10738 "FISTp [ESP + #0]\n\t" 10739 "FLDCW std/24-bit mode\n\t" 10740 "POP EAX\n\t" 10741 "CMP EAX,0x80000000\n\t" 10742 "JNE,s fast\n\t" 10743 "FLD_D $src\n\t" 10744 "CALL d2i_wrapper\n" 10745 "fast:" %} 10746 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10747 ins_pipe( pipe_slow ); 10748 %} 10749 10750 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10751 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10752 predicate(UseSSE>=2); 10753 match(Set dst (ConvD2I src)); 10754 effect( KILL tmp, KILL cr ); 10755 format %{ "CVTTSD2SI $dst, $src\n\t" 10756 "CMP $dst,0x80000000\n\t" 10757 "JNE,s fast\n\t" 10758 "SUB ESP, 8\n\t" 10759 "MOVSD [ESP], $src\n\t" 10760 "FLD_D [ESP]\n\t" 10761 "ADD ESP, 8\n\t" 10762 "CALL d2i_wrapper\n" 10763 "fast:" %} 10764 ins_encode %{ 10765 Label fast; 10766 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10767 __ cmpl($dst$$Register, 0x80000000); 10768 __ jccb(Assembler::notEqual, fast); 10769 __ subptr(rsp, 8); 10770 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10771 __ fld_d(Address(rsp, 0)); 10772 __ addptr(rsp, 8); 10773 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10774 __ bind(fast); 10775 %} 10776 ins_pipe( pipe_slow ); 10777 %} 10778 10779 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10780 predicate(UseSSE<=1); 10781 match(Set dst (ConvD2L src)); 10782 effect( KILL cr ); 10783 format %{ "FLD $src\t# Convert double to long\n\t" 10784 "FLDCW trunc mode\n\t" 10785 "SUB ESP,8\n\t" 10786 "FISTp [ESP + #0]\n\t" 10787 "FLDCW std/24-bit mode\n\t" 10788 "POP EAX\n\t" 10789 "POP EDX\n\t" 10790 "CMP EDX,0x80000000\n\t" 10791 "JNE,s fast\n\t" 10792 "TEST EAX,EAX\n\t" 10793 "JNE,s fast\n\t" 10794 "FLD $src\n\t" 10795 "CALL d2l_wrapper\n" 10796 "fast:" %} 10797 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10798 ins_pipe( pipe_slow ); 10799 %} 10800 10801 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10802 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10803 predicate (UseSSE>=2); 10804 match(Set dst (ConvD2L src)); 10805 effect( KILL cr ); 10806 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10807 "MOVSD [ESP],$src\n\t" 10808 "FLD_D [ESP]\n\t" 10809 "FLDCW trunc mode\n\t" 10810 "FISTp [ESP + #0]\n\t" 10811 "FLDCW std/24-bit mode\n\t" 10812 "POP EAX\n\t" 10813 "POP EDX\n\t" 10814 "CMP EDX,0x80000000\n\t" 10815 "JNE,s fast\n\t" 10816 "TEST EAX,EAX\n\t" 10817 "JNE,s fast\n\t" 10818 "SUB ESP,8\n\t" 10819 "MOVSD [ESP],$src\n\t" 10820 "FLD_D [ESP]\n\t" 10821 "ADD ESP,8\n\t" 10822 "CALL d2l_wrapper\n" 10823 "fast:" %} 10824 ins_encode %{ 10825 Label fast; 10826 __ subptr(rsp, 8); 10827 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10828 __ fld_d(Address(rsp, 0)); 10829 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10830 __ fistp_d(Address(rsp, 0)); 10831 // Restore the rounding mode, mask the exception 10832 if (Compile::current()->in_24_bit_fp_mode()) { 10833 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10834 } else { 10835 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10836 } 10837 // Load the converted long, adjust CPU stack 10838 __ pop(rax); 10839 __ pop(rdx); 10840 __ cmpl(rdx, 0x80000000); 10841 __ jccb(Assembler::notEqual, fast); 10842 __ testl(rax, rax); 10843 __ jccb(Assembler::notEqual, fast); 10844 __ subptr(rsp, 8); 10845 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10846 __ fld_d(Address(rsp, 0)); 10847 __ addptr(rsp, 8); 10848 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10849 __ bind(fast); 10850 %} 10851 ins_pipe( pipe_slow ); 10852 %} 10853 10854 // Convert a double to an int. Java semantics require we do complex 10855 // manglations in the corner cases. So we set the rounding mode to 10856 // 'zero', store the darned double down as an int, and reset the 10857 // rounding mode to 'nearest'. The hardware stores a flag value down 10858 // if we would overflow or converted a NAN; we check for this and 10859 // and go the slow path if needed. 10860 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10861 predicate(UseSSE==0); 10862 match(Set dst (ConvF2I src)); 10863 effect( KILL tmp, KILL cr ); 10864 format %{ "FLD $src\t# Convert float to int \n\t" 10865 "FLDCW trunc mode\n\t" 10866 "SUB ESP,4\n\t" 10867 "FISTp [ESP + #0]\n\t" 10868 "FLDCW std/24-bit mode\n\t" 10869 "POP EAX\n\t" 10870 "CMP EAX,0x80000000\n\t" 10871 "JNE,s fast\n\t" 10872 "FLD $src\n\t" 10873 "CALL d2i_wrapper\n" 10874 "fast:" %} 10875 // DPR2I_encoding works for FPR2I 10876 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10877 ins_pipe( pipe_slow ); 10878 %} 10879 10880 // Convert a float in xmm to an int reg. 10881 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10882 predicate(UseSSE>=1); 10883 match(Set dst (ConvF2I src)); 10884 effect( KILL tmp, KILL cr ); 10885 format %{ "CVTTSS2SI $dst, $src\n\t" 10886 "CMP $dst,0x80000000\n\t" 10887 "JNE,s fast\n\t" 10888 "SUB ESP, 4\n\t" 10889 "MOVSS [ESP], $src\n\t" 10890 "FLD [ESP]\n\t" 10891 "ADD ESP, 4\n\t" 10892 "CALL d2i_wrapper\n" 10893 "fast:" %} 10894 ins_encode %{ 10895 Label fast; 10896 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10897 __ cmpl($dst$$Register, 0x80000000); 10898 __ jccb(Assembler::notEqual, fast); 10899 __ subptr(rsp, 4); 10900 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10901 __ fld_s(Address(rsp, 0)); 10902 __ addptr(rsp, 4); 10903 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10904 __ bind(fast); 10905 %} 10906 ins_pipe( pipe_slow ); 10907 %} 10908 10909 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10910 predicate(UseSSE==0); 10911 match(Set dst (ConvF2L src)); 10912 effect( KILL cr ); 10913 format %{ "FLD $src\t# Convert float to long\n\t" 10914 "FLDCW trunc mode\n\t" 10915 "SUB ESP,8\n\t" 10916 "FISTp [ESP + #0]\n\t" 10917 "FLDCW std/24-bit mode\n\t" 10918 "POP EAX\n\t" 10919 "POP EDX\n\t" 10920 "CMP EDX,0x80000000\n\t" 10921 "JNE,s fast\n\t" 10922 "TEST EAX,EAX\n\t" 10923 "JNE,s fast\n\t" 10924 "FLD $src\n\t" 10925 "CALL d2l_wrapper\n" 10926 "fast:" %} 10927 // DPR2L_encoding works for FPR2L 10928 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10929 ins_pipe( pipe_slow ); 10930 %} 10931 10932 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10933 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10934 predicate (UseSSE>=1); 10935 match(Set dst (ConvF2L src)); 10936 effect( KILL cr ); 10937 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10938 "MOVSS [ESP],$src\n\t" 10939 "FLD_S [ESP]\n\t" 10940 "FLDCW trunc mode\n\t" 10941 "FISTp [ESP + #0]\n\t" 10942 "FLDCW std/24-bit mode\n\t" 10943 "POP EAX\n\t" 10944 "POP EDX\n\t" 10945 "CMP EDX,0x80000000\n\t" 10946 "JNE,s fast\n\t" 10947 "TEST EAX,EAX\n\t" 10948 "JNE,s fast\n\t" 10949 "SUB ESP,4\t# Convert float to long\n\t" 10950 "MOVSS [ESP],$src\n\t" 10951 "FLD_S [ESP]\n\t" 10952 "ADD ESP,4\n\t" 10953 "CALL d2l_wrapper\n" 10954 "fast:" %} 10955 ins_encode %{ 10956 Label fast; 10957 __ subptr(rsp, 8); 10958 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10959 __ fld_s(Address(rsp, 0)); 10960 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10961 __ fistp_d(Address(rsp, 0)); 10962 // Restore the rounding mode, mask the exception 10963 if (Compile::current()->in_24_bit_fp_mode()) { 10964 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10965 } else { 10966 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10967 } 10968 // Load the converted long, adjust CPU stack 10969 __ pop(rax); 10970 __ pop(rdx); 10971 __ cmpl(rdx, 0x80000000); 10972 __ jccb(Assembler::notEqual, fast); 10973 __ testl(rax, rax); 10974 __ jccb(Assembler::notEqual, fast); 10975 __ subptr(rsp, 4); 10976 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10977 __ fld_s(Address(rsp, 0)); 10978 __ addptr(rsp, 4); 10979 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10980 __ bind(fast); 10981 %} 10982 ins_pipe( pipe_slow ); 10983 %} 10984 10985 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 10986 predicate( UseSSE<=1 ); 10987 match(Set dst (ConvI2D src)); 10988 format %{ "FILD $src\n\t" 10989 "FSTP $dst" %} 10990 opcode(0xDB, 0x0); /* DB /0 */ 10991 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 10992 ins_pipe( fpu_reg_mem ); 10993 %} 10994 10995 instruct convI2D_reg(regD dst, rRegI src) %{ 10996 predicate( UseSSE>=2 && !UseXmmI2D ); 10997 match(Set dst (ConvI2D src)); 10998 format %{ "CVTSI2SD $dst,$src" %} 10999 ins_encode %{ 11000 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 11001 %} 11002 ins_pipe( pipe_slow ); 11003 %} 11004 11005 instruct convI2D_mem(regD dst, memory mem) %{ 11006 predicate( UseSSE>=2 ); 11007 match(Set dst (ConvI2D (LoadI mem))); 11008 format %{ "CVTSI2SD $dst,$mem" %} 11009 ins_encode %{ 11010 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 11011 %} 11012 ins_pipe( pipe_slow ); 11013 %} 11014 11015 instruct convXI2D_reg(regD dst, rRegI src) 11016 %{ 11017 predicate( UseSSE>=2 && UseXmmI2D ); 11018 match(Set dst (ConvI2D src)); 11019 11020 format %{ "MOVD $dst,$src\n\t" 11021 "CVTDQ2PD $dst,$dst\t# i2d" %} 11022 ins_encode %{ 11023 __ movdl($dst$$XMMRegister, $src$$Register); 11024 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11025 %} 11026 ins_pipe(pipe_slow); // XXX 11027 %} 11028 11029 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 11030 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11031 match(Set dst (ConvI2D (LoadI mem))); 11032 format %{ "FILD $mem\n\t" 11033 "FSTP $dst" %} 11034 opcode(0xDB); /* DB /0 */ 11035 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11036 Pop_Reg_DPR(dst)); 11037 ins_pipe( fpu_reg_mem ); 11038 %} 11039 11040 // Convert a byte to a float; no rounding step needed. 11041 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11042 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11043 match(Set dst (ConvI2F src)); 11044 format %{ "FILD $src\n\t" 11045 "FSTP $dst" %} 11046 11047 opcode(0xDB, 0x0); /* DB /0 */ 11048 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11049 ins_pipe( fpu_reg_mem ); 11050 %} 11051 11052 // In 24-bit mode, force exponent rounding by storing back out 11053 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11054 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11055 match(Set dst (ConvI2F src)); 11056 ins_cost(200); 11057 format %{ "FILD $src\n\t" 11058 "FSTP_S $dst" %} 11059 opcode(0xDB, 0x0); /* DB /0 */ 11060 ins_encode( Push_Mem_I(src), 11061 Pop_Mem_FPR(dst)); 11062 ins_pipe( fpu_mem_mem ); 11063 %} 11064 11065 // In 24-bit mode, force exponent rounding by storing back out 11066 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11067 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11068 match(Set dst (ConvI2F (LoadI mem))); 11069 ins_cost(200); 11070 format %{ "FILD $mem\n\t" 11071 "FSTP_S $dst" %} 11072 opcode(0xDB); /* DB /0 */ 11073 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11074 Pop_Mem_FPR(dst)); 11075 ins_pipe( fpu_mem_mem ); 11076 %} 11077 11078 // This instruction does not round to 24-bits 11079 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11080 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11081 match(Set dst (ConvI2F src)); 11082 format %{ "FILD $src\n\t" 11083 "FSTP $dst" %} 11084 opcode(0xDB, 0x0); /* DB /0 */ 11085 ins_encode( Push_Mem_I(src), 11086 Pop_Reg_FPR(dst)); 11087 ins_pipe( fpu_reg_mem ); 11088 %} 11089 11090 // This instruction does not round to 24-bits 11091 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11092 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11093 match(Set dst (ConvI2F (LoadI mem))); 11094 format %{ "FILD $mem\n\t" 11095 "FSTP $dst" %} 11096 opcode(0xDB); /* DB /0 */ 11097 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11098 Pop_Reg_FPR(dst)); 11099 ins_pipe( fpu_reg_mem ); 11100 %} 11101 11102 // Convert an int to a float in xmm; no rounding step needed. 11103 instruct convI2F_reg(regF dst, rRegI src) %{ 11104 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11105 match(Set dst (ConvI2F src)); 11106 format %{ "CVTSI2SS $dst, $src" %} 11107 ins_encode %{ 11108 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11109 %} 11110 ins_pipe( pipe_slow ); 11111 %} 11112 11113 instruct convXI2F_reg(regF dst, rRegI src) 11114 %{ 11115 predicate( UseSSE>=2 && UseXmmI2F ); 11116 match(Set dst (ConvI2F src)); 11117 11118 format %{ "MOVD $dst,$src\n\t" 11119 "CVTDQ2PS $dst,$dst\t# i2f" %} 11120 ins_encode %{ 11121 __ movdl($dst$$XMMRegister, $src$$Register); 11122 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11123 %} 11124 ins_pipe(pipe_slow); // XXX 11125 %} 11126 11127 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11128 match(Set dst (ConvI2L src)); 11129 effect(KILL cr); 11130 ins_cost(375); 11131 format %{ "MOV $dst.lo,$src\n\t" 11132 "MOV $dst.hi,$src\n\t" 11133 "SAR $dst.hi,31" %} 11134 ins_encode(convert_int_long(dst,src)); 11135 ins_pipe( ialu_reg_reg_long ); 11136 %} 11137 11138 // Zero-extend convert int to long 11139 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11140 match(Set dst (AndL (ConvI2L src) mask) ); 11141 effect( KILL flags ); 11142 ins_cost(250); 11143 format %{ "MOV $dst.lo,$src\n\t" 11144 "XOR $dst.hi,$dst.hi" %} 11145 opcode(0x33); // XOR 11146 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11147 ins_pipe( ialu_reg_reg_long ); 11148 %} 11149 11150 // Zero-extend long 11151 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11152 match(Set dst (AndL src mask) ); 11153 effect( KILL flags ); 11154 ins_cost(250); 11155 format %{ "MOV $dst.lo,$src.lo\n\t" 11156 "XOR $dst.hi,$dst.hi\n\t" %} 11157 opcode(0x33); // XOR 11158 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11159 ins_pipe( ialu_reg_reg_long ); 11160 %} 11161 11162 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11163 predicate (UseSSE<=1); 11164 match(Set dst (ConvL2D src)); 11165 effect( KILL cr ); 11166 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11167 "PUSH $src.lo\n\t" 11168 "FILD ST,[ESP + #0]\n\t" 11169 "ADD ESP,8\n\t" 11170 "FSTP_D $dst\t# D-round" %} 11171 opcode(0xDF, 0x5); /* DF /5 */ 11172 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11173 ins_pipe( pipe_slow ); 11174 %} 11175 11176 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11177 predicate (UseSSE>=2); 11178 match(Set dst (ConvL2D src)); 11179 effect( KILL cr ); 11180 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11181 "PUSH $src.lo\n\t" 11182 "FILD_D [ESP]\n\t" 11183 "FSTP_D [ESP]\n\t" 11184 "MOVSD $dst,[ESP]\n\t" 11185 "ADD ESP,8" %} 11186 opcode(0xDF, 0x5); /* DF /5 */ 11187 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11188 ins_pipe( pipe_slow ); 11189 %} 11190 11191 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11192 predicate (UseSSE>=1); 11193 match(Set dst (ConvL2F src)); 11194 effect( KILL cr ); 11195 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11196 "PUSH $src.lo\n\t" 11197 "FILD_D [ESP]\n\t" 11198 "FSTP_S [ESP]\n\t" 11199 "MOVSS $dst,[ESP]\n\t" 11200 "ADD ESP,8" %} 11201 opcode(0xDF, 0x5); /* DF /5 */ 11202 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11203 ins_pipe( pipe_slow ); 11204 %} 11205 11206 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11207 match(Set dst (ConvL2F src)); 11208 effect( KILL cr ); 11209 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11210 "PUSH $src.lo\n\t" 11211 "FILD ST,[ESP + #0]\n\t" 11212 "ADD ESP,8\n\t" 11213 "FSTP_S $dst\t# F-round" %} 11214 opcode(0xDF, 0x5); /* DF /5 */ 11215 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11216 ins_pipe( pipe_slow ); 11217 %} 11218 11219 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11220 match(Set dst (ConvL2I src)); 11221 effect( DEF dst, USE src ); 11222 format %{ "MOV $dst,$src.lo" %} 11223 ins_encode(enc_CopyL_Lo(dst,src)); 11224 ins_pipe( ialu_reg_reg ); 11225 %} 11226 11227 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11228 match(Set dst (MoveF2I src)); 11229 effect( DEF dst, USE src ); 11230 ins_cost(100); 11231 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11232 ins_encode %{ 11233 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11234 %} 11235 ins_pipe( ialu_reg_mem ); 11236 %} 11237 11238 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11239 predicate(UseSSE==0); 11240 match(Set dst (MoveF2I src)); 11241 effect( DEF dst, USE src ); 11242 11243 ins_cost(125); 11244 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11245 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11246 ins_pipe( fpu_mem_reg ); 11247 %} 11248 11249 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11250 predicate(UseSSE>=1); 11251 match(Set dst (MoveF2I src)); 11252 effect( DEF dst, USE src ); 11253 11254 ins_cost(95); 11255 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11256 ins_encode %{ 11257 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11258 %} 11259 ins_pipe( pipe_slow ); 11260 %} 11261 11262 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11263 predicate(UseSSE>=2); 11264 match(Set dst (MoveF2I src)); 11265 effect( DEF dst, USE src ); 11266 ins_cost(85); 11267 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11268 ins_encode %{ 11269 __ movdl($dst$$Register, $src$$XMMRegister); 11270 %} 11271 ins_pipe( pipe_slow ); 11272 %} 11273 11274 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11275 match(Set dst (MoveI2F src)); 11276 effect( DEF dst, USE src ); 11277 11278 ins_cost(100); 11279 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11280 ins_encode %{ 11281 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11282 %} 11283 ins_pipe( ialu_mem_reg ); 11284 %} 11285 11286 11287 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11288 predicate(UseSSE==0); 11289 match(Set dst (MoveI2F src)); 11290 effect(DEF dst, USE src); 11291 11292 ins_cost(125); 11293 format %{ "FLD_S $src\n\t" 11294 "FSTP $dst\t# MoveI2F_stack_reg" %} 11295 opcode(0xD9); /* D9 /0, FLD m32real */ 11296 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11297 Pop_Reg_FPR(dst) ); 11298 ins_pipe( fpu_reg_mem ); 11299 %} 11300 11301 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11302 predicate(UseSSE>=1); 11303 match(Set dst (MoveI2F src)); 11304 effect( DEF dst, USE src ); 11305 11306 ins_cost(95); 11307 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11308 ins_encode %{ 11309 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11310 %} 11311 ins_pipe( pipe_slow ); 11312 %} 11313 11314 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11315 predicate(UseSSE>=2); 11316 match(Set dst (MoveI2F src)); 11317 effect( DEF dst, USE src ); 11318 11319 ins_cost(85); 11320 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11321 ins_encode %{ 11322 __ movdl($dst$$XMMRegister, $src$$Register); 11323 %} 11324 ins_pipe( pipe_slow ); 11325 %} 11326 11327 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11328 match(Set dst (MoveD2L src)); 11329 effect(DEF dst, USE src); 11330 11331 ins_cost(250); 11332 format %{ "MOV $dst.lo,$src\n\t" 11333 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11334 opcode(0x8B, 0x8B); 11335 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11336 ins_pipe( ialu_mem_long_reg ); 11337 %} 11338 11339 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11340 predicate(UseSSE<=1); 11341 match(Set dst (MoveD2L src)); 11342 effect(DEF dst, USE src); 11343 11344 ins_cost(125); 11345 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11346 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11347 ins_pipe( fpu_mem_reg ); 11348 %} 11349 11350 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11351 predicate(UseSSE>=2); 11352 match(Set dst (MoveD2L src)); 11353 effect(DEF dst, USE src); 11354 ins_cost(95); 11355 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11356 ins_encode %{ 11357 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11358 %} 11359 ins_pipe( pipe_slow ); 11360 %} 11361 11362 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11363 predicate(UseSSE>=2); 11364 match(Set dst (MoveD2L src)); 11365 effect(DEF dst, USE src, TEMP tmp); 11366 ins_cost(85); 11367 format %{ "MOVD $dst.lo,$src\n\t" 11368 "PSHUFLW $tmp,$src,0x4E\n\t" 11369 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11370 ins_encode %{ 11371 __ movdl($dst$$Register, $src$$XMMRegister); 11372 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11373 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11374 %} 11375 ins_pipe( pipe_slow ); 11376 %} 11377 11378 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11379 match(Set dst (MoveL2D src)); 11380 effect(DEF dst, USE src); 11381 11382 ins_cost(200); 11383 format %{ "MOV $dst,$src.lo\n\t" 11384 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11385 opcode(0x89, 0x89); 11386 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11387 ins_pipe( ialu_mem_long_reg ); 11388 %} 11389 11390 11391 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11392 predicate(UseSSE<=1); 11393 match(Set dst (MoveL2D src)); 11394 effect(DEF dst, USE src); 11395 ins_cost(125); 11396 11397 format %{ "FLD_D $src\n\t" 11398 "FSTP $dst\t# MoveL2D_stack_reg" %} 11399 opcode(0xDD); /* DD /0, FLD m64real */ 11400 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11401 Pop_Reg_DPR(dst) ); 11402 ins_pipe( fpu_reg_mem ); 11403 %} 11404 11405 11406 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11407 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11408 match(Set dst (MoveL2D src)); 11409 effect(DEF dst, USE src); 11410 11411 ins_cost(95); 11412 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11413 ins_encode %{ 11414 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11415 %} 11416 ins_pipe( pipe_slow ); 11417 %} 11418 11419 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11420 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11421 match(Set dst (MoveL2D src)); 11422 effect(DEF dst, USE src); 11423 11424 ins_cost(95); 11425 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11426 ins_encode %{ 11427 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11428 %} 11429 ins_pipe( pipe_slow ); 11430 %} 11431 11432 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11433 predicate(UseSSE>=2); 11434 match(Set dst (MoveL2D src)); 11435 effect(TEMP dst, USE src, TEMP tmp); 11436 ins_cost(85); 11437 format %{ "MOVD $dst,$src.lo\n\t" 11438 "MOVD $tmp,$src.hi\n\t" 11439 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11440 ins_encode %{ 11441 __ movdl($dst$$XMMRegister, $src$$Register); 11442 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11443 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11444 %} 11445 ins_pipe( pipe_slow ); 11446 %} 11447 11448 11449 // ======================================================================= 11450 // fast clearing of an array 11451 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11452 predicate(!UseFastStosb); 11453 match(Set dummy (ClearArray cnt base)); 11454 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11455 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11456 "SHL ECX,1\t# Convert doublewords to words\n\t" 11457 "REP STOS\t# store EAX into [EDI++] while ECX--" %} 11458 ins_encode %{ 11459 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); 11460 %} 11461 ins_pipe( pipe_slow ); 11462 %} 11463 11464 instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11465 predicate(UseFastStosb); 11466 match(Set dummy (ClearArray cnt base)); 11467 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11468 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11469 "SHL ECX,3\t# Convert doublewords to bytes\n\t" 11470 "REP STOSB\t# store EAX into [EDI++] while ECX--" %} 11471 ins_encode %{ 11472 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); 11473 %} 11474 ins_pipe( pipe_slow ); 11475 %} 11476 11477 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11478 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11479 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11480 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11481 11482 format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11483 ins_encode %{ 11484 __ string_compare($str1$$Register, $str2$$Register, 11485 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11486 $tmp1$$XMMRegister); 11487 %} 11488 ins_pipe( pipe_slow ); 11489 %} 11490 11491 // fast string equals 11492 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11493 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11494 match(Set result (StrEquals (Binary str1 str2) cnt)); 11495 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11496 11497 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11498 ins_encode %{ 11499 __ char_arrays_equals(false, $str1$$Register, $str2$$Register, 11500 $cnt$$Register, $result$$Register, $tmp3$$Register, 11501 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11502 %} 11503 ins_pipe( pipe_slow ); 11504 %} 11505 11506 // fast search of substring with known size. 11507 instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11508 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11509 predicate(UseSSE42Intrinsics); 11510 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11511 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11512 11513 format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11514 ins_encode %{ 11515 int icnt2 = (int)$int_cnt2$$constant; 11516 if (icnt2 >= 8) { 11517 // IndexOf for constant substrings with size >= 8 elements 11518 // which don't need to be loaded through stack. 11519 __ string_indexofC8($str1$$Register, $str2$$Register, 11520 $cnt1$$Register, $cnt2$$Register, 11521 icnt2, $result$$Register, 11522 $vec$$XMMRegister, $tmp$$Register); 11523 } else { 11524 // Small strings are loaded through stack if they cross page boundary. 11525 __ string_indexof($str1$$Register, $str2$$Register, 11526 $cnt1$$Register, $cnt2$$Register, 11527 icnt2, $result$$Register, 11528 $vec$$XMMRegister, $tmp$$Register); 11529 } 11530 %} 11531 ins_pipe( pipe_slow ); 11532 %} 11533 11534 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11535 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11536 predicate(UseSSE42Intrinsics); 11537 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11538 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11539 11540 format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11541 ins_encode %{ 11542 __ string_indexof($str1$$Register, $str2$$Register, 11543 $cnt1$$Register, $cnt2$$Register, 11544 (-1), $result$$Register, 11545 $vec$$XMMRegister, $tmp$$Register); 11546 %} 11547 ins_pipe( pipe_slow ); 11548 %} 11549 11550 // fast array equals 11551 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11552 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11553 %{ 11554 match(Set result (AryEq ary1 ary2)); 11555 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11556 //ins_cost(300); 11557 11558 format %{ "Array Equals $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11559 ins_encode %{ 11560 __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register, 11561 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11562 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11563 %} 11564 ins_pipe( pipe_slow ); 11565 %} 11566 11567 // encode char[] to byte[] in ISO_8859_1 11568 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 11569 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11570 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11571 match(Set result (EncodeISOArray src (Binary dst len))); 11572 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11573 11574 format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 11575 ins_encode %{ 11576 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 11577 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11578 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11579 %} 11580 ins_pipe( pipe_slow ); 11581 %} 11582 11583 11584 //----------Control Flow Instructions------------------------------------------ 11585 // Signed compare Instructions 11586 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 11587 match(Set cr (CmpI op1 op2)); 11588 effect( DEF cr, USE op1, USE op2 ); 11589 format %{ "CMP $op1,$op2" %} 11590 opcode(0x3B); /* Opcode 3B /r */ 11591 ins_encode( OpcP, RegReg( op1, op2) ); 11592 ins_pipe( ialu_cr_reg_reg ); 11593 %} 11594 11595 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 11596 match(Set cr (CmpI op1 op2)); 11597 effect( DEF cr, USE op1 ); 11598 format %{ "CMP $op1,$op2" %} 11599 opcode(0x81,0x07); /* Opcode 81 /7 */ 11600 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 11601 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11602 ins_pipe( ialu_cr_reg_imm ); 11603 %} 11604 11605 // Cisc-spilled version of cmpI_eReg 11606 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 11607 match(Set cr (CmpI op1 (LoadI op2))); 11608 11609 format %{ "CMP $op1,$op2" %} 11610 ins_cost(500); 11611 opcode(0x3B); /* Opcode 3B /r */ 11612 ins_encode( OpcP, RegMem( op1, op2) ); 11613 ins_pipe( ialu_cr_reg_mem ); 11614 %} 11615 11616 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ 11617 match(Set cr (CmpI src zero)); 11618 effect( DEF cr, USE src ); 11619 11620 format %{ "TEST $src,$src" %} 11621 opcode(0x85); 11622 ins_encode( OpcP, RegReg( src, src ) ); 11623 ins_pipe( ialu_cr_reg_imm ); 11624 %} 11625 11626 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ 11627 match(Set cr (CmpI (AndI src con) zero)); 11628 11629 format %{ "TEST $src,$con" %} 11630 opcode(0xF7,0x00); 11631 ins_encode( OpcP, RegOpc(src), Con32(con) ); 11632 ins_pipe( ialu_cr_reg_imm ); 11633 %} 11634 11635 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ 11636 match(Set cr (CmpI (AndI src mem) zero)); 11637 11638 format %{ "TEST $src,$mem" %} 11639 opcode(0x85); 11640 ins_encode( OpcP, RegMem( src, mem ) ); 11641 ins_pipe( ialu_cr_reg_mem ); 11642 %} 11643 11644 // Unsigned compare Instructions; really, same as signed except they 11645 // produce an eFlagsRegU instead of eFlagsReg. 11646 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 11647 match(Set cr (CmpU op1 op2)); 11648 11649 format %{ "CMPu $op1,$op2" %} 11650 opcode(0x3B); /* Opcode 3B /r */ 11651 ins_encode( OpcP, RegReg( op1, op2) ); 11652 ins_pipe( ialu_cr_reg_reg ); 11653 %} 11654 11655 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 11656 match(Set cr (CmpU op1 op2)); 11657 11658 format %{ "CMPu $op1,$op2" %} 11659 opcode(0x81,0x07); /* Opcode 81 /7 */ 11660 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11661 ins_pipe( ialu_cr_reg_imm ); 11662 %} 11663 11664 // // Cisc-spilled version of cmpU_eReg 11665 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 11666 match(Set cr (CmpU op1 (LoadI op2))); 11667 11668 format %{ "CMPu $op1,$op2" %} 11669 ins_cost(500); 11670 opcode(0x3B); /* Opcode 3B /r */ 11671 ins_encode( OpcP, RegMem( op1, op2) ); 11672 ins_pipe( ialu_cr_reg_mem ); 11673 %} 11674 11675 // // Cisc-spilled version of cmpU_eReg 11676 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 11677 // match(Set cr (CmpU (LoadI op1) op2)); 11678 // 11679 // format %{ "CMPu $op1,$op2" %} 11680 // ins_cost(500); 11681 // opcode(0x39); /* Opcode 39 /r */ 11682 // ins_encode( OpcP, RegMem( op1, op2) ); 11683 //%} 11684 11685 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ 11686 match(Set cr (CmpU src zero)); 11687 11688 format %{ "TESTu $src,$src" %} 11689 opcode(0x85); 11690 ins_encode( OpcP, RegReg( src, src ) ); 11691 ins_pipe( ialu_cr_reg_imm ); 11692 %} 11693 11694 // Unsigned pointer compare Instructions 11695 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 11696 match(Set cr (CmpP op1 op2)); 11697 11698 format %{ "CMPu $op1,$op2" %} 11699 opcode(0x3B); /* Opcode 3B /r */ 11700 ins_encode( OpcP, RegReg( op1, op2) ); 11701 ins_pipe( ialu_cr_reg_reg ); 11702 %} 11703 11704 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 11705 match(Set cr (CmpP op1 op2)); 11706 11707 format %{ "CMPu $op1,$op2" %} 11708 opcode(0x81,0x07); /* Opcode 81 /7 */ 11709 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11710 ins_pipe( ialu_cr_reg_imm ); 11711 %} 11712 11713 // // Cisc-spilled version of cmpP_eReg 11714 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 11715 match(Set cr (CmpP op1 (LoadP op2))); 11716 11717 format %{ "CMPu $op1,$op2" %} 11718 ins_cost(500); 11719 opcode(0x3B); /* Opcode 3B /r */ 11720 ins_encode( OpcP, RegMem( op1, op2) ); 11721 ins_pipe( ialu_cr_reg_mem ); 11722 %} 11723 11724 // // Cisc-spilled version of cmpP_eReg 11725 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 11726 // match(Set cr (CmpP (LoadP op1) op2)); 11727 // 11728 // format %{ "CMPu $op1,$op2" %} 11729 // ins_cost(500); 11730 // opcode(0x39); /* Opcode 39 /r */ 11731 // ins_encode( OpcP, RegMem( op1, op2) ); 11732 //%} 11733 11734 // Compare raw pointer (used in out-of-heap check). 11735 // Only works because non-oop pointers must be raw pointers 11736 // and raw pointers have no anti-dependencies. 11737 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 11738 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 11739 match(Set cr (CmpP op1 (LoadP op2))); 11740 11741 format %{ "CMPu $op1,$op2" %} 11742 opcode(0x3B); /* Opcode 3B /r */ 11743 ins_encode( OpcP, RegMem( op1, op2) ); 11744 ins_pipe( ialu_cr_reg_mem ); 11745 %} 11746 11747 // 11748 // This will generate a signed flags result. This should be ok 11749 // since any compare to a zero should be eq/neq. 11750 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 11751 match(Set cr (CmpP src zero)); 11752 11753 format %{ "TEST $src,$src" %} 11754 opcode(0x85); 11755 ins_encode( OpcP, RegReg( src, src ) ); 11756 ins_pipe( ialu_cr_reg_imm ); 11757 %} 11758 11759 // Cisc-spilled version of testP_reg 11760 // This will generate a signed flags result. This should be ok 11761 // since any compare to a zero should be eq/neq. 11762 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 11763 match(Set cr (CmpP (LoadP op) zero)); 11764 11765 format %{ "TEST $op,0xFFFFFFFF" %} 11766 ins_cost(500); 11767 opcode(0xF7); /* Opcode F7 /0 */ 11768 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 11769 ins_pipe( ialu_cr_reg_imm ); 11770 %} 11771 11772 // Yanked all unsigned pointer compare operations. 11773 // Pointer compares are done with CmpP which is already unsigned. 11774 11775 //----------Max and Min-------------------------------------------------------- 11776 // Min Instructions 11777 //// 11778 // *** Min and Max using the conditional move are slower than the 11779 // *** branch version on a Pentium III. 11780 // // Conditional move for min 11781 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11782 // effect( USE_DEF op2, USE op1, USE cr ); 11783 // format %{ "CMOVlt $op2,$op1\t! min" %} 11784 // opcode(0x4C,0x0F); 11785 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11786 // ins_pipe( pipe_cmov_reg ); 11787 //%} 11788 // 11789 //// Min Register with Register (P6 version) 11790 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11791 // predicate(VM_Version::supports_cmov() ); 11792 // match(Set op2 (MinI op1 op2)); 11793 // ins_cost(200); 11794 // expand %{ 11795 // eFlagsReg cr; 11796 // compI_eReg(cr,op1,op2); 11797 // cmovI_reg_lt(op2,op1,cr); 11798 // %} 11799 //%} 11800 11801 // Min Register with Register (generic version) 11802 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11803 match(Set dst (MinI dst src)); 11804 effect(KILL flags); 11805 ins_cost(300); 11806 11807 format %{ "MIN $dst,$src" %} 11808 opcode(0xCC); 11809 ins_encode( min_enc(dst,src) ); 11810 ins_pipe( pipe_slow ); 11811 %} 11812 11813 // Max Register with Register 11814 // *** Min and Max using the conditional move are slower than the 11815 // *** branch version on a Pentium III. 11816 // // Conditional move for max 11817 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11818 // effect( USE_DEF op2, USE op1, USE cr ); 11819 // format %{ "CMOVgt $op2,$op1\t! max" %} 11820 // opcode(0x4F,0x0F); 11821 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11822 // ins_pipe( pipe_cmov_reg ); 11823 //%} 11824 // 11825 // // Max Register with Register (P6 version) 11826 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11827 // predicate(VM_Version::supports_cmov() ); 11828 // match(Set op2 (MaxI op1 op2)); 11829 // ins_cost(200); 11830 // expand %{ 11831 // eFlagsReg cr; 11832 // compI_eReg(cr,op1,op2); 11833 // cmovI_reg_gt(op2,op1,cr); 11834 // %} 11835 //%} 11836 11837 // Max Register with Register (generic version) 11838 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11839 match(Set dst (MaxI dst src)); 11840 effect(KILL flags); 11841 ins_cost(300); 11842 11843 format %{ "MAX $dst,$src" %} 11844 opcode(0xCC); 11845 ins_encode( max_enc(dst,src) ); 11846 ins_pipe( pipe_slow ); 11847 %} 11848 11849 // ============================================================================ 11850 // Counted Loop limit node which represents exact final iterator value. 11851 // Note: the resulting value should fit into integer range since 11852 // counted loops have limit check on overflow. 11853 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 11854 match(Set limit (LoopLimit (Binary init limit) stride)); 11855 effect(TEMP limit_hi, TEMP tmp, KILL flags); 11856 ins_cost(300); 11857 11858 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 11859 ins_encode %{ 11860 int strd = (int)$stride$$constant; 11861 assert(strd != 1 && strd != -1, "sanity"); 11862 int m1 = (strd > 0) ? 1 : -1; 11863 // Convert limit to long (EAX:EDX) 11864 __ cdql(); 11865 // Convert init to long (init:tmp) 11866 __ movl($tmp$$Register, $init$$Register); 11867 __ sarl($tmp$$Register, 31); 11868 // $limit - $init 11869 __ subl($limit$$Register, $init$$Register); 11870 __ sbbl($limit_hi$$Register, $tmp$$Register); 11871 // + ($stride - 1) 11872 if (strd > 0) { 11873 __ addl($limit$$Register, (strd - 1)); 11874 __ adcl($limit_hi$$Register, 0); 11875 __ movl($tmp$$Register, strd); 11876 } else { 11877 __ addl($limit$$Register, (strd + 1)); 11878 __ adcl($limit_hi$$Register, -1); 11879 __ lneg($limit_hi$$Register, $limit$$Register); 11880 __ movl($tmp$$Register, -strd); 11881 } 11882 // signed devision: (EAX:EDX) / pos_stride 11883 __ idivl($tmp$$Register); 11884 if (strd < 0) { 11885 // restore sign 11886 __ negl($tmp$$Register); 11887 } 11888 // (EAX) * stride 11889 __ mull($tmp$$Register); 11890 // + init (ignore upper bits) 11891 __ addl($limit$$Register, $init$$Register); 11892 %} 11893 ins_pipe( pipe_slow ); 11894 %} 11895 11896 // ============================================================================ 11897 // Branch Instructions 11898 // Jump Table 11899 instruct jumpXtnd(rRegI switch_val) %{ 11900 match(Jump switch_val); 11901 ins_cost(350); 11902 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 11903 ins_encode %{ 11904 // Jump to Address(table_base + switch_reg) 11905 Address index(noreg, $switch_val$$Register, Address::times_1); 11906 __ jump(ArrayAddress($constantaddress, index)); 11907 %} 11908 ins_pipe(pipe_jmp); 11909 %} 11910 11911 // Jump Direct - Label defines a relative address from JMP+1 11912 instruct jmpDir(label labl) %{ 11913 match(Goto); 11914 effect(USE labl); 11915 11916 ins_cost(300); 11917 format %{ "JMP $labl" %} 11918 size(5); 11919 ins_encode %{ 11920 Label* L = $labl$$label; 11921 __ jmp(*L, false); // Always long jump 11922 %} 11923 ins_pipe( pipe_jmp ); 11924 %} 11925 11926 // Jump Direct Conditional - Label defines a relative address from Jcc+1 11927 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 11928 match(If cop cr); 11929 effect(USE labl); 11930 11931 ins_cost(300); 11932 format %{ "J$cop $labl" %} 11933 size(6); 11934 ins_encode %{ 11935 Label* L = $labl$$label; 11936 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 11937 %} 11938 ins_pipe( pipe_jcc ); 11939 %} 11940 11941 // Jump Direct Conditional - Label defines a relative address from Jcc+1 11942 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 11943 match(CountedLoopEnd cop cr); 11944 effect(USE labl); 11945 11946 ins_cost(300); 11947 format %{ "J$cop $labl\t# Loop end" %} 11948 size(6); 11949 ins_encode %{ 11950 Label* L = $labl$$label; 11951 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 11952 %} 11953 ins_pipe( pipe_jcc ); 11954 %} 11955 11956 // Jump Direct Conditional - Label defines a relative address from Jcc+1 11957 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 11958 match(CountedLoopEnd cop cmp); 11959 effect(USE labl); 11960 11961 ins_cost(300); 11962 format %{ "J$cop,u $labl\t# Loop end" %} 11963 size(6); 11964 ins_encode %{ 11965 Label* L = $labl$$label; 11966 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 11967 %} 11968 ins_pipe( pipe_jcc ); 11969 %} 11970 11971 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 11972 match(CountedLoopEnd cop cmp); 11973 effect(USE labl); 11974 11975 ins_cost(200); 11976 format %{ "J$cop,u $labl\t# Loop end" %} 11977 size(6); 11978 ins_encode %{ 11979 Label* L = $labl$$label; 11980 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 11981 %} 11982 ins_pipe( pipe_jcc ); 11983 %} 11984 11985 // Jump Direct Conditional - using unsigned comparison 11986 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 11987 match(If cop cmp); 11988 effect(USE labl); 11989 11990 ins_cost(300); 11991 format %{ "J$cop,u $labl" %} 11992 size(6); 11993 ins_encode %{ 11994 Label* L = $labl$$label; 11995 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 11996 %} 11997 ins_pipe(pipe_jcc); 11998 %} 11999 12000 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12001 match(If cop cmp); 12002 effect(USE labl); 12003 12004 ins_cost(200); 12005 format %{ "J$cop,u $labl" %} 12006 size(6); 12007 ins_encode %{ 12008 Label* L = $labl$$label; 12009 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12010 %} 12011 ins_pipe(pipe_jcc); 12012 %} 12013 12014 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12015 match(If cop cmp); 12016 effect(USE labl); 12017 12018 ins_cost(200); 12019 format %{ $$template 12020 if ($cop$$cmpcode == Assembler::notEqual) { 12021 $$emit$$"JP,u $labl\n\t" 12022 $$emit$$"J$cop,u $labl" 12023 } else { 12024 $$emit$$"JP,u done\n\t" 12025 $$emit$$"J$cop,u $labl\n\t" 12026 $$emit$$"done:" 12027 } 12028 %} 12029 ins_encode %{ 12030 Label* l = $labl$$label; 12031 if ($cop$$cmpcode == Assembler::notEqual) { 12032 __ jcc(Assembler::parity, *l, false); 12033 __ jcc(Assembler::notEqual, *l, false); 12034 } else if ($cop$$cmpcode == Assembler::equal) { 12035 Label done; 12036 __ jccb(Assembler::parity, done); 12037 __ jcc(Assembler::equal, *l, false); 12038 __ bind(done); 12039 } else { 12040 ShouldNotReachHere(); 12041 } 12042 %} 12043 ins_pipe(pipe_jcc); 12044 %} 12045 12046 // ============================================================================ 12047 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12048 // array for an instance of the superklass. Set a hidden internal cache on a 12049 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12050 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12051 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12052 match(Set result (PartialSubtypeCheck sub super)); 12053 effect( KILL rcx, KILL cr ); 12054 12055 ins_cost(1100); // slightly larger than the next version 12056 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12057 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12058 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12059 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12060 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12061 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12062 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12063 "miss:\t" %} 12064 12065 opcode(0x1); // Force a XOR of EDI 12066 ins_encode( enc_PartialSubtypeCheck() ); 12067 ins_pipe( pipe_slow ); 12068 %} 12069 12070 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12071 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12072 effect( KILL rcx, KILL result ); 12073 12074 ins_cost(1000); 12075 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12076 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12077 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12078 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12079 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12080 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12081 "miss:\t" %} 12082 12083 opcode(0x0); // No need to XOR EDI 12084 ins_encode( enc_PartialSubtypeCheck() ); 12085 ins_pipe( pipe_slow ); 12086 %} 12087 12088 // ============================================================================ 12089 // Branch Instructions -- short offset versions 12090 // 12091 // These instructions are used to replace jumps of a long offset (the default 12092 // match) with jumps of a shorter offset. These instructions are all tagged 12093 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12094 // match rules in general matching. Instead, the ADLC generates a conversion 12095 // method in the MachNode which can be used to do in-place replacement of the 12096 // long variant with the shorter variant. The compiler will determine if a 12097 // branch can be taken by the is_short_branch_offset() predicate in the machine 12098 // specific code section of the file. 12099 12100 // Jump Direct - Label defines a relative address from JMP+1 12101 instruct jmpDir_short(label labl) %{ 12102 match(Goto); 12103 effect(USE labl); 12104 12105 ins_cost(300); 12106 format %{ "JMP,s $labl" %} 12107 size(2); 12108 ins_encode %{ 12109 Label* L = $labl$$label; 12110 __ jmpb(*L); 12111 %} 12112 ins_pipe( pipe_jmp ); 12113 ins_short_branch(1); 12114 %} 12115 12116 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12117 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12118 match(If cop cr); 12119 effect(USE labl); 12120 12121 ins_cost(300); 12122 format %{ "J$cop,s $labl" %} 12123 size(2); 12124 ins_encode %{ 12125 Label* L = $labl$$label; 12126 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12127 %} 12128 ins_pipe( pipe_jcc ); 12129 ins_short_branch(1); 12130 %} 12131 12132 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12133 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12134 match(CountedLoopEnd cop cr); 12135 effect(USE labl); 12136 12137 ins_cost(300); 12138 format %{ "J$cop,s $labl\t# Loop end" %} 12139 size(2); 12140 ins_encode %{ 12141 Label* L = $labl$$label; 12142 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12143 %} 12144 ins_pipe( pipe_jcc ); 12145 ins_short_branch(1); 12146 %} 12147 12148 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12149 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12150 match(CountedLoopEnd cop cmp); 12151 effect(USE labl); 12152 12153 ins_cost(300); 12154 format %{ "J$cop,us $labl\t# Loop end" %} 12155 size(2); 12156 ins_encode %{ 12157 Label* L = $labl$$label; 12158 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12159 %} 12160 ins_pipe( pipe_jcc ); 12161 ins_short_branch(1); 12162 %} 12163 12164 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12165 match(CountedLoopEnd cop cmp); 12166 effect(USE labl); 12167 12168 ins_cost(300); 12169 format %{ "J$cop,us $labl\t# Loop end" %} 12170 size(2); 12171 ins_encode %{ 12172 Label* L = $labl$$label; 12173 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12174 %} 12175 ins_pipe( pipe_jcc ); 12176 ins_short_branch(1); 12177 %} 12178 12179 // Jump Direct Conditional - using unsigned comparison 12180 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12181 match(If cop cmp); 12182 effect(USE labl); 12183 12184 ins_cost(300); 12185 format %{ "J$cop,us $labl" %} 12186 size(2); 12187 ins_encode %{ 12188 Label* L = $labl$$label; 12189 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12190 %} 12191 ins_pipe( pipe_jcc ); 12192 ins_short_branch(1); 12193 %} 12194 12195 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12196 match(If cop cmp); 12197 effect(USE labl); 12198 12199 ins_cost(300); 12200 format %{ "J$cop,us $labl" %} 12201 size(2); 12202 ins_encode %{ 12203 Label* L = $labl$$label; 12204 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12205 %} 12206 ins_pipe( pipe_jcc ); 12207 ins_short_branch(1); 12208 %} 12209 12210 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12211 match(If cop cmp); 12212 effect(USE labl); 12213 12214 ins_cost(300); 12215 format %{ $$template 12216 if ($cop$$cmpcode == Assembler::notEqual) { 12217 $$emit$$"JP,u,s $labl\n\t" 12218 $$emit$$"J$cop,u,s $labl" 12219 } else { 12220 $$emit$$"JP,u,s done\n\t" 12221 $$emit$$"J$cop,u,s $labl\n\t" 12222 $$emit$$"done:" 12223 } 12224 %} 12225 size(4); 12226 ins_encode %{ 12227 Label* l = $labl$$label; 12228 if ($cop$$cmpcode == Assembler::notEqual) { 12229 __ jccb(Assembler::parity, *l); 12230 __ jccb(Assembler::notEqual, *l); 12231 } else if ($cop$$cmpcode == Assembler::equal) { 12232 Label done; 12233 __ jccb(Assembler::parity, done); 12234 __ jccb(Assembler::equal, *l); 12235 __ bind(done); 12236 } else { 12237 ShouldNotReachHere(); 12238 } 12239 %} 12240 ins_pipe(pipe_jcc); 12241 ins_short_branch(1); 12242 %} 12243 12244 // ============================================================================ 12245 // Long Compare 12246 // 12247 // Currently we hold longs in 2 registers. Comparing such values efficiently 12248 // is tricky. The flavor of compare used depends on whether we are testing 12249 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12250 // The GE test is the negated LT test. The LE test can be had by commuting 12251 // the operands (yielding a GE test) and then negating; negate again for the 12252 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12253 // NE test is negated from that. 12254 12255 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12256 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12257 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12258 // are collapsed internally in the ADLC's dfa-gen code. The match for 12259 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12260 // foo match ends up with the wrong leaf. One fix is to not match both 12261 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12262 // both forms beat the trinary form of long-compare and both are very useful 12263 // on Intel which has so few registers. 12264 12265 // Manifest a CmpL result in an integer register. Very painful. 12266 // This is the test to avoid. 12267 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12268 match(Set dst (CmpL3 src1 src2)); 12269 effect( KILL flags ); 12270 ins_cost(1000); 12271 format %{ "XOR $dst,$dst\n\t" 12272 "CMP $src1.hi,$src2.hi\n\t" 12273 "JLT,s m_one\n\t" 12274 "JGT,s p_one\n\t" 12275 "CMP $src1.lo,$src2.lo\n\t" 12276 "JB,s m_one\n\t" 12277 "JEQ,s done\n" 12278 "p_one:\tINC $dst\n\t" 12279 "JMP,s done\n" 12280 "m_one:\tDEC $dst\n" 12281 "done:" %} 12282 ins_encode %{ 12283 Label p_one, m_one, done; 12284 __ xorptr($dst$$Register, $dst$$Register); 12285 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12286 __ jccb(Assembler::less, m_one); 12287 __ jccb(Assembler::greater, p_one); 12288 __ cmpl($src1$$Register, $src2$$Register); 12289 __ jccb(Assembler::below, m_one); 12290 __ jccb(Assembler::equal, done); 12291 __ bind(p_one); 12292 __ incrementl($dst$$Register); 12293 __ jmpb(done); 12294 __ bind(m_one); 12295 __ decrementl($dst$$Register); 12296 __ bind(done); 12297 %} 12298 ins_pipe( pipe_slow ); 12299 %} 12300 12301 //====== 12302 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12303 // compares. Can be used for LE or GT compares by reversing arguments. 12304 // NOT GOOD FOR EQ/NE tests. 12305 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12306 match( Set flags (CmpL src zero )); 12307 ins_cost(100); 12308 format %{ "TEST $src.hi,$src.hi" %} 12309 opcode(0x85); 12310 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12311 ins_pipe( ialu_cr_reg_reg ); 12312 %} 12313 12314 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12315 // compares. Can be used for LE or GT compares by reversing arguments. 12316 // NOT GOOD FOR EQ/NE tests. 12317 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12318 match( Set flags (CmpL src1 src2 )); 12319 effect( TEMP tmp ); 12320 ins_cost(300); 12321 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12322 "MOV $tmp,$src1.hi\n\t" 12323 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12324 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12325 ins_pipe( ialu_cr_reg_reg ); 12326 %} 12327 12328 // Long compares reg < zero/req OR reg >= zero/req. 12329 // Just a wrapper for a normal branch, plus the predicate test. 12330 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12331 match(If cmp flags); 12332 effect(USE labl); 12333 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12334 expand %{ 12335 jmpCon(cmp,flags,labl); // JLT or JGE... 12336 %} 12337 %} 12338 12339 // Compare 2 longs and CMOVE longs. 12340 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12341 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12342 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12343 ins_cost(400); 12344 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12345 "CMOV$cmp $dst.hi,$src.hi" %} 12346 opcode(0x0F,0x40); 12347 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12348 ins_pipe( pipe_cmov_reg_long ); 12349 %} 12350 12351 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12352 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12353 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12354 ins_cost(500); 12355 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12356 "CMOV$cmp $dst.hi,$src.hi" %} 12357 opcode(0x0F,0x40); 12358 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12359 ins_pipe( pipe_cmov_reg_long ); 12360 %} 12361 12362 // Compare 2 longs and CMOVE ints. 12363 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12364 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12365 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12366 ins_cost(200); 12367 format %{ "CMOV$cmp $dst,$src" %} 12368 opcode(0x0F,0x40); 12369 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12370 ins_pipe( pipe_cmov_reg ); 12371 %} 12372 12373 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12374 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12375 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12376 ins_cost(250); 12377 format %{ "CMOV$cmp $dst,$src" %} 12378 opcode(0x0F,0x40); 12379 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12380 ins_pipe( pipe_cmov_mem ); 12381 %} 12382 12383 // Compare 2 longs and CMOVE ints. 12384 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12385 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12386 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12387 ins_cost(200); 12388 format %{ "CMOV$cmp $dst,$src" %} 12389 opcode(0x0F,0x40); 12390 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12391 ins_pipe( pipe_cmov_reg ); 12392 %} 12393 12394 // Compare 2 longs and CMOVE doubles 12395 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 12396 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12397 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12398 ins_cost(200); 12399 expand %{ 12400 fcmovDPR_regS(cmp,flags,dst,src); 12401 %} 12402 %} 12403 12404 // Compare 2 longs and CMOVE doubles 12405 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12406 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12407 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12408 ins_cost(200); 12409 expand %{ 12410 fcmovD_regS(cmp,flags,dst,src); 12411 %} 12412 %} 12413 12414 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 12415 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12416 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12417 ins_cost(200); 12418 expand %{ 12419 fcmovFPR_regS(cmp,flags,dst,src); 12420 %} 12421 %} 12422 12423 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 12424 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12425 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12426 ins_cost(200); 12427 expand %{ 12428 fcmovF_regS(cmp,flags,dst,src); 12429 %} 12430 %} 12431 12432 //====== 12433 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12434 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12435 match( Set flags (CmpL src zero )); 12436 effect(TEMP tmp); 12437 ins_cost(200); 12438 format %{ "MOV $tmp,$src.lo\n\t" 12439 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 12440 ins_encode( long_cmp_flags0( src, tmp ) ); 12441 ins_pipe( ialu_reg_reg_long ); 12442 %} 12443 12444 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12445 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 12446 match( Set flags (CmpL src1 src2 )); 12447 ins_cost(200+300); 12448 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12449 "JNE,s skip\n\t" 12450 "CMP $src1.hi,$src2.hi\n\t" 12451 "skip:\t" %} 12452 ins_encode( long_cmp_flags1( src1, src2 ) ); 12453 ins_pipe( ialu_cr_reg_reg ); 12454 %} 12455 12456 // Long compare reg == zero/reg OR reg != zero/reg 12457 // Just a wrapper for a normal branch, plus the predicate test. 12458 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 12459 match(If cmp flags); 12460 effect(USE labl); 12461 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12462 expand %{ 12463 jmpCon(cmp,flags,labl); // JEQ or JNE... 12464 %} 12465 %} 12466 12467 // Compare 2 longs and CMOVE longs. 12468 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 12469 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12470 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12471 ins_cost(400); 12472 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12473 "CMOV$cmp $dst.hi,$src.hi" %} 12474 opcode(0x0F,0x40); 12475 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12476 ins_pipe( pipe_cmov_reg_long ); 12477 %} 12478 12479 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 12480 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12481 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12482 ins_cost(500); 12483 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12484 "CMOV$cmp $dst.hi,$src.hi" %} 12485 opcode(0x0F,0x40); 12486 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12487 ins_pipe( pipe_cmov_reg_long ); 12488 %} 12489 12490 // Compare 2 longs and CMOVE ints. 12491 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 12492 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12493 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12494 ins_cost(200); 12495 format %{ "CMOV$cmp $dst,$src" %} 12496 opcode(0x0F,0x40); 12497 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12498 ins_pipe( pipe_cmov_reg ); 12499 %} 12500 12501 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 12502 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12503 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12504 ins_cost(250); 12505 format %{ "CMOV$cmp $dst,$src" %} 12506 opcode(0x0F,0x40); 12507 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12508 ins_pipe( pipe_cmov_mem ); 12509 %} 12510 12511 // Compare 2 longs and CMOVE ints. 12512 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 12513 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12514 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12515 ins_cost(200); 12516 format %{ "CMOV$cmp $dst,$src" %} 12517 opcode(0x0F,0x40); 12518 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12519 ins_pipe( pipe_cmov_reg ); 12520 %} 12521 12522 // Compare 2 longs and CMOVE doubles 12523 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 12524 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12525 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12526 ins_cost(200); 12527 expand %{ 12528 fcmovDPR_regS(cmp,flags,dst,src); 12529 %} 12530 %} 12531 12532 // Compare 2 longs and CMOVE doubles 12533 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 12534 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12535 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12536 ins_cost(200); 12537 expand %{ 12538 fcmovD_regS(cmp,flags,dst,src); 12539 %} 12540 %} 12541 12542 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 12543 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12544 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12545 ins_cost(200); 12546 expand %{ 12547 fcmovFPR_regS(cmp,flags,dst,src); 12548 %} 12549 %} 12550 12551 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 12552 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12553 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12554 ins_cost(200); 12555 expand %{ 12556 fcmovF_regS(cmp,flags,dst,src); 12557 %} 12558 %} 12559 12560 //====== 12561 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12562 // Same as cmpL_reg_flags_LEGT except must negate src 12563 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12564 match( Set flags (CmpL src zero )); 12565 effect( TEMP tmp ); 12566 ins_cost(300); 12567 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 12568 "CMP $tmp,$src.lo\n\t" 12569 "SBB $tmp,$src.hi\n\t" %} 12570 ins_encode( long_cmp_flags3(src, tmp) ); 12571 ins_pipe( ialu_reg_reg_long ); 12572 %} 12573 12574 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12575 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 12576 // requires a commuted test to get the same result. 12577 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12578 match( Set flags (CmpL src1 src2 )); 12579 effect( TEMP tmp ); 12580 ins_cost(300); 12581 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 12582 "MOV $tmp,$src2.hi\n\t" 12583 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 12584 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 12585 ins_pipe( ialu_cr_reg_reg ); 12586 %} 12587 12588 // Long compares reg < zero/req OR reg >= zero/req. 12589 // Just a wrapper for a normal branch, plus the predicate test 12590 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 12591 match(If cmp flags); 12592 effect(USE labl); 12593 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 12594 ins_cost(300); 12595 expand %{ 12596 jmpCon(cmp,flags,labl); // JGT or JLE... 12597 %} 12598 %} 12599 12600 // Compare 2 longs and CMOVE longs. 12601 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 12602 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12603 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12604 ins_cost(400); 12605 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12606 "CMOV$cmp $dst.hi,$src.hi" %} 12607 opcode(0x0F,0x40); 12608 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12609 ins_pipe( pipe_cmov_reg_long ); 12610 %} 12611 12612 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 12613 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12614 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12615 ins_cost(500); 12616 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12617 "CMOV$cmp $dst.hi,$src.hi+4" %} 12618 opcode(0x0F,0x40); 12619 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12620 ins_pipe( pipe_cmov_reg_long ); 12621 %} 12622 12623 // Compare 2 longs and CMOVE ints. 12624 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 12625 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12626 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12627 ins_cost(200); 12628 format %{ "CMOV$cmp $dst,$src" %} 12629 opcode(0x0F,0x40); 12630 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12631 ins_pipe( pipe_cmov_reg ); 12632 %} 12633 12634 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 12635 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12636 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12637 ins_cost(250); 12638 format %{ "CMOV$cmp $dst,$src" %} 12639 opcode(0x0F,0x40); 12640 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12641 ins_pipe( pipe_cmov_mem ); 12642 %} 12643 12644 // Compare 2 longs and CMOVE ptrs. 12645 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 12646 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12647 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12648 ins_cost(200); 12649 format %{ "CMOV$cmp $dst,$src" %} 12650 opcode(0x0F,0x40); 12651 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12652 ins_pipe( pipe_cmov_reg ); 12653 %} 12654 12655 // Compare 2 longs and CMOVE doubles 12656 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 12657 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12658 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12659 ins_cost(200); 12660 expand %{ 12661 fcmovDPR_regS(cmp,flags,dst,src); 12662 %} 12663 %} 12664 12665 // Compare 2 longs and CMOVE doubles 12666 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 12667 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12668 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12669 ins_cost(200); 12670 expand %{ 12671 fcmovD_regS(cmp,flags,dst,src); 12672 %} 12673 %} 12674 12675 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 12676 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12677 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12678 ins_cost(200); 12679 expand %{ 12680 fcmovFPR_regS(cmp,flags,dst,src); 12681 %} 12682 %} 12683 12684 12685 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 12686 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12687 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12688 ins_cost(200); 12689 expand %{ 12690 fcmovF_regS(cmp,flags,dst,src); 12691 %} 12692 %} 12693 12694 12695 // ============================================================================ 12696 // Procedure Call/Return Instructions 12697 // Call Java Static Instruction 12698 // Note: If this code changes, the corresponding ret_addr_offset() and 12699 // compute_padding() functions will have to be adjusted. 12700 instruct CallStaticJavaDirect(method meth) %{ 12701 match(CallStaticJava); 12702 effect(USE meth); 12703 12704 ins_cost(300); 12705 format %{ "CALL,static " %} 12706 opcode(0xE8); /* E8 cd */ 12707 ins_encode( pre_call_resets, 12708 Java_Static_Call( meth ), 12709 call_epilog, 12710 post_call_FPU ); 12711 ins_pipe( pipe_slow ); 12712 ins_alignment(4); 12713 %} 12714 12715 // Call Java Dynamic Instruction 12716 // Note: If this code changes, the corresponding ret_addr_offset() and 12717 // compute_padding() functions will have to be adjusted. 12718 instruct CallDynamicJavaDirect(method meth) %{ 12719 match(CallDynamicJava); 12720 effect(USE meth); 12721 12722 ins_cost(300); 12723 format %{ "MOV EAX,(oop)-1\n\t" 12724 "CALL,dynamic" %} 12725 opcode(0xE8); /* E8 cd */ 12726 ins_encode( pre_call_resets, 12727 Java_Dynamic_Call( meth ), 12728 call_epilog, 12729 post_call_FPU ); 12730 ins_pipe( pipe_slow ); 12731 ins_alignment(4); 12732 %} 12733 12734 // Call Runtime Instruction 12735 instruct CallRuntimeDirect(method meth) %{ 12736 match(CallRuntime ); 12737 effect(USE meth); 12738 12739 ins_cost(300); 12740 format %{ "CALL,runtime " %} 12741 opcode(0xE8); /* E8 cd */ 12742 // Use FFREEs to clear entries in float stack 12743 ins_encode( pre_call_resets, 12744 FFree_Float_Stack_All, 12745 Java_To_Runtime( meth ), 12746 post_call_FPU ); 12747 ins_pipe( pipe_slow ); 12748 %} 12749 12750 // Call runtime without safepoint 12751 instruct CallLeafDirect(method meth) %{ 12752 match(CallLeaf); 12753 effect(USE meth); 12754 12755 ins_cost(300); 12756 format %{ "CALL_LEAF,runtime " %} 12757 opcode(0xE8); /* E8 cd */ 12758 ins_encode( pre_call_resets, 12759 FFree_Float_Stack_All, 12760 Java_To_Runtime( meth ), 12761 Verify_FPU_For_Leaf, post_call_FPU ); 12762 ins_pipe( pipe_slow ); 12763 %} 12764 12765 instruct CallLeafNoFPDirect(method meth) %{ 12766 match(CallLeafNoFP); 12767 effect(USE meth); 12768 12769 ins_cost(300); 12770 format %{ "CALL_LEAF_NOFP,runtime " %} 12771 opcode(0xE8); /* E8 cd */ 12772 ins_encode(Java_To_Runtime(meth)); 12773 ins_pipe( pipe_slow ); 12774 %} 12775 12776 12777 // Return Instruction 12778 // Remove the return address & jump to it. 12779 instruct Ret() %{ 12780 match(Return); 12781 format %{ "RET" %} 12782 opcode(0xC3); 12783 ins_encode(OpcP); 12784 ins_pipe( pipe_jmp ); 12785 %} 12786 12787 // Tail Call; Jump from runtime stub to Java code. 12788 // Also known as an 'interprocedural jump'. 12789 // Target of jump will eventually return to caller. 12790 // TailJump below removes the return address. 12791 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 12792 match(TailCall jump_target method_oop ); 12793 ins_cost(300); 12794 format %{ "JMP $jump_target \t# EBX holds method oop" %} 12795 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12796 ins_encode( OpcP, RegOpc(jump_target) ); 12797 ins_pipe( pipe_jmp ); 12798 %} 12799 12800 12801 // Tail Jump; remove the return address; jump to target. 12802 // TailCall above leaves the return address around. 12803 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 12804 match( TailJump jump_target ex_oop ); 12805 ins_cost(300); 12806 format %{ "POP EDX\t# pop return address into dummy\n\t" 12807 "JMP $jump_target " %} 12808 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12809 ins_encode( enc_pop_rdx, 12810 OpcP, RegOpc(jump_target) ); 12811 ins_pipe( pipe_jmp ); 12812 %} 12813 12814 // Create exception oop: created by stack-crawling runtime code. 12815 // Created exception is now available to this handler, and is setup 12816 // just prior to jumping to this handler. No code emitted. 12817 instruct CreateException( eAXRegP ex_oop ) 12818 %{ 12819 match(Set ex_oop (CreateEx)); 12820 12821 size(0); 12822 // use the following format syntax 12823 format %{ "# exception oop is in EAX; no code emitted" %} 12824 ins_encode(); 12825 ins_pipe( empty ); 12826 %} 12827 12828 12829 // Rethrow exception: 12830 // The exception oop will come in the first argument position. 12831 // Then JUMP (not call) to the rethrow stub code. 12832 instruct RethrowException() 12833 %{ 12834 match(Rethrow); 12835 12836 // use the following format syntax 12837 format %{ "JMP rethrow_stub" %} 12838 ins_encode(enc_rethrow); 12839 ins_pipe( pipe_jmp ); 12840 %} 12841 12842 // inlined locking and unlocking 12843 12844 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 12845 predicate(Compile::current()->use_rtm()); 12846 match(Set cr (FastLock object box)); 12847 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 12848 ins_cost(300); 12849 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 12850 ins_encode %{ 12851 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 12852 $scr$$Register, $cx1$$Register, $cx2$$Register, 12853 _counters, _rtm_counters, _stack_rtm_counters, 12854 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 12855 true, ra_->C->profile_rtm()); 12856 %} 12857 ins_pipe(pipe_slow); 12858 %} 12859 12860 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 12861 predicate(!Compile::current()->use_rtm()); 12862 match(Set cr (FastLock object box)); 12863 effect(TEMP tmp, TEMP scr, USE_KILL box); 12864 ins_cost(300); 12865 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 12866 ins_encode %{ 12867 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 12868 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 12869 %} 12870 ins_pipe(pipe_slow); 12871 %} 12872 12873 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 12874 match(Set cr (FastUnlock object box)); 12875 effect(TEMP tmp, USE_KILL box); 12876 ins_cost(300); 12877 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 12878 ins_encode %{ 12879 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 12880 %} 12881 ins_pipe(pipe_slow); 12882 %} 12883 12884 12885 12886 // ============================================================================ 12887 // Safepoint Instruction 12888 instruct safePoint_poll(eFlagsReg cr) %{ 12889 match(SafePoint); 12890 effect(KILL cr); 12891 12892 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. 12893 // On SPARC that might be acceptable as we can generate the address with 12894 // just a sethi, saving an or. By polling at offset 0 we can end up 12895 // putting additional pressure on the index-0 in the D$. Because of 12896 // alignment (just like the situation at hand) the lower indices tend 12897 // to see more traffic. It'd be better to change the polling address 12898 // to offset 0 of the last $line in the polling page. 12899 12900 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} 12901 ins_cost(125); 12902 size(6) ; 12903 ins_encode( Safepoint_Poll() ); 12904 ins_pipe( ialu_reg_mem ); 12905 %} 12906 12907 12908 // ============================================================================ 12909 // This name is KNOWN by the ADLC and cannot be changed. 12910 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 12911 // for this guy. 12912 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 12913 match(Set dst (ThreadLocal)); 12914 effect(DEF dst, KILL cr); 12915 12916 format %{ "MOV $dst, Thread::current()" %} 12917 ins_encode %{ 12918 Register dstReg = as_Register($dst$$reg); 12919 __ get_thread(dstReg); 12920 %} 12921 ins_pipe( ialu_reg_fat ); 12922 %} 12923 12924 12925 12926 //----------PEEPHOLE RULES----------------------------------------------------- 12927 // These must follow all instruction definitions as they use the names 12928 // defined in the instructions definitions. 12929 // 12930 // peepmatch ( root_instr_name [preceding_instruction]* ); 12931 // 12932 // peepconstraint %{ 12933 // (instruction_number.operand_name relational_op instruction_number.operand_name 12934 // [, ...] ); 12935 // // instruction numbers are zero-based using left to right order in peepmatch 12936 // 12937 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 12938 // // provide an instruction_number.operand_name for each operand that appears 12939 // // in the replacement instruction's match rule 12940 // 12941 // ---------VM FLAGS--------------------------------------------------------- 12942 // 12943 // All peephole optimizations can be turned off using -XX:-OptoPeephole 12944 // 12945 // Each peephole rule is given an identifying number starting with zero and 12946 // increasing by one in the order seen by the parser. An individual peephole 12947 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 12948 // on the command-line. 12949 // 12950 // ---------CURRENT LIMITATIONS---------------------------------------------- 12951 // 12952 // Only match adjacent instructions in same basic block 12953 // Only equality constraints 12954 // Only constraints between operands, not (0.dest_reg == EAX_enc) 12955 // Only one replacement instruction 12956 // 12957 // ---------EXAMPLE---------------------------------------------------------- 12958 // 12959 // // pertinent parts of existing instructions in architecture description 12960 // instruct movI(rRegI dst, rRegI src) %{ 12961 // match(Set dst (CopyI src)); 12962 // %} 12963 // 12964 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 12965 // match(Set dst (AddI dst src)); 12966 // effect(KILL cr); 12967 // %} 12968 // 12969 // // Change (inc mov) to lea 12970 // peephole %{ 12971 // // increment preceeded by register-register move 12972 // peepmatch ( incI_eReg movI ); 12973 // // require that the destination register of the increment 12974 // // match the destination register of the move 12975 // peepconstraint ( 0.dst == 1.dst ); 12976 // // construct a replacement instruction that sets 12977 // // the destination to ( move's source register + one ) 12978 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 12979 // %} 12980 // 12981 // Implementation no longer uses movX instructions since 12982 // machine-independent system no longer uses CopyX nodes. 12983 // 12984 // peephole %{ 12985 // peepmatch ( incI_eReg movI ); 12986 // peepconstraint ( 0.dst == 1.dst ); 12987 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 12988 // %} 12989 // 12990 // peephole %{ 12991 // peepmatch ( decI_eReg movI ); 12992 // peepconstraint ( 0.dst == 1.dst ); 12993 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 12994 // %} 12995 // 12996 // peephole %{ 12997 // peepmatch ( addI_eReg_imm movI ); 12998 // peepconstraint ( 0.dst == 1.dst ); 12999 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13000 // %} 13001 // 13002 // peephole %{ 13003 // peepmatch ( addP_eReg_imm movP ); 13004 // peepconstraint ( 0.dst == 1.dst ); 13005 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13006 // %} 13007 13008 // // Change load of spilled value to only a spill 13009 // instruct storeI(memory mem, rRegI src) %{ 13010 // match(Set mem (StoreI mem src)); 13011 // %} 13012 // 13013 // instruct loadI(rRegI dst, memory mem) %{ 13014 // match(Set dst (LoadI mem)); 13015 // %} 13016 // 13017 peephole %{ 13018 peepmatch ( loadI storeI ); 13019 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13020 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13021 %} 13022 13023 //----------SMARTSPILL RULES--------------------------------------------------- 13024 // These must follow all instruction definitions as they use the names 13025 // defined in the instructions definitions.