1 // 2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2)); 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3)); 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4)); 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5)); 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6)); 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7)); 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8)); 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9)); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 137 // 138 // Class for no registers (empty set). 139 reg_class no_reg(); 140 141 // Class for all registers 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 143 // Class for all registers (excluding EBP) 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 145 // Dynamic register class that selects at runtime between register classes 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 149 150 // Class for general registers 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 152 // Class for general registers (excluding EBP). 153 // This register class can be used for implicit null checks on win95. 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 155 // Used also if the PreserveFramePointer flag is true. 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 159 160 // Class of "X" registers 161 reg_class int_x_reg(EBX, ECX, EDX, EAX); 162 163 // Class of registers that can appear in an address with no offset. 164 // EBP and ESP require an extra instruction byte for zero offset. 165 // Used in fast-unlock 166 reg_class p_reg(EDX, EDI, ESI, EBX); 167 168 // Class for general registers excluding ECX 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 170 // Class for general registers excluding ECX (and EBP) 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 174 175 // Class for general registers excluding EAX 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 177 178 // Class for general registers excluding EAX and EBX. 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 180 // Class for general registers excluding EAX and EBX (and EBP) 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 184 185 // Class of EAX (for multiply and divide operations) 186 reg_class eax_reg(EAX); 187 188 // Class of EBX (for atomic add) 189 reg_class ebx_reg(EBX); 190 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 192 reg_class ecx_reg(ECX); 193 194 // Class of EDX (for multiply and divide operations) 195 reg_class edx_reg(EDX); 196 197 // Class of EDI (for synchronization) 198 reg_class edi_reg(EDI); 199 200 // Class of ESI (for synchronization) 201 reg_class esi_reg(ESI); 202 203 // Singleton class for stack pointer 204 reg_class sp_reg(ESP); 205 206 // Singleton class for instruction pointer 207 // reg_class ip_reg(EIP); 208 209 // Class of integer register pairs 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 211 // Class of integer register pairs (excluding EBP and EDI); 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 215 216 // Class of integer register pairs that aligns with calling convention 217 reg_class eadx_reg( EAX,EDX ); 218 reg_class ebcx_reg( ECX,EBX ); 219 220 // Not AX or DX, used in divides 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 222 // Not AX or DX (and neither EBP), used in divides 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 226 227 // Floating point registers. Notice FPR0 is not a choice. 228 // FPR0 is not ever allocated; we use clever encodings to fake 229 // a 2-address instructions out of Intels FP stack. 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 231 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 233 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 234 FPR7L,FPR7H ); 235 236 reg_class fp_flt_reg0( FPR1L ); 237 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 238 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 240 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 241 242 %} 243 244 245 //----------SOURCE BLOCK------------------------------------------------------- 246 // This is a block of C++ code which provides values, functions, and 247 // definitions necessary in the rest of the architecture description 248 source_hpp %{ 249 // Must be visible to the DFA in dfa_x86_32.cpp 250 extern bool is_operand_hi32_zero(Node* n); 251 %} 252 253 source %{ 254 #define RELOC_IMM32 Assembler::imm_operand 255 #define RELOC_DISP32 Assembler::disp32_operand 256 257 #define __ _masm. 258 259 // How to find the high register of a Long pair, given the low register 260 #define HIGH_FROM_LOW(x) ((x)+2) 261 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM 263 // instructions, to allow sign-masking or sign-bit flipping. They allow 264 // fast versions of NegF/NegD and AbsF/AbsD. 265 266 // Note: 'double' and 'long long' have 32-bits alignment on x86. 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 268 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 269 // of 128-bits operands for SSE instructions. 270 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 271 // Store the value to a 128-bits operand. 272 operand[0] = lo; 273 operand[1] = hi; 274 return operand; 275 } 276 277 // Buffer for 128-bits masks used by SSE instructions. 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 279 280 // Static initialization during VM startup. 281 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 283 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 285 286 // Offset hacking within calls. 287 static int pre_call_resets_size() { 288 int size = 0; 289 Compile* C = Compile::current(); 290 if (C->in_24_bit_fp_mode()) { 291 size += 6; // fldcw 292 } 293 if (C->max_vector_size() > 16) { 294 if(UseAVX <= 2) { 295 size += 3; // vzeroupper 296 } 297 } 298 return size; 299 } 300 301 // !!!!! Special hack to get all type of calls to specify the byte offset 302 // from the start of the call to the point where the return address 303 // will point. 304 int MachCallStaticJavaNode::ret_addr_offset() { 305 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 306 } 307 308 int MachCallDynamicJavaNode::ret_addr_offset() { 309 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 310 } 311 312 static int sizeof_FFree_Float_Stack_All = -1; 313 314 int MachCallRuntimeNode::ret_addr_offset() { 315 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 316 return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size(); 317 } 318 319 // Indicate if the safepoint node needs the polling page as an input. 320 // Since x86 does have absolute addressing, it doesn't. 321 bool SafePointNode::needs_polling_address_input() { 322 return false; 323 } 324 325 // 326 // Compute padding required for nodes which need alignment 327 // 328 329 // The address of the call instruction needs to be 4-byte aligned to 330 // ensure that it does not span a cache line so that it can be patched. 331 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 332 current_offset += pre_call_resets_size(); // skip fldcw, if any 333 current_offset += 1; // skip call opcode byte 334 return round_to(current_offset, alignment_required()) - current_offset; 335 } 336 337 // The address of the call instruction needs to be 4-byte aligned to 338 // ensure that it does not span a cache line so that it can be patched. 339 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 340 current_offset += pre_call_resets_size(); // skip fldcw, if any 341 current_offset += 5; // skip MOV instruction 342 current_offset += 1; // skip call opcode byte 343 return round_to(current_offset, alignment_required()) - current_offset; 344 } 345 346 // EMIT_RM() 347 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 348 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 349 cbuf.insts()->emit_int8(c); 350 } 351 352 // EMIT_CC() 353 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 354 unsigned char c = (unsigned char)( f1 | f2 ); 355 cbuf.insts()->emit_int8(c); 356 } 357 358 // EMIT_OPCODE() 359 void emit_opcode(CodeBuffer &cbuf, int code) { 360 cbuf.insts()->emit_int8((unsigned char) code); 361 } 362 363 // EMIT_OPCODE() w/ relocation information 364 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 365 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 366 emit_opcode(cbuf, code); 367 } 368 369 // EMIT_D8() 370 void emit_d8(CodeBuffer &cbuf, int d8) { 371 cbuf.insts()->emit_int8((unsigned char) d8); 372 } 373 374 // EMIT_D16() 375 void emit_d16(CodeBuffer &cbuf, int d16) { 376 cbuf.insts()->emit_int16(d16); 377 } 378 379 // EMIT_D32() 380 void emit_d32(CodeBuffer &cbuf, int d32) { 381 cbuf.insts()->emit_int32(d32); 382 } 383 384 // emit 32 bit value and construct relocation entry from relocInfo::relocType 385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 386 int format) { 387 cbuf.relocate(cbuf.insts_mark(), reloc, format); 388 cbuf.insts()->emit_int32(d32); 389 } 390 391 // emit 32 bit value and construct relocation entry from RelocationHolder 392 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 393 int format) { 394 #ifdef ASSERT 395 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 396 assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code"); 397 } 398 #endif 399 cbuf.relocate(cbuf.insts_mark(), rspec, format); 400 cbuf.insts()->emit_int32(d32); 401 } 402 403 // Access stack slot for load or store 404 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 405 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 406 if( -128 <= disp && disp <= 127 ) { 407 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 408 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 409 emit_d8 (cbuf, disp); // Displacement // R/M byte 410 } else { 411 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 412 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 413 emit_d32(cbuf, disp); // Displacement // R/M byte 414 } 415 } 416 417 // rRegI ereg, memory mem) %{ // emit_reg_mem 418 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 419 // There is no index & no scale, use form without SIB byte 420 if ((index == 0x4) && 421 (scale == 0) && (base != ESP_enc)) { 422 // If no displacement, mode is 0x0; unless base is [EBP] 423 if ( (displace == 0) && (base != EBP_enc) ) { 424 emit_rm(cbuf, 0x0, reg_encoding, base); 425 } 426 else { // If 8-bit displacement, mode 0x1 427 if ((displace >= -128) && (displace <= 127) 428 && (disp_reloc == relocInfo::none) ) { 429 emit_rm(cbuf, 0x1, reg_encoding, base); 430 emit_d8(cbuf, displace); 431 } 432 else { // If 32-bit displacement 433 if (base == -1) { // Special flag for absolute address 434 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 435 // (manual lies; no SIB needed here) 436 if ( disp_reloc != relocInfo::none ) { 437 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 438 } else { 439 emit_d32 (cbuf, displace); 440 } 441 } 442 else { // Normal base + offset 443 emit_rm(cbuf, 0x2, reg_encoding, base); 444 if ( disp_reloc != relocInfo::none ) { 445 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 446 } else { 447 emit_d32 (cbuf, displace); 448 } 449 } 450 } 451 } 452 } 453 else { // Else, encode with the SIB byte 454 // If no displacement, mode is 0x0; unless base is [EBP] 455 if (displace == 0 && (base != EBP_enc)) { // If no displacement 456 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 457 emit_rm(cbuf, scale, index, base); 458 } 459 else { // If 8-bit displacement, mode 0x1 460 if ((displace >= -128) && (displace <= 127) 461 && (disp_reloc == relocInfo::none) ) { 462 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 463 emit_rm(cbuf, scale, index, base); 464 emit_d8(cbuf, displace); 465 } 466 else { // If 32-bit displacement 467 if (base == 0x04 ) { 468 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 469 emit_rm(cbuf, scale, index, 0x04); 470 } else { 471 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 472 emit_rm(cbuf, scale, index, base); 473 } 474 if ( disp_reloc != relocInfo::none ) { 475 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 476 } else { 477 emit_d32 (cbuf, displace); 478 } 479 } 480 } 481 } 482 } 483 484 485 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 486 if( dst_encoding == src_encoding ) { 487 // reg-reg copy, use an empty encoding 488 } else { 489 emit_opcode( cbuf, 0x8B ); 490 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 491 } 492 } 493 494 void emit_cmpfp_fixup(MacroAssembler& _masm) { 495 Label exit; 496 __ jccb(Assembler::noParity, exit); 497 __ pushf(); 498 // 499 // comiss/ucomiss instructions set ZF,PF,CF flags and 500 // zero OF,AF,SF for NaN values. 501 // Fixup flags by zeroing ZF,PF so that compare of NaN 502 // values returns 'less than' result (CF is set). 503 // Leave the rest of flags unchanged. 504 // 505 // 7 6 5 4 3 2 1 0 506 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 507 // 0 0 1 0 1 0 1 1 (0x2B) 508 // 509 __ andl(Address(rsp, 0), 0xffffff2b); 510 __ popf(); 511 __ bind(exit); 512 } 513 514 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 515 Label done; 516 __ movl(dst, -1); 517 __ jcc(Assembler::parity, done); 518 __ jcc(Assembler::below, done); 519 __ setb(Assembler::notEqual, dst); 520 __ movzbl(dst, dst); 521 __ bind(done); 522 } 523 524 525 //============================================================================= 526 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 527 528 int Compile::ConstantTable::calculate_table_base_offset() const { 529 return 0; // absolute addressing, no offset 530 } 531 532 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 533 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 534 ShouldNotReachHere(); 535 } 536 537 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 538 // Empty encoding 539 } 540 541 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 542 return 0; 543 } 544 545 #ifndef PRODUCT 546 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 547 st->print("# MachConstantBaseNode (empty encoding)"); 548 } 549 #endif 550 551 552 //============================================================================= 553 #ifndef PRODUCT 554 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 555 Compile* C = ra_->C; 556 557 int framesize = C->frame_size_in_bytes(); 558 int bangsize = C->bang_size_in_bytes(); 559 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 560 // Remove wordSize for return addr which is already pushed. 561 framesize -= wordSize; 562 563 if (C->need_stack_bang(bangsize)) { 564 framesize -= wordSize; 565 st->print("# stack bang (%d bytes)", bangsize); 566 st->print("\n\t"); 567 st->print("PUSH EBP\t# Save EBP"); 568 if (PreserveFramePointer) { 569 st->print("\n\t"); 570 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 571 } 572 if (framesize) { 573 st->print("\n\t"); 574 st->print("SUB ESP, #%d\t# Create frame",framesize); 575 } 576 } else { 577 st->print("SUB ESP, #%d\t# Create frame",framesize); 578 st->print("\n\t"); 579 framesize -= wordSize; 580 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 581 if (PreserveFramePointer) { 582 st->print("\n\t"); 583 st->print("MOV EBP, [ESP + #%d]\t# Save the caller's SP into EBP", (framesize + wordSize)); 584 } 585 } 586 587 if (VerifyStackAtCalls) { 588 st->print("\n\t"); 589 framesize -= wordSize; 590 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 591 } 592 593 if( C->in_24_bit_fp_mode() ) { 594 st->print("\n\t"); 595 st->print("FLDCW \t# load 24 bit fpu control word"); 596 } 597 if (UseSSE >= 2 && VerifyFPU) { 598 st->print("\n\t"); 599 st->print("# verify FPU stack (must be clean on entry)"); 600 } 601 602 #ifdef ASSERT 603 if (VerifyStackAtCalls) { 604 st->print("\n\t"); 605 st->print("# stack alignment check"); 606 } 607 #endif 608 st->cr(); 609 } 610 #endif 611 612 613 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 614 Compile* C = ra_->C; 615 MacroAssembler _masm(&cbuf); 616 617 int framesize = C->frame_size_in_bytes(); 618 int bangsize = C->bang_size_in_bytes(); 619 620 __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode()); 621 622 C->set_frame_complete(cbuf.insts_size()); 623 624 if (C->has_mach_constant_base_node()) { 625 // NOTE: We set the table base offset here because users might be 626 // emitted before MachConstantBaseNode. 627 Compile::ConstantTable& constant_table = C->constant_table(); 628 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 629 } 630 } 631 632 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 633 return MachNode::size(ra_); // too many variables; just compute it the hard way 634 } 635 636 int MachPrologNode::reloc() const { 637 return 0; // a large enough number 638 } 639 640 //============================================================================= 641 #ifndef PRODUCT 642 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 643 Compile *C = ra_->C; 644 int framesize = C->frame_size_in_bytes(); 645 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 646 // Remove two words for return addr and rbp, 647 framesize -= 2*wordSize; 648 649 if (C->max_vector_size() > 16) { 650 st->print("VZEROUPPER"); 651 st->cr(); st->print("\t"); 652 } 653 if (C->in_24_bit_fp_mode()) { 654 st->print("FLDCW standard control word"); 655 st->cr(); st->print("\t"); 656 } 657 if (framesize) { 658 st->print("ADD ESP,%d\t# Destroy frame",framesize); 659 st->cr(); st->print("\t"); 660 } 661 st->print_cr("POPL EBP"); st->print("\t"); 662 if (do_polling() && C->is_method_compilation()) { 663 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 664 st->cr(); st->print("\t"); 665 } 666 } 667 #endif 668 669 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 670 Compile *C = ra_->C; 671 672 if (C->max_vector_size() > 16) { 673 // Clear upper bits of YMM registers when current compiled code uses 674 // wide vectors to avoid AVX <-> SSE transition penalty during call. 675 MacroAssembler masm(&cbuf); 676 masm.vzeroupper(); 677 } 678 // If method set FPU control word, restore to standard control word 679 if (C->in_24_bit_fp_mode()) { 680 MacroAssembler masm(&cbuf); 681 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 682 } 683 684 int framesize = C->frame_size_in_bytes(); 685 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 686 // Remove two words for return addr and rbp, 687 framesize -= 2*wordSize; 688 689 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 690 691 if (framesize >= 128) { 692 emit_opcode(cbuf, 0x81); // add SP, #framesize 693 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 694 emit_d32(cbuf, framesize); 695 } else if (framesize) { 696 emit_opcode(cbuf, 0x83); // add SP, #framesize 697 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 698 emit_d8(cbuf, framesize); 699 } 700 701 emit_opcode(cbuf, 0x58 | EBP_enc); 702 703 if (do_polling() && C->is_method_compilation()) { 704 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); 705 emit_opcode(cbuf,0x85); 706 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX 707 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 708 } 709 } 710 711 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 712 Compile *C = ra_->C; 713 // If method set FPU control word, restore to standard control word 714 int size = C->in_24_bit_fp_mode() ? 6 : 0; 715 if (C->max_vector_size() > 16) size += 3; // vzeroupper 716 if (do_polling() && C->is_method_compilation()) size += 6; 717 718 int framesize = C->frame_size_in_bytes(); 719 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 720 // Remove two words for return addr and rbp, 721 framesize -= 2*wordSize; 722 723 size++; // popl rbp, 724 725 if (framesize >= 128) { 726 size += 6; 727 } else { 728 size += framesize ? 3 : 0; 729 } 730 return size; 731 } 732 733 int MachEpilogNode::reloc() const { 734 return 0; // a large enough number 735 } 736 737 const Pipeline * MachEpilogNode::pipeline() const { 738 return MachNode::pipeline_class(); 739 } 740 741 int MachEpilogNode::safepoint_offset() const { return 0; } 742 743 //============================================================================= 744 745 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 746 static enum RC rc_class( OptoReg::Name reg ) { 747 748 if( !OptoReg::is_valid(reg) ) return rc_bad; 749 if (OptoReg::is_stack(reg)) return rc_stack; 750 751 VMReg r = OptoReg::as_VMReg(reg); 752 if (r->is_Register()) return rc_int; 753 if (r->is_FloatRegister()) { 754 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 755 return rc_float; 756 } 757 assert(r->is_XMMRegister(), "must be"); 758 return rc_xmm; 759 } 760 761 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 762 int opcode, const char *op_str, int size, outputStream* st ) { 763 if( cbuf ) { 764 emit_opcode (*cbuf, opcode ); 765 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 766 #ifndef PRODUCT 767 } else if( !do_size ) { 768 if( size != 0 ) st->print("\n\t"); 769 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 770 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 771 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 772 } else { // FLD, FST, PUSH, POP 773 st->print("%s [ESP + #%d]",op_str,offset); 774 } 775 #endif 776 } 777 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 778 return size+3+offset_size; 779 } 780 781 // Helper for XMM registers. Extra opcode bits, limited syntax. 782 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 783 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 784 int in_size_in_bits = Assembler::EVEX_32bit; 785 int evex_encoding = 0; 786 if (reg_lo+1 == reg_hi) { 787 in_size_in_bits = Assembler::EVEX_64bit; 788 evex_encoding = Assembler::VEX_W; 789 } 790 if (cbuf) { 791 MacroAssembler _masm(cbuf); 792 if (reg_lo+1 == reg_hi) { // double move? 793 if (is_load) { 794 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 795 } else { 796 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 797 } 798 } else { 799 if (is_load) { 800 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 801 } else { 802 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 803 } 804 } 805 #ifndef PRODUCT 806 } else if (!do_size) { 807 if (size != 0) st->print("\n\t"); 808 if (reg_lo+1 == reg_hi) { // double move? 809 if (is_load) st->print("%s %s,[ESP + #%d]", 810 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 811 Matcher::regName[reg_lo], offset); 812 else st->print("MOVSD [ESP + #%d],%s", 813 offset, Matcher::regName[reg_lo]); 814 } else { 815 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 816 Matcher::regName[reg_lo], offset); 817 else st->print("MOVSS [ESP + #%d],%s", 818 offset, Matcher::regName[reg_lo]); 819 } 820 #endif 821 } 822 bool is_single_byte = false; 823 if ((UseAVX > 2) && (offset != 0)) { 824 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 825 } 826 int offset_size = 0; 827 if (UseAVX > 2 ) { 828 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 829 } else { 830 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 831 } 832 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 833 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 834 return size+5+offset_size; 835 } 836 837 838 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 839 int src_hi, int dst_hi, int size, outputStream* st ) { 840 if (cbuf) { 841 MacroAssembler _masm(cbuf); 842 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 843 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 844 as_XMMRegister(Matcher::_regEncode[src_lo])); 845 } else { 846 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 847 as_XMMRegister(Matcher::_regEncode[src_lo])); 848 } 849 #ifndef PRODUCT 850 } else if (!do_size) { 851 if (size != 0) st->print("\n\t"); 852 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 853 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 854 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 855 } else { 856 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 857 } 858 } else { 859 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 860 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 861 } else { 862 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 863 } 864 } 865 #endif 866 } 867 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 868 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 869 int sz = (UseAVX > 2) ? 6 : 4; 870 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 871 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 872 return size + sz; 873 } 874 875 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 876 int src_hi, int dst_hi, int size, outputStream* st ) { 877 // 32-bit 878 if (cbuf) { 879 MacroAssembler _masm(cbuf); 880 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 881 as_Register(Matcher::_regEncode[src_lo])); 882 #ifndef PRODUCT 883 } else if (!do_size) { 884 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 885 #endif 886 } 887 return (UseAVX> 2) ? 6 : 4; 888 } 889 890 891 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 892 int src_hi, int dst_hi, int size, outputStream* st ) { 893 // 32-bit 894 if (cbuf) { 895 MacroAssembler _masm(cbuf); 896 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 897 as_XMMRegister(Matcher::_regEncode[src_lo])); 898 #ifndef PRODUCT 899 } else if (!do_size) { 900 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 901 #endif 902 } 903 return (UseAVX> 2) ? 6 : 4; 904 } 905 906 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 907 if( cbuf ) { 908 emit_opcode(*cbuf, 0x8B ); 909 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 910 #ifndef PRODUCT 911 } else if( !do_size ) { 912 if( size != 0 ) st->print("\n\t"); 913 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 914 #endif 915 } 916 return size+2; 917 } 918 919 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 920 int offset, int size, outputStream* st ) { 921 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 922 if( cbuf ) { 923 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 924 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 925 #ifndef PRODUCT 926 } else if( !do_size ) { 927 if( size != 0 ) st->print("\n\t"); 928 st->print("FLD %s",Matcher::regName[src_lo]); 929 #endif 930 } 931 size += 2; 932 } 933 934 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 935 const char *op_str; 936 int op; 937 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 938 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 939 op = 0xDD; 940 } else { // 32-bit store 941 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 942 op = 0xD9; 943 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 944 } 945 946 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 947 } 948 949 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 950 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 951 int src_hi, int dst_hi, uint ireg, outputStream* st); 952 953 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 954 int stack_offset, int reg, uint ireg, outputStream* st); 955 956 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, 957 int dst_offset, uint ireg, outputStream* st) { 958 int calc_size = 0; 959 int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 960 int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 961 switch (ireg) { 962 case Op_VecS: 963 calc_size = 3+src_offset_size + 3+dst_offset_size; 964 break; 965 case Op_VecD: 966 calc_size = 3+src_offset_size + 3+dst_offset_size; 967 src_offset += 4; 968 dst_offset += 4; 969 src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 970 dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 971 calc_size += 3+src_offset_size + 3+dst_offset_size; 972 break; 973 case Op_VecX: 974 case Op_VecY: 975 case Op_VecZ: 976 calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; 977 break; 978 default: 979 ShouldNotReachHere(); 980 } 981 if (cbuf) { 982 MacroAssembler _masm(cbuf); 983 int offset = __ offset(); 984 switch (ireg) { 985 case Op_VecS: 986 __ pushl(Address(rsp, src_offset)); 987 __ popl (Address(rsp, dst_offset)); 988 break; 989 case Op_VecD: 990 __ pushl(Address(rsp, src_offset)); 991 __ popl (Address(rsp, dst_offset)); 992 __ pushl(Address(rsp, src_offset+4)); 993 __ popl (Address(rsp, dst_offset+4)); 994 break; 995 case Op_VecX: 996 __ movdqu(Address(rsp, -16), xmm0); 997 __ movdqu(xmm0, Address(rsp, src_offset)); 998 __ movdqu(Address(rsp, dst_offset), xmm0); 999 __ movdqu(xmm0, Address(rsp, -16)); 1000 break; 1001 case Op_VecY: 1002 __ vmovdqu(Address(rsp, -32), xmm0); 1003 __ vmovdqu(xmm0, Address(rsp, src_offset)); 1004 __ vmovdqu(Address(rsp, dst_offset), xmm0); 1005 __ vmovdqu(xmm0, Address(rsp, -32)); 1006 case Op_VecZ: 1007 __ evmovdqul(Address(rsp, -64), xmm0, 2); 1008 __ evmovdqul(xmm0, Address(rsp, src_offset), 2); 1009 __ evmovdqul(Address(rsp, dst_offset), xmm0, 2); 1010 __ evmovdqul(xmm0, Address(rsp, -64), 2); 1011 break; 1012 default: 1013 ShouldNotReachHere(); 1014 } 1015 int size = __ offset() - offset; 1016 assert(size == calc_size, "incorrect size calculattion"); 1017 return size; 1018 #ifndef PRODUCT 1019 } else if (!do_size) { 1020 switch (ireg) { 1021 case Op_VecS: 1022 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1023 "popl [rsp + #%d]", 1024 src_offset, dst_offset); 1025 break; 1026 case Op_VecD: 1027 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1028 "popq [rsp + #%d]\n\t" 1029 "pushl [rsp + #%d]\n\t" 1030 "popq [rsp + #%d]", 1031 src_offset, dst_offset, src_offset+4, dst_offset+4); 1032 break; 1033 case Op_VecX: 1034 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1035 "movdqu xmm0, [rsp + #%d]\n\t" 1036 "movdqu [rsp + #%d], xmm0\n\t" 1037 "movdqu xmm0, [rsp - #16]", 1038 src_offset, dst_offset); 1039 break; 1040 case Op_VecY: 1041 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1042 "vmovdqu xmm0, [rsp + #%d]\n\t" 1043 "vmovdqu [rsp + #%d], xmm0\n\t" 1044 "vmovdqu xmm0, [rsp - #32]", 1045 src_offset, dst_offset); 1046 case Op_VecZ: 1047 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1048 "vmovdqu xmm0, [rsp + #%d]\n\t" 1049 "vmovdqu [rsp + #%d], xmm0\n\t" 1050 "vmovdqu xmm0, [rsp - #64]", 1051 src_offset, dst_offset); 1052 break; 1053 default: 1054 ShouldNotReachHere(); 1055 } 1056 #endif 1057 } 1058 return calc_size; 1059 } 1060 1061 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1062 // Get registers to move 1063 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1064 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1065 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1066 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1067 1068 enum RC src_second_rc = rc_class(src_second); 1069 enum RC src_first_rc = rc_class(src_first); 1070 enum RC dst_second_rc = rc_class(dst_second); 1071 enum RC dst_first_rc = rc_class(dst_first); 1072 1073 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1074 1075 // Generate spill code! 1076 int size = 0; 1077 1078 if( src_first == dst_first && src_second == dst_second ) 1079 return size; // Self copy, no move 1080 1081 if (bottom_type()->isa_vect() != NULL) { 1082 uint ireg = ideal_reg(); 1083 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1084 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1085 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1086 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1087 // mem -> mem 1088 int src_offset = ra_->reg2offset(src_first); 1089 int dst_offset = ra_->reg2offset(dst_first); 1090 return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); 1091 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1092 return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); 1093 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1094 int stack_offset = ra_->reg2offset(dst_first); 1095 return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); 1096 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1097 int stack_offset = ra_->reg2offset(src_first); 1098 return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); 1099 } else { 1100 ShouldNotReachHere(); 1101 } 1102 } 1103 1104 // -------------------------------------- 1105 // Check for mem-mem move. push/pop to move. 1106 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1107 if( src_second == dst_first ) { // overlapping stack copy ranges 1108 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1109 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1110 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1111 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1112 } 1113 // move low bits 1114 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1115 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1116 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1117 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1118 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1119 } 1120 return size; 1121 } 1122 1123 // -------------------------------------- 1124 // Check for integer reg-reg copy 1125 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1126 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1127 1128 // Check for integer store 1129 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1130 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1131 1132 // Check for integer load 1133 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 1134 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1135 1136 // Check for integer reg-xmm reg copy 1137 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1138 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1139 "no 64 bit integer-float reg moves" ); 1140 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1141 } 1142 // -------------------------------------- 1143 // Check for float reg-reg copy 1144 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1145 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1146 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1147 if( cbuf ) { 1148 1149 // Note the mucking with the register encode to compensate for the 0/1 1150 // indexing issue mentioned in a comment in the reg_def sections 1151 // for FPR registers many lines above here. 1152 1153 if( src_first != FPR1L_num ) { 1154 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1155 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1156 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1157 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1158 } else { 1159 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1160 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1161 } 1162 #ifndef PRODUCT 1163 } else if( !do_size ) { 1164 if( size != 0 ) st->print("\n\t"); 1165 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1166 else st->print( "FST %s", Matcher::regName[dst_first]); 1167 #endif 1168 } 1169 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1170 } 1171 1172 // Check for float store 1173 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1174 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1175 } 1176 1177 // Check for float load 1178 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1179 int offset = ra_->reg2offset(src_first); 1180 const char *op_str; 1181 int op; 1182 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1183 op_str = "FLD_D"; 1184 op = 0xDD; 1185 } else { // 32-bit load 1186 op_str = "FLD_S"; 1187 op = 0xD9; 1188 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1189 } 1190 if( cbuf ) { 1191 emit_opcode (*cbuf, op ); 1192 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1193 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1194 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1195 #ifndef PRODUCT 1196 } else if( !do_size ) { 1197 if( size != 0 ) st->print("\n\t"); 1198 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1199 #endif 1200 } 1201 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1202 return size + 3+offset_size+2; 1203 } 1204 1205 // Check for xmm reg-reg copy 1206 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1207 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1208 (src_first+1 == src_second && dst_first+1 == dst_second), 1209 "no non-adjacent float-moves" ); 1210 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1211 } 1212 1213 // Check for xmm reg-integer reg copy 1214 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1215 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1216 "no 64 bit float-integer reg moves" ); 1217 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1218 } 1219 1220 // Check for xmm store 1221 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1222 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1223 } 1224 1225 // Check for float xmm load 1226 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1227 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1228 } 1229 1230 // Copy from float reg to xmm reg 1231 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1232 // copy to the top of stack from floating point reg 1233 // and use LEA to preserve flags 1234 if( cbuf ) { 1235 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1236 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1237 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1238 emit_d8(*cbuf,0xF8); 1239 #ifndef PRODUCT 1240 } else if( !do_size ) { 1241 if( size != 0 ) st->print("\n\t"); 1242 st->print("LEA ESP,[ESP-8]"); 1243 #endif 1244 } 1245 size += 4; 1246 1247 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1248 1249 // Copy from the temp memory to the xmm reg. 1250 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1251 1252 if( cbuf ) { 1253 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1254 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1255 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1256 emit_d8(*cbuf,0x08); 1257 #ifndef PRODUCT 1258 } else if( !do_size ) { 1259 if( size != 0 ) st->print("\n\t"); 1260 st->print("LEA ESP,[ESP+8]"); 1261 #endif 1262 } 1263 size += 4; 1264 return size; 1265 } 1266 1267 assert( size > 0, "missed a case" ); 1268 1269 // -------------------------------------------------------------------- 1270 // Check for second bits still needing moving. 1271 if( src_second == dst_second ) 1272 return size; // Self copy; no move 1273 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1274 1275 // Check for second word int-int move 1276 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1277 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1278 1279 // Check for second word integer store 1280 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1281 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1282 1283 // Check for second word integer load 1284 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1285 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1286 1287 1288 Unimplemented(); 1289 return 0; // Mute compiler 1290 } 1291 1292 #ifndef PRODUCT 1293 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1294 implementation( NULL, ra_, false, st ); 1295 } 1296 #endif 1297 1298 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1299 implementation( &cbuf, ra_, false, NULL ); 1300 } 1301 1302 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1303 return implementation( NULL, ra_, true, NULL ); 1304 } 1305 1306 1307 //============================================================================= 1308 #ifndef PRODUCT 1309 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1310 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1311 int reg = ra_->get_reg_first(this); 1312 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1313 } 1314 #endif 1315 1316 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1317 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1318 int reg = ra_->get_encode(this); 1319 if( offset >= 128 ) { 1320 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1321 emit_rm(cbuf, 0x2, reg, 0x04); 1322 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1323 emit_d32(cbuf, offset); 1324 } 1325 else { 1326 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1327 emit_rm(cbuf, 0x1, reg, 0x04); 1328 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1329 emit_d8(cbuf, offset); 1330 } 1331 } 1332 1333 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1334 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1335 if( offset >= 128 ) { 1336 return 7; 1337 } 1338 else { 1339 return 4; 1340 } 1341 } 1342 1343 //============================================================================= 1344 #ifndef PRODUCT 1345 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1346 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1347 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1348 st->print_cr("\tNOP"); 1349 st->print_cr("\tNOP"); 1350 if( !OptoBreakpoint ) 1351 st->print_cr("\tNOP"); 1352 } 1353 #endif 1354 1355 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1356 MacroAssembler masm(&cbuf); 1357 #ifdef ASSERT 1358 uint insts_size = cbuf.insts_size(); 1359 #endif 1360 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1361 masm.jump_cc(Assembler::notEqual, 1362 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1363 /* WARNING these NOPs are critical so that verified entry point is properly 1364 aligned for patching by NativeJump::patch_verified_entry() */ 1365 int nops_cnt = 2; 1366 if( !OptoBreakpoint ) // Leave space for int3 1367 nops_cnt += 1; 1368 masm.nop(nops_cnt); 1369 1370 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1371 } 1372 1373 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1374 return OptoBreakpoint ? 11 : 12; 1375 } 1376 1377 1378 //============================================================================= 1379 1380 int Matcher::regnum_to_fpu_offset(int regnum) { 1381 return regnum - 32; // The FP registers are in the second chunk 1382 } 1383 1384 // This is UltraSparc specific, true just means we have fast l2f conversion 1385 const bool Matcher::convL2FSupported(void) { 1386 return true; 1387 } 1388 1389 // Is this branch offset short enough that a short branch can be used? 1390 // 1391 // NOTE: If the platform does not provide any short branch variants, then 1392 // this method should return false for offset 0. 1393 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1394 // The passed offset is relative to address of the branch. 1395 // On 86 a branch displacement is calculated relative to address 1396 // of a next instruction. 1397 offset -= br_size; 1398 1399 // the short version of jmpConUCF2 contains multiple branches, 1400 // making the reach slightly less 1401 if (rule == jmpConUCF2_rule) 1402 return (-126 <= offset && offset <= 125); 1403 return (-128 <= offset && offset <= 127); 1404 } 1405 1406 const bool Matcher::isSimpleConstant64(jlong value) { 1407 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1408 return false; 1409 } 1410 1411 // The ecx parameter to rep stos for the ClearArray node is in dwords. 1412 const bool Matcher::init_array_count_is_in_bytes = false; 1413 1414 // Threshold size for cleararray. 1415 const int Matcher::init_array_short_size = 8 * BytesPerLong; 1416 1417 // Needs 2 CMOV's for longs. 1418 const int Matcher::long_cmove_cost() { return 1; } 1419 1420 // No CMOVF/CMOVD with SSE/SSE2 1421 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 1422 1423 // Does the CPU require late expand (see block.cpp for description of late expand)? 1424 const bool Matcher::require_postalloc_expand = false; 1425 1426 // Should the Matcher clone shifts on addressing modes, expecting them to 1427 // be subsumed into complex addressing expressions or compute them into 1428 // registers? True for Intel but false for most RISCs 1429 const bool Matcher::clone_shift_expressions = true; 1430 1431 // Do we need to mask the count passed to shift instructions or does 1432 // the cpu only look at the lower 5/6 bits anyway? 1433 const bool Matcher::need_masked_shift_count = false; 1434 1435 bool Matcher::narrow_oop_use_complex_address() { 1436 ShouldNotCallThis(); 1437 return true; 1438 } 1439 1440 bool Matcher::narrow_klass_use_complex_address() { 1441 ShouldNotCallThis(); 1442 return true; 1443 } 1444 1445 1446 // Is it better to copy float constants, or load them directly from memory? 1447 // Intel can load a float constant from a direct address, requiring no 1448 // extra registers. Most RISCs will have to materialize an address into a 1449 // register first, so they would do better to copy the constant from stack. 1450 const bool Matcher::rematerialize_float_constants = true; 1451 1452 // If CPU can load and store mis-aligned doubles directly then no fixup is 1453 // needed. Else we split the double into 2 integer pieces and move it 1454 // piece-by-piece. Only happens when passing doubles into C code as the 1455 // Java calling convention forces doubles to be aligned. 1456 const bool Matcher::misaligned_doubles_ok = true; 1457 1458 1459 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1460 // Get the memory operand from the node 1461 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1462 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1463 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1464 uint opcnt = 1; // First operand 1465 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1466 while( idx >= skipped+num_edges ) { 1467 skipped += num_edges; 1468 opcnt++; // Bump operand count 1469 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1470 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1471 } 1472 1473 MachOper *memory = node->_opnds[opcnt]; 1474 MachOper *new_memory = NULL; 1475 switch (memory->opcode()) { 1476 case DIRECT: 1477 case INDOFFSET32X: 1478 // No transformation necessary. 1479 return; 1480 case INDIRECT: 1481 new_memory = new indirect_win95_safeOper( ); 1482 break; 1483 case INDOFFSET8: 1484 new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1485 break; 1486 case INDOFFSET32: 1487 new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1488 break; 1489 case INDINDEXOFFSET: 1490 new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1491 break; 1492 case INDINDEXSCALE: 1493 new_memory = new indIndexScale_win95_safeOper(memory->scale()); 1494 break; 1495 case INDINDEXSCALEOFFSET: 1496 new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1497 break; 1498 case LOAD_LONG_INDIRECT: 1499 case LOAD_LONG_INDOFFSET32: 1500 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1501 return; 1502 default: 1503 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1504 return; 1505 } 1506 node->_opnds[opcnt] = new_memory; 1507 } 1508 1509 // Advertise here if the CPU requires explicit rounding operations 1510 // to implement the UseStrictFP mode. 1511 const bool Matcher::strict_fp_requires_explicit_rounding = true; 1512 1513 // Are floats conerted to double when stored to stack during deoptimization? 1514 // On x32 it is stored with convertion only when FPU is used for floats. 1515 bool Matcher::float_in_double() { return (UseSSE == 0); } 1516 1517 // Do ints take an entire long register or just half? 1518 const bool Matcher::int_in_long = false; 1519 1520 // Return whether or not this register is ever used as an argument. This 1521 // function is used on startup to build the trampoline stubs in generateOptoStub. 1522 // Registers not mentioned will be killed by the VM call in the trampoline, and 1523 // arguments in those registers not be available to the callee. 1524 bool Matcher::can_be_java_arg( int reg ) { 1525 if( reg == ECX_num || reg == EDX_num ) return true; 1526 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1527 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1528 return false; 1529 } 1530 1531 bool Matcher::is_spillable_arg( int reg ) { 1532 return can_be_java_arg(reg); 1533 } 1534 1535 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1536 // Use hardware integer DIV instruction when 1537 // it is faster than a code which use multiply. 1538 // Only when constant divisor fits into 32 bit 1539 // (min_jint is excluded to get only correct 1540 // positive 32 bit values from negative). 1541 return VM_Version::has_fast_idiv() && 1542 (divisor == (int)divisor && divisor != min_jint); 1543 } 1544 1545 // Register for DIVI projection of divmodI 1546 RegMask Matcher::divI_proj_mask() { 1547 return EAX_REG_mask(); 1548 } 1549 1550 // Register for MODI projection of divmodI 1551 RegMask Matcher::modI_proj_mask() { 1552 return EDX_REG_mask(); 1553 } 1554 1555 // Register for DIVL projection of divmodL 1556 RegMask Matcher::divL_proj_mask() { 1557 ShouldNotReachHere(); 1558 return RegMask(); 1559 } 1560 1561 // Register for MODL projection of divmodL 1562 RegMask Matcher::modL_proj_mask() { 1563 ShouldNotReachHere(); 1564 return RegMask(); 1565 } 1566 1567 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1568 return NO_REG_mask(); 1569 } 1570 1571 // Returns true if the high 32 bits of the value is known to be zero. 1572 bool is_operand_hi32_zero(Node* n) { 1573 int opc = n->Opcode(); 1574 if (opc == Op_AndL) { 1575 Node* o2 = n->in(2); 1576 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1577 return true; 1578 } 1579 } 1580 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1581 return true; 1582 } 1583 return false; 1584 } 1585 1586 %} 1587 1588 //----------ENCODING BLOCK----------------------------------------------------- 1589 // This block specifies the encoding classes used by the compiler to output 1590 // byte streams. Encoding classes generate functions which are called by 1591 // Machine Instruction Nodes in order to generate the bit encoding of the 1592 // instruction. Operands specify their base encoding interface with the 1593 // interface keyword. There are currently supported four interfaces, 1594 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1595 // operand to generate a function which returns its register number when 1596 // queried. CONST_INTER causes an operand to generate a function which 1597 // returns the value of the constant when queried. MEMORY_INTER causes an 1598 // operand to generate four functions which return the Base Register, the 1599 // Index Register, the Scale Value, and the Offset Value of the operand when 1600 // queried. COND_INTER causes an operand to generate six functions which 1601 // return the encoding code (ie - encoding bits for the instruction) 1602 // associated with each basic boolean condition for a conditional instruction. 1603 // Instructions specify two basic values for encoding. They use the 1604 // ins_encode keyword to specify their encoding class (which must be one of 1605 // the class names specified in the encoding block), and they use the 1606 // opcode keyword to specify, in order, their primary, secondary, and 1607 // tertiary opcode. Only the opcode sections which a particular instruction 1608 // needs for encoding need to be specified. 1609 encode %{ 1610 // Build emit functions for each basic byte or larger field in the intel 1611 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1612 // code in the enc_class source block. Emit functions will live in the 1613 // main source block for now. In future, we can generalize this by 1614 // adding a syntax that specifies the sizes of fields in an order, 1615 // so that the adlc can build the emit functions automagically 1616 1617 // Emit primary opcode 1618 enc_class OpcP %{ 1619 emit_opcode(cbuf, $primary); 1620 %} 1621 1622 // Emit secondary opcode 1623 enc_class OpcS %{ 1624 emit_opcode(cbuf, $secondary); 1625 %} 1626 1627 // Emit opcode directly 1628 enc_class Opcode(immI d8) %{ 1629 emit_opcode(cbuf, $d8$$constant); 1630 %} 1631 1632 enc_class SizePrefix %{ 1633 emit_opcode(cbuf,0x66); 1634 %} 1635 1636 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1637 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1638 %} 1639 1640 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1641 emit_opcode(cbuf,$opcode$$constant); 1642 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1643 %} 1644 1645 enc_class mov_r32_imm0( rRegI dst ) %{ 1646 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1647 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1648 %} 1649 1650 enc_class cdq_enc %{ 1651 // Full implementation of Java idiv and irem; checks for 1652 // special case as described in JVM spec., p.243 & p.271. 1653 // 1654 // normal case special case 1655 // 1656 // input : rax,: dividend min_int 1657 // reg: divisor -1 1658 // 1659 // output: rax,: quotient (= rax, idiv reg) min_int 1660 // rdx: remainder (= rax, irem reg) 0 1661 // 1662 // Code sequnce: 1663 // 1664 // 81 F8 00 00 00 80 cmp rax,80000000h 1665 // 0F 85 0B 00 00 00 jne normal_case 1666 // 33 D2 xor rdx,edx 1667 // 83 F9 FF cmp rcx,0FFh 1668 // 0F 84 03 00 00 00 je done 1669 // normal_case: 1670 // 99 cdq 1671 // F7 F9 idiv rax,ecx 1672 // done: 1673 // 1674 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1675 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1676 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1677 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1678 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1679 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1680 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1681 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1682 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1683 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1684 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1685 // normal_case: 1686 emit_opcode(cbuf,0x99); // cdq 1687 // idiv (note: must be emitted by the user of this rule) 1688 // normal: 1689 %} 1690 1691 // Dense encoding for older common ops 1692 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1693 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1694 %} 1695 1696 1697 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1698 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1699 // Check for 8-bit immediate, and set sign extend bit in opcode 1700 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1701 emit_opcode(cbuf, $primary | 0x02); 1702 } 1703 else { // If 32-bit immediate 1704 emit_opcode(cbuf, $primary); 1705 } 1706 %} 1707 1708 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1709 // Emit primary opcode and set sign-extend bit 1710 // Check for 8-bit immediate, and set sign extend bit in opcode 1711 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1712 emit_opcode(cbuf, $primary | 0x02); } 1713 else { // If 32-bit immediate 1714 emit_opcode(cbuf, $primary); 1715 } 1716 // Emit r/m byte with secondary opcode, after primary opcode. 1717 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1718 %} 1719 1720 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1721 // Check for 8-bit immediate, and set sign extend bit in opcode 1722 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1723 $$$emit8$imm$$constant; 1724 } 1725 else { // If 32-bit immediate 1726 // Output immediate 1727 $$$emit32$imm$$constant; 1728 } 1729 %} 1730 1731 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1732 // Emit primary opcode and set sign-extend bit 1733 // Check for 8-bit immediate, and set sign extend bit in opcode 1734 int con = (int)$imm$$constant; // Throw away top bits 1735 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1736 // Emit r/m byte with secondary opcode, after primary opcode. 1737 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1738 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1739 else emit_d32(cbuf,con); 1740 %} 1741 1742 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1743 // Emit primary opcode and set sign-extend bit 1744 // Check for 8-bit immediate, and set sign extend bit in opcode 1745 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1746 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1747 // Emit r/m byte with tertiary opcode, after primary opcode. 1748 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1749 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1750 else emit_d32(cbuf,con); 1751 %} 1752 1753 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1754 emit_cc(cbuf, $secondary, $dst$$reg ); 1755 %} 1756 1757 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1758 int destlo = $dst$$reg; 1759 int desthi = HIGH_FROM_LOW(destlo); 1760 // bswap lo 1761 emit_opcode(cbuf, 0x0F); 1762 emit_cc(cbuf, 0xC8, destlo); 1763 // bswap hi 1764 emit_opcode(cbuf, 0x0F); 1765 emit_cc(cbuf, 0xC8, desthi); 1766 // xchg lo and hi 1767 emit_opcode(cbuf, 0x87); 1768 emit_rm(cbuf, 0x3, destlo, desthi); 1769 %} 1770 1771 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1772 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1773 %} 1774 1775 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1776 $$$emit8$primary; 1777 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1778 %} 1779 1780 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1781 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1782 emit_d8(cbuf, op >> 8 ); 1783 emit_d8(cbuf, op & 255); 1784 %} 1785 1786 // emulate a CMOV with a conditional branch around a MOV 1787 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1788 // Invert sense of branch from sense of CMOV 1789 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1790 emit_d8( cbuf, $brOffs$$constant ); 1791 %} 1792 1793 enc_class enc_PartialSubtypeCheck( ) %{ 1794 Register Redi = as_Register(EDI_enc); // result register 1795 Register Reax = as_Register(EAX_enc); // super class 1796 Register Recx = as_Register(ECX_enc); // killed 1797 Register Resi = as_Register(ESI_enc); // sub class 1798 Label miss; 1799 1800 MacroAssembler _masm(&cbuf); 1801 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1802 NULL, &miss, 1803 /*set_cond_codes:*/ true); 1804 if ($primary) { 1805 __ xorptr(Redi, Redi); 1806 } 1807 __ bind(miss); 1808 %} 1809 1810 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1811 MacroAssembler masm(&cbuf); 1812 int start = masm.offset(); 1813 if (UseSSE >= 2) { 1814 if (VerifyFPU) { 1815 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1816 } 1817 } else { 1818 // External c_calling_convention expects the FPU stack to be 'clean'. 1819 // Compiled code leaves it dirty. Do cleanup now. 1820 masm.empty_FPU_stack(); 1821 } 1822 if (sizeof_FFree_Float_Stack_All == -1) { 1823 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1824 } else { 1825 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1826 } 1827 %} 1828 1829 enc_class Verify_FPU_For_Leaf %{ 1830 if( VerifyFPU ) { 1831 MacroAssembler masm(&cbuf); 1832 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1833 } 1834 %} 1835 1836 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1837 // This is the instruction starting address for relocation info. 1838 cbuf.set_insts_mark(); 1839 $$$emit8$primary; 1840 // CALL directly to the runtime 1841 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1842 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1843 1844 if (UseSSE >= 2) { 1845 MacroAssembler _masm(&cbuf); 1846 BasicType rt = tf()->return_type(); 1847 1848 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1849 // A C runtime call where the return value is unused. In SSE2+ 1850 // mode the result needs to be removed from the FPU stack. It's 1851 // likely that this function call could be removed by the 1852 // optimizer if the C function is a pure function. 1853 __ ffree(0); 1854 } else if (rt == T_FLOAT) { 1855 __ lea(rsp, Address(rsp, -4)); 1856 __ fstp_s(Address(rsp, 0)); 1857 __ movflt(xmm0, Address(rsp, 0)); 1858 __ lea(rsp, Address(rsp, 4)); 1859 } else if (rt == T_DOUBLE) { 1860 __ lea(rsp, Address(rsp, -8)); 1861 __ fstp_d(Address(rsp, 0)); 1862 __ movdbl(xmm0, Address(rsp, 0)); 1863 __ lea(rsp, Address(rsp, 8)); 1864 } 1865 } 1866 %} 1867 1868 1869 enc_class pre_call_resets %{ 1870 // If method sets FPU control word restore it here 1871 debug_only(int off0 = cbuf.insts_size()); 1872 if (ra_->C->in_24_bit_fp_mode()) { 1873 MacroAssembler _masm(&cbuf); 1874 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1875 } 1876 if (ra_->C->max_vector_size() > 16) { 1877 // Clear upper bits of YMM registers when current compiled code uses 1878 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1879 MacroAssembler _masm(&cbuf); 1880 __ vzeroupper(); 1881 } 1882 debug_only(int off1 = cbuf.insts_size()); 1883 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1884 %} 1885 1886 enc_class post_call_FPU %{ 1887 // If method sets FPU control word do it here also 1888 if (Compile::current()->in_24_bit_fp_mode()) { 1889 MacroAssembler masm(&cbuf); 1890 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1891 } 1892 %} 1893 1894 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1895 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1896 // who we intended to call. 1897 cbuf.set_insts_mark(); 1898 $$$emit8$primary; 1899 if (!_method) { 1900 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1901 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1902 } else if (_optimized_virtual) { 1903 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1904 opt_virtual_call_Relocation::spec(), RELOC_IMM32 ); 1905 } else { 1906 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1907 static_call_Relocation::spec(), RELOC_IMM32 ); 1908 } 1909 if (_method) { // Emit stub for static call. 1910 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1911 if (stub == NULL) { 1912 ciEnv::current()->record_failure("CodeCache is full"); 1913 return; 1914 } 1915 } 1916 %} 1917 1918 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1919 MacroAssembler _masm(&cbuf); 1920 __ ic_call((address)$meth$$method); 1921 %} 1922 1923 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1924 int disp = in_bytes(Method::from_compiled_offset()); 1925 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1926 1927 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1928 cbuf.set_insts_mark(); 1929 $$$emit8$primary; 1930 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1931 emit_d8(cbuf, disp); // Displacement 1932 1933 %} 1934 1935 // Following encoding is no longer used, but may be restored if calling 1936 // convention changes significantly. 1937 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1938 // 1939 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1940 // // int ic_reg = Matcher::inline_cache_reg(); 1941 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1942 // // int imo_reg = Matcher::interpreter_method_oop_reg(); 1943 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1944 // 1945 // // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1946 // // // so we load it immediately before the call 1947 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1948 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1949 // 1950 // // xor rbp,ebp 1951 // emit_opcode(cbuf, 0x33); 1952 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1953 // 1954 // // CALL to interpreter. 1955 // cbuf.set_insts_mark(); 1956 // $$$emit8$primary; 1957 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1958 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1959 // %} 1960 1961 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1962 $$$emit8$primary; 1963 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1964 $$$emit8$shift$$constant; 1965 %} 1966 1967 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1968 // Load immediate does not have a zero or sign extended version 1969 // for 8-bit immediates 1970 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1971 $$$emit32$src$$constant; 1972 %} 1973 1974 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1975 // Load immediate does not have a zero or sign extended version 1976 // for 8-bit immediates 1977 emit_opcode(cbuf, $primary + $dst$$reg); 1978 $$$emit32$src$$constant; 1979 %} 1980 1981 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1982 // Load immediate does not have a zero or sign extended version 1983 // for 8-bit immediates 1984 int dst_enc = $dst$$reg; 1985 int src_con = $src$$constant & 0x0FFFFFFFFL; 1986 if (src_con == 0) { 1987 // xor dst, dst 1988 emit_opcode(cbuf, 0x33); 1989 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1990 } else { 1991 emit_opcode(cbuf, $primary + dst_enc); 1992 emit_d32(cbuf, src_con); 1993 } 1994 %} 1995 1996 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 1997 // Load immediate does not have a zero or sign extended version 1998 // for 8-bit immediates 1999 int dst_enc = $dst$$reg + 2; 2000 int src_con = ((julong)($src$$constant)) >> 32; 2001 if (src_con == 0) { 2002 // xor dst, dst 2003 emit_opcode(cbuf, 0x33); 2004 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2005 } else { 2006 emit_opcode(cbuf, $primary + dst_enc); 2007 emit_d32(cbuf, src_con); 2008 } 2009 %} 2010 2011 2012 // Encode a reg-reg copy. If it is useless, then empty encoding. 2013 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 2014 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2015 %} 2016 2017 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 2018 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2019 %} 2020 2021 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 2022 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2023 %} 2024 2025 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 2026 $$$emit8$primary; 2027 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2028 %} 2029 2030 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 2031 $$$emit8$secondary; 2032 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2033 %} 2034 2035 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 2036 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2037 %} 2038 2039 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2040 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2041 %} 2042 2043 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 2044 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2045 %} 2046 2047 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2048 // Output immediate 2049 $$$emit32$src$$constant; 2050 %} 2051 2052 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 2053 // Output Float immediate bits 2054 jfloat jf = $src$$constant; 2055 int jf_as_bits = jint_cast( jf ); 2056 emit_d32(cbuf, jf_as_bits); 2057 %} 2058 2059 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 2060 // Output Float immediate bits 2061 jfloat jf = $src$$constant; 2062 int jf_as_bits = jint_cast( jf ); 2063 emit_d32(cbuf, jf_as_bits); 2064 %} 2065 2066 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2067 // Output immediate 2068 $$$emit16$src$$constant; 2069 %} 2070 2071 enc_class Con_d32(immI src) %{ 2072 emit_d32(cbuf,$src$$constant); 2073 %} 2074 2075 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2076 // Output immediate memory reference 2077 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2078 emit_d32(cbuf, 0x00); 2079 %} 2080 2081 enc_class lock_prefix( ) %{ 2082 if( os::is_MP() ) 2083 emit_opcode(cbuf,0xF0); // [Lock] 2084 %} 2085 2086 // Cmp-xchg long value. 2087 // Note: we need to swap rbx, and rcx before and after the 2088 // cmpxchg8 instruction because the instruction uses 2089 // rcx as the high order word of the new value to store but 2090 // our register encoding uses rbx,. 2091 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2092 2093 // XCHG rbx,ecx 2094 emit_opcode(cbuf,0x87); 2095 emit_opcode(cbuf,0xD9); 2096 // [Lock] 2097 if( os::is_MP() ) 2098 emit_opcode(cbuf,0xF0); 2099 // CMPXCHG8 [Eptr] 2100 emit_opcode(cbuf,0x0F); 2101 emit_opcode(cbuf,0xC7); 2102 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2103 // XCHG rbx,ecx 2104 emit_opcode(cbuf,0x87); 2105 emit_opcode(cbuf,0xD9); 2106 %} 2107 2108 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2109 // [Lock] 2110 if( os::is_MP() ) 2111 emit_opcode(cbuf,0xF0); 2112 2113 // CMPXCHG [Eptr] 2114 emit_opcode(cbuf,0x0F); 2115 emit_opcode(cbuf,0xB1); 2116 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2117 %} 2118 2119 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2120 int res_encoding = $res$$reg; 2121 2122 // MOV res,0 2123 emit_opcode( cbuf, 0xB8 + res_encoding); 2124 emit_d32( cbuf, 0 ); 2125 // JNE,s fail 2126 emit_opcode(cbuf,0x75); 2127 emit_d8(cbuf, 5 ); 2128 // MOV res,1 2129 emit_opcode( cbuf, 0xB8 + res_encoding); 2130 emit_d32( cbuf, 1 ); 2131 // fail: 2132 %} 2133 2134 enc_class set_instruction_start( ) %{ 2135 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2136 %} 2137 2138 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2139 int reg_encoding = $ereg$$reg; 2140 int base = $mem$$base; 2141 int index = $mem$$index; 2142 int scale = $mem$$scale; 2143 int displace = $mem$$disp; 2144 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2145 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2146 %} 2147 2148 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2149 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2150 int base = $mem$$base; 2151 int index = $mem$$index; 2152 int scale = $mem$$scale; 2153 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2154 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2155 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2156 %} 2157 2158 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2159 int r1, r2; 2160 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2161 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2162 emit_opcode(cbuf,0x0F); 2163 emit_opcode(cbuf,$tertiary); 2164 emit_rm(cbuf, 0x3, r1, r2); 2165 emit_d8(cbuf,$cnt$$constant); 2166 emit_d8(cbuf,$primary); 2167 emit_rm(cbuf, 0x3, $secondary, r1); 2168 emit_d8(cbuf,$cnt$$constant); 2169 %} 2170 2171 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2172 emit_opcode( cbuf, 0x8B ); // Move 2173 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2174 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2175 emit_d8(cbuf,$primary); 2176 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2177 emit_d8(cbuf,$cnt$$constant-32); 2178 } 2179 emit_d8(cbuf,$primary); 2180 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2181 emit_d8(cbuf,31); 2182 %} 2183 2184 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2185 int r1, r2; 2186 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2187 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2188 2189 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2190 emit_rm(cbuf, 0x3, r1, r2); 2191 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2192 emit_opcode(cbuf,$primary); 2193 emit_rm(cbuf, 0x3, $secondary, r1); 2194 emit_d8(cbuf,$cnt$$constant-32); 2195 } 2196 emit_opcode(cbuf,0x33); // XOR r2,r2 2197 emit_rm(cbuf, 0x3, r2, r2); 2198 %} 2199 2200 // Clone of RegMem but accepts an extra parameter to access each 2201 // half of a double in memory; it never needs relocation info. 2202 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2203 emit_opcode(cbuf,$opcode$$constant); 2204 int reg_encoding = $rm_reg$$reg; 2205 int base = $mem$$base; 2206 int index = $mem$$index; 2207 int scale = $mem$$scale; 2208 int displace = $mem$$disp + $disp_for_half$$constant; 2209 relocInfo::relocType disp_reloc = relocInfo::none; 2210 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2211 %} 2212 2213 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2214 // 2215 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2216 // and it never needs relocation information. 2217 // Frequently used to move data between FPU's Stack Top and memory. 2218 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2219 int rm_byte_opcode = $rm_opcode$$constant; 2220 int base = $mem$$base; 2221 int index = $mem$$index; 2222 int scale = $mem$$scale; 2223 int displace = $mem$$disp; 2224 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2225 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2226 %} 2227 2228 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2229 int rm_byte_opcode = $rm_opcode$$constant; 2230 int base = $mem$$base; 2231 int index = $mem$$index; 2232 int scale = $mem$$scale; 2233 int displace = $mem$$disp; 2234 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2235 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2236 %} 2237 2238 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2239 int reg_encoding = $dst$$reg; 2240 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2241 int index = 0x04; // 0x04 indicates no index 2242 int scale = 0x00; // 0x00 indicates no scale 2243 int displace = $src1$$constant; // 0x00 indicates no displacement 2244 relocInfo::relocType disp_reloc = relocInfo::none; 2245 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2246 %} 2247 2248 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2249 // Compare dst,src 2250 emit_opcode(cbuf,0x3B); 2251 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2252 // jmp dst < src around move 2253 emit_opcode(cbuf,0x7C); 2254 emit_d8(cbuf,2); 2255 // move dst,src 2256 emit_opcode(cbuf,0x8B); 2257 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2258 %} 2259 2260 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2261 // Compare dst,src 2262 emit_opcode(cbuf,0x3B); 2263 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2264 // jmp dst > src around move 2265 emit_opcode(cbuf,0x7F); 2266 emit_d8(cbuf,2); 2267 // move dst,src 2268 emit_opcode(cbuf,0x8B); 2269 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2270 %} 2271 2272 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2273 // If src is FPR1, we can just FST to store it. 2274 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2275 int reg_encoding = 0x2; // Just store 2276 int base = $mem$$base; 2277 int index = $mem$$index; 2278 int scale = $mem$$scale; 2279 int displace = $mem$$disp; 2280 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2281 if( $src$$reg != FPR1L_enc ) { 2282 reg_encoding = 0x3; // Store & pop 2283 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2284 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2285 } 2286 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2287 emit_opcode(cbuf,$primary); 2288 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2289 %} 2290 2291 enc_class neg_reg(rRegI dst) %{ 2292 // NEG $dst 2293 emit_opcode(cbuf,0xF7); 2294 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2295 %} 2296 2297 enc_class setLT_reg(eCXRegI dst) %{ 2298 // SETLT $dst 2299 emit_opcode(cbuf,0x0F); 2300 emit_opcode(cbuf,0x9C); 2301 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2302 %} 2303 2304 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2305 int tmpReg = $tmp$$reg; 2306 2307 // SUB $p,$q 2308 emit_opcode(cbuf,0x2B); 2309 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2310 // SBB $tmp,$tmp 2311 emit_opcode(cbuf,0x1B); 2312 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2313 // AND $tmp,$y 2314 emit_opcode(cbuf,0x23); 2315 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2316 // ADD $p,$tmp 2317 emit_opcode(cbuf,0x03); 2318 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2319 %} 2320 2321 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2322 // TEST shift,32 2323 emit_opcode(cbuf,0xF7); 2324 emit_rm(cbuf, 0x3, 0, ECX_enc); 2325 emit_d32(cbuf,0x20); 2326 // JEQ,s small 2327 emit_opcode(cbuf, 0x74); 2328 emit_d8(cbuf, 0x04); 2329 // MOV $dst.hi,$dst.lo 2330 emit_opcode( cbuf, 0x8B ); 2331 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2332 // CLR $dst.lo 2333 emit_opcode(cbuf, 0x33); 2334 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2335 // small: 2336 // SHLD $dst.hi,$dst.lo,$shift 2337 emit_opcode(cbuf,0x0F); 2338 emit_opcode(cbuf,0xA5); 2339 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2340 // SHL $dst.lo,$shift" 2341 emit_opcode(cbuf,0xD3); 2342 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2343 %} 2344 2345 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2346 // TEST shift,32 2347 emit_opcode(cbuf,0xF7); 2348 emit_rm(cbuf, 0x3, 0, ECX_enc); 2349 emit_d32(cbuf,0x20); 2350 // JEQ,s small 2351 emit_opcode(cbuf, 0x74); 2352 emit_d8(cbuf, 0x04); 2353 // MOV $dst.lo,$dst.hi 2354 emit_opcode( cbuf, 0x8B ); 2355 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2356 // CLR $dst.hi 2357 emit_opcode(cbuf, 0x33); 2358 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2359 // small: 2360 // SHRD $dst.lo,$dst.hi,$shift 2361 emit_opcode(cbuf,0x0F); 2362 emit_opcode(cbuf,0xAD); 2363 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2364 // SHR $dst.hi,$shift" 2365 emit_opcode(cbuf,0xD3); 2366 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2367 %} 2368 2369 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2370 // TEST shift,32 2371 emit_opcode(cbuf,0xF7); 2372 emit_rm(cbuf, 0x3, 0, ECX_enc); 2373 emit_d32(cbuf,0x20); 2374 // JEQ,s small 2375 emit_opcode(cbuf, 0x74); 2376 emit_d8(cbuf, 0x05); 2377 // MOV $dst.lo,$dst.hi 2378 emit_opcode( cbuf, 0x8B ); 2379 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2380 // SAR $dst.hi,31 2381 emit_opcode(cbuf, 0xC1); 2382 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2383 emit_d8(cbuf, 0x1F ); 2384 // small: 2385 // SHRD $dst.lo,$dst.hi,$shift 2386 emit_opcode(cbuf,0x0F); 2387 emit_opcode(cbuf,0xAD); 2388 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2389 // SAR $dst.hi,$shift" 2390 emit_opcode(cbuf,0xD3); 2391 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2392 %} 2393 2394 2395 // ----------------- Encodings for floating point unit ----------------- 2396 // May leave result in FPU-TOS or FPU reg depending on opcodes 2397 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2398 $$$emit8$primary; 2399 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2400 %} 2401 2402 // Pop argument in FPR0 with FSTP ST(0) 2403 enc_class PopFPU() %{ 2404 emit_opcode( cbuf, 0xDD ); 2405 emit_d8( cbuf, 0xD8 ); 2406 %} 2407 2408 // !!!!! equivalent to Pop_Reg_F 2409 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2410 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2411 emit_d8( cbuf, 0xD8+$dst$$reg ); 2412 %} 2413 2414 enc_class Push_Reg_DPR( regDPR dst ) %{ 2415 emit_opcode( cbuf, 0xD9 ); 2416 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2417 %} 2418 2419 enc_class strictfp_bias1( regDPR dst ) %{ 2420 emit_opcode( cbuf, 0xDB ); // FLD m80real 2421 emit_opcode( cbuf, 0x2D ); 2422 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2423 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2424 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2425 %} 2426 2427 enc_class strictfp_bias2( regDPR dst ) %{ 2428 emit_opcode( cbuf, 0xDB ); // FLD m80real 2429 emit_opcode( cbuf, 0x2D ); 2430 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2431 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2432 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2433 %} 2434 2435 // Special case for moving an integer register to a stack slot. 2436 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2437 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2438 %} 2439 2440 // Special case for moving a register to a stack slot. 2441 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2442 // Opcode already emitted 2443 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2444 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2445 emit_d32(cbuf, $dst$$disp); // Displacement 2446 %} 2447 2448 // Push the integer in stackSlot 'src' onto FP-stack 2449 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2450 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2451 %} 2452 2453 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2454 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2455 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2456 %} 2457 2458 // Same as Pop_Mem_F except for opcode 2459 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2460 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2461 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2462 %} 2463 2464 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2465 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2466 emit_d8( cbuf, 0xD8+$dst$$reg ); 2467 %} 2468 2469 enc_class Push_Reg_FPR( regFPR dst ) %{ 2470 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2471 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2472 %} 2473 2474 // Push FPU's float to a stack-slot, and pop FPU-stack 2475 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2476 int pop = 0x02; 2477 if ($src$$reg != FPR1L_enc) { 2478 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2479 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2480 pop = 0x03; 2481 } 2482 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2483 %} 2484 2485 // Push FPU's double to a stack-slot, and pop FPU-stack 2486 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2487 int pop = 0x02; 2488 if ($src$$reg != FPR1L_enc) { 2489 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2490 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2491 pop = 0x03; 2492 } 2493 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2494 %} 2495 2496 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2497 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2498 int pop = 0xD0 - 1; // -1 since we skip FLD 2499 if ($src$$reg != FPR1L_enc) { 2500 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2501 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2502 pop = 0xD8; 2503 } 2504 emit_opcode( cbuf, 0xDD ); 2505 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2506 %} 2507 2508 2509 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2510 // load dst in FPR0 2511 emit_opcode( cbuf, 0xD9 ); 2512 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2513 if ($src$$reg != FPR1L_enc) { 2514 // fincstp 2515 emit_opcode (cbuf, 0xD9); 2516 emit_opcode (cbuf, 0xF7); 2517 // swap src with FPR1: 2518 // FXCH FPR1 with src 2519 emit_opcode(cbuf, 0xD9); 2520 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2521 // fdecstp 2522 emit_opcode (cbuf, 0xD9); 2523 emit_opcode (cbuf, 0xF6); 2524 } 2525 %} 2526 2527 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2528 MacroAssembler _masm(&cbuf); 2529 __ subptr(rsp, 8); 2530 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2531 __ fld_d(Address(rsp, 0)); 2532 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2533 __ fld_d(Address(rsp, 0)); 2534 %} 2535 2536 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2537 MacroAssembler _masm(&cbuf); 2538 __ subptr(rsp, 4); 2539 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2540 __ fld_s(Address(rsp, 0)); 2541 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2542 __ fld_s(Address(rsp, 0)); 2543 %} 2544 2545 enc_class Push_ResultD(regD dst) %{ 2546 MacroAssembler _masm(&cbuf); 2547 __ fstp_d(Address(rsp, 0)); 2548 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2549 __ addptr(rsp, 8); 2550 %} 2551 2552 enc_class Push_ResultF(regF dst, immI d8) %{ 2553 MacroAssembler _masm(&cbuf); 2554 __ fstp_s(Address(rsp, 0)); 2555 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2556 __ addptr(rsp, $d8$$constant); 2557 %} 2558 2559 enc_class Push_SrcD(regD src) %{ 2560 MacroAssembler _masm(&cbuf); 2561 __ subptr(rsp, 8); 2562 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2563 __ fld_d(Address(rsp, 0)); 2564 %} 2565 2566 enc_class push_stack_temp_qword() %{ 2567 MacroAssembler _masm(&cbuf); 2568 __ subptr(rsp, 8); 2569 %} 2570 2571 enc_class pop_stack_temp_qword() %{ 2572 MacroAssembler _masm(&cbuf); 2573 __ addptr(rsp, 8); 2574 %} 2575 2576 enc_class push_xmm_to_fpr1(regD src) %{ 2577 MacroAssembler _masm(&cbuf); 2578 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2579 __ fld_d(Address(rsp, 0)); 2580 %} 2581 2582 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2583 if ($src$$reg != FPR1L_enc) { 2584 // fincstp 2585 emit_opcode (cbuf, 0xD9); 2586 emit_opcode (cbuf, 0xF7); 2587 // FXCH FPR1 with src 2588 emit_opcode(cbuf, 0xD9); 2589 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2590 // fdecstp 2591 emit_opcode (cbuf, 0xD9); 2592 emit_opcode (cbuf, 0xF6); 2593 } 2594 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2595 // // FSTP FPR$dst$$reg 2596 // emit_opcode( cbuf, 0xDD ); 2597 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2598 %} 2599 2600 enc_class fnstsw_sahf_skip_parity() %{ 2601 // fnstsw ax 2602 emit_opcode( cbuf, 0xDF ); 2603 emit_opcode( cbuf, 0xE0 ); 2604 // sahf 2605 emit_opcode( cbuf, 0x9E ); 2606 // jnp ::skip 2607 emit_opcode( cbuf, 0x7B ); 2608 emit_opcode( cbuf, 0x05 ); 2609 %} 2610 2611 enc_class emitModDPR() %{ 2612 // fprem must be iterative 2613 // :: loop 2614 // fprem 2615 emit_opcode( cbuf, 0xD9 ); 2616 emit_opcode( cbuf, 0xF8 ); 2617 // wait 2618 emit_opcode( cbuf, 0x9b ); 2619 // fnstsw ax 2620 emit_opcode( cbuf, 0xDF ); 2621 emit_opcode( cbuf, 0xE0 ); 2622 // sahf 2623 emit_opcode( cbuf, 0x9E ); 2624 // jp ::loop 2625 emit_opcode( cbuf, 0x0F ); 2626 emit_opcode( cbuf, 0x8A ); 2627 emit_opcode( cbuf, 0xF4 ); 2628 emit_opcode( cbuf, 0xFF ); 2629 emit_opcode( cbuf, 0xFF ); 2630 emit_opcode( cbuf, 0xFF ); 2631 %} 2632 2633 enc_class fpu_flags() %{ 2634 // fnstsw_ax 2635 emit_opcode( cbuf, 0xDF); 2636 emit_opcode( cbuf, 0xE0); 2637 // test ax,0x0400 2638 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2639 emit_opcode( cbuf, 0xA9 ); 2640 emit_d16 ( cbuf, 0x0400 ); 2641 // // // This sequence works, but stalls for 12-16 cycles on PPro 2642 // // test rax,0x0400 2643 // emit_opcode( cbuf, 0xA9 ); 2644 // emit_d32 ( cbuf, 0x00000400 ); 2645 // 2646 // jz exit (no unordered comparison) 2647 emit_opcode( cbuf, 0x74 ); 2648 emit_d8 ( cbuf, 0x02 ); 2649 // mov ah,1 - treat as LT case (set carry flag) 2650 emit_opcode( cbuf, 0xB4 ); 2651 emit_d8 ( cbuf, 0x01 ); 2652 // sahf 2653 emit_opcode( cbuf, 0x9E); 2654 %} 2655 2656 enc_class cmpF_P6_fixup() %{ 2657 // Fixup the integer flags in case comparison involved a NaN 2658 // 2659 // JNP exit (no unordered comparison, P-flag is set by NaN) 2660 emit_opcode( cbuf, 0x7B ); 2661 emit_d8 ( cbuf, 0x03 ); 2662 // MOV AH,1 - treat as LT case (set carry flag) 2663 emit_opcode( cbuf, 0xB4 ); 2664 emit_d8 ( cbuf, 0x01 ); 2665 // SAHF 2666 emit_opcode( cbuf, 0x9E); 2667 // NOP // target for branch to avoid branch to branch 2668 emit_opcode( cbuf, 0x90); 2669 %} 2670 2671 // fnstsw_ax(); 2672 // sahf(); 2673 // movl(dst, nan_result); 2674 // jcc(Assembler::parity, exit); 2675 // movl(dst, less_result); 2676 // jcc(Assembler::below, exit); 2677 // movl(dst, equal_result); 2678 // jcc(Assembler::equal, exit); 2679 // movl(dst, greater_result); 2680 2681 // less_result = 1; 2682 // greater_result = -1; 2683 // equal_result = 0; 2684 // nan_result = -1; 2685 2686 enc_class CmpF_Result(rRegI dst) %{ 2687 // fnstsw_ax(); 2688 emit_opcode( cbuf, 0xDF); 2689 emit_opcode( cbuf, 0xE0); 2690 // sahf 2691 emit_opcode( cbuf, 0x9E); 2692 // movl(dst, nan_result); 2693 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2694 emit_d32( cbuf, -1 ); 2695 // jcc(Assembler::parity, exit); 2696 emit_opcode( cbuf, 0x7A ); 2697 emit_d8 ( cbuf, 0x13 ); 2698 // movl(dst, less_result); 2699 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2700 emit_d32( cbuf, -1 ); 2701 // jcc(Assembler::below, exit); 2702 emit_opcode( cbuf, 0x72 ); 2703 emit_d8 ( cbuf, 0x0C ); 2704 // movl(dst, equal_result); 2705 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2706 emit_d32( cbuf, 0 ); 2707 // jcc(Assembler::equal, exit); 2708 emit_opcode( cbuf, 0x74 ); 2709 emit_d8 ( cbuf, 0x05 ); 2710 // movl(dst, greater_result); 2711 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2712 emit_d32( cbuf, 1 ); 2713 %} 2714 2715 2716 // Compare the longs and set flags 2717 // BROKEN! Do Not use as-is 2718 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2719 // CMP $src1.hi,$src2.hi 2720 emit_opcode( cbuf, 0x3B ); 2721 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2722 // JNE,s done 2723 emit_opcode(cbuf,0x75); 2724 emit_d8(cbuf, 2 ); 2725 // CMP $src1.lo,$src2.lo 2726 emit_opcode( cbuf, 0x3B ); 2727 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2728 // done: 2729 %} 2730 2731 enc_class convert_int_long( regL dst, rRegI src ) %{ 2732 // mov $dst.lo,$src 2733 int dst_encoding = $dst$$reg; 2734 int src_encoding = $src$$reg; 2735 encode_Copy( cbuf, dst_encoding , src_encoding ); 2736 // mov $dst.hi,$src 2737 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2738 // sar $dst.hi,31 2739 emit_opcode( cbuf, 0xC1 ); 2740 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2741 emit_d8(cbuf, 0x1F ); 2742 %} 2743 2744 enc_class convert_long_double( eRegL src ) %{ 2745 // push $src.hi 2746 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2747 // push $src.lo 2748 emit_opcode(cbuf, 0x50+$src$$reg ); 2749 // fild 64-bits at [SP] 2750 emit_opcode(cbuf,0xdf); 2751 emit_d8(cbuf, 0x6C); 2752 emit_d8(cbuf, 0x24); 2753 emit_d8(cbuf, 0x00); 2754 // pop stack 2755 emit_opcode(cbuf, 0x83); // add SP, #8 2756 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2757 emit_d8(cbuf, 0x8); 2758 %} 2759 2760 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2761 // IMUL EDX:EAX,$src1 2762 emit_opcode( cbuf, 0xF7 ); 2763 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2764 // SAR EDX,$cnt-32 2765 int shift_count = ((int)$cnt$$constant) - 32; 2766 if (shift_count > 0) { 2767 emit_opcode(cbuf, 0xC1); 2768 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2769 emit_d8(cbuf, shift_count); 2770 } 2771 %} 2772 2773 // this version doesn't have add sp, 8 2774 enc_class convert_long_double2( eRegL src ) %{ 2775 // push $src.hi 2776 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2777 // push $src.lo 2778 emit_opcode(cbuf, 0x50+$src$$reg ); 2779 // fild 64-bits at [SP] 2780 emit_opcode(cbuf,0xdf); 2781 emit_d8(cbuf, 0x6C); 2782 emit_d8(cbuf, 0x24); 2783 emit_d8(cbuf, 0x00); 2784 %} 2785 2786 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2787 // Basic idea: long = (long)int * (long)int 2788 // IMUL EDX:EAX, src 2789 emit_opcode( cbuf, 0xF7 ); 2790 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2791 %} 2792 2793 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2794 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2795 // MUL EDX:EAX, src 2796 emit_opcode( cbuf, 0xF7 ); 2797 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2798 %} 2799 2800 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2801 // Basic idea: lo(result) = lo(x_lo * y_lo) 2802 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2803 // MOV $tmp,$src.lo 2804 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2805 // IMUL $tmp,EDX 2806 emit_opcode( cbuf, 0x0F ); 2807 emit_opcode( cbuf, 0xAF ); 2808 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2809 // MOV EDX,$src.hi 2810 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2811 // IMUL EDX,EAX 2812 emit_opcode( cbuf, 0x0F ); 2813 emit_opcode( cbuf, 0xAF ); 2814 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2815 // ADD $tmp,EDX 2816 emit_opcode( cbuf, 0x03 ); 2817 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2818 // MUL EDX:EAX,$src.lo 2819 emit_opcode( cbuf, 0xF7 ); 2820 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2821 // ADD EDX,ESI 2822 emit_opcode( cbuf, 0x03 ); 2823 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2824 %} 2825 2826 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2827 // Basic idea: lo(result) = lo(src * y_lo) 2828 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2829 // IMUL $tmp,EDX,$src 2830 emit_opcode( cbuf, 0x6B ); 2831 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2832 emit_d8( cbuf, (int)$src$$constant ); 2833 // MOV EDX,$src 2834 emit_opcode(cbuf, 0xB8 + EDX_enc); 2835 emit_d32( cbuf, (int)$src$$constant ); 2836 // MUL EDX:EAX,EDX 2837 emit_opcode( cbuf, 0xF7 ); 2838 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2839 // ADD EDX,ESI 2840 emit_opcode( cbuf, 0x03 ); 2841 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2842 %} 2843 2844 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2845 // PUSH src1.hi 2846 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2847 // PUSH src1.lo 2848 emit_opcode(cbuf, 0x50+$src1$$reg ); 2849 // PUSH src2.hi 2850 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2851 // PUSH src2.lo 2852 emit_opcode(cbuf, 0x50+$src2$$reg ); 2853 // CALL directly to the runtime 2854 cbuf.set_insts_mark(); 2855 emit_opcode(cbuf,0xE8); // Call into runtime 2856 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2857 // Restore stack 2858 emit_opcode(cbuf, 0x83); // add SP, #framesize 2859 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2860 emit_d8(cbuf, 4*4); 2861 %} 2862 2863 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2864 // PUSH src1.hi 2865 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2866 // PUSH src1.lo 2867 emit_opcode(cbuf, 0x50+$src1$$reg ); 2868 // PUSH src2.hi 2869 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2870 // PUSH src2.lo 2871 emit_opcode(cbuf, 0x50+$src2$$reg ); 2872 // CALL directly to the runtime 2873 cbuf.set_insts_mark(); 2874 emit_opcode(cbuf,0xE8); // Call into runtime 2875 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2876 // Restore stack 2877 emit_opcode(cbuf, 0x83); // add SP, #framesize 2878 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2879 emit_d8(cbuf, 4*4); 2880 %} 2881 2882 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2883 // MOV $tmp,$src.lo 2884 emit_opcode(cbuf, 0x8B); 2885 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2886 // OR $tmp,$src.hi 2887 emit_opcode(cbuf, 0x0B); 2888 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2889 %} 2890 2891 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2892 // CMP $src1.lo,$src2.lo 2893 emit_opcode( cbuf, 0x3B ); 2894 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2895 // JNE,s skip 2896 emit_cc(cbuf, 0x70, 0x5); 2897 emit_d8(cbuf,2); 2898 // CMP $src1.hi,$src2.hi 2899 emit_opcode( cbuf, 0x3B ); 2900 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2901 %} 2902 2903 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2904 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2905 emit_opcode( cbuf, 0x3B ); 2906 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2907 // MOV $tmp,$src1.hi 2908 emit_opcode( cbuf, 0x8B ); 2909 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2910 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2911 emit_opcode( cbuf, 0x1B ); 2912 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2913 %} 2914 2915 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2916 // XOR $tmp,$tmp 2917 emit_opcode(cbuf,0x33); // XOR 2918 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2919 // CMP $tmp,$src.lo 2920 emit_opcode( cbuf, 0x3B ); 2921 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2922 // SBB $tmp,$src.hi 2923 emit_opcode( cbuf, 0x1B ); 2924 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2925 %} 2926 2927 // Sniff, sniff... smells like Gnu Superoptimizer 2928 enc_class neg_long( eRegL dst ) %{ 2929 emit_opcode(cbuf,0xF7); // NEG hi 2930 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2931 emit_opcode(cbuf,0xF7); // NEG lo 2932 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2933 emit_opcode(cbuf,0x83); // SBB hi,0 2934 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2935 emit_d8 (cbuf,0 ); 2936 %} 2937 2938 enc_class enc_pop_rdx() %{ 2939 emit_opcode(cbuf,0x5A); 2940 %} 2941 2942 enc_class enc_rethrow() %{ 2943 cbuf.set_insts_mark(); 2944 emit_opcode(cbuf, 0xE9); // jmp entry 2945 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2946 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2947 %} 2948 2949 2950 // Convert a double to an int. Java semantics require we do complex 2951 // manglelations in the corner cases. So we set the rounding mode to 2952 // 'zero', store the darned double down as an int, and reset the 2953 // rounding mode to 'nearest'. The hardware throws an exception which 2954 // patches up the correct value directly to the stack. 2955 enc_class DPR2I_encoding( regDPR src ) %{ 2956 // Flip to round-to-zero mode. We attempted to allow invalid-op 2957 // exceptions here, so that a NAN or other corner-case value will 2958 // thrown an exception (but normal values get converted at full speed). 2959 // However, I2C adapters and other float-stack manglers leave pending 2960 // invalid-op exceptions hanging. We would have to clear them before 2961 // enabling them and that is more expensive than just testing for the 2962 // invalid value Intel stores down in the corner cases. 2963 emit_opcode(cbuf,0xD9); // FLDCW trunc 2964 emit_opcode(cbuf,0x2D); 2965 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 2966 // Allocate a word 2967 emit_opcode(cbuf,0x83); // SUB ESP,4 2968 emit_opcode(cbuf,0xEC); 2969 emit_d8(cbuf,0x04); 2970 // Encoding assumes a double has been pushed into FPR0. 2971 // Store down the double as an int, popping the FPU stack 2972 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2973 emit_opcode(cbuf,0x1C); 2974 emit_d8(cbuf,0x24); 2975 // Restore the rounding mode; mask the exception 2976 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2977 emit_opcode(cbuf,0x2D); 2978 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2979 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 2980 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 2981 2982 // Load the converted int; adjust CPU stack 2983 emit_opcode(cbuf,0x58); // POP EAX 2984 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2985 emit_d32 (cbuf,0x80000000); // 0x80000000 2986 emit_opcode(cbuf,0x75); // JNE around_slow_call 2987 emit_d8 (cbuf,0x07); // Size of slow_call 2988 // Push src onto stack slow-path 2989 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2990 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2991 // CALL directly to the runtime 2992 cbuf.set_insts_mark(); 2993 emit_opcode(cbuf,0xE8); // Call into runtime 2994 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2995 // Carry on here... 2996 %} 2997 2998 enc_class DPR2L_encoding( regDPR src ) %{ 2999 emit_opcode(cbuf,0xD9); // FLDCW trunc 3000 emit_opcode(cbuf,0x2D); 3001 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3002 // Allocate a word 3003 emit_opcode(cbuf,0x83); // SUB ESP,8 3004 emit_opcode(cbuf,0xEC); 3005 emit_d8(cbuf,0x08); 3006 // Encoding assumes a double has been pushed into FPR0. 3007 // Store down the double as a long, popping the FPU stack 3008 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3009 emit_opcode(cbuf,0x3C); 3010 emit_d8(cbuf,0x24); 3011 // Restore the rounding mode; mask the exception 3012 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3013 emit_opcode(cbuf,0x2D); 3014 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3015 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3016 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3017 3018 // Load the converted int; adjust CPU stack 3019 emit_opcode(cbuf,0x58); // POP EAX 3020 emit_opcode(cbuf,0x5A); // POP EDX 3021 emit_opcode(cbuf,0x81); // CMP EDX,imm 3022 emit_d8 (cbuf,0xFA); // rdx 3023 emit_d32 (cbuf,0x80000000); // 0x80000000 3024 emit_opcode(cbuf,0x75); // JNE around_slow_call 3025 emit_d8 (cbuf,0x07+4); // Size of slow_call 3026 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3027 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3028 emit_opcode(cbuf,0x75); // JNE around_slow_call 3029 emit_d8 (cbuf,0x07); // Size of slow_call 3030 // Push src onto stack slow-path 3031 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3032 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3033 // CALL directly to the runtime 3034 cbuf.set_insts_mark(); 3035 emit_opcode(cbuf,0xE8); // Call into runtime 3036 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3037 // Carry on here... 3038 %} 3039 3040 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3041 // Operand was loaded from memory into fp ST (stack top) 3042 // FMUL ST,$src /* D8 C8+i */ 3043 emit_opcode(cbuf, 0xD8); 3044 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3045 %} 3046 3047 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3048 // FADDP ST,src2 /* D8 C0+i */ 3049 emit_opcode(cbuf, 0xD8); 3050 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3051 //could use FADDP src2,fpST /* DE C0+i */ 3052 %} 3053 3054 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3055 // FADDP src2,ST /* DE C0+i */ 3056 emit_opcode(cbuf, 0xDE); 3057 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3058 %} 3059 3060 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3061 // Operand has been loaded into fp ST (stack top) 3062 // FSUB ST,$src1 3063 emit_opcode(cbuf, 0xD8); 3064 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3065 3066 // FDIV 3067 emit_opcode(cbuf, 0xD8); 3068 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3069 %} 3070 3071 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3072 // Operand was loaded from memory into fp ST (stack top) 3073 // FADD ST,$src /* D8 C0+i */ 3074 emit_opcode(cbuf, 0xD8); 3075 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3076 3077 // FMUL ST,src2 /* D8 C*+i */ 3078 emit_opcode(cbuf, 0xD8); 3079 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3080 %} 3081 3082 3083 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3084 // Operand was loaded from memory into fp ST (stack top) 3085 // FADD ST,$src /* D8 C0+i */ 3086 emit_opcode(cbuf, 0xD8); 3087 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3088 3089 // FMULP src2,ST /* DE C8+i */ 3090 emit_opcode(cbuf, 0xDE); 3091 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3092 %} 3093 3094 // Atomically load the volatile long 3095 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3096 emit_opcode(cbuf,0xDF); 3097 int rm_byte_opcode = 0x05; 3098 int base = $mem$$base; 3099 int index = $mem$$index; 3100 int scale = $mem$$scale; 3101 int displace = $mem$$disp; 3102 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3103 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3104 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3105 %} 3106 3107 // Volatile Store Long. Must be atomic, so move it into 3108 // the FP TOS and then do a 64-bit FIST. Has to probe the 3109 // target address before the store (for null-ptr checks) 3110 // so the memory operand is used twice in the encoding. 3111 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3112 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3113 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3114 emit_opcode(cbuf,0xDF); 3115 int rm_byte_opcode = 0x07; 3116 int base = $mem$$base; 3117 int index = $mem$$index; 3118 int scale = $mem$$scale; 3119 int displace = $mem$$disp; 3120 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3121 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3122 %} 3123 3124 // Safepoint Poll. This polls the safepoint page, and causes an 3125 // exception if it is not readable. Unfortunately, it kills the condition code 3126 // in the process 3127 // We current use TESTL [spp],EDI 3128 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 3129 3130 enc_class Safepoint_Poll() %{ 3131 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); 3132 emit_opcode(cbuf,0x85); 3133 emit_rm (cbuf, 0x0, 0x7, 0x5); 3134 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 3135 %} 3136 %} 3137 3138 3139 //----------FRAME-------------------------------------------------------------- 3140 // Definition of frame structure and management information. 3141 // 3142 // S T A C K L A Y O U T Allocators stack-slot number 3143 // | (to get allocators register number 3144 // G Owned by | | v add OptoReg::stack0()) 3145 // r CALLER | | 3146 // o | +--------+ pad to even-align allocators stack-slot 3147 // w V | pad0 | numbers; owned by CALLER 3148 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3149 // h ^ | in | 5 3150 // | | args | 4 Holes in incoming args owned by SELF 3151 // | | | | 3 3152 // | | +--------+ 3153 // V | | old out| Empty on Intel, window on Sparc 3154 // | old |preserve| Must be even aligned. 3155 // | SP-+--------+----> Matcher::_old_SP, even aligned 3156 // | | in | 3 area for Intel ret address 3157 // Owned by |preserve| Empty on Sparc. 3158 // SELF +--------+ 3159 // | | pad2 | 2 pad to align old SP 3160 // | +--------+ 1 3161 // | | locks | 0 3162 // | +--------+----> OptoReg::stack0(), even aligned 3163 // | | pad1 | 11 pad to align new SP 3164 // | +--------+ 3165 // | | | 10 3166 // | | spills | 9 spills 3167 // V | | 8 (pad0 slot for callee) 3168 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3169 // ^ | out | 7 3170 // | | args | 6 Holes in outgoing args owned by CALLEE 3171 // Owned by +--------+ 3172 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3173 // | new |preserve| Must be even-aligned. 3174 // | SP-+--------+----> Matcher::_new_SP, even aligned 3175 // | | | 3176 // 3177 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3178 // known from SELF's arguments and the Java calling convention. 3179 // Region 6-7 is determined per call site. 3180 // Note 2: If the calling convention leaves holes in the incoming argument 3181 // area, those holes are owned by SELF. Holes in the outgoing area 3182 // are owned by the CALLEE. Holes should not be nessecary in the 3183 // incoming area, as the Java calling convention is completely under 3184 // the control of the AD file. Doubles can be sorted and packed to 3185 // avoid holes. Holes in the outgoing arguments may be nessecary for 3186 // varargs C calling conventions. 3187 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3188 // even aligned with pad0 as needed. 3189 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3190 // region 6-11 is even aligned; it may be padded out more so that 3191 // the region from SP to FP meets the minimum stack alignment. 3192 3193 frame %{ 3194 // What direction does stack grow in (assumed to be same for C & Java) 3195 stack_direction(TOWARDS_LOW); 3196 3197 // These three registers define part of the calling convention 3198 // between compiled code and the interpreter. 3199 inline_cache_reg(EAX); // Inline Cache Register 3200 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 3201 3202 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3203 cisc_spilling_operand_name(indOffset32); 3204 3205 // Number of stack slots consumed by locking an object 3206 sync_stack_slots(1); 3207 3208 // Compiled code's Frame Pointer 3209 frame_pointer(ESP); 3210 // Interpreter stores its frame pointer in a register which is 3211 // stored to the stack by I2CAdaptors. 3212 // I2CAdaptors convert from interpreted java to compiled java. 3213 interpreter_frame_pointer(EBP); 3214 3215 // Stack alignment requirement 3216 // Alignment size in bytes (128-bit -> 16 bytes) 3217 stack_alignment(StackAlignmentInBytes); 3218 3219 // Number of stack slots between incoming argument block and the start of 3220 // a new frame. The PROLOG must add this many slots to the stack. The 3221 // EPILOG must remove this many slots. Intel needs one slot for 3222 // return address and one for rbp, (must save rbp) 3223 in_preserve_stack_slots(2+VerifyStackAtCalls); 3224 3225 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3226 // for calls to C. Supports the var-args backing area for register parms. 3227 varargs_C_out_slots_killed(0); 3228 3229 // The after-PROLOG location of the return address. Location of 3230 // return address specifies a type (REG or STACK) and a number 3231 // representing the register number (i.e. - use a register name) or 3232 // stack slot. 3233 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3234 // Otherwise, it is above the locks and verification slot and alignment word 3235 return_addr(STACK - 1 + 3236 round_to((Compile::current()->in_preserve_stack_slots() + 3237 Compile::current()->fixed_slots()), 3238 stack_alignment_in_slots())); 3239 3240 // Body of function which returns an integer array locating 3241 // arguments either in registers or in stack slots. Passed an array 3242 // of ideal registers called "sig" and a "length" count. Stack-slot 3243 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3244 // arguments for a CALLEE. Incoming stack arguments are 3245 // automatically biased by the preserve_stack_slots field above. 3246 calling_convention %{ 3247 // No difference between ingoing/outgoing just pass false 3248 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 3249 %} 3250 3251 3252 // Body of function which returns an integer array locating 3253 // arguments either in registers or in stack slots. Passed an array 3254 // of ideal registers called "sig" and a "length" count. Stack-slot 3255 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3256 // arguments for a CALLEE. Incoming stack arguments are 3257 // automatically biased by the preserve_stack_slots field above. 3258 c_calling_convention %{ 3259 // This is obviously always outgoing 3260 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 3261 %} 3262 3263 // Location of C & interpreter return values 3264 c_return_value %{ 3265 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3266 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3267 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3268 3269 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3270 // that C functions return float and double results in XMM0. 3271 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3272 return OptoRegPair(XMM0b_num,XMM0_num); 3273 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3274 return OptoRegPair(OptoReg::Bad,XMM0_num); 3275 3276 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3277 %} 3278 3279 // Location of return values 3280 return_value %{ 3281 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3282 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3283 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3284 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3285 return OptoRegPair(XMM0b_num,XMM0_num); 3286 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3287 return OptoRegPair(OptoReg::Bad,XMM0_num); 3288 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3289 %} 3290 3291 %} 3292 3293 //----------ATTRIBUTES--------------------------------------------------------- 3294 //----------Operand Attributes------------------------------------------------- 3295 op_attrib op_cost(0); // Required cost attribute 3296 3297 //----------Instruction Attributes--------------------------------------------- 3298 ins_attrib ins_cost(100); // Required cost attribute 3299 ins_attrib ins_size(8); // Required size attribute (in bits) 3300 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3301 // non-matching short branch variant of some 3302 // long branch? 3303 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3304 // specifies the alignment that some part of the instruction (not 3305 // necessarily the start) requires. If > 1, a compute_padding() 3306 // function must be provided for the instruction 3307 3308 //----------OPERANDS----------------------------------------------------------- 3309 // Operand definitions must precede instruction definitions for correct parsing 3310 // in the ADLC because operands constitute user defined types which are used in 3311 // instruction definitions. 3312 3313 //----------Simple Operands---------------------------------------------------- 3314 // Immediate Operands 3315 // Integer Immediate 3316 operand immI() %{ 3317 match(ConI); 3318 3319 op_cost(10); 3320 format %{ %} 3321 interface(CONST_INTER); 3322 %} 3323 3324 // Constant for test vs zero 3325 operand immI0() %{ 3326 predicate(n->get_int() == 0); 3327 match(ConI); 3328 3329 op_cost(0); 3330 format %{ %} 3331 interface(CONST_INTER); 3332 %} 3333 3334 // Constant for increment 3335 operand immI1() %{ 3336 predicate(n->get_int() == 1); 3337 match(ConI); 3338 3339 op_cost(0); 3340 format %{ %} 3341 interface(CONST_INTER); 3342 %} 3343 3344 // Constant for decrement 3345 operand immI_M1() %{ 3346 predicate(n->get_int() == -1); 3347 match(ConI); 3348 3349 op_cost(0); 3350 format %{ %} 3351 interface(CONST_INTER); 3352 %} 3353 3354 // Valid scale values for addressing modes 3355 operand immI2() %{ 3356 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3357 match(ConI); 3358 3359 format %{ %} 3360 interface(CONST_INTER); 3361 %} 3362 3363 operand immI8() %{ 3364 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3365 match(ConI); 3366 3367 op_cost(5); 3368 format %{ %} 3369 interface(CONST_INTER); 3370 %} 3371 3372 operand immI16() %{ 3373 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3374 match(ConI); 3375 3376 op_cost(10); 3377 format %{ %} 3378 interface(CONST_INTER); 3379 %} 3380 3381 // Int Immediate non-negative 3382 operand immU31() 3383 %{ 3384 predicate(n->get_int() >= 0); 3385 match(ConI); 3386 3387 op_cost(0); 3388 format %{ %} 3389 interface(CONST_INTER); 3390 %} 3391 3392 // Constant for long shifts 3393 operand immI_32() %{ 3394 predicate( n->get_int() == 32 ); 3395 match(ConI); 3396 3397 op_cost(0); 3398 format %{ %} 3399 interface(CONST_INTER); 3400 %} 3401 3402 operand immI_1_31() %{ 3403 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3404 match(ConI); 3405 3406 op_cost(0); 3407 format %{ %} 3408 interface(CONST_INTER); 3409 %} 3410 3411 operand immI_32_63() %{ 3412 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3413 match(ConI); 3414 op_cost(0); 3415 3416 format %{ %} 3417 interface(CONST_INTER); 3418 %} 3419 3420 operand immI_1() %{ 3421 predicate( n->get_int() == 1 ); 3422 match(ConI); 3423 3424 op_cost(0); 3425 format %{ %} 3426 interface(CONST_INTER); 3427 %} 3428 3429 operand immI_2() %{ 3430 predicate( n->get_int() == 2 ); 3431 match(ConI); 3432 3433 op_cost(0); 3434 format %{ %} 3435 interface(CONST_INTER); 3436 %} 3437 3438 operand immI_3() %{ 3439 predicate( n->get_int() == 3 ); 3440 match(ConI); 3441 3442 op_cost(0); 3443 format %{ %} 3444 interface(CONST_INTER); 3445 %} 3446 3447 // Pointer Immediate 3448 operand immP() %{ 3449 match(ConP); 3450 3451 op_cost(10); 3452 format %{ %} 3453 interface(CONST_INTER); 3454 %} 3455 3456 // NULL Pointer Immediate 3457 operand immP0() %{ 3458 predicate( n->get_ptr() == 0 ); 3459 match(ConP); 3460 op_cost(0); 3461 3462 format %{ %} 3463 interface(CONST_INTER); 3464 %} 3465 3466 // Long Immediate 3467 operand immL() %{ 3468 match(ConL); 3469 3470 op_cost(20); 3471 format %{ %} 3472 interface(CONST_INTER); 3473 %} 3474 3475 // Long Immediate zero 3476 operand immL0() %{ 3477 predicate( n->get_long() == 0L ); 3478 match(ConL); 3479 op_cost(0); 3480 3481 format %{ %} 3482 interface(CONST_INTER); 3483 %} 3484 3485 // Long Immediate zero 3486 operand immL_M1() %{ 3487 predicate( n->get_long() == -1L ); 3488 match(ConL); 3489 op_cost(0); 3490 3491 format %{ %} 3492 interface(CONST_INTER); 3493 %} 3494 3495 // Long immediate from 0 to 127. 3496 // Used for a shorter form of long mul by 10. 3497 operand immL_127() %{ 3498 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3499 match(ConL); 3500 op_cost(0); 3501 3502 format %{ %} 3503 interface(CONST_INTER); 3504 %} 3505 3506 // Long Immediate: low 32-bit mask 3507 operand immL_32bits() %{ 3508 predicate(n->get_long() == 0xFFFFFFFFL); 3509 match(ConL); 3510 op_cost(0); 3511 3512 format %{ %} 3513 interface(CONST_INTER); 3514 %} 3515 3516 // Long Immediate: low 32-bit mask 3517 operand immL32() %{ 3518 predicate(n->get_long() == (int)(n->get_long())); 3519 match(ConL); 3520 op_cost(20); 3521 3522 format %{ %} 3523 interface(CONST_INTER); 3524 %} 3525 3526 //Double Immediate zero 3527 operand immDPR0() %{ 3528 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3529 // bug that generates code such that NaNs compare equal to 0.0 3530 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3531 match(ConD); 3532 3533 op_cost(5); 3534 format %{ %} 3535 interface(CONST_INTER); 3536 %} 3537 3538 // Double Immediate one 3539 operand immDPR1() %{ 3540 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3541 match(ConD); 3542 3543 op_cost(5); 3544 format %{ %} 3545 interface(CONST_INTER); 3546 %} 3547 3548 // Double Immediate 3549 operand immDPR() %{ 3550 predicate(UseSSE<=1); 3551 match(ConD); 3552 3553 op_cost(5); 3554 format %{ %} 3555 interface(CONST_INTER); 3556 %} 3557 3558 operand immD() %{ 3559 predicate(UseSSE>=2); 3560 match(ConD); 3561 3562 op_cost(5); 3563 format %{ %} 3564 interface(CONST_INTER); 3565 %} 3566 3567 // Double Immediate zero 3568 operand immD0() %{ 3569 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3570 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3571 // compare equal to -0.0. 3572 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3573 match(ConD); 3574 3575 format %{ %} 3576 interface(CONST_INTER); 3577 %} 3578 3579 // Float Immediate zero 3580 operand immFPR0() %{ 3581 predicate(UseSSE == 0 && n->getf() == 0.0F); 3582 match(ConF); 3583 3584 op_cost(5); 3585 format %{ %} 3586 interface(CONST_INTER); 3587 %} 3588 3589 // Float Immediate one 3590 operand immFPR1() %{ 3591 predicate(UseSSE == 0 && n->getf() == 1.0F); 3592 match(ConF); 3593 3594 op_cost(5); 3595 format %{ %} 3596 interface(CONST_INTER); 3597 %} 3598 3599 // Float Immediate 3600 operand immFPR() %{ 3601 predicate( UseSSE == 0 ); 3602 match(ConF); 3603 3604 op_cost(5); 3605 format %{ %} 3606 interface(CONST_INTER); 3607 %} 3608 3609 // Float Immediate 3610 operand immF() %{ 3611 predicate(UseSSE >= 1); 3612 match(ConF); 3613 3614 op_cost(5); 3615 format %{ %} 3616 interface(CONST_INTER); 3617 %} 3618 3619 // Float Immediate zero. Zero and not -0.0 3620 operand immF0() %{ 3621 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3622 match(ConF); 3623 3624 op_cost(5); 3625 format %{ %} 3626 interface(CONST_INTER); 3627 %} 3628 3629 // Immediates for special shifts (sign extend) 3630 3631 // Constants for increment 3632 operand immI_16() %{ 3633 predicate( n->get_int() == 16 ); 3634 match(ConI); 3635 3636 format %{ %} 3637 interface(CONST_INTER); 3638 %} 3639 3640 operand immI_24() %{ 3641 predicate( n->get_int() == 24 ); 3642 match(ConI); 3643 3644 format %{ %} 3645 interface(CONST_INTER); 3646 %} 3647 3648 // Constant for byte-wide masking 3649 operand immI_255() %{ 3650 predicate( n->get_int() == 255 ); 3651 match(ConI); 3652 3653 format %{ %} 3654 interface(CONST_INTER); 3655 %} 3656 3657 // Constant for short-wide masking 3658 operand immI_65535() %{ 3659 predicate(n->get_int() == 65535); 3660 match(ConI); 3661 3662 format %{ %} 3663 interface(CONST_INTER); 3664 %} 3665 3666 // Register Operands 3667 // Integer Register 3668 operand rRegI() %{ 3669 constraint(ALLOC_IN_RC(int_reg)); 3670 match(RegI); 3671 match(xRegI); 3672 match(eAXRegI); 3673 match(eBXRegI); 3674 match(eCXRegI); 3675 match(eDXRegI); 3676 match(eDIRegI); 3677 match(eSIRegI); 3678 3679 format %{ %} 3680 interface(REG_INTER); 3681 %} 3682 3683 // Subset of Integer Register 3684 operand xRegI(rRegI reg) %{ 3685 constraint(ALLOC_IN_RC(int_x_reg)); 3686 match(reg); 3687 match(eAXRegI); 3688 match(eBXRegI); 3689 match(eCXRegI); 3690 match(eDXRegI); 3691 3692 format %{ %} 3693 interface(REG_INTER); 3694 %} 3695 3696 // Special Registers 3697 operand eAXRegI(xRegI reg) %{ 3698 constraint(ALLOC_IN_RC(eax_reg)); 3699 match(reg); 3700 match(rRegI); 3701 3702 format %{ "EAX" %} 3703 interface(REG_INTER); 3704 %} 3705 3706 // Special Registers 3707 operand eBXRegI(xRegI reg) %{ 3708 constraint(ALLOC_IN_RC(ebx_reg)); 3709 match(reg); 3710 match(rRegI); 3711 3712 format %{ "EBX" %} 3713 interface(REG_INTER); 3714 %} 3715 3716 operand eCXRegI(xRegI reg) %{ 3717 constraint(ALLOC_IN_RC(ecx_reg)); 3718 match(reg); 3719 match(rRegI); 3720 3721 format %{ "ECX" %} 3722 interface(REG_INTER); 3723 %} 3724 3725 operand eDXRegI(xRegI reg) %{ 3726 constraint(ALLOC_IN_RC(edx_reg)); 3727 match(reg); 3728 match(rRegI); 3729 3730 format %{ "EDX" %} 3731 interface(REG_INTER); 3732 %} 3733 3734 operand eDIRegI(xRegI reg) %{ 3735 constraint(ALLOC_IN_RC(edi_reg)); 3736 match(reg); 3737 match(rRegI); 3738 3739 format %{ "EDI" %} 3740 interface(REG_INTER); 3741 %} 3742 3743 operand naxRegI() %{ 3744 constraint(ALLOC_IN_RC(nax_reg)); 3745 match(RegI); 3746 match(eCXRegI); 3747 match(eDXRegI); 3748 match(eSIRegI); 3749 match(eDIRegI); 3750 3751 format %{ %} 3752 interface(REG_INTER); 3753 %} 3754 3755 operand nadxRegI() %{ 3756 constraint(ALLOC_IN_RC(nadx_reg)); 3757 match(RegI); 3758 match(eBXRegI); 3759 match(eCXRegI); 3760 match(eSIRegI); 3761 match(eDIRegI); 3762 3763 format %{ %} 3764 interface(REG_INTER); 3765 %} 3766 3767 operand ncxRegI() %{ 3768 constraint(ALLOC_IN_RC(ncx_reg)); 3769 match(RegI); 3770 match(eAXRegI); 3771 match(eDXRegI); 3772 match(eSIRegI); 3773 match(eDIRegI); 3774 3775 format %{ %} 3776 interface(REG_INTER); 3777 %} 3778 3779 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3780 // // 3781 operand eSIRegI(xRegI reg) %{ 3782 constraint(ALLOC_IN_RC(esi_reg)); 3783 match(reg); 3784 match(rRegI); 3785 3786 format %{ "ESI" %} 3787 interface(REG_INTER); 3788 %} 3789 3790 // Pointer Register 3791 operand anyRegP() %{ 3792 constraint(ALLOC_IN_RC(any_reg)); 3793 match(RegP); 3794 match(eAXRegP); 3795 match(eBXRegP); 3796 match(eCXRegP); 3797 match(eDIRegP); 3798 match(eRegP); 3799 3800 format %{ %} 3801 interface(REG_INTER); 3802 %} 3803 3804 operand eRegP() %{ 3805 constraint(ALLOC_IN_RC(int_reg)); 3806 match(RegP); 3807 match(eAXRegP); 3808 match(eBXRegP); 3809 match(eCXRegP); 3810 match(eDIRegP); 3811 3812 format %{ %} 3813 interface(REG_INTER); 3814 %} 3815 3816 // On windows95, EBP is not safe to use for implicit null tests. 3817 operand eRegP_no_EBP() %{ 3818 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3819 match(RegP); 3820 match(eAXRegP); 3821 match(eBXRegP); 3822 match(eCXRegP); 3823 match(eDIRegP); 3824 3825 op_cost(100); 3826 format %{ %} 3827 interface(REG_INTER); 3828 %} 3829 3830 operand naxRegP() %{ 3831 constraint(ALLOC_IN_RC(nax_reg)); 3832 match(RegP); 3833 match(eBXRegP); 3834 match(eDXRegP); 3835 match(eCXRegP); 3836 match(eSIRegP); 3837 match(eDIRegP); 3838 3839 format %{ %} 3840 interface(REG_INTER); 3841 %} 3842 3843 operand nabxRegP() %{ 3844 constraint(ALLOC_IN_RC(nabx_reg)); 3845 match(RegP); 3846 match(eCXRegP); 3847 match(eDXRegP); 3848 match(eSIRegP); 3849 match(eDIRegP); 3850 3851 format %{ %} 3852 interface(REG_INTER); 3853 %} 3854 3855 operand pRegP() %{ 3856 constraint(ALLOC_IN_RC(p_reg)); 3857 match(RegP); 3858 match(eBXRegP); 3859 match(eDXRegP); 3860 match(eSIRegP); 3861 match(eDIRegP); 3862 3863 format %{ %} 3864 interface(REG_INTER); 3865 %} 3866 3867 // Special Registers 3868 // Return a pointer value 3869 operand eAXRegP(eRegP reg) %{ 3870 constraint(ALLOC_IN_RC(eax_reg)); 3871 match(reg); 3872 format %{ "EAX" %} 3873 interface(REG_INTER); 3874 %} 3875 3876 // Used in AtomicAdd 3877 operand eBXRegP(eRegP reg) %{ 3878 constraint(ALLOC_IN_RC(ebx_reg)); 3879 match(reg); 3880 format %{ "EBX" %} 3881 interface(REG_INTER); 3882 %} 3883 3884 // Tail-call (interprocedural jump) to interpreter 3885 operand eCXRegP(eRegP reg) %{ 3886 constraint(ALLOC_IN_RC(ecx_reg)); 3887 match(reg); 3888 format %{ "ECX" %} 3889 interface(REG_INTER); 3890 %} 3891 3892 operand eSIRegP(eRegP reg) %{ 3893 constraint(ALLOC_IN_RC(esi_reg)); 3894 match(reg); 3895 format %{ "ESI" %} 3896 interface(REG_INTER); 3897 %} 3898 3899 // Used in rep stosw 3900 operand eDIRegP(eRegP reg) %{ 3901 constraint(ALLOC_IN_RC(edi_reg)); 3902 match(reg); 3903 format %{ "EDI" %} 3904 interface(REG_INTER); 3905 %} 3906 3907 operand eRegL() %{ 3908 constraint(ALLOC_IN_RC(long_reg)); 3909 match(RegL); 3910 match(eADXRegL); 3911 3912 format %{ %} 3913 interface(REG_INTER); 3914 %} 3915 3916 operand eADXRegL( eRegL reg ) %{ 3917 constraint(ALLOC_IN_RC(eadx_reg)); 3918 match(reg); 3919 3920 format %{ "EDX:EAX" %} 3921 interface(REG_INTER); 3922 %} 3923 3924 operand eBCXRegL( eRegL reg ) %{ 3925 constraint(ALLOC_IN_RC(ebcx_reg)); 3926 match(reg); 3927 3928 format %{ "EBX:ECX" %} 3929 interface(REG_INTER); 3930 %} 3931 3932 // Special case for integer high multiply 3933 operand eADXRegL_low_only() %{ 3934 constraint(ALLOC_IN_RC(eadx_reg)); 3935 match(RegL); 3936 3937 format %{ "EAX" %} 3938 interface(REG_INTER); 3939 %} 3940 3941 // Flags register, used as output of compare instructions 3942 operand eFlagsReg() %{ 3943 constraint(ALLOC_IN_RC(int_flags)); 3944 match(RegFlags); 3945 3946 format %{ "EFLAGS" %} 3947 interface(REG_INTER); 3948 %} 3949 3950 // Flags register, used as output of FLOATING POINT compare instructions 3951 operand eFlagsRegU() %{ 3952 constraint(ALLOC_IN_RC(int_flags)); 3953 match(RegFlags); 3954 3955 format %{ "EFLAGS_U" %} 3956 interface(REG_INTER); 3957 %} 3958 3959 operand eFlagsRegUCF() %{ 3960 constraint(ALLOC_IN_RC(int_flags)); 3961 match(RegFlags); 3962 predicate(false); 3963 3964 format %{ "EFLAGS_U_CF" %} 3965 interface(REG_INTER); 3966 %} 3967 3968 // Condition Code Register used by long compare 3969 operand flagsReg_long_LTGE() %{ 3970 constraint(ALLOC_IN_RC(int_flags)); 3971 match(RegFlags); 3972 format %{ "FLAGS_LTGE" %} 3973 interface(REG_INTER); 3974 %} 3975 operand flagsReg_long_EQNE() %{ 3976 constraint(ALLOC_IN_RC(int_flags)); 3977 match(RegFlags); 3978 format %{ "FLAGS_EQNE" %} 3979 interface(REG_INTER); 3980 %} 3981 operand flagsReg_long_LEGT() %{ 3982 constraint(ALLOC_IN_RC(int_flags)); 3983 match(RegFlags); 3984 format %{ "FLAGS_LEGT" %} 3985 interface(REG_INTER); 3986 %} 3987 3988 // Float register operands 3989 operand regDPR() %{ 3990 predicate( UseSSE < 2 ); 3991 constraint(ALLOC_IN_RC(fp_dbl_reg)); 3992 match(RegD); 3993 match(regDPR1); 3994 match(regDPR2); 3995 format %{ %} 3996 interface(REG_INTER); 3997 %} 3998 3999 operand regDPR1(regDPR reg) %{ 4000 predicate( UseSSE < 2 ); 4001 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4002 match(reg); 4003 format %{ "FPR1" %} 4004 interface(REG_INTER); 4005 %} 4006 4007 operand regDPR2(regDPR reg) %{ 4008 predicate( UseSSE < 2 ); 4009 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4010 match(reg); 4011 format %{ "FPR2" %} 4012 interface(REG_INTER); 4013 %} 4014 4015 operand regnotDPR1(regDPR reg) %{ 4016 predicate( UseSSE < 2 ); 4017 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4018 match(reg); 4019 format %{ %} 4020 interface(REG_INTER); 4021 %} 4022 4023 // Float register operands 4024 operand regFPR() %{ 4025 predicate( UseSSE < 2 ); 4026 constraint(ALLOC_IN_RC(fp_flt_reg)); 4027 match(RegF); 4028 match(regFPR1); 4029 format %{ %} 4030 interface(REG_INTER); 4031 %} 4032 4033 // Float register operands 4034 operand regFPR1(regFPR reg) %{ 4035 predicate( UseSSE < 2 ); 4036 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4037 match(reg); 4038 format %{ "FPR1" %} 4039 interface(REG_INTER); 4040 %} 4041 4042 // XMM Float register operands 4043 operand regF() %{ 4044 predicate( UseSSE>=1 ); 4045 constraint(ALLOC_IN_RC(float_reg_legacy)); 4046 match(RegF); 4047 format %{ %} 4048 interface(REG_INTER); 4049 %} 4050 4051 // XMM Double register operands 4052 operand regD() %{ 4053 predicate( UseSSE>=2 ); 4054 constraint(ALLOC_IN_RC(double_reg_legacy)); 4055 match(RegD); 4056 format %{ %} 4057 interface(REG_INTER); 4058 %} 4059 4060 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM) 4061 // runtime code generation via reg_class_dynamic. 4062 operand vecS() %{ 4063 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 4064 match(VecS); 4065 4066 format %{ %} 4067 interface(REG_INTER); 4068 %} 4069 4070 operand vecD() %{ 4071 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 4072 match(VecD); 4073 4074 format %{ %} 4075 interface(REG_INTER); 4076 %} 4077 4078 operand vecX() %{ 4079 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 4080 match(VecX); 4081 4082 format %{ %} 4083 interface(REG_INTER); 4084 %} 4085 4086 operand vecY() %{ 4087 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 4088 match(VecY); 4089 4090 format %{ %} 4091 interface(REG_INTER); 4092 %} 4093 4094 //----------Memory Operands---------------------------------------------------- 4095 // Direct Memory Operand 4096 operand direct(immP addr) %{ 4097 match(addr); 4098 4099 format %{ "[$addr]" %} 4100 interface(MEMORY_INTER) %{ 4101 base(0xFFFFFFFF); 4102 index(0x4); 4103 scale(0x0); 4104 disp($addr); 4105 %} 4106 %} 4107 4108 // Indirect Memory Operand 4109 operand indirect(eRegP reg) %{ 4110 constraint(ALLOC_IN_RC(int_reg)); 4111 match(reg); 4112 4113 format %{ "[$reg]" %} 4114 interface(MEMORY_INTER) %{ 4115 base($reg); 4116 index(0x4); 4117 scale(0x0); 4118 disp(0x0); 4119 %} 4120 %} 4121 4122 // Indirect Memory Plus Short Offset Operand 4123 operand indOffset8(eRegP reg, immI8 off) %{ 4124 match(AddP reg off); 4125 4126 format %{ "[$reg + $off]" %} 4127 interface(MEMORY_INTER) %{ 4128 base($reg); 4129 index(0x4); 4130 scale(0x0); 4131 disp($off); 4132 %} 4133 %} 4134 4135 // Indirect Memory Plus Long Offset Operand 4136 operand indOffset32(eRegP reg, immI off) %{ 4137 match(AddP reg off); 4138 4139 format %{ "[$reg + $off]" %} 4140 interface(MEMORY_INTER) %{ 4141 base($reg); 4142 index(0x4); 4143 scale(0x0); 4144 disp($off); 4145 %} 4146 %} 4147 4148 // Indirect Memory Plus Long Offset Operand 4149 operand indOffset32X(rRegI reg, immP off) %{ 4150 match(AddP off reg); 4151 4152 format %{ "[$reg + $off]" %} 4153 interface(MEMORY_INTER) %{ 4154 base($reg); 4155 index(0x4); 4156 scale(0x0); 4157 disp($off); 4158 %} 4159 %} 4160 4161 // Indirect Memory Plus Index Register Plus Offset Operand 4162 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4163 match(AddP (AddP reg ireg) off); 4164 4165 op_cost(10); 4166 format %{"[$reg + $off + $ireg]" %} 4167 interface(MEMORY_INTER) %{ 4168 base($reg); 4169 index($ireg); 4170 scale(0x0); 4171 disp($off); 4172 %} 4173 %} 4174 4175 // Indirect Memory Plus Index Register Plus Offset Operand 4176 operand indIndex(eRegP reg, rRegI ireg) %{ 4177 match(AddP reg ireg); 4178 4179 op_cost(10); 4180 format %{"[$reg + $ireg]" %} 4181 interface(MEMORY_INTER) %{ 4182 base($reg); 4183 index($ireg); 4184 scale(0x0); 4185 disp(0x0); 4186 %} 4187 %} 4188 4189 // // ------------------------------------------------------------------------- 4190 // // 486 architecture doesn't support "scale * index + offset" with out a base 4191 // // ------------------------------------------------------------------------- 4192 // // Scaled Memory Operands 4193 // // Indirect Memory Times Scale Plus Offset Operand 4194 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4195 // match(AddP off (LShiftI ireg scale)); 4196 // 4197 // op_cost(10); 4198 // format %{"[$off + $ireg << $scale]" %} 4199 // interface(MEMORY_INTER) %{ 4200 // base(0x4); 4201 // index($ireg); 4202 // scale($scale); 4203 // disp($off); 4204 // %} 4205 // %} 4206 4207 // Indirect Memory Times Scale Plus Index Register 4208 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4209 match(AddP reg (LShiftI ireg scale)); 4210 4211 op_cost(10); 4212 format %{"[$reg + $ireg << $scale]" %} 4213 interface(MEMORY_INTER) %{ 4214 base($reg); 4215 index($ireg); 4216 scale($scale); 4217 disp(0x0); 4218 %} 4219 %} 4220 4221 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4222 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4223 match(AddP (AddP reg (LShiftI ireg scale)) off); 4224 4225 op_cost(10); 4226 format %{"[$reg + $off + $ireg << $scale]" %} 4227 interface(MEMORY_INTER) %{ 4228 base($reg); 4229 index($ireg); 4230 scale($scale); 4231 disp($off); 4232 %} 4233 %} 4234 4235 //----------Load Long Memory Operands------------------------------------------ 4236 // The load-long idiom will use it's address expression again after loading 4237 // the first word of the long. If the load-long destination overlaps with 4238 // registers used in the addressing expression, the 2nd half will be loaded 4239 // from a clobbered address. Fix this by requiring that load-long use 4240 // address registers that do not overlap with the load-long target. 4241 4242 // load-long support 4243 operand load_long_RegP() %{ 4244 constraint(ALLOC_IN_RC(esi_reg)); 4245 match(RegP); 4246 match(eSIRegP); 4247 op_cost(100); 4248 format %{ %} 4249 interface(REG_INTER); 4250 %} 4251 4252 // Indirect Memory Operand Long 4253 operand load_long_indirect(load_long_RegP reg) %{ 4254 constraint(ALLOC_IN_RC(esi_reg)); 4255 match(reg); 4256 4257 format %{ "[$reg]" %} 4258 interface(MEMORY_INTER) %{ 4259 base($reg); 4260 index(0x4); 4261 scale(0x0); 4262 disp(0x0); 4263 %} 4264 %} 4265 4266 // Indirect Memory Plus Long Offset Operand 4267 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4268 match(AddP reg off); 4269 4270 format %{ "[$reg + $off]" %} 4271 interface(MEMORY_INTER) %{ 4272 base($reg); 4273 index(0x4); 4274 scale(0x0); 4275 disp($off); 4276 %} 4277 %} 4278 4279 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4280 4281 4282 //----------Special Memory Operands-------------------------------------------- 4283 // Stack Slot Operand - This operand is used for loading and storing temporary 4284 // values on the stack where a match requires a value to 4285 // flow through memory. 4286 operand stackSlotP(sRegP reg) %{ 4287 constraint(ALLOC_IN_RC(stack_slots)); 4288 // No match rule because this operand is only generated in matching 4289 format %{ "[$reg]" %} 4290 interface(MEMORY_INTER) %{ 4291 base(0x4); // ESP 4292 index(0x4); // No Index 4293 scale(0x0); // No Scale 4294 disp($reg); // Stack Offset 4295 %} 4296 %} 4297 4298 operand stackSlotI(sRegI reg) %{ 4299 constraint(ALLOC_IN_RC(stack_slots)); 4300 // No match rule because this operand is only generated in matching 4301 format %{ "[$reg]" %} 4302 interface(MEMORY_INTER) %{ 4303 base(0x4); // ESP 4304 index(0x4); // No Index 4305 scale(0x0); // No Scale 4306 disp($reg); // Stack Offset 4307 %} 4308 %} 4309 4310 operand stackSlotF(sRegF reg) %{ 4311 constraint(ALLOC_IN_RC(stack_slots)); 4312 // No match rule because this operand is only generated in matching 4313 format %{ "[$reg]" %} 4314 interface(MEMORY_INTER) %{ 4315 base(0x4); // ESP 4316 index(0x4); // No Index 4317 scale(0x0); // No Scale 4318 disp($reg); // Stack Offset 4319 %} 4320 %} 4321 4322 operand stackSlotD(sRegD reg) %{ 4323 constraint(ALLOC_IN_RC(stack_slots)); 4324 // No match rule because this operand is only generated in matching 4325 format %{ "[$reg]" %} 4326 interface(MEMORY_INTER) %{ 4327 base(0x4); // ESP 4328 index(0x4); // No Index 4329 scale(0x0); // No Scale 4330 disp($reg); // Stack Offset 4331 %} 4332 %} 4333 4334 operand stackSlotL(sRegL reg) %{ 4335 constraint(ALLOC_IN_RC(stack_slots)); 4336 // No match rule because this operand is only generated in matching 4337 format %{ "[$reg]" %} 4338 interface(MEMORY_INTER) %{ 4339 base(0x4); // ESP 4340 index(0x4); // No Index 4341 scale(0x0); // No Scale 4342 disp($reg); // Stack Offset 4343 %} 4344 %} 4345 4346 //----------Memory Operands - Win95 Implicit Null Variants---------------- 4347 // Indirect Memory Operand 4348 operand indirect_win95_safe(eRegP_no_EBP reg) 4349 %{ 4350 constraint(ALLOC_IN_RC(int_reg)); 4351 match(reg); 4352 4353 op_cost(100); 4354 format %{ "[$reg]" %} 4355 interface(MEMORY_INTER) %{ 4356 base($reg); 4357 index(0x4); 4358 scale(0x0); 4359 disp(0x0); 4360 %} 4361 %} 4362 4363 // Indirect Memory Plus Short Offset Operand 4364 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 4365 %{ 4366 match(AddP reg off); 4367 4368 op_cost(100); 4369 format %{ "[$reg + $off]" %} 4370 interface(MEMORY_INTER) %{ 4371 base($reg); 4372 index(0x4); 4373 scale(0x0); 4374 disp($off); 4375 %} 4376 %} 4377 4378 // Indirect Memory Plus Long Offset Operand 4379 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 4380 %{ 4381 match(AddP reg off); 4382 4383 op_cost(100); 4384 format %{ "[$reg + $off]" %} 4385 interface(MEMORY_INTER) %{ 4386 base($reg); 4387 index(0x4); 4388 scale(0x0); 4389 disp($off); 4390 %} 4391 %} 4392 4393 // Indirect Memory Plus Index Register Plus Offset Operand 4394 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) 4395 %{ 4396 match(AddP (AddP reg ireg) off); 4397 4398 op_cost(100); 4399 format %{"[$reg + $off + $ireg]" %} 4400 interface(MEMORY_INTER) %{ 4401 base($reg); 4402 index($ireg); 4403 scale(0x0); 4404 disp($off); 4405 %} 4406 %} 4407 4408 // Indirect Memory Times Scale Plus Index Register 4409 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) 4410 %{ 4411 match(AddP reg (LShiftI ireg scale)); 4412 4413 op_cost(100); 4414 format %{"[$reg + $ireg << $scale]" %} 4415 interface(MEMORY_INTER) %{ 4416 base($reg); 4417 index($ireg); 4418 scale($scale); 4419 disp(0x0); 4420 %} 4421 %} 4422 4423 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4424 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) 4425 %{ 4426 match(AddP (AddP reg (LShiftI ireg scale)) off); 4427 4428 op_cost(100); 4429 format %{"[$reg + $off + $ireg << $scale]" %} 4430 interface(MEMORY_INTER) %{ 4431 base($reg); 4432 index($ireg); 4433 scale($scale); 4434 disp($off); 4435 %} 4436 %} 4437 4438 //----------Conditional Branch Operands---------------------------------------- 4439 // Comparison Op - This is the operation of the comparison, and is limited to 4440 // the following set of codes: 4441 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4442 // 4443 // Other attributes of the comparison, such as unsignedness, are specified 4444 // by the comparison instruction that sets a condition code flags register. 4445 // That result is represented by a flags operand whose subtype is appropriate 4446 // to the unsignedness (etc.) of the comparison. 4447 // 4448 // Later, the instruction which matches both the Comparison Op (a Bool) and 4449 // the flags (produced by the Cmp) specifies the coding of the comparison op 4450 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4451 4452 // Comparision Code 4453 operand cmpOp() %{ 4454 match(Bool); 4455 4456 format %{ "" %} 4457 interface(COND_INTER) %{ 4458 equal(0x4, "e"); 4459 not_equal(0x5, "ne"); 4460 less(0xC, "l"); 4461 greater_equal(0xD, "ge"); 4462 less_equal(0xE, "le"); 4463 greater(0xF, "g"); 4464 overflow(0x0, "o"); 4465 no_overflow(0x1, "no"); 4466 %} 4467 %} 4468 4469 // Comparison Code, unsigned compare. Used by FP also, with 4470 // C2 (unordered) turned into GT or LT already. The other bits 4471 // C0 and C3 are turned into Carry & Zero flags. 4472 operand cmpOpU() %{ 4473 match(Bool); 4474 4475 format %{ "" %} 4476 interface(COND_INTER) %{ 4477 equal(0x4, "e"); 4478 not_equal(0x5, "ne"); 4479 less(0x2, "b"); 4480 greater_equal(0x3, "nb"); 4481 less_equal(0x6, "be"); 4482 greater(0x7, "nbe"); 4483 overflow(0x0, "o"); 4484 no_overflow(0x1, "no"); 4485 %} 4486 %} 4487 4488 // Floating comparisons that don't require any fixup for the unordered case 4489 operand cmpOpUCF() %{ 4490 match(Bool); 4491 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4492 n->as_Bool()->_test._test == BoolTest::ge || 4493 n->as_Bool()->_test._test == BoolTest::le || 4494 n->as_Bool()->_test._test == BoolTest::gt); 4495 format %{ "" %} 4496 interface(COND_INTER) %{ 4497 equal(0x4, "e"); 4498 not_equal(0x5, "ne"); 4499 less(0x2, "b"); 4500 greater_equal(0x3, "nb"); 4501 less_equal(0x6, "be"); 4502 greater(0x7, "nbe"); 4503 overflow(0x0, "o"); 4504 no_overflow(0x1, "no"); 4505 %} 4506 %} 4507 4508 4509 // Floating comparisons that can be fixed up with extra conditional jumps 4510 operand cmpOpUCF2() %{ 4511 match(Bool); 4512 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4513 n->as_Bool()->_test._test == BoolTest::eq); 4514 format %{ "" %} 4515 interface(COND_INTER) %{ 4516 equal(0x4, "e"); 4517 not_equal(0x5, "ne"); 4518 less(0x2, "b"); 4519 greater_equal(0x3, "nb"); 4520 less_equal(0x6, "be"); 4521 greater(0x7, "nbe"); 4522 overflow(0x0, "o"); 4523 no_overflow(0x1, "no"); 4524 %} 4525 %} 4526 4527 // Comparison Code for FP conditional move 4528 operand cmpOp_fcmov() %{ 4529 match(Bool); 4530 4531 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4532 n->as_Bool()->_test._test != BoolTest::no_overflow); 4533 format %{ "" %} 4534 interface(COND_INTER) %{ 4535 equal (0x0C8); 4536 not_equal (0x1C8); 4537 less (0x0C0); 4538 greater_equal(0x1C0); 4539 less_equal (0x0D0); 4540 greater (0x1D0); 4541 overflow(0x0, "o"); // not really supported by the instruction 4542 no_overflow(0x1, "no"); // not really supported by the instruction 4543 %} 4544 %} 4545 4546 // Comparision Code used in long compares 4547 operand cmpOp_commute() %{ 4548 match(Bool); 4549 4550 format %{ "" %} 4551 interface(COND_INTER) %{ 4552 equal(0x4, "e"); 4553 not_equal(0x5, "ne"); 4554 less(0xF, "g"); 4555 greater_equal(0xE, "le"); 4556 less_equal(0xD, "ge"); 4557 greater(0xC, "l"); 4558 overflow(0x0, "o"); 4559 no_overflow(0x1, "no"); 4560 %} 4561 %} 4562 4563 //----------OPERAND CLASSES---------------------------------------------------- 4564 // Operand Classes are groups of operands that are used as to simplify 4565 // instruction definitions by not requiring the AD writer to specify separate 4566 // instructions for every form of operand when the instruction accepts 4567 // multiple operand types with the same basic encoding and format. The classic 4568 // case of this is memory operands. 4569 4570 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4571 indIndex, indIndexScale, indIndexScaleOffset); 4572 4573 // Long memory operations are encoded in 2 instructions and a +4 offset. 4574 // This means some kind of offset is always required and you cannot use 4575 // an oop as the offset (done when working on static globals). 4576 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4577 indIndex, indIndexScale, indIndexScaleOffset); 4578 4579 4580 //----------PIPELINE----------------------------------------------------------- 4581 // Rules which define the behavior of the target architectures pipeline. 4582 pipeline %{ 4583 4584 //----------ATTRIBUTES--------------------------------------------------------- 4585 attributes %{ 4586 variable_size_instructions; // Fixed size instructions 4587 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4588 instruction_unit_size = 1; // An instruction is 1 bytes long 4589 instruction_fetch_unit_size = 16; // The processor fetches one line 4590 instruction_fetch_units = 1; // of 16 bytes 4591 4592 // List of nop instructions 4593 nops( MachNop ); 4594 %} 4595 4596 //----------RESOURCES---------------------------------------------------------- 4597 // Resources are the functional units available to the machine 4598 4599 // Generic P2/P3 pipeline 4600 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4601 // 3 instructions decoded per cycle. 4602 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4603 // 2 ALU op, only ALU0 handles mul/div instructions. 4604 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4605 MS0, MS1, MEM = MS0 | MS1, 4606 BR, FPU, 4607 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4608 4609 //----------PIPELINE DESCRIPTION----------------------------------------------- 4610 // Pipeline Description specifies the stages in the machine's pipeline 4611 4612 // Generic P2/P3 pipeline 4613 pipe_desc(S0, S1, S2, S3, S4, S5); 4614 4615 //----------PIPELINE CLASSES--------------------------------------------------- 4616 // Pipeline Classes describe the stages in which input and output are 4617 // referenced by the hardware pipeline. 4618 4619 // Naming convention: ialu or fpu 4620 // Then: _reg 4621 // Then: _reg if there is a 2nd register 4622 // Then: _long if it's a pair of instructions implementing a long 4623 // Then: _fat if it requires the big decoder 4624 // Or: _mem if it requires the big decoder and a memory unit. 4625 4626 // Integer ALU reg operation 4627 pipe_class ialu_reg(rRegI dst) %{ 4628 single_instruction; 4629 dst : S4(write); 4630 dst : S3(read); 4631 DECODE : S0; // any decoder 4632 ALU : S3; // any alu 4633 %} 4634 4635 // Long ALU reg operation 4636 pipe_class ialu_reg_long(eRegL dst) %{ 4637 instruction_count(2); 4638 dst : S4(write); 4639 dst : S3(read); 4640 DECODE : S0(2); // any 2 decoders 4641 ALU : S3(2); // both alus 4642 %} 4643 4644 // Integer ALU reg operation using big decoder 4645 pipe_class ialu_reg_fat(rRegI dst) %{ 4646 single_instruction; 4647 dst : S4(write); 4648 dst : S3(read); 4649 D0 : S0; // big decoder only 4650 ALU : S3; // any alu 4651 %} 4652 4653 // Long ALU reg operation using big decoder 4654 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4655 instruction_count(2); 4656 dst : S4(write); 4657 dst : S3(read); 4658 D0 : S0(2); // big decoder only; twice 4659 ALU : S3(2); // any 2 alus 4660 %} 4661 4662 // Integer ALU reg-reg operation 4663 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4664 single_instruction; 4665 dst : S4(write); 4666 src : S3(read); 4667 DECODE : S0; // any decoder 4668 ALU : S3; // any alu 4669 %} 4670 4671 // Long ALU reg-reg operation 4672 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4673 instruction_count(2); 4674 dst : S4(write); 4675 src : S3(read); 4676 DECODE : S0(2); // any 2 decoders 4677 ALU : S3(2); // both alus 4678 %} 4679 4680 // Integer ALU reg-reg operation 4681 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4682 single_instruction; 4683 dst : S4(write); 4684 src : S3(read); 4685 D0 : S0; // big decoder only 4686 ALU : S3; // any alu 4687 %} 4688 4689 // Long ALU reg-reg operation 4690 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4691 instruction_count(2); 4692 dst : S4(write); 4693 src : S3(read); 4694 D0 : S0(2); // big decoder only; twice 4695 ALU : S3(2); // both alus 4696 %} 4697 4698 // Integer ALU reg-mem operation 4699 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4700 single_instruction; 4701 dst : S5(write); 4702 mem : S3(read); 4703 D0 : S0; // big decoder only 4704 ALU : S4; // any alu 4705 MEM : S3; // any mem 4706 %} 4707 4708 // Long ALU reg-mem operation 4709 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4710 instruction_count(2); 4711 dst : S5(write); 4712 mem : S3(read); 4713 D0 : S0(2); // big decoder only; twice 4714 ALU : S4(2); // any 2 alus 4715 MEM : S3(2); // both mems 4716 %} 4717 4718 // Integer mem operation (prefetch) 4719 pipe_class ialu_mem(memory mem) 4720 %{ 4721 single_instruction; 4722 mem : S3(read); 4723 D0 : S0; // big decoder only 4724 MEM : S3; // any mem 4725 %} 4726 4727 // Integer Store to Memory 4728 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4729 single_instruction; 4730 mem : S3(read); 4731 src : S5(read); 4732 D0 : S0; // big decoder only 4733 ALU : S4; // any alu 4734 MEM : S3; 4735 %} 4736 4737 // Long Store to Memory 4738 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4739 instruction_count(2); 4740 mem : S3(read); 4741 src : S5(read); 4742 D0 : S0(2); // big decoder only; twice 4743 ALU : S4(2); // any 2 alus 4744 MEM : S3(2); // Both mems 4745 %} 4746 4747 // Integer Store to Memory 4748 pipe_class ialu_mem_imm(memory mem) %{ 4749 single_instruction; 4750 mem : S3(read); 4751 D0 : S0; // big decoder only 4752 ALU : S4; // any alu 4753 MEM : S3; 4754 %} 4755 4756 // Integer ALU0 reg-reg operation 4757 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4758 single_instruction; 4759 dst : S4(write); 4760 src : S3(read); 4761 D0 : S0; // Big decoder only 4762 ALU0 : S3; // only alu0 4763 %} 4764 4765 // Integer ALU0 reg-mem operation 4766 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4767 single_instruction; 4768 dst : S5(write); 4769 mem : S3(read); 4770 D0 : S0; // big decoder only 4771 ALU0 : S4; // ALU0 only 4772 MEM : S3; // any mem 4773 %} 4774 4775 // Integer ALU reg-reg operation 4776 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4777 single_instruction; 4778 cr : S4(write); 4779 src1 : S3(read); 4780 src2 : S3(read); 4781 DECODE : S0; // any decoder 4782 ALU : S3; // any alu 4783 %} 4784 4785 // Integer ALU reg-imm operation 4786 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4787 single_instruction; 4788 cr : S4(write); 4789 src1 : S3(read); 4790 DECODE : S0; // any decoder 4791 ALU : S3; // any alu 4792 %} 4793 4794 // Integer ALU reg-mem operation 4795 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4796 single_instruction; 4797 cr : S4(write); 4798 src1 : S3(read); 4799 src2 : S3(read); 4800 D0 : S0; // big decoder only 4801 ALU : S4; // any alu 4802 MEM : S3; 4803 %} 4804 4805 // Conditional move reg-reg 4806 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4807 instruction_count(4); 4808 y : S4(read); 4809 q : S3(read); 4810 p : S3(read); 4811 DECODE : S0(4); // any decoder 4812 %} 4813 4814 // Conditional move reg-reg 4815 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4816 single_instruction; 4817 dst : S4(write); 4818 src : S3(read); 4819 cr : S3(read); 4820 DECODE : S0; // any decoder 4821 %} 4822 4823 // Conditional move reg-mem 4824 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4825 single_instruction; 4826 dst : S4(write); 4827 src : S3(read); 4828 cr : S3(read); 4829 DECODE : S0; // any decoder 4830 MEM : S3; 4831 %} 4832 4833 // Conditional move reg-reg long 4834 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4835 single_instruction; 4836 dst : S4(write); 4837 src : S3(read); 4838 cr : S3(read); 4839 DECODE : S0(2); // any 2 decoders 4840 %} 4841 4842 // Conditional move double reg-reg 4843 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4844 single_instruction; 4845 dst : S4(write); 4846 src : S3(read); 4847 cr : S3(read); 4848 DECODE : S0; // any decoder 4849 %} 4850 4851 // Float reg-reg operation 4852 pipe_class fpu_reg(regDPR dst) %{ 4853 instruction_count(2); 4854 dst : S3(read); 4855 DECODE : S0(2); // any 2 decoders 4856 FPU : S3; 4857 %} 4858 4859 // Float reg-reg operation 4860 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4861 instruction_count(2); 4862 dst : S4(write); 4863 src : S3(read); 4864 DECODE : S0(2); // any 2 decoders 4865 FPU : S3; 4866 %} 4867 4868 // Float reg-reg operation 4869 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4870 instruction_count(3); 4871 dst : S4(write); 4872 src1 : S3(read); 4873 src2 : S3(read); 4874 DECODE : S0(3); // any 3 decoders 4875 FPU : S3(2); 4876 %} 4877 4878 // Float reg-reg operation 4879 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4880 instruction_count(4); 4881 dst : S4(write); 4882 src1 : S3(read); 4883 src2 : S3(read); 4884 src3 : S3(read); 4885 DECODE : S0(4); // any 3 decoders 4886 FPU : S3(2); 4887 %} 4888 4889 // Float reg-reg operation 4890 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4891 instruction_count(4); 4892 dst : S4(write); 4893 src1 : S3(read); 4894 src2 : S3(read); 4895 src3 : S3(read); 4896 DECODE : S1(3); // any 3 decoders 4897 D0 : S0; // Big decoder only 4898 FPU : S3(2); 4899 MEM : S3; 4900 %} 4901 4902 // Float reg-mem operation 4903 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4904 instruction_count(2); 4905 dst : S5(write); 4906 mem : S3(read); 4907 D0 : S0; // big decoder only 4908 DECODE : S1; // any decoder for FPU POP 4909 FPU : S4; 4910 MEM : S3; // any mem 4911 %} 4912 4913 // Float reg-mem operation 4914 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4915 instruction_count(3); 4916 dst : S5(write); 4917 src1 : S3(read); 4918 mem : S3(read); 4919 D0 : S0; // big decoder only 4920 DECODE : S1(2); // any decoder for FPU POP 4921 FPU : S4; 4922 MEM : S3; // any mem 4923 %} 4924 4925 // Float mem-reg operation 4926 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4927 instruction_count(2); 4928 src : S5(read); 4929 mem : S3(read); 4930 DECODE : S0; // any decoder for FPU PUSH 4931 D0 : S1; // big decoder only 4932 FPU : S4; 4933 MEM : S3; // any mem 4934 %} 4935 4936 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4937 instruction_count(3); 4938 src1 : S3(read); 4939 src2 : S3(read); 4940 mem : S3(read); 4941 DECODE : S0(2); // any decoder for FPU PUSH 4942 D0 : S1; // big decoder only 4943 FPU : S4; 4944 MEM : S3; // any mem 4945 %} 4946 4947 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4948 instruction_count(3); 4949 src1 : S3(read); 4950 src2 : S3(read); 4951 mem : S4(read); 4952 DECODE : S0; // any decoder for FPU PUSH 4953 D0 : S0(2); // big decoder only 4954 FPU : S4; 4955 MEM : S3(2); // any mem 4956 %} 4957 4958 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4959 instruction_count(2); 4960 src1 : S3(read); 4961 dst : S4(read); 4962 D0 : S0(2); // big decoder only 4963 MEM : S3(2); // any mem 4964 %} 4965 4966 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4967 instruction_count(3); 4968 src1 : S3(read); 4969 src2 : S3(read); 4970 dst : S4(read); 4971 D0 : S0(3); // big decoder only 4972 FPU : S4; 4973 MEM : S3(3); // any mem 4974 %} 4975 4976 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4977 instruction_count(3); 4978 src1 : S4(read); 4979 mem : S4(read); 4980 DECODE : S0; // any decoder for FPU PUSH 4981 D0 : S0(2); // big decoder only 4982 FPU : S4; 4983 MEM : S3(2); // any mem 4984 %} 4985 4986 // Float load constant 4987 pipe_class fpu_reg_con(regDPR dst) %{ 4988 instruction_count(2); 4989 dst : S5(write); 4990 D0 : S0; // big decoder only for the load 4991 DECODE : S1; // any decoder for FPU POP 4992 FPU : S4; 4993 MEM : S3; // any mem 4994 %} 4995 4996 // Float load constant 4997 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 4998 instruction_count(3); 4999 dst : S5(write); 5000 src : S3(read); 5001 D0 : S0; // big decoder only for the load 5002 DECODE : S1(2); // any decoder for FPU POP 5003 FPU : S4; 5004 MEM : S3; // any mem 5005 %} 5006 5007 // UnConditional branch 5008 pipe_class pipe_jmp( label labl ) %{ 5009 single_instruction; 5010 BR : S3; 5011 %} 5012 5013 // Conditional branch 5014 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5015 single_instruction; 5016 cr : S1(read); 5017 BR : S3; 5018 %} 5019 5020 // Allocation idiom 5021 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5022 instruction_count(1); force_serialization; 5023 fixed_latency(6); 5024 heap_ptr : S3(read); 5025 DECODE : S0(3); 5026 D0 : S2; 5027 MEM : S3; 5028 ALU : S3(2); 5029 dst : S5(write); 5030 BR : S5; 5031 %} 5032 5033 // Generic big/slow expanded idiom 5034 pipe_class pipe_slow( ) %{ 5035 instruction_count(10); multiple_bundles; force_serialization; 5036 fixed_latency(100); 5037 D0 : S0(2); 5038 MEM : S3(2); 5039 %} 5040 5041 // The real do-nothing guy 5042 pipe_class empty( ) %{ 5043 instruction_count(0); 5044 %} 5045 5046 // Define the class for the Nop node 5047 define %{ 5048 MachNop = empty; 5049 %} 5050 5051 %} 5052 5053 //----------INSTRUCTIONS------------------------------------------------------- 5054 // 5055 // match -- States which machine-independent subtree may be replaced 5056 // by this instruction. 5057 // ins_cost -- The estimated cost of this instruction is used by instruction 5058 // selection to identify a minimum cost tree of machine 5059 // instructions that matches a tree of machine-independent 5060 // instructions. 5061 // format -- A string providing the disassembly for this instruction. 5062 // The value of an instruction's operand may be inserted 5063 // by referring to it with a '$' prefix. 5064 // opcode -- Three instruction opcodes may be provided. These are referred 5065 // to within an encode class as $primary, $secondary, and $tertiary 5066 // respectively. The primary opcode is commonly used to 5067 // indicate the type of machine instruction, while secondary 5068 // and tertiary are often used for prefix options or addressing 5069 // modes. 5070 // ins_encode -- A list of encode classes with parameters. The encode class 5071 // name must have been defined in an 'enc_class' specification 5072 // in the encode section of the architecture description. 5073 5074 //----------BSWAP-Instruction-------------------------------------------------- 5075 instruct bytes_reverse_int(rRegI dst) %{ 5076 match(Set dst (ReverseBytesI dst)); 5077 5078 format %{ "BSWAP $dst" %} 5079 opcode(0x0F, 0xC8); 5080 ins_encode( OpcP, OpcSReg(dst) ); 5081 ins_pipe( ialu_reg ); 5082 %} 5083 5084 instruct bytes_reverse_long(eRegL dst) %{ 5085 match(Set dst (ReverseBytesL dst)); 5086 5087 format %{ "BSWAP $dst.lo\n\t" 5088 "BSWAP $dst.hi\n\t" 5089 "XCHG $dst.lo $dst.hi" %} 5090 5091 ins_cost(125); 5092 ins_encode( bswap_long_bytes(dst) ); 5093 ins_pipe( ialu_reg_reg); 5094 %} 5095 5096 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5097 match(Set dst (ReverseBytesUS dst)); 5098 effect(KILL cr); 5099 5100 format %{ "BSWAP $dst\n\t" 5101 "SHR $dst,16\n\t" %} 5102 ins_encode %{ 5103 __ bswapl($dst$$Register); 5104 __ shrl($dst$$Register, 16); 5105 %} 5106 ins_pipe( ialu_reg ); 5107 %} 5108 5109 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5110 match(Set dst (ReverseBytesS dst)); 5111 effect(KILL cr); 5112 5113 format %{ "BSWAP $dst\n\t" 5114 "SAR $dst,16\n\t" %} 5115 ins_encode %{ 5116 __ bswapl($dst$$Register); 5117 __ sarl($dst$$Register, 16); 5118 %} 5119 ins_pipe( ialu_reg ); 5120 %} 5121 5122 5123 //---------- Zeros Count Instructions ------------------------------------------ 5124 5125 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5126 predicate(UseCountLeadingZerosInstruction); 5127 match(Set dst (CountLeadingZerosI src)); 5128 effect(KILL cr); 5129 5130 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5131 ins_encode %{ 5132 __ lzcntl($dst$$Register, $src$$Register); 5133 %} 5134 ins_pipe(ialu_reg); 5135 %} 5136 5137 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5138 predicate(!UseCountLeadingZerosInstruction); 5139 match(Set dst (CountLeadingZerosI src)); 5140 effect(KILL cr); 5141 5142 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5143 "JNZ skip\n\t" 5144 "MOV $dst, -1\n" 5145 "skip:\n\t" 5146 "NEG $dst\n\t" 5147 "ADD $dst, 31" %} 5148 ins_encode %{ 5149 Register Rdst = $dst$$Register; 5150 Register Rsrc = $src$$Register; 5151 Label skip; 5152 __ bsrl(Rdst, Rsrc); 5153 __ jccb(Assembler::notZero, skip); 5154 __ movl(Rdst, -1); 5155 __ bind(skip); 5156 __ negl(Rdst); 5157 __ addl(Rdst, BitsPerInt - 1); 5158 %} 5159 ins_pipe(ialu_reg); 5160 %} 5161 5162 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5163 predicate(UseCountLeadingZerosInstruction); 5164 match(Set dst (CountLeadingZerosL src)); 5165 effect(TEMP dst, KILL cr); 5166 5167 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5168 "JNC done\n\t" 5169 "LZCNT $dst, $src.lo\n\t" 5170 "ADD $dst, 32\n" 5171 "done:" %} 5172 ins_encode %{ 5173 Register Rdst = $dst$$Register; 5174 Register Rsrc = $src$$Register; 5175 Label done; 5176 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5177 __ jccb(Assembler::carryClear, done); 5178 __ lzcntl(Rdst, Rsrc); 5179 __ addl(Rdst, BitsPerInt); 5180 __ bind(done); 5181 %} 5182 ins_pipe(ialu_reg); 5183 %} 5184 5185 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5186 predicate(!UseCountLeadingZerosInstruction); 5187 match(Set dst (CountLeadingZerosL src)); 5188 effect(TEMP dst, KILL cr); 5189 5190 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5191 "JZ msw_is_zero\n\t" 5192 "ADD $dst, 32\n\t" 5193 "JMP not_zero\n" 5194 "msw_is_zero:\n\t" 5195 "BSR $dst, $src.lo\n\t" 5196 "JNZ not_zero\n\t" 5197 "MOV $dst, -1\n" 5198 "not_zero:\n\t" 5199 "NEG $dst\n\t" 5200 "ADD $dst, 63\n" %} 5201 ins_encode %{ 5202 Register Rdst = $dst$$Register; 5203 Register Rsrc = $src$$Register; 5204 Label msw_is_zero; 5205 Label not_zero; 5206 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5207 __ jccb(Assembler::zero, msw_is_zero); 5208 __ addl(Rdst, BitsPerInt); 5209 __ jmpb(not_zero); 5210 __ bind(msw_is_zero); 5211 __ bsrl(Rdst, Rsrc); 5212 __ jccb(Assembler::notZero, not_zero); 5213 __ movl(Rdst, -1); 5214 __ bind(not_zero); 5215 __ negl(Rdst); 5216 __ addl(Rdst, BitsPerLong - 1); 5217 %} 5218 ins_pipe(ialu_reg); 5219 %} 5220 5221 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5222 predicate(UseCountTrailingZerosInstruction); 5223 match(Set dst (CountTrailingZerosI src)); 5224 effect(KILL cr); 5225 5226 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5227 ins_encode %{ 5228 __ tzcntl($dst$$Register, $src$$Register); 5229 %} 5230 ins_pipe(ialu_reg); 5231 %} 5232 5233 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5234 predicate(!UseCountTrailingZerosInstruction); 5235 match(Set dst (CountTrailingZerosI src)); 5236 effect(KILL cr); 5237 5238 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5239 "JNZ done\n\t" 5240 "MOV $dst, 32\n" 5241 "done:" %} 5242 ins_encode %{ 5243 Register Rdst = $dst$$Register; 5244 Label done; 5245 __ bsfl(Rdst, $src$$Register); 5246 __ jccb(Assembler::notZero, done); 5247 __ movl(Rdst, BitsPerInt); 5248 __ bind(done); 5249 %} 5250 ins_pipe(ialu_reg); 5251 %} 5252 5253 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5254 predicate(UseCountTrailingZerosInstruction); 5255 match(Set dst (CountTrailingZerosL src)); 5256 effect(TEMP dst, KILL cr); 5257 5258 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5259 "JNC done\n\t" 5260 "TZCNT $dst, $src.hi\n\t" 5261 "ADD $dst, 32\n" 5262 "done:" %} 5263 ins_encode %{ 5264 Register Rdst = $dst$$Register; 5265 Register Rsrc = $src$$Register; 5266 Label done; 5267 __ tzcntl(Rdst, Rsrc); 5268 __ jccb(Assembler::carryClear, done); 5269 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5270 __ addl(Rdst, BitsPerInt); 5271 __ bind(done); 5272 %} 5273 ins_pipe(ialu_reg); 5274 %} 5275 5276 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5277 predicate(!UseCountTrailingZerosInstruction); 5278 match(Set dst (CountTrailingZerosL src)); 5279 effect(TEMP dst, KILL cr); 5280 5281 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5282 "JNZ done\n\t" 5283 "BSF $dst, $src.hi\n\t" 5284 "JNZ msw_not_zero\n\t" 5285 "MOV $dst, 32\n" 5286 "msw_not_zero:\n\t" 5287 "ADD $dst, 32\n" 5288 "done:" %} 5289 ins_encode %{ 5290 Register Rdst = $dst$$Register; 5291 Register Rsrc = $src$$Register; 5292 Label msw_not_zero; 5293 Label done; 5294 __ bsfl(Rdst, Rsrc); 5295 __ jccb(Assembler::notZero, done); 5296 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5297 __ jccb(Assembler::notZero, msw_not_zero); 5298 __ movl(Rdst, BitsPerInt); 5299 __ bind(msw_not_zero); 5300 __ addl(Rdst, BitsPerInt); 5301 __ bind(done); 5302 %} 5303 ins_pipe(ialu_reg); 5304 %} 5305 5306 5307 //---------- Population Count Instructions ------------------------------------- 5308 5309 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5310 predicate(UsePopCountInstruction); 5311 match(Set dst (PopCountI src)); 5312 effect(KILL cr); 5313 5314 format %{ "POPCNT $dst, $src" %} 5315 ins_encode %{ 5316 __ popcntl($dst$$Register, $src$$Register); 5317 %} 5318 ins_pipe(ialu_reg); 5319 %} 5320 5321 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5322 predicate(UsePopCountInstruction); 5323 match(Set dst (PopCountI (LoadI mem))); 5324 effect(KILL cr); 5325 5326 format %{ "POPCNT $dst, $mem" %} 5327 ins_encode %{ 5328 __ popcntl($dst$$Register, $mem$$Address); 5329 %} 5330 ins_pipe(ialu_reg); 5331 %} 5332 5333 // Note: Long.bitCount(long) returns an int. 5334 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5335 predicate(UsePopCountInstruction); 5336 match(Set dst (PopCountL src)); 5337 effect(KILL cr, TEMP tmp, TEMP dst); 5338 5339 format %{ "POPCNT $dst, $src.lo\n\t" 5340 "POPCNT $tmp, $src.hi\n\t" 5341 "ADD $dst, $tmp" %} 5342 ins_encode %{ 5343 __ popcntl($dst$$Register, $src$$Register); 5344 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5345 __ addl($dst$$Register, $tmp$$Register); 5346 %} 5347 ins_pipe(ialu_reg); 5348 %} 5349 5350 // Note: Long.bitCount(long) returns an int. 5351 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5352 predicate(UsePopCountInstruction); 5353 match(Set dst (PopCountL (LoadL mem))); 5354 effect(KILL cr, TEMP tmp, TEMP dst); 5355 5356 format %{ "POPCNT $dst, $mem\n\t" 5357 "POPCNT $tmp, $mem+4\n\t" 5358 "ADD $dst, $tmp" %} 5359 ins_encode %{ 5360 //__ popcntl($dst$$Register, $mem$$Address$$first); 5361 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5362 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5363 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5364 __ addl($dst$$Register, $tmp$$Register); 5365 %} 5366 ins_pipe(ialu_reg); 5367 %} 5368 5369 5370 //----------Load/Store/Move Instructions--------------------------------------- 5371 //----------Load Instructions-------------------------------------------------- 5372 // Load Byte (8bit signed) 5373 instruct loadB(xRegI dst, memory mem) %{ 5374 match(Set dst (LoadB mem)); 5375 5376 ins_cost(125); 5377 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5378 5379 ins_encode %{ 5380 __ movsbl($dst$$Register, $mem$$Address); 5381 %} 5382 5383 ins_pipe(ialu_reg_mem); 5384 %} 5385 5386 // Load Byte (8bit signed) into Long Register 5387 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5388 match(Set dst (ConvI2L (LoadB mem))); 5389 effect(KILL cr); 5390 5391 ins_cost(375); 5392 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5393 "MOV $dst.hi,$dst.lo\n\t" 5394 "SAR $dst.hi,7" %} 5395 5396 ins_encode %{ 5397 __ movsbl($dst$$Register, $mem$$Address); 5398 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5399 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5400 %} 5401 5402 ins_pipe(ialu_reg_mem); 5403 %} 5404 5405 // Load Unsigned Byte (8bit UNsigned) 5406 instruct loadUB(xRegI dst, memory mem) %{ 5407 match(Set dst (LoadUB mem)); 5408 5409 ins_cost(125); 5410 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5411 5412 ins_encode %{ 5413 __ movzbl($dst$$Register, $mem$$Address); 5414 %} 5415 5416 ins_pipe(ialu_reg_mem); 5417 %} 5418 5419 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5420 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5421 match(Set dst (ConvI2L (LoadUB mem))); 5422 effect(KILL cr); 5423 5424 ins_cost(250); 5425 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5426 "XOR $dst.hi,$dst.hi" %} 5427 5428 ins_encode %{ 5429 Register Rdst = $dst$$Register; 5430 __ movzbl(Rdst, $mem$$Address); 5431 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5432 %} 5433 5434 ins_pipe(ialu_reg_mem); 5435 %} 5436 5437 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5438 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5439 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5440 effect(KILL cr); 5441 5442 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5443 "XOR $dst.hi,$dst.hi\n\t" 5444 "AND $dst.lo,right_n_bits($mask, 8)" %} 5445 ins_encode %{ 5446 Register Rdst = $dst$$Register; 5447 __ movzbl(Rdst, $mem$$Address); 5448 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5449 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5450 %} 5451 ins_pipe(ialu_reg_mem); 5452 %} 5453 5454 // Load Short (16bit signed) 5455 instruct loadS(rRegI dst, memory mem) %{ 5456 match(Set dst (LoadS mem)); 5457 5458 ins_cost(125); 5459 format %{ "MOVSX $dst,$mem\t# short" %} 5460 5461 ins_encode %{ 5462 __ movswl($dst$$Register, $mem$$Address); 5463 %} 5464 5465 ins_pipe(ialu_reg_mem); 5466 %} 5467 5468 // Load Short (16 bit signed) to Byte (8 bit signed) 5469 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5470 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5471 5472 ins_cost(125); 5473 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5474 ins_encode %{ 5475 __ movsbl($dst$$Register, $mem$$Address); 5476 %} 5477 ins_pipe(ialu_reg_mem); 5478 %} 5479 5480 // Load Short (16bit signed) into Long Register 5481 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5482 match(Set dst (ConvI2L (LoadS mem))); 5483 effect(KILL cr); 5484 5485 ins_cost(375); 5486 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5487 "MOV $dst.hi,$dst.lo\n\t" 5488 "SAR $dst.hi,15" %} 5489 5490 ins_encode %{ 5491 __ movswl($dst$$Register, $mem$$Address); 5492 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5493 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5494 %} 5495 5496 ins_pipe(ialu_reg_mem); 5497 %} 5498 5499 // Load Unsigned Short/Char (16bit unsigned) 5500 instruct loadUS(rRegI dst, memory mem) %{ 5501 match(Set dst (LoadUS mem)); 5502 5503 ins_cost(125); 5504 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5505 5506 ins_encode %{ 5507 __ movzwl($dst$$Register, $mem$$Address); 5508 %} 5509 5510 ins_pipe(ialu_reg_mem); 5511 %} 5512 5513 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5514 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5515 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5516 5517 ins_cost(125); 5518 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5519 ins_encode %{ 5520 __ movsbl($dst$$Register, $mem$$Address); 5521 %} 5522 ins_pipe(ialu_reg_mem); 5523 %} 5524 5525 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5526 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5527 match(Set dst (ConvI2L (LoadUS mem))); 5528 effect(KILL cr); 5529 5530 ins_cost(250); 5531 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5532 "XOR $dst.hi,$dst.hi" %} 5533 5534 ins_encode %{ 5535 __ movzwl($dst$$Register, $mem$$Address); 5536 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5537 %} 5538 5539 ins_pipe(ialu_reg_mem); 5540 %} 5541 5542 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5543 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5544 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5545 effect(KILL cr); 5546 5547 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5548 "XOR $dst.hi,$dst.hi" %} 5549 ins_encode %{ 5550 Register Rdst = $dst$$Register; 5551 __ movzbl(Rdst, $mem$$Address); 5552 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5553 %} 5554 ins_pipe(ialu_reg_mem); 5555 %} 5556 5557 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5558 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5559 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5560 effect(KILL cr); 5561 5562 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5563 "XOR $dst.hi,$dst.hi\n\t" 5564 "AND $dst.lo,right_n_bits($mask, 16)" %} 5565 ins_encode %{ 5566 Register Rdst = $dst$$Register; 5567 __ movzwl(Rdst, $mem$$Address); 5568 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5569 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5570 %} 5571 ins_pipe(ialu_reg_mem); 5572 %} 5573 5574 // Load Integer 5575 instruct loadI(rRegI dst, memory mem) %{ 5576 match(Set dst (LoadI mem)); 5577 5578 ins_cost(125); 5579 format %{ "MOV $dst,$mem\t# int" %} 5580 5581 ins_encode %{ 5582 __ movl($dst$$Register, $mem$$Address); 5583 %} 5584 5585 ins_pipe(ialu_reg_mem); 5586 %} 5587 5588 // Load Integer (32 bit signed) to Byte (8 bit signed) 5589 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5590 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5591 5592 ins_cost(125); 5593 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5594 ins_encode %{ 5595 __ movsbl($dst$$Register, $mem$$Address); 5596 %} 5597 ins_pipe(ialu_reg_mem); 5598 %} 5599 5600 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5601 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5602 match(Set dst (AndI (LoadI mem) mask)); 5603 5604 ins_cost(125); 5605 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5606 ins_encode %{ 5607 __ movzbl($dst$$Register, $mem$$Address); 5608 %} 5609 ins_pipe(ialu_reg_mem); 5610 %} 5611 5612 // Load Integer (32 bit signed) to Short (16 bit signed) 5613 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5614 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5615 5616 ins_cost(125); 5617 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5618 ins_encode %{ 5619 __ movswl($dst$$Register, $mem$$Address); 5620 %} 5621 ins_pipe(ialu_reg_mem); 5622 %} 5623 5624 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5625 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5626 match(Set dst (AndI (LoadI mem) mask)); 5627 5628 ins_cost(125); 5629 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5630 ins_encode %{ 5631 __ movzwl($dst$$Register, $mem$$Address); 5632 %} 5633 ins_pipe(ialu_reg_mem); 5634 %} 5635 5636 // Load Integer into Long Register 5637 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5638 match(Set dst (ConvI2L (LoadI mem))); 5639 effect(KILL cr); 5640 5641 ins_cost(375); 5642 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5643 "MOV $dst.hi,$dst.lo\n\t" 5644 "SAR $dst.hi,31" %} 5645 5646 ins_encode %{ 5647 __ movl($dst$$Register, $mem$$Address); 5648 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5649 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5650 %} 5651 5652 ins_pipe(ialu_reg_mem); 5653 %} 5654 5655 // Load Integer with mask 0xFF into Long Register 5656 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5657 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5658 effect(KILL cr); 5659 5660 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5661 "XOR $dst.hi,$dst.hi" %} 5662 ins_encode %{ 5663 Register Rdst = $dst$$Register; 5664 __ movzbl(Rdst, $mem$$Address); 5665 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5666 %} 5667 ins_pipe(ialu_reg_mem); 5668 %} 5669 5670 // Load Integer with mask 0xFFFF into Long Register 5671 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5672 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5673 effect(KILL cr); 5674 5675 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5676 "XOR $dst.hi,$dst.hi" %} 5677 ins_encode %{ 5678 Register Rdst = $dst$$Register; 5679 __ movzwl(Rdst, $mem$$Address); 5680 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5681 %} 5682 ins_pipe(ialu_reg_mem); 5683 %} 5684 5685 // Load Integer with 31-bit mask into Long Register 5686 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5687 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5688 effect(KILL cr); 5689 5690 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5691 "XOR $dst.hi,$dst.hi\n\t" 5692 "AND $dst.lo,$mask" %} 5693 ins_encode %{ 5694 Register Rdst = $dst$$Register; 5695 __ movl(Rdst, $mem$$Address); 5696 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5697 __ andl(Rdst, $mask$$constant); 5698 %} 5699 ins_pipe(ialu_reg_mem); 5700 %} 5701 5702 // Load Unsigned Integer into Long Register 5703 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5704 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5705 effect(KILL cr); 5706 5707 ins_cost(250); 5708 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5709 "XOR $dst.hi,$dst.hi" %} 5710 5711 ins_encode %{ 5712 __ movl($dst$$Register, $mem$$Address); 5713 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5714 %} 5715 5716 ins_pipe(ialu_reg_mem); 5717 %} 5718 5719 // Load Long. Cannot clobber address while loading, so restrict address 5720 // register to ESI 5721 instruct loadL(eRegL dst, load_long_memory mem) %{ 5722 predicate(!((LoadLNode*)n)->require_atomic_access()); 5723 match(Set dst (LoadL mem)); 5724 5725 ins_cost(250); 5726 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5727 "MOV $dst.hi,$mem+4" %} 5728 5729 ins_encode %{ 5730 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5731 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5732 __ movl($dst$$Register, Amemlo); 5733 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5734 %} 5735 5736 ins_pipe(ialu_reg_long_mem); 5737 %} 5738 5739 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5740 // then store it down to the stack and reload on the int 5741 // side. 5742 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5743 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5744 match(Set dst (LoadL mem)); 5745 5746 ins_cost(200); 5747 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5748 "FISTp $dst" %} 5749 ins_encode(enc_loadL_volatile(mem,dst)); 5750 ins_pipe( fpu_reg_mem ); 5751 %} 5752 5753 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5754 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5755 match(Set dst (LoadL mem)); 5756 effect(TEMP tmp); 5757 ins_cost(180); 5758 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5759 "MOVSD $dst,$tmp" %} 5760 ins_encode %{ 5761 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5762 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5763 %} 5764 ins_pipe( pipe_slow ); 5765 %} 5766 5767 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5768 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5769 match(Set dst (LoadL mem)); 5770 effect(TEMP tmp); 5771 ins_cost(160); 5772 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5773 "MOVD $dst.lo,$tmp\n\t" 5774 "PSRLQ $tmp,32\n\t" 5775 "MOVD $dst.hi,$tmp" %} 5776 ins_encode %{ 5777 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5778 __ movdl($dst$$Register, $tmp$$XMMRegister); 5779 __ psrlq($tmp$$XMMRegister, 32); 5780 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5781 %} 5782 ins_pipe( pipe_slow ); 5783 %} 5784 5785 // Load Range 5786 instruct loadRange(rRegI dst, memory mem) %{ 5787 match(Set dst (LoadRange mem)); 5788 5789 ins_cost(125); 5790 format %{ "MOV $dst,$mem" %} 5791 opcode(0x8B); 5792 ins_encode( OpcP, RegMem(dst,mem)); 5793 ins_pipe( ialu_reg_mem ); 5794 %} 5795 5796 5797 // Load Pointer 5798 instruct loadP(eRegP dst, memory mem) %{ 5799 match(Set dst (LoadP mem)); 5800 5801 ins_cost(125); 5802 format %{ "MOV $dst,$mem" %} 5803 opcode(0x8B); 5804 ins_encode( OpcP, RegMem(dst,mem)); 5805 ins_pipe( ialu_reg_mem ); 5806 %} 5807 5808 // Load Klass Pointer 5809 instruct loadKlass(eRegP dst, memory mem) %{ 5810 match(Set dst (LoadKlass mem)); 5811 5812 ins_cost(125); 5813 format %{ "MOV $dst,$mem" %} 5814 opcode(0x8B); 5815 ins_encode( OpcP, RegMem(dst,mem)); 5816 ins_pipe( ialu_reg_mem ); 5817 %} 5818 5819 // Load Double 5820 instruct loadDPR(regDPR dst, memory mem) %{ 5821 predicate(UseSSE<=1); 5822 match(Set dst (LoadD mem)); 5823 5824 ins_cost(150); 5825 format %{ "FLD_D ST,$mem\n\t" 5826 "FSTP $dst" %} 5827 opcode(0xDD); /* DD /0 */ 5828 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5829 Pop_Reg_DPR(dst) ); 5830 ins_pipe( fpu_reg_mem ); 5831 %} 5832 5833 // Load Double to XMM 5834 instruct loadD(regD dst, memory mem) %{ 5835 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5836 match(Set dst (LoadD mem)); 5837 ins_cost(145); 5838 format %{ "MOVSD $dst,$mem" %} 5839 ins_encode %{ 5840 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5841 %} 5842 ins_pipe( pipe_slow ); 5843 %} 5844 5845 instruct loadD_partial(regD dst, memory mem) %{ 5846 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5847 match(Set dst (LoadD mem)); 5848 ins_cost(145); 5849 format %{ "MOVLPD $dst,$mem" %} 5850 ins_encode %{ 5851 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5852 %} 5853 ins_pipe( pipe_slow ); 5854 %} 5855 5856 // Load to XMM register (single-precision floating point) 5857 // MOVSS instruction 5858 instruct loadF(regF dst, memory mem) %{ 5859 predicate(UseSSE>=1); 5860 match(Set dst (LoadF mem)); 5861 ins_cost(145); 5862 format %{ "MOVSS $dst,$mem" %} 5863 ins_encode %{ 5864 __ movflt ($dst$$XMMRegister, $mem$$Address); 5865 %} 5866 ins_pipe( pipe_slow ); 5867 %} 5868 5869 // Load Float 5870 instruct loadFPR(regFPR dst, memory mem) %{ 5871 predicate(UseSSE==0); 5872 match(Set dst (LoadF mem)); 5873 5874 ins_cost(150); 5875 format %{ "FLD_S ST,$mem\n\t" 5876 "FSTP $dst" %} 5877 opcode(0xD9); /* D9 /0 */ 5878 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5879 Pop_Reg_FPR(dst) ); 5880 ins_pipe( fpu_reg_mem ); 5881 %} 5882 5883 // Load Effective Address 5884 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5885 match(Set dst mem); 5886 5887 ins_cost(110); 5888 format %{ "LEA $dst,$mem" %} 5889 opcode(0x8D); 5890 ins_encode( OpcP, RegMem(dst,mem)); 5891 ins_pipe( ialu_reg_reg_fat ); 5892 %} 5893 5894 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5895 match(Set dst mem); 5896 5897 ins_cost(110); 5898 format %{ "LEA $dst,$mem" %} 5899 opcode(0x8D); 5900 ins_encode( OpcP, RegMem(dst,mem)); 5901 ins_pipe( ialu_reg_reg_fat ); 5902 %} 5903 5904 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5905 match(Set dst mem); 5906 5907 ins_cost(110); 5908 format %{ "LEA $dst,$mem" %} 5909 opcode(0x8D); 5910 ins_encode( OpcP, RegMem(dst,mem)); 5911 ins_pipe( ialu_reg_reg_fat ); 5912 %} 5913 5914 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5915 match(Set dst mem); 5916 5917 ins_cost(110); 5918 format %{ "LEA $dst,$mem" %} 5919 opcode(0x8D); 5920 ins_encode( OpcP, RegMem(dst,mem)); 5921 ins_pipe( ialu_reg_reg_fat ); 5922 %} 5923 5924 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5925 match(Set dst mem); 5926 5927 ins_cost(110); 5928 format %{ "LEA $dst,$mem" %} 5929 opcode(0x8D); 5930 ins_encode( OpcP, RegMem(dst,mem)); 5931 ins_pipe( ialu_reg_reg_fat ); 5932 %} 5933 5934 // Load Constant 5935 instruct loadConI(rRegI dst, immI src) %{ 5936 match(Set dst src); 5937 5938 format %{ "MOV $dst,$src" %} 5939 ins_encode( LdImmI(dst, src) ); 5940 ins_pipe( ialu_reg_fat ); 5941 %} 5942 5943 // Load Constant zero 5944 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ 5945 match(Set dst src); 5946 effect(KILL cr); 5947 5948 ins_cost(50); 5949 format %{ "XOR $dst,$dst" %} 5950 opcode(0x33); /* + rd */ 5951 ins_encode( OpcP, RegReg( dst, dst ) ); 5952 ins_pipe( ialu_reg ); 5953 %} 5954 5955 instruct loadConP(eRegP dst, immP src) %{ 5956 match(Set dst src); 5957 5958 format %{ "MOV $dst,$src" %} 5959 opcode(0xB8); /* + rd */ 5960 ins_encode( LdImmP(dst, src) ); 5961 ins_pipe( ialu_reg_fat ); 5962 %} 5963 5964 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 5965 match(Set dst src); 5966 effect(KILL cr); 5967 ins_cost(200); 5968 format %{ "MOV $dst.lo,$src.lo\n\t" 5969 "MOV $dst.hi,$src.hi" %} 5970 opcode(0xB8); 5971 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 5972 ins_pipe( ialu_reg_long_fat ); 5973 %} 5974 5975 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 5976 match(Set dst src); 5977 effect(KILL cr); 5978 ins_cost(150); 5979 format %{ "XOR $dst.lo,$dst.lo\n\t" 5980 "XOR $dst.hi,$dst.hi" %} 5981 opcode(0x33,0x33); 5982 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 5983 ins_pipe( ialu_reg_long ); 5984 %} 5985 5986 // The instruction usage is guarded by predicate in operand immFPR(). 5987 instruct loadConFPR(regFPR dst, immFPR con) %{ 5988 match(Set dst con); 5989 ins_cost(125); 5990 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 5991 "FSTP $dst" %} 5992 ins_encode %{ 5993 __ fld_s($constantaddress($con)); 5994 __ fstp_d($dst$$reg); 5995 %} 5996 ins_pipe(fpu_reg_con); 5997 %} 5998 5999 // The instruction usage is guarded by predicate in operand immFPR0(). 6000 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6001 match(Set dst con); 6002 ins_cost(125); 6003 format %{ "FLDZ ST\n\t" 6004 "FSTP $dst" %} 6005 ins_encode %{ 6006 __ fldz(); 6007 __ fstp_d($dst$$reg); 6008 %} 6009 ins_pipe(fpu_reg_con); 6010 %} 6011 6012 // The instruction usage is guarded by predicate in operand immFPR1(). 6013 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6014 match(Set dst con); 6015 ins_cost(125); 6016 format %{ "FLD1 ST\n\t" 6017 "FSTP $dst" %} 6018 ins_encode %{ 6019 __ fld1(); 6020 __ fstp_d($dst$$reg); 6021 %} 6022 ins_pipe(fpu_reg_con); 6023 %} 6024 6025 // The instruction usage is guarded by predicate in operand immF(). 6026 instruct loadConF(regF dst, immF con) %{ 6027 match(Set dst con); 6028 ins_cost(125); 6029 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6030 ins_encode %{ 6031 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6032 %} 6033 ins_pipe(pipe_slow); 6034 %} 6035 6036 // The instruction usage is guarded by predicate in operand immF0(). 6037 instruct loadConF0(regF dst, immF0 src) %{ 6038 match(Set dst src); 6039 ins_cost(100); 6040 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6041 ins_encode %{ 6042 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6043 %} 6044 ins_pipe(pipe_slow); 6045 %} 6046 6047 // The instruction usage is guarded by predicate in operand immDPR(). 6048 instruct loadConDPR(regDPR dst, immDPR con) %{ 6049 match(Set dst con); 6050 ins_cost(125); 6051 6052 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6053 "FSTP $dst" %} 6054 ins_encode %{ 6055 __ fld_d($constantaddress($con)); 6056 __ fstp_d($dst$$reg); 6057 %} 6058 ins_pipe(fpu_reg_con); 6059 %} 6060 6061 // The instruction usage is guarded by predicate in operand immDPR0(). 6062 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6063 match(Set dst con); 6064 ins_cost(125); 6065 6066 format %{ "FLDZ ST\n\t" 6067 "FSTP $dst" %} 6068 ins_encode %{ 6069 __ fldz(); 6070 __ fstp_d($dst$$reg); 6071 %} 6072 ins_pipe(fpu_reg_con); 6073 %} 6074 6075 // The instruction usage is guarded by predicate in operand immDPR1(). 6076 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6077 match(Set dst con); 6078 ins_cost(125); 6079 6080 format %{ "FLD1 ST\n\t" 6081 "FSTP $dst" %} 6082 ins_encode %{ 6083 __ fld1(); 6084 __ fstp_d($dst$$reg); 6085 %} 6086 ins_pipe(fpu_reg_con); 6087 %} 6088 6089 // The instruction usage is guarded by predicate in operand immD(). 6090 instruct loadConD(regD dst, immD con) %{ 6091 match(Set dst con); 6092 ins_cost(125); 6093 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6094 ins_encode %{ 6095 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6096 %} 6097 ins_pipe(pipe_slow); 6098 %} 6099 6100 // The instruction usage is guarded by predicate in operand immD0(). 6101 instruct loadConD0(regD dst, immD0 src) %{ 6102 match(Set dst src); 6103 ins_cost(100); 6104 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6105 ins_encode %{ 6106 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6107 %} 6108 ins_pipe( pipe_slow ); 6109 %} 6110 6111 // Load Stack Slot 6112 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6113 match(Set dst src); 6114 ins_cost(125); 6115 6116 format %{ "MOV $dst,$src" %} 6117 opcode(0x8B); 6118 ins_encode( OpcP, RegMem(dst,src)); 6119 ins_pipe( ialu_reg_mem ); 6120 %} 6121 6122 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6123 match(Set dst src); 6124 6125 ins_cost(200); 6126 format %{ "MOV $dst,$src.lo\n\t" 6127 "MOV $dst+4,$src.hi" %} 6128 opcode(0x8B, 0x8B); 6129 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6130 ins_pipe( ialu_mem_long_reg ); 6131 %} 6132 6133 // Load Stack Slot 6134 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6135 match(Set dst src); 6136 ins_cost(125); 6137 6138 format %{ "MOV $dst,$src" %} 6139 opcode(0x8B); 6140 ins_encode( OpcP, RegMem(dst,src)); 6141 ins_pipe( ialu_reg_mem ); 6142 %} 6143 6144 // Load Stack Slot 6145 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6146 match(Set dst src); 6147 ins_cost(125); 6148 6149 format %{ "FLD_S $src\n\t" 6150 "FSTP $dst" %} 6151 opcode(0xD9); /* D9 /0, FLD m32real */ 6152 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6153 Pop_Reg_FPR(dst) ); 6154 ins_pipe( fpu_reg_mem ); 6155 %} 6156 6157 // Load Stack Slot 6158 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6159 match(Set dst src); 6160 ins_cost(125); 6161 6162 format %{ "FLD_D $src\n\t" 6163 "FSTP $dst" %} 6164 opcode(0xDD); /* DD /0, FLD m64real */ 6165 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6166 Pop_Reg_DPR(dst) ); 6167 ins_pipe( fpu_reg_mem ); 6168 %} 6169 6170 // Prefetch instructions for allocation. 6171 // Must be safe to execute with invalid address (cannot fault). 6172 6173 instruct prefetchAlloc0( memory mem ) %{ 6174 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6175 match(PrefetchAllocation mem); 6176 ins_cost(0); 6177 size(0); 6178 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6179 ins_encode(); 6180 ins_pipe(empty); 6181 %} 6182 6183 instruct prefetchAlloc( memory mem ) %{ 6184 predicate(AllocatePrefetchInstr==3); 6185 match( PrefetchAllocation mem ); 6186 ins_cost(100); 6187 6188 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6189 ins_encode %{ 6190 __ prefetchw($mem$$Address); 6191 %} 6192 ins_pipe(ialu_mem); 6193 %} 6194 6195 instruct prefetchAllocNTA( memory mem ) %{ 6196 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6197 match(PrefetchAllocation mem); 6198 ins_cost(100); 6199 6200 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6201 ins_encode %{ 6202 __ prefetchnta($mem$$Address); 6203 %} 6204 ins_pipe(ialu_mem); 6205 %} 6206 6207 instruct prefetchAllocT0( memory mem ) %{ 6208 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6209 match(PrefetchAllocation mem); 6210 ins_cost(100); 6211 6212 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6213 ins_encode %{ 6214 __ prefetcht0($mem$$Address); 6215 %} 6216 ins_pipe(ialu_mem); 6217 %} 6218 6219 instruct prefetchAllocT2( memory mem ) %{ 6220 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6221 match(PrefetchAllocation mem); 6222 ins_cost(100); 6223 6224 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6225 ins_encode %{ 6226 __ prefetcht2($mem$$Address); 6227 %} 6228 ins_pipe(ialu_mem); 6229 %} 6230 6231 //----------Store Instructions------------------------------------------------- 6232 6233 // Store Byte 6234 instruct storeB(memory mem, xRegI src) %{ 6235 match(Set mem (StoreB mem src)); 6236 6237 ins_cost(125); 6238 format %{ "MOV8 $mem,$src" %} 6239 opcode(0x88); 6240 ins_encode( OpcP, RegMem( src, mem ) ); 6241 ins_pipe( ialu_mem_reg ); 6242 %} 6243 6244 // Store Char/Short 6245 instruct storeC(memory mem, rRegI src) %{ 6246 match(Set mem (StoreC mem src)); 6247 6248 ins_cost(125); 6249 format %{ "MOV16 $mem,$src" %} 6250 opcode(0x89, 0x66); 6251 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6252 ins_pipe( ialu_mem_reg ); 6253 %} 6254 6255 // Store Integer 6256 instruct storeI(memory mem, rRegI src) %{ 6257 match(Set mem (StoreI mem src)); 6258 6259 ins_cost(125); 6260 format %{ "MOV $mem,$src" %} 6261 opcode(0x89); 6262 ins_encode( OpcP, RegMem( src, mem ) ); 6263 ins_pipe( ialu_mem_reg ); 6264 %} 6265 6266 // Store Long 6267 instruct storeL(long_memory mem, eRegL src) %{ 6268 predicate(!((StoreLNode*)n)->require_atomic_access()); 6269 match(Set mem (StoreL mem src)); 6270 6271 ins_cost(200); 6272 format %{ "MOV $mem,$src.lo\n\t" 6273 "MOV $mem+4,$src.hi" %} 6274 opcode(0x89, 0x89); 6275 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6276 ins_pipe( ialu_mem_long_reg ); 6277 %} 6278 6279 // Store Long to Integer 6280 instruct storeL2I(memory mem, eRegL src) %{ 6281 match(Set mem (StoreI mem (ConvL2I src))); 6282 6283 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6284 ins_encode %{ 6285 __ movl($mem$$Address, $src$$Register); 6286 %} 6287 ins_pipe(ialu_mem_reg); 6288 %} 6289 6290 // Volatile Store Long. Must be atomic, so move it into 6291 // the FP TOS and then do a 64-bit FIST. Has to probe the 6292 // target address before the store (for null-ptr checks) 6293 // so the memory operand is used twice in the encoding. 6294 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6295 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6296 match(Set mem (StoreL mem src)); 6297 effect( KILL cr ); 6298 ins_cost(400); 6299 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6300 "FILD $src\n\t" 6301 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6302 opcode(0x3B); 6303 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6304 ins_pipe( fpu_reg_mem ); 6305 %} 6306 6307 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6308 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6309 match(Set mem (StoreL mem src)); 6310 effect( TEMP tmp, KILL cr ); 6311 ins_cost(380); 6312 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6313 "MOVSD $tmp,$src\n\t" 6314 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6315 ins_encode %{ 6316 __ cmpl(rax, $mem$$Address); 6317 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6318 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6319 %} 6320 ins_pipe( pipe_slow ); 6321 %} 6322 6323 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6324 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6325 match(Set mem (StoreL mem src)); 6326 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6327 ins_cost(360); 6328 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6329 "MOVD $tmp,$src.lo\n\t" 6330 "MOVD $tmp2,$src.hi\n\t" 6331 "PUNPCKLDQ $tmp,$tmp2\n\t" 6332 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6333 ins_encode %{ 6334 __ cmpl(rax, $mem$$Address); 6335 __ movdl($tmp$$XMMRegister, $src$$Register); 6336 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6337 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6338 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6339 %} 6340 ins_pipe( pipe_slow ); 6341 %} 6342 6343 // Store Pointer; for storing unknown oops and raw pointers 6344 instruct storeP(memory mem, anyRegP src) %{ 6345 match(Set mem (StoreP mem src)); 6346 6347 ins_cost(125); 6348 format %{ "MOV $mem,$src" %} 6349 opcode(0x89); 6350 ins_encode( OpcP, RegMem( src, mem ) ); 6351 ins_pipe( ialu_mem_reg ); 6352 %} 6353 6354 // Store Integer Immediate 6355 instruct storeImmI(memory mem, immI src) %{ 6356 match(Set mem (StoreI mem src)); 6357 6358 ins_cost(150); 6359 format %{ "MOV $mem,$src" %} 6360 opcode(0xC7); /* C7 /0 */ 6361 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6362 ins_pipe( ialu_mem_imm ); 6363 %} 6364 6365 // Store Short/Char Immediate 6366 instruct storeImmI16(memory mem, immI16 src) %{ 6367 predicate(UseStoreImmI16); 6368 match(Set mem (StoreC mem src)); 6369 6370 ins_cost(150); 6371 format %{ "MOV16 $mem,$src" %} 6372 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6373 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6374 ins_pipe( ialu_mem_imm ); 6375 %} 6376 6377 // Store Pointer Immediate; null pointers or constant oops that do not 6378 // need card-mark barriers. 6379 instruct storeImmP(memory mem, immP src) %{ 6380 match(Set mem (StoreP mem src)); 6381 6382 ins_cost(150); 6383 format %{ "MOV $mem,$src" %} 6384 opcode(0xC7); /* C7 /0 */ 6385 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6386 ins_pipe( ialu_mem_imm ); 6387 %} 6388 6389 // Store Byte Immediate 6390 instruct storeImmB(memory mem, immI8 src) %{ 6391 match(Set mem (StoreB mem src)); 6392 6393 ins_cost(150); 6394 format %{ "MOV8 $mem,$src" %} 6395 opcode(0xC6); /* C6 /0 */ 6396 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6397 ins_pipe( ialu_mem_imm ); 6398 %} 6399 6400 // Store CMS card-mark Immediate 6401 instruct storeImmCM(memory mem, immI8 src) %{ 6402 match(Set mem (StoreCM mem src)); 6403 6404 ins_cost(150); 6405 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6406 opcode(0xC6); /* C6 /0 */ 6407 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6408 ins_pipe( ialu_mem_imm ); 6409 %} 6410 6411 // Store Double 6412 instruct storeDPR( memory mem, regDPR1 src) %{ 6413 predicate(UseSSE<=1); 6414 match(Set mem (StoreD mem src)); 6415 6416 ins_cost(100); 6417 format %{ "FST_D $mem,$src" %} 6418 opcode(0xDD); /* DD /2 */ 6419 ins_encode( enc_FPR_store(mem,src) ); 6420 ins_pipe( fpu_mem_reg ); 6421 %} 6422 6423 // Store double does rounding on x86 6424 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6425 predicate(UseSSE<=1); 6426 match(Set mem (StoreD mem (RoundDouble src))); 6427 6428 ins_cost(100); 6429 format %{ "FST_D $mem,$src\t# round" %} 6430 opcode(0xDD); /* DD /2 */ 6431 ins_encode( enc_FPR_store(mem,src) ); 6432 ins_pipe( fpu_mem_reg ); 6433 %} 6434 6435 // Store XMM register to memory (double-precision floating points) 6436 // MOVSD instruction 6437 instruct storeD(memory mem, regD src) %{ 6438 predicate(UseSSE>=2); 6439 match(Set mem (StoreD mem src)); 6440 ins_cost(95); 6441 format %{ "MOVSD $mem,$src" %} 6442 ins_encode %{ 6443 __ movdbl($mem$$Address, $src$$XMMRegister); 6444 %} 6445 ins_pipe( pipe_slow ); 6446 %} 6447 6448 // Store XMM register to memory (single-precision floating point) 6449 // MOVSS instruction 6450 instruct storeF(memory mem, regF src) %{ 6451 predicate(UseSSE>=1); 6452 match(Set mem (StoreF mem src)); 6453 ins_cost(95); 6454 format %{ "MOVSS $mem,$src" %} 6455 ins_encode %{ 6456 __ movflt($mem$$Address, $src$$XMMRegister); 6457 %} 6458 ins_pipe( pipe_slow ); 6459 %} 6460 6461 // Store Float 6462 instruct storeFPR( memory mem, regFPR1 src) %{ 6463 predicate(UseSSE==0); 6464 match(Set mem (StoreF mem src)); 6465 6466 ins_cost(100); 6467 format %{ "FST_S $mem,$src" %} 6468 opcode(0xD9); /* D9 /2 */ 6469 ins_encode( enc_FPR_store(mem,src) ); 6470 ins_pipe( fpu_mem_reg ); 6471 %} 6472 6473 // Store Float does rounding on x86 6474 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6475 predicate(UseSSE==0); 6476 match(Set mem (StoreF mem (RoundFloat src))); 6477 6478 ins_cost(100); 6479 format %{ "FST_S $mem,$src\t# round" %} 6480 opcode(0xD9); /* D9 /2 */ 6481 ins_encode( enc_FPR_store(mem,src) ); 6482 ins_pipe( fpu_mem_reg ); 6483 %} 6484 6485 // Store Float does rounding on x86 6486 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6487 predicate(UseSSE<=1); 6488 match(Set mem (StoreF mem (ConvD2F src))); 6489 6490 ins_cost(100); 6491 format %{ "FST_S $mem,$src\t# D-round" %} 6492 opcode(0xD9); /* D9 /2 */ 6493 ins_encode( enc_FPR_store(mem,src) ); 6494 ins_pipe( fpu_mem_reg ); 6495 %} 6496 6497 // Store immediate Float value (it is faster than store from FPU register) 6498 // The instruction usage is guarded by predicate in operand immFPR(). 6499 instruct storeFPR_imm( memory mem, immFPR src) %{ 6500 match(Set mem (StoreF mem src)); 6501 6502 ins_cost(50); 6503 format %{ "MOV $mem,$src\t# store float" %} 6504 opcode(0xC7); /* C7 /0 */ 6505 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6506 ins_pipe( ialu_mem_imm ); 6507 %} 6508 6509 // Store immediate Float value (it is faster than store from XMM register) 6510 // The instruction usage is guarded by predicate in operand immF(). 6511 instruct storeF_imm( memory mem, immF src) %{ 6512 match(Set mem (StoreF mem src)); 6513 6514 ins_cost(50); 6515 format %{ "MOV $mem,$src\t# store float" %} 6516 opcode(0xC7); /* C7 /0 */ 6517 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6518 ins_pipe( ialu_mem_imm ); 6519 %} 6520 6521 // Store Integer to stack slot 6522 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6523 match(Set dst src); 6524 6525 ins_cost(100); 6526 format %{ "MOV $dst,$src" %} 6527 opcode(0x89); 6528 ins_encode( OpcPRegSS( dst, src ) ); 6529 ins_pipe( ialu_mem_reg ); 6530 %} 6531 6532 // Store Integer to stack slot 6533 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6534 match(Set dst src); 6535 6536 ins_cost(100); 6537 format %{ "MOV $dst,$src" %} 6538 opcode(0x89); 6539 ins_encode( OpcPRegSS( dst, src ) ); 6540 ins_pipe( ialu_mem_reg ); 6541 %} 6542 6543 // Store Long to stack slot 6544 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6545 match(Set dst src); 6546 6547 ins_cost(200); 6548 format %{ "MOV $dst,$src.lo\n\t" 6549 "MOV $dst+4,$src.hi" %} 6550 opcode(0x89, 0x89); 6551 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6552 ins_pipe( ialu_mem_long_reg ); 6553 %} 6554 6555 //----------MemBar Instructions----------------------------------------------- 6556 // Memory barrier flavors 6557 6558 instruct membar_acquire() %{ 6559 match(MemBarAcquire); 6560 match(LoadFence); 6561 ins_cost(400); 6562 6563 size(0); 6564 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6565 ins_encode(); 6566 ins_pipe(empty); 6567 %} 6568 6569 instruct membar_acquire_lock() %{ 6570 match(MemBarAcquireLock); 6571 ins_cost(0); 6572 6573 size(0); 6574 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6575 ins_encode( ); 6576 ins_pipe(empty); 6577 %} 6578 6579 instruct membar_release() %{ 6580 match(MemBarRelease); 6581 match(StoreFence); 6582 ins_cost(400); 6583 6584 size(0); 6585 format %{ "MEMBAR-release ! (empty encoding)" %} 6586 ins_encode( ); 6587 ins_pipe(empty); 6588 %} 6589 6590 instruct membar_release_lock() %{ 6591 match(MemBarReleaseLock); 6592 ins_cost(0); 6593 6594 size(0); 6595 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6596 ins_encode( ); 6597 ins_pipe(empty); 6598 %} 6599 6600 instruct membar_volatile(eFlagsReg cr) %{ 6601 match(MemBarVolatile); 6602 effect(KILL cr); 6603 ins_cost(400); 6604 6605 format %{ 6606 $$template 6607 if (os::is_MP()) { 6608 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6609 } else { 6610 $$emit$$"MEMBAR-volatile ! (empty encoding)" 6611 } 6612 %} 6613 ins_encode %{ 6614 __ membar(Assembler::StoreLoad); 6615 %} 6616 ins_pipe(pipe_slow); 6617 %} 6618 6619 instruct unnecessary_membar_volatile() %{ 6620 match(MemBarVolatile); 6621 predicate(Matcher::post_store_load_barrier(n)); 6622 ins_cost(0); 6623 6624 size(0); 6625 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6626 ins_encode( ); 6627 ins_pipe(empty); 6628 %} 6629 6630 instruct membar_storestore() %{ 6631 match(MemBarStoreStore); 6632 ins_cost(0); 6633 6634 size(0); 6635 format %{ "MEMBAR-storestore (empty encoding)" %} 6636 ins_encode( ); 6637 ins_pipe(empty); 6638 %} 6639 6640 //----------Move Instructions-------------------------------------------------- 6641 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6642 match(Set dst (CastX2P src)); 6643 format %{ "# X2P $dst, $src" %} 6644 ins_encode( /*empty encoding*/ ); 6645 ins_cost(0); 6646 ins_pipe(empty); 6647 %} 6648 6649 instruct castP2X(rRegI dst, eRegP src ) %{ 6650 match(Set dst (CastP2X src)); 6651 ins_cost(50); 6652 format %{ "MOV $dst, $src\t# CastP2X" %} 6653 ins_encode( enc_Copy( dst, src) ); 6654 ins_pipe( ialu_reg_reg ); 6655 %} 6656 6657 //----------Conditional Move--------------------------------------------------- 6658 // Conditional move 6659 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6660 predicate(!VM_Version::supports_cmov() ); 6661 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6662 ins_cost(200); 6663 format %{ "J$cop,us skip\t# signed cmove\n\t" 6664 "MOV $dst,$src\n" 6665 "skip:" %} 6666 ins_encode %{ 6667 Label Lskip; 6668 // Invert sense of branch from sense of CMOV 6669 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6670 __ movl($dst$$Register, $src$$Register); 6671 __ bind(Lskip); 6672 %} 6673 ins_pipe( pipe_cmov_reg ); 6674 %} 6675 6676 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6677 predicate(!VM_Version::supports_cmov() ); 6678 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6679 ins_cost(200); 6680 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6681 "MOV $dst,$src\n" 6682 "skip:" %} 6683 ins_encode %{ 6684 Label Lskip; 6685 // Invert sense of branch from sense of CMOV 6686 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6687 __ movl($dst$$Register, $src$$Register); 6688 __ bind(Lskip); 6689 %} 6690 ins_pipe( pipe_cmov_reg ); 6691 %} 6692 6693 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6694 predicate(VM_Version::supports_cmov() ); 6695 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6696 ins_cost(200); 6697 format %{ "CMOV$cop $dst,$src" %} 6698 opcode(0x0F,0x40); 6699 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6700 ins_pipe( pipe_cmov_reg ); 6701 %} 6702 6703 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6704 predicate(VM_Version::supports_cmov() ); 6705 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6706 ins_cost(200); 6707 format %{ "CMOV$cop $dst,$src" %} 6708 opcode(0x0F,0x40); 6709 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6710 ins_pipe( pipe_cmov_reg ); 6711 %} 6712 6713 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6714 predicate(VM_Version::supports_cmov() ); 6715 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6716 ins_cost(200); 6717 expand %{ 6718 cmovI_regU(cop, cr, dst, src); 6719 %} 6720 %} 6721 6722 // Conditional move 6723 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6724 predicate(VM_Version::supports_cmov() ); 6725 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6726 ins_cost(250); 6727 format %{ "CMOV$cop $dst,$src" %} 6728 opcode(0x0F,0x40); 6729 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6730 ins_pipe( pipe_cmov_mem ); 6731 %} 6732 6733 // Conditional move 6734 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6735 predicate(VM_Version::supports_cmov() ); 6736 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6737 ins_cost(250); 6738 format %{ "CMOV$cop $dst,$src" %} 6739 opcode(0x0F,0x40); 6740 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6741 ins_pipe( pipe_cmov_mem ); 6742 %} 6743 6744 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6745 predicate(VM_Version::supports_cmov() ); 6746 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6747 ins_cost(250); 6748 expand %{ 6749 cmovI_memU(cop, cr, dst, src); 6750 %} 6751 %} 6752 6753 // Conditional move 6754 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6755 predicate(VM_Version::supports_cmov() ); 6756 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6757 ins_cost(200); 6758 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6759 opcode(0x0F,0x40); 6760 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6761 ins_pipe( pipe_cmov_reg ); 6762 %} 6763 6764 // Conditional move (non-P6 version) 6765 // Note: a CMoveP is generated for stubs and native wrappers 6766 // regardless of whether we are on a P6, so we 6767 // emulate a cmov here 6768 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6769 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6770 ins_cost(300); 6771 format %{ "Jn$cop skip\n\t" 6772 "MOV $dst,$src\t# pointer\n" 6773 "skip:" %} 6774 opcode(0x8b); 6775 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6776 ins_pipe( pipe_cmov_reg ); 6777 %} 6778 6779 // Conditional move 6780 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6781 predicate(VM_Version::supports_cmov() ); 6782 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6783 ins_cost(200); 6784 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6785 opcode(0x0F,0x40); 6786 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6787 ins_pipe( pipe_cmov_reg ); 6788 %} 6789 6790 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6791 predicate(VM_Version::supports_cmov() ); 6792 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6793 ins_cost(200); 6794 expand %{ 6795 cmovP_regU(cop, cr, dst, src); 6796 %} 6797 %} 6798 6799 // DISABLED: Requires the ADLC to emit a bottom_type call that 6800 // correctly meets the two pointer arguments; one is an incoming 6801 // register but the other is a memory operand. ALSO appears to 6802 // be buggy with implicit null checks. 6803 // 6804 //// Conditional move 6805 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6806 // predicate(VM_Version::supports_cmov() ); 6807 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6808 // ins_cost(250); 6809 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6810 // opcode(0x0F,0x40); 6811 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6812 // ins_pipe( pipe_cmov_mem ); 6813 //%} 6814 // 6815 //// Conditional move 6816 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6817 // predicate(VM_Version::supports_cmov() ); 6818 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6819 // ins_cost(250); 6820 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6821 // opcode(0x0F,0x40); 6822 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6823 // ins_pipe( pipe_cmov_mem ); 6824 //%} 6825 6826 // Conditional move 6827 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6828 predicate(UseSSE<=1); 6829 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6830 ins_cost(200); 6831 format %{ "FCMOV$cop $dst,$src\t# double" %} 6832 opcode(0xDA); 6833 ins_encode( enc_cmov_dpr(cop,src) ); 6834 ins_pipe( pipe_cmovDPR_reg ); 6835 %} 6836 6837 // Conditional move 6838 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6839 predicate(UseSSE==0); 6840 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6841 ins_cost(200); 6842 format %{ "FCMOV$cop $dst,$src\t# float" %} 6843 opcode(0xDA); 6844 ins_encode( enc_cmov_dpr(cop,src) ); 6845 ins_pipe( pipe_cmovDPR_reg ); 6846 %} 6847 6848 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6849 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6850 predicate(UseSSE<=1); 6851 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6852 ins_cost(200); 6853 format %{ "Jn$cop skip\n\t" 6854 "MOV $dst,$src\t# double\n" 6855 "skip:" %} 6856 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6857 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6858 ins_pipe( pipe_cmovDPR_reg ); 6859 %} 6860 6861 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6862 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6863 predicate(UseSSE==0); 6864 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6865 ins_cost(200); 6866 format %{ "Jn$cop skip\n\t" 6867 "MOV $dst,$src\t# float\n" 6868 "skip:" %} 6869 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6870 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6871 ins_pipe( pipe_cmovDPR_reg ); 6872 %} 6873 6874 // No CMOVE with SSE/SSE2 6875 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6876 predicate (UseSSE>=1); 6877 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6878 ins_cost(200); 6879 format %{ "Jn$cop skip\n\t" 6880 "MOVSS $dst,$src\t# float\n" 6881 "skip:" %} 6882 ins_encode %{ 6883 Label skip; 6884 // Invert sense of branch from sense of CMOV 6885 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6886 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6887 __ bind(skip); 6888 %} 6889 ins_pipe( pipe_slow ); 6890 %} 6891 6892 // No CMOVE with SSE/SSE2 6893 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6894 predicate (UseSSE>=2); 6895 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6896 ins_cost(200); 6897 format %{ "Jn$cop skip\n\t" 6898 "MOVSD $dst,$src\t# float\n" 6899 "skip:" %} 6900 ins_encode %{ 6901 Label skip; 6902 // Invert sense of branch from sense of CMOV 6903 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6904 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6905 __ bind(skip); 6906 %} 6907 ins_pipe( pipe_slow ); 6908 %} 6909 6910 // unsigned version 6911 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6912 predicate (UseSSE>=1); 6913 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6914 ins_cost(200); 6915 format %{ "Jn$cop skip\n\t" 6916 "MOVSS $dst,$src\t# float\n" 6917 "skip:" %} 6918 ins_encode %{ 6919 Label skip; 6920 // Invert sense of branch from sense of CMOV 6921 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6922 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6923 __ bind(skip); 6924 %} 6925 ins_pipe( pipe_slow ); 6926 %} 6927 6928 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6929 predicate (UseSSE>=1); 6930 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6931 ins_cost(200); 6932 expand %{ 6933 fcmovF_regU(cop, cr, dst, src); 6934 %} 6935 %} 6936 6937 // unsigned version 6938 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6939 predicate (UseSSE>=2); 6940 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6941 ins_cost(200); 6942 format %{ "Jn$cop skip\n\t" 6943 "MOVSD $dst,$src\t# float\n" 6944 "skip:" %} 6945 ins_encode %{ 6946 Label skip; 6947 // Invert sense of branch from sense of CMOV 6948 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6949 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6950 __ bind(skip); 6951 %} 6952 ins_pipe( pipe_slow ); 6953 %} 6954 6955 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 6956 predicate (UseSSE>=2); 6957 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6958 ins_cost(200); 6959 expand %{ 6960 fcmovD_regU(cop, cr, dst, src); 6961 %} 6962 %} 6963 6964 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 6965 predicate(VM_Version::supports_cmov() ); 6966 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6967 ins_cost(200); 6968 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6969 "CMOV$cop $dst.hi,$src.hi" %} 6970 opcode(0x0F,0x40); 6971 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6972 ins_pipe( pipe_cmov_reg_long ); 6973 %} 6974 6975 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 6976 predicate(VM_Version::supports_cmov() ); 6977 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6978 ins_cost(200); 6979 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6980 "CMOV$cop $dst.hi,$src.hi" %} 6981 opcode(0x0F,0x40); 6982 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6983 ins_pipe( pipe_cmov_reg_long ); 6984 %} 6985 6986 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 6987 predicate(VM_Version::supports_cmov() ); 6988 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6989 ins_cost(200); 6990 expand %{ 6991 cmovL_regU(cop, cr, dst, src); 6992 %} 6993 %} 6994 6995 //----------Arithmetic Instructions-------------------------------------------- 6996 //----------Addition Instructions---------------------------------------------- 6997 6998 // Integer Addition Instructions 6999 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7000 match(Set dst (AddI dst src)); 7001 effect(KILL cr); 7002 7003 size(2); 7004 format %{ "ADD $dst,$src" %} 7005 opcode(0x03); 7006 ins_encode( OpcP, RegReg( dst, src) ); 7007 ins_pipe( ialu_reg_reg ); 7008 %} 7009 7010 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7011 match(Set dst (AddI dst src)); 7012 effect(KILL cr); 7013 7014 format %{ "ADD $dst,$src" %} 7015 opcode(0x81, 0x00); /* /0 id */ 7016 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7017 ins_pipe( ialu_reg ); 7018 %} 7019 7020 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 7021 predicate(UseIncDec); 7022 match(Set dst (AddI dst src)); 7023 effect(KILL cr); 7024 7025 size(1); 7026 format %{ "INC $dst" %} 7027 opcode(0x40); /* */ 7028 ins_encode( Opc_plus( primary, dst ) ); 7029 ins_pipe( ialu_reg ); 7030 %} 7031 7032 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7033 match(Set dst (AddI src0 src1)); 7034 ins_cost(110); 7035 7036 format %{ "LEA $dst,[$src0 + $src1]" %} 7037 opcode(0x8D); /* 0x8D /r */ 7038 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7039 ins_pipe( ialu_reg_reg ); 7040 %} 7041 7042 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7043 match(Set dst (AddP src0 src1)); 7044 ins_cost(110); 7045 7046 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7047 opcode(0x8D); /* 0x8D /r */ 7048 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7049 ins_pipe( ialu_reg_reg ); 7050 %} 7051 7052 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7053 predicate(UseIncDec); 7054 match(Set dst (AddI dst src)); 7055 effect(KILL cr); 7056 7057 size(1); 7058 format %{ "DEC $dst" %} 7059 opcode(0x48); /* */ 7060 ins_encode( Opc_plus( primary, dst ) ); 7061 ins_pipe( ialu_reg ); 7062 %} 7063 7064 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7065 match(Set dst (AddP dst src)); 7066 effect(KILL cr); 7067 7068 size(2); 7069 format %{ "ADD $dst,$src" %} 7070 opcode(0x03); 7071 ins_encode( OpcP, RegReg( dst, src) ); 7072 ins_pipe( ialu_reg_reg ); 7073 %} 7074 7075 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7076 match(Set dst (AddP dst src)); 7077 effect(KILL cr); 7078 7079 format %{ "ADD $dst,$src" %} 7080 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7081 // ins_encode( RegImm( dst, src) ); 7082 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7083 ins_pipe( ialu_reg ); 7084 %} 7085 7086 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7087 match(Set dst (AddI dst (LoadI src))); 7088 effect(KILL cr); 7089 7090 ins_cost(125); 7091 format %{ "ADD $dst,$src" %} 7092 opcode(0x03); 7093 ins_encode( OpcP, RegMem( dst, src) ); 7094 ins_pipe( ialu_reg_mem ); 7095 %} 7096 7097 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7098 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7099 effect(KILL cr); 7100 7101 ins_cost(150); 7102 format %{ "ADD $dst,$src" %} 7103 opcode(0x01); /* Opcode 01 /r */ 7104 ins_encode( OpcP, RegMem( src, dst ) ); 7105 ins_pipe( ialu_mem_reg ); 7106 %} 7107 7108 // Add Memory with Immediate 7109 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7110 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7111 effect(KILL cr); 7112 7113 ins_cost(125); 7114 format %{ "ADD $dst,$src" %} 7115 opcode(0x81); /* Opcode 81 /0 id */ 7116 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7117 ins_pipe( ialu_mem_imm ); 7118 %} 7119 7120 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 7121 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7122 effect(KILL cr); 7123 7124 ins_cost(125); 7125 format %{ "INC $dst" %} 7126 opcode(0xFF); /* Opcode FF /0 */ 7127 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7128 ins_pipe( ialu_mem_imm ); 7129 %} 7130 7131 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7132 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7133 effect(KILL cr); 7134 7135 ins_cost(125); 7136 format %{ "DEC $dst" %} 7137 opcode(0xFF); /* Opcode FF /1 */ 7138 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7139 ins_pipe( ialu_mem_imm ); 7140 %} 7141 7142 7143 instruct checkCastPP( eRegP dst ) %{ 7144 match(Set dst (CheckCastPP dst)); 7145 7146 size(0); 7147 format %{ "#checkcastPP of $dst" %} 7148 ins_encode( /*empty encoding*/ ); 7149 ins_pipe( empty ); 7150 %} 7151 7152 instruct castPP( eRegP dst ) %{ 7153 match(Set dst (CastPP dst)); 7154 format %{ "#castPP of $dst" %} 7155 ins_encode( /*empty encoding*/ ); 7156 ins_pipe( empty ); 7157 %} 7158 7159 instruct castII( rRegI dst ) %{ 7160 match(Set dst (CastII dst)); 7161 format %{ "#castII of $dst" %} 7162 ins_encode( /*empty encoding*/ ); 7163 ins_cost(0); 7164 ins_pipe( empty ); 7165 %} 7166 7167 7168 // Load-locked - same as a regular pointer load when used with compare-swap 7169 instruct loadPLocked(eRegP dst, memory mem) %{ 7170 match(Set dst (LoadPLocked mem)); 7171 7172 ins_cost(125); 7173 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7174 opcode(0x8B); 7175 ins_encode( OpcP, RegMem(dst,mem)); 7176 ins_pipe( ialu_reg_mem ); 7177 %} 7178 7179 // Conditional-store of the updated heap-top. 7180 // Used during allocation of the shared heap. 7181 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7182 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7183 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7184 // EAX is killed if there is contention, but then it's also unused. 7185 // In the common case of no contention, EAX holds the new oop address. 7186 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7187 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7188 ins_pipe( pipe_cmpxchg ); 7189 %} 7190 7191 // Conditional-store of an int value. 7192 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7193 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7194 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7195 effect(KILL oldval); 7196 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7197 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7198 ins_pipe( pipe_cmpxchg ); 7199 %} 7200 7201 // Conditional-store of a long value. 7202 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7203 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7204 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7205 effect(KILL oldval); 7206 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7207 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7208 "XCHG EBX,ECX" 7209 %} 7210 ins_encode %{ 7211 // Note: we need to swap rbx, and rcx before and after the 7212 // cmpxchg8 instruction because the instruction uses 7213 // rcx as the high order word of the new value to store but 7214 // our register encoding uses rbx. 7215 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7216 if( os::is_MP() ) 7217 __ lock(); 7218 __ cmpxchg8($mem$$Address); 7219 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7220 %} 7221 ins_pipe( pipe_cmpxchg ); 7222 %} 7223 7224 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7225 7226 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7227 predicate(VM_Version::supports_cx8()); 7228 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7229 effect(KILL cr, KILL oldval); 7230 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7231 "MOV $res,0\n\t" 7232 "JNE,s fail\n\t" 7233 "MOV $res,1\n" 7234 "fail:" %} 7235 ins_encode( enc_cmpxchg8(mem_ptr), 7236 enc_flags_ne_to_boolean(res) ); 7237 ins_pipe( pipe_cmpxchg ); 7238 %} 7239 7240 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7241 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7242 effect(KILL cr, KILL oldval); 7243 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7244 "MOV $res,0\n\t" 7245 "JNE,s fail\n\t" 7246 "MOV $res,1\n" 7247 "fail:" %} 7248 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7249 ins_pipe( pipe_cmpxchg ); 7250 %} 7251 7252 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7253 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7254 effect(KILL cr, KILL oldval); 7255 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7256 "MOV $res,0\n\t" 7257 "JNE,s fail\n\t" 7258 "MOV $res,1\n" 7259 "fail:" %} 7260 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7261 ins_pipe( pipe_cmpxchg ); 7262 %} 7263 7264 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7265 predicate(n->as_LoadStore()->result_not_used()); 7266 match(Set dummy (GetAndAddI mem add)); 7267 effect(KILL cr); 7268 format %{ "ADDL [$mem],$add" %} 7269 ins_encode %{ 7270 if (os::is_MP()) { __ lock(); } 7271 __ addl($mem$$Address, $add$$constant); 7272 %} 7273 ins_pipe( pipe_cmpxchg ); 7274 %} 7275 7276 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7277 match(Set newval (GetAndAddI mem newval)); 7278 effect(KILL cr); 7279 format %{ "XADDL [$mem],$newval" %} 7280 ins_encode %{ 7281 if (os::is_MP()) { __ lock(); } 7282 __ xaddl($mem$$Address, $newval$$Register); 7283 %} 7284 ins_pipe( pipe_cmpxchg ); 7285 %} 7286 7287 instruct xchgI( memory mem, rRegI newval) %{ 7288 match(Set newval (GetAndSetI mem newval)); 7289 format %{ "XCHGL $newval,[$mem]" %} 7290 ins_encode %{ 7291 __ xchgl($newval$$Register, $mem$$Address); 7292 %} 7293 ins_pipe( pipe_cmpxchg ); 7294 %} 7295 7296 instruct xchgP( memory mem, pRegP newval) %{ 7297 match(Set newval (GetAndSetP mem newval)); 7298 format %{ "XCHGL $newval,[$mem]" %} 7299 ins_encode %{ 7300 __ xchgl($newval$$Register, $mem$$Address); 7301 %} 7302 ins_pipe( pipe_cmpxchg ); 7303 %} 7304 7305 //----------Subtraction Instructions------------------------------------------- 7306 7307 // Integer Subtraction Instructions 7308 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7309 match(Set dst (SubI dst src)); 7310 effect(KILL cr); 7311 7312 size(2); 7313 format %{ "SUB $dst,$src" %} 7314 opcode(0x2B); 7315 ins_encode( OpcP, RegReg( dst, src) ); 7316 ins_pipe( ialu_reg_reg ); 7317 %} 7318 7319 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7320 match(Set dst (SubI dst src)); 7321 effect(KILL cr); 7322 7323 format %{ "SUB $dst,$src" %} 7324 opcode(0x81,0x05); /* Opcode 81 /5 */ 7325 // ins_encode( RegImm( dst, src) ); 7326 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7327 ins_pipe( ialu_reg ); 7328 %} 7329 7330 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7331 match(Set dst (SubI dst (LoadI src))); 7332 effect(KILL cr); 7333 7334 ins_cost(125); 7335 format %{ "SUB $dst,$src" %} 7336 opcode(0x2B); 7337 ins_encode( OpcP, RegMem( dst, src) ); 7338 ins_pipe( ialu_reg_mem ); 7339 %} 7340 7341 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7342 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7343 effect(KILL cr); 7344 7345 ins_cost(150); 7346 format %{ "SUB $dst,$src" %} 7347 opcode(0x29); /* Opcode 29 /r */ 7348 ins_encode( OpcP, RegMem( src, dst ) ); 7349 ins_pipe( ialu_mem_reg ); 7350 %} 7351 7352 // Subtract from a pointer 7353 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ 7354 match(Set dst (AddP dst (SubI zero src))); 7355 effect(KILL cr); 7356 7357 size(2); 7358 format %{ "SUB $dst,$src" %} 7359 opcode(0x2B); 7360 ins_encode( OpcP, RegReg( dst, src) ); 7361 ins_pipe( ialu_reg_reg ); 7362 %} 7363 7364 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ 7365 match(Set dst (SubI zero dst)); 7366 effect(KILL cr); 7367 7368 size(2); 7369 format %{ "NEG $dst" %} 7370 opcode(0xF7,0x03); // Opcode F7 /3 7371 ins_encode( OpcP, RegOpc( dst ) ); 7372 ins_pipe( ialu_reg ); 7373 %} 7374 7375 //----------Multiplication/Division Instructions------------------------------- 7376 // Integer Multiplication Instructions 7377 // Multiply Register 7378 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7379 match(Set dst (MulI dst src)); 7380 effect(KILL cr); 7381 7382 size(3); 7383 ins_cost(300); 7384 format %{ "IMUL $dst,$src" %} 7385 opcode(0xAF, 0x0F); 7386 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7387 ins_pipe( ialu_reg_reg_alu0 ); 7388 %} 7389 7390 // Multiply 32-bit Immediate 7391 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7392 match(Set dst (MulI src imm)); 7393 effect(KILL cr); 7394 7395 ins_cost(300); 7396 format %{ "IMUL $dst,$src,$imm" %} 7397 opcode(0x69); /* 69 /r id */ 7398 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7399 ins_pipe( ialu_reg_reg_alu0 ); 7400 %} 7401 7402 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7403 match(Set dst src); 7404 effect(KILL cr); 7405 7406 // Note that this is artificially increased to make it more expensive than loadConL 7407 ins_cost(250); 7408 format %{ "MOV EAX,$src\t// low word only" %} 7409 opcode(0xB8); 7410 ins_encode( LdImmL_Lo(dst, src) ); 7411 ins_pipe( ialu_reg_fat ); 7412 %} 7413 7414 // Multiply by 32-bit Immediate, taking the shifted high order results 7415 // (special case for shift by 32) 7416 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7417 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7418 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7419 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7420 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7421 effect(USE src1, KILL cr); 7422 7423 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7424 ins_cost(0*100 + 1*400 - 150); 7425 format %{ "IMUL EDX:EAX,$src1" %} 7426 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7427 ins_pipe( pipe_slow ); 7428 %} 7429 7430 // Multiply by 32-bit Immediate, taking the shifted high order results 7431 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7432 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7433 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7434 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7435 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7436 effect(USE src1, KILL cr); 7437 7438 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7439 ins_cost(1*100 + 1*400 - 150); 7440 format %{ "IMUL EDX:EAX,$src1\n\t" 7441 "SAR EDX,$cnt-32" %} 7442 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7443 ins_pipe( pipe_slow ); 7444 %} 7445 7446 // Multiply Memory 32-bit Immediate 7447 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7448 match(Set dst (MulI (LoadI src) imm)); 7449 effect(KILL cr); 7450 7451 ins_cost(300); 7452 format %{ "IMUL $dst,$src,$imm" %} 7453 opcode(0x69); /* 69 /r id */ 7454 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7455 ins_pipe( ialu_reg_mem_alu0 ); 7456 %} 7457 7458 // Multiply Memory 7459 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7460 match(Set dst (MulI dst (LoadI src))); 7461 effect(KILL cr); 7462 7463 ins_cost(350); 7464 format %{ "IMUL $dst,$src" %} 7465 opcode(0xAF, 0x0F); 7466 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7467 ins_pipe( ialu_reg_mem_alu0 ); 7468 %} 7469 7470 // Multiply Register Int to Long 7471 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7472 // Basic Idea: long = (long)int * (long)int 7473 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7474 effect(DEF dst, USE src, USE src1, KILL flags); 7475 7476 ins_cost(300); 7477 format %{ "IMUL $dst,$src1" %} 7478 7479 ins_encode( long_int_multiply( dst, src1 ) ); 7480 ins_pipe( ialu_reg_reg_alu0 ); 7481 %} 7482 7483 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7484 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7485 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7486 effect(KILL flags); 7487 7488 ins_cost(300); 7489 format %{ "MUL $dst,$src1" %} 7490 7491 ins_encode( long_uint_multiply(dst, src1) ); 7492 ins_pipe( ialu_reg_reg_alu0 ); 7493 %} 7494 7495 // Multiply Register Long 7496 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7497 match(Set dst (MulL dst src)); 7498 effect(KILL cr, TEMP tmp); 7499 ins_cost(4*100+3*400); 7500 // Basic idea: lo(result) = lo(x_lo * y_lo) 7501 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7502 format %{ "MOV $tmp,$src.lo\n\t" 7503 "IMUL $tmp,EDX\n\t" 7504 "MOV EDX,$src.hi\n\t" 7505 "IMUL EDX,EAX\n\t" 7506 "ADD $tmp,EDX\n\t" 7507 "MUL EDX:EAX,$src.lo\n\t" 7508 "ADD EDX,$tmp" %} 7509 ins_encode( long_multiply( dst, src, tmp ) ); 7510 ins_pipe( pipe_slow ); 7511 %} 7512 7513 // Multiply Register Long where the left operand's high 32 bits are zero 7514 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7515 predicate(is_operand_hi32_zero(n->in(1))); 7516 match(Set dst (MulL dst src)); 7517 effect(KILL cr, TEMP tmp); 7518 ins_cost(2*100+2*400); 7519 // Basic idea: lo(result) = lo(x_lo * y_lo) 7520 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7521 format %{ "MOV $tmp,$src.hi\n\t" 7522 "IMUL $tmp,EAX\n\t" 7523 "MUL EDX:EAX,$src.lo\n\t" 7524 "ADD EDX,$tmp" %} 7525 ins_encode %{ 7526 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7527 __ imull($tmp$$Register, rax); 7528 __ mull($src$$Register); 7529 __ addl(rdx, $tmp$$Register); 7530 %} 7531 ins_pipe( pipe_slow ); 7532 %} 7533 7534 // Multiply Register Long where the right operand's high 32 bits are zero 7535 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7536 predicate(is_operand_hi32_zero(n->in(2))); 7537 match(Set dst (MulL dst src)); 7538 effect(KILL cr, TEMP tmp); 7539 ins_cost(2*100+2*400); 7540 // Basic idea: lo(result) = lo(x_lo * y_lo) 7541 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7542 format %{ "MOV $tmp,$src.lo\n\t" 7543 "IMUL $tmp,EDX\n\t" 7544 "MUL EDX:EAX,$src.lo\n\t" 7545 "ADD EDX,$tmp" %} 7546 ins_encode %{ 7547 __ movl($tmp$$Register, $src$$Register); 7548 __ imull($tmp$$Register, rdx); 7549 __ mull($src$$Register); 7550 __ addl(rdx, $tmp$$Register); 7551 %} 7552 ins_pipe( pipe_slow ); 7553 %} 7554 7555 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7556 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7557 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7558 match(Set dst (MulL dst src)); 7559 effect(KILL cr); 7560 ins_cost(1*400); 7561 // Basic idea: lo(result) = lo(x_lo * y_lo) 7562 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7563 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7564 ins_encode %{ 7565 __ mull($src$$Register); 7566 %} 7567 ins_pipe( pipe_slow ); 7568 %} 7569 7570 // Multiply Register Long by small constant 7571 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7572 match(Set dst (MulL dst src)); 7573 effect(KILL cr, TEMP tmp); 7574 ins_cost(2*100+2*400); 7575 size(12); 7576 // Basic idea: lo(result) = lo(src * EAX) 7577 // hi(result) = hi(src * EAX) + lo(src * EDX) 7578 format %{ "IMUL $tmp,EDX,$src\n\t" 7579 "MOV EDX,$src\n\t" 7580 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7581 "ADD EDX,$tmp" %} 7582 ins_encode( long_multiply_con( dst, src, tmp ) ); 7583 ins_pipe( pipe_slow ); 7584 %} 7585 7586 // Integer DIV with Register 7587 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7588 match(Set rax (DivI rax div)); 7589 effect(KILL rdx, KILL cr); 7590 size(26); 7591 ins_cost(30*100+10*100); 7592 format %{ "CMP EAX,0x80000000\n\t" 7593 "JNE,s normal\n\t" 7594 "XOR EDX,EDX\n\t" 7595 "CMP ECX,-1\n\t" 7596 "JE,s done\n" 7597 "normal: CDQ\n\t" 7598 "IDIV $div\n\t" 7599 "done:" %} 7600 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7601 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7602 ins_pipe( ialu_reg_reg_alu0 ); 7603 %} 7604 7605 // Divide Register Long 7606 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7607 match(Set dst (DivL src1 src2)); 7608 effect( KILL cr, KILL cx, KILL bx ); 7609 ins_cost(10000); 7610 format %{ "PUSH $src1.hi\n\t" 7611 "PUSH $src1.lo\n\t" 7612 "PUSH $src2.hi\n\t" 7613 "PUSH $src2.lo\n\t" 7614 "CALL SharedRuntime::ldiv\n\t" 7615 "ADD ESP,16" %} 7616 ins_encode( long_div(src1,src2) ); 7617 ins_pipe( pipe_slow ); 7618 %} 7619 7620 // Integer DIVMOD with Register, both quotient and mod results 7621 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7622 match(DivModI rax div); 7623 effect(KILL cr); 7624 size(26); 7625 ins_cost(30*100+10*100); 7626 format %{ "CMP EAX,0x80000000\n\t" 7627 "JNE,s normal\n\t" 7628 "XOR EDX,EDX\n\t" 7629 "CMP ECX,-1\n\t" 7630 "JE,s done\n" 7631 "normal: CDQ\n\t" 7632 "IDIV $div\n\t" 7633 "done:" %} 7634 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7635 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7636 ins_pipe( pipe_slow ); 7637 %} 7638 7639 // Integer MOD with Register 7640 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7641 match(Set rdx (ModI rax div)); 7642 effect(KILL rax, KILL cr); 7643 7644 size(26); 7645 ins_cost(300); 7646 format %{ "CDQ\n\t" 7647 "IDIV $div" %} 7648 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7649 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7650 ins_pipe( ialu_reg_reg_alu0 ); 7651 %} 7652 7653 // Remainder Register Long 7654 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7655 match(Set dst (ModL src1 src2)); 7656 effect( KILL cr, KILL cx, KILL bx ); 7657 ins_cost(10000); 7658 format %{ "PUSH $src1.hi\n\t" 7659 "PUSH $src1.lo\n\t" 7660 "PUSH $src2.hi\n\t" 7661 "PUSH $src2.lo\n\t" 7662 "CALL SharedRuntime::lrem\n\t" 7663 "ADD ESP,16" %} 7664 ins_encode( long_mod(src1,src2) ); 7665 ins_pipe( pipe_slow ); 7666 %} 7667 7668 // Divide Register Long (no special case since divisor != -1) 7669 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7670 match(Set dst (DivL dst imm)); 7671 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7672 ins_cost(1000); 7673 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7674 "XOR $tmp2,$tmp2\n\t" 7675 "CMP $tmp,EDX\n\t" 7676 "JA,s fast\n\t" 7677 "MOV $tmp2,EAX\n\t" 7678 "MOV EAX,EDX\n\t" 7679 "MOV EDX,0\n\t" 7680 "JLE,s pos\n\t" 7681 "LNEG EAX : $tmp2\n\t" 7682 "DIV $tmp # unsigned division\n\t" 7683 "XCHG EAX,$tmp2\n\t" 7684 "DIV $tmp\n\t" 7685 "LNEG $tmp2 : EAX\n\t" 7686 "JMP,s done\n" 7687 "pos:\n\t" 7688 "DIV $tmp\n\t" 7689 "XCHG EAX,$tmp2\n" 7690 "fast:\n\t" 7691 "DIV $tmp\n" 7692 "done:\n\t" 7693 "MOV EDX,$tmp2\n\t" 7694 "NEG EDX:EAX # if $imm < 0" %} 7695 ins_encode %{ 7696 int con = (int)$imm$$constant; 7697 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7698 int pcon = (con > 0) ? con : -con; 7699 Label Lfast, Lpos, Ldone; 7700 7701 __ movl($tmp$$Register, pcon); 7702 __ xorl($tmp2$$Register,$tmp2$$Register); 7703 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7704 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7705 7706 __ movl($tmp2$$Register, $dst$$Register); // save 7707 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7708 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7709 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7710 7711 // Negative dividend. 7712 // convert value to positive to use unsigned division 7713 __ lneg($dst$$Register, $tmp2$$Register); 7714 __ divl($tmp$$Register); 7715 __ xchgl($dst$$Register, $tmp2$$Register); 7716 __ divl($tmp$$Register); 7717 // revert result back to negative 7718 __ lneg($tmp2$$Register, $dst$$Register); 7719 __ jmpb(Ldone); 7720 7721 __ bind(Lpos); 7722 __ divl($tmp$$Register); // Use unsigned division 7723 __ xchgl($dst$$Register, $tmp2$$Register); 7724 // Fallthrow for final divide, tmp2 has 32 bit hi result 7725 7726 __ bind(Lfast); 7727 // fast path: src is positive 7728 __ divl($tmp$$Register); // Use unsigned division 7729 7730 __ bind(Ldone); 7731 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7732 if (con < 0) { 7733 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7734 } 7735 %} 7736 ins_pipe( pipe_slow ); 7737 %} 7738 7739 // Remainder Register Long (remainder fit into 32 bits) 7740 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7741 match(Set dst (ModL dst imm)); 7742 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7743 ins_cost(1000); 7744 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7745 "CMP $tmp,EDX\n\t" 7746 "JA,s fast\n\t" 7747 "MOV $tmp2,EAX\n\t" 7748 "MOV EAX,EDX\n\t" 7749 "MOV EDX,0\n\t" 7750 "JLE,s pos\n\t" 7751 "LNEG EAX : $tmp2\n\t" 7752 "DIV $tmp # unsigned division\n\t" 7753 "MOV EAX,$tmp2\n\t" 7754 "DIV $tmp\n\t" 7755 "NEG EDX\n\t" 7756 "JMP,s done\n" 7757 "pos:\n\t" 7758 "DIV $tmp\n\t" 7759 "MOV EAX,$tmp2\n" 7760 "fast:\n\t" 7761 "DIV $tmp\n" 7762 "done:\n\t" 7763 "MOV EAX,EDX\n\t" 7764 "SAR EDX,31\n\t" %} 7765 ins_encode %{ 7766 int con = (int)$imm$$constant; 7767 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7768 int pcon = (con > 0) ? con : -con; 7769 Label Lfast, Lpos, Ldone; 7770 7771 __ movl($tmp$$Register, pcon); 7772 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7773 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7774 7775 __ movl($tmp2$$Register, $dst$$Register); // save 7776 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7777 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7778 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7779 7780 // Negative dividend. 7781 // convert value to positive to use unsigned division 7782 __ lneg($dst$$Register, $tmp2$$Register); 7783 __ divl($tmp$$Register); 7784 __ movl($dst$$Register, $tmp2$$Register); 7785 __ divl($tmp$$Register); 7786 // revert remainder back to negative 7787 __ negl(HIGH_FROM_LOW($dst$$Register)); 7788 __ jmpb(Ldone); 7789 7790 __ bind(Lpos); 7791 __ divl($tmp$$Register); 7792 __ movl($dst$$Register, $tmp2$$Register); 7793 7794 __ bind(Lfast); 7795 // fast path: src is positive 7796 __ divl($tmp$$Register); 7797 7798 __ bind(Ldone); 7799 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7800 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 7801 7802 %} 7803 ins_pipe( pipe_slow ); 7804 %} 7805 7806 // Integer Shift Instructions 7807 // Shift Left by one 7808 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7809 match(Set dst (LShiftI dst shift)); 7810 effect(KILL cr); 7811 7812 size(2); 7813 format %{ "SHL $dst,$shift" %} 7814 opcode(0xD1, 0x4); /* D1 /4 */ 7815 ins_encode( OpcP, RegOpc( dst ) ); 7816 ins_pipe( ialu_reg ); 7817 %} 7818 7819 // Shift Left by 8-bit immediate 7820 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7821 match(Set dst (LShiftI dst shift)); 7822 effect(KILL cr); 7823 7824 size(3); 7825 format %{ "SHL $dst,$shift" %} 7826 opcode(0xC1, 0x4); /* C1 /4 ib */ 7827 ins_encode( RegOpcImm( dst, shift) ); 7828 ins_pipe( ialu_reg ); 7829 %} 7830 7831 // Shift Left by variable 7832 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7833 match(Set dst (LShiftI dst shift)); 7834 effect(KILL cr); 7835 7836 size(2); 7837 format %{ "SHL $dst,$shift" %} 7838 opcode(0xD3, 0x4); /* D3 /4 */ 7839 ins_encode( OpcP, RegOpc( dst ) ); 7840 ins_pipe( ialu_reg_reg ); 7841 %} 7842 7843 // Arithmetic shift right by one 7844 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7845 match(Set dst (RShiftI dst shift)); 7846 effect(KILL cr); 7847 7848 size(2); 7849 format %{ "SAR $dst,$shift" %} 7850 opcode(0xD1, 0x7); /* D1 /7 */ 7851 ins_encode( OpcP, RegOpc( dst ) ); 7852 ins_pipe( ialu_reg ); 7853 %} 7854 7855 // Arithmetic shift right by one 7856 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 7857 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7858 effect(KILL cr); 7859 format %{ "SAR $dst,$shift" %} 7860 opcode(0xD1, 0x7); /* D1 /7 */ 7861 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 7862 ins_pipe( ialu_mem_imm ); 7863 %} 7864 7865 // Arithmetic Shift Right by 8-bit immediate 7866 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7867 match(Set dst (RShiftI dst shift)); 7868 effect(KILL cr); 7869 7870 size(3); 7871 format %{ "SAR $dst,$shift" %} 7872 opcode(0xC1, 0x7); /* C1 /7 ib */ 7873 ins_encode( RegOpcImm( dst, shift ) ); 7874 ins_pipe( ialu_mem_imm ); 7875 %} 7876 7877 // Arithmetic Shift Right by 8-bit immediate 7878 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 7879 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7880 effect(KILL cr); 7881 7882 format %{ "SAR $dst,$shift" %} 7883 opcode(0xC1, 0x7); /* C1 /7 ib */ 7884 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 7885 ins_pipe( ialu_mem_imm ); 7886 %} 7887 7888 // Arithmetic Shift Right by variable 7889 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7890 match(Set dst (RShiftI dst shift)); 7891 effect(KILL cr); 7892 7893 size(2); 7894 format %{ "SAR $dst,$shift" %} 7895 opcode(0xD3, 0x7); /* D3 /7 */ 7896 ins_encode( OpcP, RegOpc( dst ) ); 7897 ins_pipe( ialu_reg_reg ); 7898 %} 7899 7900 // Logical shift right by one 7901 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7902 match(Set dst (URShiftI dst shift)); 7903 effect(KILL cr); 7904 7905 size(2); 7906 format %{ "SHR $dst,$shift" %} 7907 opcode(0xD1, 0x5); /* D1 /5 */ 7908 ins_encode( OpcP, RegOpc( dst ) ); 7909 ins_pipe( ialu_reg ); 7910 %} 7911 7912 // Logical Shift Right by 8-bit immediate 7913 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7914 match(Set dst (URShiftI dst shift)); 7915 effect(KILL cr); 7916 7917 size(3); 7918 format %{ "SHR $dst,$shift" %} 7919 opcode(0xC1, 0x5); /* C1 /5 ib */ 7920 ins_encode( RegOpcImm( dst, shift) ); 7921 ins_pipe( ialu_reg ); 7922 %} 7923 7924 7925 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 7926 // This idiom is used by the compiler for the i2b bytecode. 7927 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 7928 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 7929 7930 size(3); 7931 format %{ "MOVSX $dst,$src :8" %} 7932 ins_encode %{ 7933 __ movsbl($dst$$Register, $src$$Register); 7934 %} 7935 ins_pipe(ialu_reg_reg); 7936 %} 7937 7938 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 7939 // This idiom is used by the compiler the i2s bytecode. 7940 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 7941 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 7942 7943 size(3); 7944 format %{ "MOVSX $dst,$src :16" %} 7945 ins_encode %{ 7946 __ movswl($dst$$Register, $src$$Register); 7947 %} 7948 ins_pipe(ialu_reg_reg); 7949 %} 7950 7951 7952 // Logical Shift Right by variable 7953 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7954 match(Set dst (URShiftI dst shift)); 7955 effect(KILL cr); 7956 7957 size(2); 7958 format %{ "SHR $dst,$shift" %} 7959 opcode(0xD3, 0x5); /* D3 /5 */ 7960 ins_encode( OpcP, RegOpc( dst ) ); 7961 ins_pipe( ialu_reg_reg ); 7962 %} 7963 7964 7965 //----------Logical Instructions----------------------------------------------- 7966 //----------Integer Logical Instructions--------------------------------------- 7967 // And Instructions 7968 // And Register with Register 7969 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7970 match(Set dst (AndI dst src)); 7971 effect(KILL cr); 7972 7973 size(2); 7974 format %{ "AND $dst,$src" %} 7975 opcode(0x23); 7976 ins_encode( OpcP, RegReg( dst, src) ); 7977 ins_pipe( ialu_reg_reg ); 7978 %} 7979 7980 // And Register with Immediate 7981 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7982 match(Set dst (AndI dst src)); 7983 effect(KILL cr); 7984 7985 format %{ "AND $dst,$src" %} 7986 opcode(0x81,0x04); /* Opcode 81 /4 */ 7987 // ins_encode( RegImm( dst, src) ); 7988 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7989 ins_pipe( ialu_reg ); 7990 %} 7991 7992 // And Register with Memory 7993 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7994 match(Set dst (AndI dst (LoadI src))); 7995 effect(KILL cr); 7996 7997 ins_cost(125); 7998 format %{ "AND $dst,$src" %} 7999 opcode(0x23); 8000 ins_encode( OpcP, RegMem( dst, src) ); 8001 ins_pipe( ialu_reg_mem ); 8002 %} 8003 8004 // And Memory with Register 8005 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8006 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8007 effect(KILL cr); 8008 8009 ins_cost(150); 8010 format %{ "AND $dst,$src" %} 8011 opcode(0x21); /* Opcode 21 /r */ 8012 ins_encode( OpcP, RegMem( src, dst ) ); 8013 ins_pipe( ialu_mem_reg ); 8014 %} 8015 8016 // And Memory with Immediate 8017 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8018 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8019 effect(KILL cr); 8020 8021 ins_cost(125); 8022 format %{ "AND $dst,$src" %} 8023 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8024 // ins_encode( MemImm( dst, src) ); 8025 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8026 ins_pipe( ialu_mem_imm ); 8027 %} 8028 8029 // BMI1 instructions 8030 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8031 match(Set dst (AndI (XorI src1 minus_1) src2)); 8032 predicate(UseBMI1Instructions); 8033 effect(KILL cr); 8034 8035 format %{ "ANDNL $dst, $src1, $src2" %} 8036 8037 ins_encode %{ 8038 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8039 %} 8040 ins_pipe(ialu_reg); 8041 %} 8042 8043 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8044 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8045 predicate(UseBMI1Instructions); 8046 effect(KILL cr); 8047 8048 ins_cost(125); 8049 format %{ "ANDNL $dst, $src1, $src2" %} 8050 8051 ins_encode %{ 8052 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8053 %} 8054 ins_pipe(ialu_reg_mem); 8055 %} 8056 8057 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ 8058 match(Set dst (AndI (SubI imm_zero src) src)); 8059 predicate(UseBMI1Instructions); 8060 effect(KILL cr); 8061 8062 format %{ "BLSIL $dst, $src" %} 8063 8064 ins_encode %{ 8065 __ blsil($dst$$Register, $src$$Register); 8066 %} 8067 ins_pipe(ialu_reg); 8068 %} 8069 8070 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ 8071 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8072 predicate(UseBMI1Instructions); 8073 effect(KILL cr); 8074 8075 ins_cost(125); 8076 format %{ "BLSIL $dst, $src" %} 8077 8078 ins_encode %{ 8079 __ blsil($dst$$Register, $src$$Address); 8080 %} 8081 ins_pipe(ialu_reg_mem); 8082 %} 8083 8084 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8085 %{ 8086 match(Set dst (XorI (AddI src minus_1) src)); 8087 predicate(UseBMI1Instructions); 8088 effect(KILL cr); 8089 8090 format %{ "BLSMSKL $dst, $src" %} 8091 8092 ins_encode %{ 8093 __ blsmskl($dst$$Register, $src$$Register); 8094 %} 8095 8096 ins_pipe(ialu_reg); 8097 %} 8098 8099 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8100 %{ 8101 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8102 predicate(UseBMI1Instructions); 8103 effect(KILL cr); 8104 8105 ins_cost(125); 8106 format %{ "BLSMSKL $dst, $src" %} 8107 8108 ins_encode %{ 8109 __ blsmskl($dst$$Register, $src$$Address); 8110 %} 8111 8112 ins_pipe(ialu_reg_mem); 8113 %} 8114 8115 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8116 %{ 8117 match(Set dst (AndI (AddI src minus_1) src) ); 8118 predicate(UseBMI1Instructions); 8119 effect(KILL cr); 8120 8121 format %{ "BLSRL $dst, $src" %} 8122 8123 ins_encode %{ 8124 __ blsrl($dst$$Register, $src$$Register); 8125 %} 8126 8127 ins_pipe(ialu_reg); 8128 %} 8129 8130 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8131 %{ 8132 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8133 predicate(UseBMI1Instructions); 8134 effect(KILL cr); 8135 8136 ins_cost(125); 8137 format %{ "BLSRL $dst, $src" %} 8138 8139 ins_encode %{ 8140 __ blsrl($dst$$Register, $src$$Address); 8141 %} 8142 8143 ins_pipe(ialu_reg_mem); 8144 %} 8145 8146 // Or Instructions 8147 // Or Register with Register 8148 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8149 match(Set dst (OrI dst src)); 8150 effect(KILL cr); 8151 8152 size(2); 8153 format %{ "OR $dst,$src" %} 8154 opcode(0x0B); 8155 ins_encode( OpcP, RegReg( dst, src) ); 8156 ins_pipe( ialu_reg_reg ); 8157 %} 8158 8159 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8160 match(Set dst (OrI dst (CastP2X src))); 8161 effect(KILL cr); 8162 8163 size(2); 8164 format %{ "OR $dst,$src" %} 8165 opcode(0x0B); 8166 ins_encode( OpcP, RegReg( dst, src) ); 8167 ins_pipe( ialu_reg_reg ); 8168 %} 8169 8170 8171 // Or Register with Immediate 8172 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8173 match(Set dst (OrI dst src)); 8174 effect(KILL cr); 8175 8176 format %{ "OR $dst,$src" %} 8177 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8178 // ins_encode( RegImm( dst, src) ); 8179 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8180 ins_pipe( ialu_reg ); 8181 %} 8182 8183 // Or Register with Memory 8184 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8185 match(Set dst (OrI dst (LoadI src))); 8186 effect(KILL cr); 8187 8188 ins_cost(125); 8189 format %{ "OR $dst,$src" %} 8190 opcode(0x0B); 8191 ins_encode( OpcP, RegMem( dst, src) ); 8192 ins_pipe( ialu_reg_mem ); 8193 %} 8194 8195 // Or Memory with Register 8196 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8197 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8198 effect(KILL cr); 8199 8200 ins_cost(150); 8201 format %{ "OR $dst,$src" %} 8202 opcode(0x09); /* Opcode 09 /r */ 8203 ins_encode( OpcP, RegMem( src, dst ) ); 8204 ins_pipe( ialu_mem_reg ); 8205 %} 8206 8207 // Or Memory with Immediate 8208 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8209 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8210 effect(KILL cr); 8211 8212 ins_cost(125); 8213 format %{ "OR $dst,$src" %} 8214 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8215 // ins_encode( MemImm( dst, src) ); 8216 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8217 ins_pipe( ialu_mem_imm ); 8218 %} 8219 8220 // ROL/ROR 8221 // ROL expand 8222 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8223 effect(USE_DEF dst, USE shift, KILL cr); 8224 8225 format %{ "ROL $dst, $shift" %} 8226 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8227 ins_encode( OpcP, RegOpc( dst )); 8228 ins_pipe( ialu_reg ); 8229 %} 8230 8231 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8232 effect(USE_DEF dst, USE shift, KILL cr); 8233 8234 format %{ "ROL $dst, $shift" %} 8235 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8236 ins_encode( RegOpcImm(dst, shift) ); 8237 ins_pipe(ialu_reg); 8238 %} 8239 8240 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8241 effect(USE_DEF dst, USE shift, KILL cr); 8242 8243 format %{ "ROL $dst, $shift" %} 8244 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8245 ins_encode(OpcP, RegOpc(dst)); 8246 ins_pipe( ialu_reg_reg ); 8247 %} 8248 // end of ROL expand 8249 8250 // ROL 32bit by one once 8251 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8252 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8253 8254 expand %{ 8255 rolI_eReg_imm1(dst, lshift, cr); 8256 %} 8257 %} 8258 8259 // ROL 32bit var by imm8 once 8260 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8261 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8262 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8263 8264 expand %{ 8265 rolI_eReg_imm8(dst, lshift, cr); 8266 %} 8267 %} 8268 8269 // ROL 32bit var by var once 8270 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8271 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8272 8273 expand %{ 8274 rolI_eReg_CL(dst, shift, cr); 8275 %} 8276 %} 8277 8278 // ROL 32bit var by var once 8279 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8280 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8281 8282 expand %{ 8283 rolI_eReg_CL(dst, shift, cr); 8284 %} 8285 %} 8286 8287 // ROR expand 8288 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8289 effect(USE_DEF dst, USE shift, KILL cr); 8290 8291 format %{ "ROR $dst, $shift" %} 8292 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8293 ins_encode( OpcP, RegOpc( dst ) ); 8294 ins_pipe( ialu_reg ); 8295 %} 8296 8297 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8298 effect (USE_DEF dst, USE shift, KILL cr); 8299 8300 format %{ "ROR $dst, $shift" %} 8301 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8302 ins_encode( RegOpcImm(dst, shift) ); 8303 ins_pipe( ialu_reg ); 8304 %} 8305 8306 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8307 effect(USE_DEF dst, USE shift, KILL cr); 8308 8309 format %{ "ROR $dst, $shift" %} 8310 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8311 ins_encode(OpcP, RegOpc(dst)); 8312 ins_pipe( ialu_reg_reg ); 8313 %} 8314 // end of ROR expand 8315 8316 // ROR right once 8317 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8318 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8319 8320 expand %{ 8321 rorI_eReg_imm1(dst, rshift, cr); 8322 %} 8323 %} 8324 8325 // ROR 32bit by immI8 once 8326 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8327 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8328 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8329 8330 expand %{ 8331 rorI_eReg_imm8(dst, rshift, cr); 8332 %} 8333 %} 8334 8335 // ROR 32bit var by var once 8336 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8337 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8338 8339 expand %{ 8340 rorI_eReg_CL(dst, shift, cr); 8341 %} 8342 %} 8343 8344 // ROR 32bit var by var once 8345 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8346 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8347 8348 expand %{ 8349 rorI_eReg_CL(dst, shift, cr); 8350 %} 8351 %} 8352 8353 // Xor Instructions 8354 // Xor Register with Register 8355 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8356 match(Set dst (XorI dst src)); 8357 effect(KILL cr); 8358 8359 size(2); 8360 format %{ "XOR $dst,$src" %} 8361 opcode(0x33); 8362 ins_encode( OpcP, RegReg( dst, src) ); 8363 ins_pipe( ialu_reg_reg ); 8364 %} 8365 8366 // Xor Register with Immediate -1 8367 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8368 match(Set dst (XorI dst imm)); 8369 8370 size(2); 8371 format %{ "NOT $dst" %} 8372 ins_encode %{ 8373 __ notl($dst$$Register); 8374 %} 8375 ins_pipe( ialu_reg ); 8376 %} 8377 8378 // Xor Register with Immediate 8379 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8380 match(Set dst (XorI dst src)); 8381 effect(KILL cr); 8382 8383 format %{ "XOR $dst,$src" %} 8384 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8385 // ins_encode( RegImm( dst, src) ); 8386 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8387 ins_pipe( ialu_reg ); 8388 %} 8389 8390 // Xor Register with Memory 8391 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8392 match(Set dst (XorI dst (LoadI src))); 8393 effect(KILL cr); 8394 8395 ins_cost(125); 8396 format %{ "XOR $dst,$src" %} 8397 opcode(0x33); 8398 ins_encode( OpcP, RegMem(dst, src) ); 8399 ins_pipe( ialu_reg_mem ); 8400 %} 8401 8402 // Xor Memory with Register 8403 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8404 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8405 effect(KILL cr); 8406 8407 ins_cost(150); 8408 format %{ "XOR $dst,$src" %} 8409 opcode(0x31); /* Opcode 31 /r */ 8410 ins_encode( OpcP, RegMem( src, dst ) ); 8411 ins_pipe( ialu_mem_reg ); 8412 %} 8413 8414 // Xor Memory with Immediate 8415 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8416 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8417 effect(KILL cr); 8418 8419 ins_cost(125); 8420 format %{ "XOR $dst,$src" %} 8421 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8422 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8423 ins_pipe( ialu_mem_imm ); 8424 %} 8425 8426 //----------Convert Int to Boolean--------------------------------------------- 8427 8428 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8429 effect( DEF dst, USE src ); 8430 format %{ "MOV $dst,$src" %} 8431 ins_encode( enc_Copy( dst, src) ); 8432 ins_pipe( ialu_reg_reg ); 8433 %} 8434 8435 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8436 effect( USE_DEF dst, USE src, KILL cr ); 8437 8438 size(4); 8439 format %{ "NEG $dst\n\t" 8440 "ADC $dst,$src" %} 8441 ins_encode( neg_reg(dst), 8442 OpcRegReg(0x13,dst,src) ); 8443 ins_pipe( ialu_reg_reg_long ); 8444 %} 8445 8446 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8447 match(Set dst (Conv2B src)); 8448 8449 expand %{ 8450 movI_nocopy(dst,src); 8451 ci2b(dst,src,cr); 8452 %} 8453 %} 8454 8455 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8456 effect( DEF dst, USE src ); 8457 format %{ "MOV $dst,$src" %} 8458 ins_encode( enc_Copy( dst, src) ); 8459 ins_pipe( ialu_reg_reg ); 8460 %} 8461 8462 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8463 effect( USE_DEF dst, USE src, KILL cr ); 8464 format %{ "NEG $dst\n\t" 8465 "ADC $dst,$src" %} 8466 ins_encode( neg_reg(dst), 8467 OpcRegReg(0x13,dst,src) ); 8468 ins_pipe( ialu_reg_reg_long ); 8469 %} 8470 8471 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8472 match(Set dst (Conv2B src)); 8473 8474 expand %{ 8475 movP_nocopy(dst,src); 8476 cp2b(dst,src,cr); 8477 %} 8478 %} 8479 8480 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8481 match(Set dst (CmpLTMask p q)); 8482 effect(KILL cr); 8483 ins_cost(400); 8484 8485 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8486 format %{ "XOR $dst,$dst\n\t" 8487 "CMP $p,$q\n\t" 8488 "SETlt $dst\n\t" 8489 "NEG $dst" %} 8490 ins_encode %{ 8491 Register Rp = $p$$Register; 8492 Register Rq = $q$$Register; 8493 Register Rd = $dst$$Register; 8494 Label done; 8495 __ xorl(Rd, Rd); 8496 __ cmpl(Rp, Rq); 8497 __ setb(Assembler::less, Rd); 8498 __ negl(Rd); 8499 %} 8500 8501 ins_pipe(pipe_slow); 8502 %} 8503 8504 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ 8505 match(Set dst (CmpLTMask dst zero)); 8506 effect(DEF dst, KILL cr); 8507 ins_cost(100); 8508 8509 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8510 ins_encode %{ 8511 __ sarl($dst$$Register, 31); 8512 %} 8513 ins_pipe(ialu_reg); 8514 %} 8515 8516 /* better to save a register than avoid a branch */ 8517 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8518 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8519 effect(KILL cr); 8520 ins_cost(400); 8521 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8522 "JGE done\n\t" 8523 "ADD $p,$y\n" 8524 "done: " %} 8525 ins_encode %{ 8526 Register Rp = $p$$Register; 8527 Register Rq = $q$$Register; 8528 Register Ry = $y$$Register; 8529 Label done; 8530 __ subl(Rp, Rq); 8531 __ jccb(Assembler::greaterEqual, done); 8532 __ addl(Rp, Ry); 8533 __ bind(done); 8534 %} 8535 8536 ins_pipe(pipe_cmplt); 8537 %} 8538 8539 /* better to save a register than avoid a branch */ 8540 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8541 match(Set y (AndI (CmpLTMask p q) y)); 8542 effect(KILL cr); 8543 8544 ins_cost(300); 8545 8546 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8547 "JLT done\n\t" 8548 "XORL $y, $y\n" 8549 "done: " %} 8550 ins_encode %{ 8551 Register Rp = $p$$Register; 8552 Register Rq = $q$$Register; 8553 Register Ry = $y$$Register; 8554 Label done; 8555 __ cmpl(Rp, Rq); 8556 __ jccb(Assembler::less, done); 8557 __ xorl(Ry, Ry); 8558 __ bind(done); 8559 %} 8560 8561 ins_pipe(pipe_cmplt); 8562 %} 8563 8564 /* If I enable this, I encourage spilling in the inner loop of compress. 8565 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8566 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8567 */ 8568 //----------Overflow Math Instructions----------------------------------------- 8569 8570 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8571 %{ 8572 match(Set cr (OverflowAddI op1 op2)); 8573 effect(DEF cr, USE_KILL op1, USE op2); 8574 8575 format %{ "ADD $op1, $op2\t# overflow check int" %} 8576 8577 ins_encode %{ 8578 __ addl($op1$$Register, $op2$$Register); 8579 %} 8580 ins_pipe(ialu_reg_reg); 8581 %} 8582 8583 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8584 %{ 8585 match(Set cr (OverflowAddI op1 op2)); 8586 effect(DEF cr, USE_KILL op1, USE op2); 8587 8588 format %{ "ADD $op1, $op2\t# overflow check int" %} 8589 8590 ins_encode %{ 8591 __ addl($op1$$Register, $op2$$constant); 8592 %} 8593 ins_pipe(ialu_reg_reg); 8594 %} 8595 8596 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8597 %{ 8598 match(Set cr (OverflowSubI op1 op2)); 8599 8600 format %{ "CMP $op1, $op2\t# overflow check int" %} 8601 ins_encode %{ 8602 __ cmpl($op1$$Register, $op2$$Register); 8603 %} 8604 ins_pipe(ialu_reg_reg); 8605 %} 8606 8607 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8608 %{ 8609 match(Set cr (OverflowSubI op1 op2)); 8610 8611 format %{ "CMP $op1, $op2\t# overflow check int" %} 8612 ins_encode %{ 8613 __ cmpl($op1$$Register, $op2$$constant); 8614 %} 8615 ins_pipe(ialu_reg_reg); 8616 %} 8617 8618 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) 8619 %{ 8620 match(Set cr (OverflowSubI zero op2)); 8621 effect(DEF cr, USE_KILL op2); 8622 8623 format %{ "NEG $op2\t# overflow check int" %} 8624 ins_encode %{ 8625 __ negl($op2$$Register); 8626 %} 8627 ins_pipe(ialu_reg_reg); 8628 %} 8629 8630 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8631 %{ 8632 match(Set cr (OverflowMulI op1 op2)); 8633 effect(DEF cr, USE_KILL op1, USE op2); 8634 8635 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8636 ins_encode %{ 8637 __ imull($op1$$Register, $op2$$Register); 8638 %} 8639 ins_pipe(ialu_reg_reg_alu0); 8640 %} 8641 8642 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8643 %{ 8644 match(Set cr (OverflowMulI op1 op2)); 8645 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8646 8647 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8648 ins_encode %{ 8649 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8650 %} 8651 ins_pipe(ialu_reg_reg_alu0); 8652 %} 8653 8654 //----------Long Instructions------------------------------------------------ 8655 // Add Long Register with Register 8656 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8657 match(Set dst (AddL dst src)); 8658 effect(KILL cr); 8659 ins_cost(200); 8660 format %{ "ADD $dst.lo,$src.lo\n\t" 8661 "ADC $dst.hi,$src.hi" %} 8662 opcode(0x03, 0x13); 8663 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8664 ins_pipe( ialu_reg_reg_long ); 8665 %} 8666 8667 // Add Long Register with Immediate 8668 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8669 match(Set dst (AddL dst src)); 8670 effect(KILL cr); 8671 format %{ "ADD $dst.lo,$src.lo\n\t" 8672 "ADC $dst.hi,$src.hi" %} 8673 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8674 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8675 ins_pipe( ialu_reg_long ); 8676 %} 8677 8678 // Add Long Register with Memory 8679 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8680 match(Set dst (AddL dst (LoadL mem))); 8681 effect(KILL cr); 8682 ins_cost(125); 8683 format %{ "ADD $dst.lo,$mem\n\t" 8684 "ADC $dst.hi,$mem+4" %} 8685 opcode(0x03, 0x13); 8686 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8687 ins_pipe( ialu_reg_long_mem ); 8688 %} 8689 8690 // Subtract Long Register with Register. 8691 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8692 match(Set dst (SubL dst src)); 8693 effect(KILL cr); 8694 ins_cost(200); 8695 format %{ "SUB $dst.lo,$src.lo\n\t" 8696 "SBB $dst.hi,$src.hi" %} 8697 opcode(0x2B, 0x1B); 8698 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8699 ins_pipe( ialu_reg_reg_long ); 8700 %} 8701 8702 // Subtract Long Register with Immediate 8703 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8704 match(Set dst (SubL dst src)); 8705 effect(KILL cr); 8706 format %{ "SUB $dst.lo,$src.lo\n\t" 8707 "SBB $dst.hi,$src.hi" %} 8708 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8709 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8710 ins_pipe( ialu_reg_long ); 8711 %} 8712 8713 // Subtract Long Register with Memory 8714 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8715 match(Set dst (SubL dst (LoadL mem))); 8716 effect(KILL cr); 8717 ins_cost(125); 8718 format %{ "SUB $dst.lo,$mem\n\t" 8719 "SBB $dst.hi,$mem+4" %} 8720 opcode(0x2B, 0x1B); 8721 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8722 ins_pipe( ialu_reg_long_mem ); 8723 %} 8724 8725 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8726 match(Set dst (SubL zero dst)); 8727 effect(KILL cr); 8728 ins_cost(300); 8729 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8730 ins_encode( neg_long(dst) ); 8731 ins_pipe( ialu_reg_reg_long ); 8732 %} 8733 8734 // And Long Register with Register 8735 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8736 match(Set dst (AndL dst src)); 8737 effect(KILL cr); 8738 format %{ "AND $dst.lo,$src.lo\n\t" 8739 "AND $dst.hi,$src.hi" %} 8740 opcode(0x23,0x23); 8741 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8742 ins_pipe( ialu_reg_reg_long ); 8743 %} 8744 8745 // And Long Register with Immediate 8746 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8747 match(Set dst (AndL dst src)); 8748 effect(KILL cr); 8749 format %{ "AND $dst.lo,$src.lo\n\t" 8750 "AND $dst.hi,$src.hi" %} 8751 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8752 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8753 ins_pipe( ialu_reg_long ); 8754 %} 8755 8756 // And Long Register with Memory 8757 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8758 match(Set dst (AndL dst (LoadL mem))); 8759 effect(KILL cr); 8760 ins_cost(125); 8761 format %{ "AND $dst.lo,$mem\n\t" 8762 "AND $dst.hi,$mem+4" %} 8763 opcode(0x23, 0x23); 8764 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8765 ins_pipe( ialu_reg_long_mem ); 8766 %} 8767 8768 // BMI1 instructions 8769 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8770 match(Set dst (AndL (XorL src1 minus_1) src2)); 8771 predicate(UseBMI1Instructions); 8772 effect(KILL cr, TEMP dst); 8773 8774 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8775 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8776 %} 8777 8778 ins_encode %{ 8779 Register Rdst = $dst$$Register; 8780 Register Rsrc1 = $src1$$Register; 8781 Register Rsrc2 = $src2$$Register; 8782 __ andnl(Rdst, Rsrc1, Rsrc2); 8783 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 8784 %} 8785 ins_pipe(ialu_reg_reg_long); 8786 %} 8787 8788 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 8789 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 8790 predicate(UseBMI1Instructions); 8791 effect(KILL cr, TEMP dst); 8792 8793 ins_cost(125); 8794 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 8795 "ANDNL $dst.hi, $src1.hi, $src2+4" 8796 %} 8797 8798 ins_encode %{ 8799 Register Rdst = $dst$$Register; 8800 Register Rsrc1 = $src1$$Register; 8801 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 8802 8803 __ andnl(Rdst, Rsrc1, $src2$$Address); 8804 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 8805 %} 8806 ins_pipe(ialu_reg_mem); 8807 %} 8808 8809 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 8810 match(Set dst (AndL (SubL imm_zero src) src)); 8811 predicate(UseBMI1Instructions); 8812 effect(KILL cr, TEMP dst); 8813 8814 format %{ "MOVL $dst.hi, 0\n\t" 8815 "BLSIL $dst.lo, $src.lo\n\t" 8816 "JNZ done\n\t" 8817 "BLSIL $dst.hi, $src.hi\n" 8818 "done:" 8819 %} 8820 8821 ins_encode %{ 8822 Label done; 8823 Register Rdst = $dst$$Register; 8824 Register Rsrc = $src$$Register; 8825 __ movl(HIGH_FROM_LOW(Rdst), 0); 8826 __ blsil(Rdst, Rsrc); 8827 __ jccb(Assembler::notZero, done); 8828 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8829 __ bind(done); 8830 %} 8831 ins_pipe(ialu_reg); 8832 %} 8833 8834 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 8835 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 8836 predicate(UseBMI1Instructions); 8837 effect(KILL cr, TEMP dst); 8838 8839 ins_cost(125); 8840 format %{ "MOVL $dst.hi, 0\n\t" 8841 "BLSIL $dst.lo, $src\n\t" 8842 "JNZ done\n\t" 8843 "BLSIL $dst.hi, $src+4\n" 8844 "done:" 8845 %} 8846 8847 ins_encode %{ 8848 Label done; 8849 Register Rdst = $dst$$Register; 8850 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8851 8852 __ movl(HIGH_FROM_LOW(Rdst), 0); 8853 __ blsil(Rdst, $src$$Address); 8854 __ jccb(Assembler::notZero, done); 8855 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 8856 __ bind(done); 8857 %} 8858 ins_pipe(ialu_reg_mem); 8859 %} 8860 8861 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8862 %{ 8863 match(Set dst (XorL (AddL src minus_1) src)); 8864 predicate(UseBMI1Instructions); 8865 effect(KILL cr, TEMP dst); 8866 8867 format %{ "MOVL $dst.hi, 0\n\t" 8868 "BLSMSKL $dst.lo, $src.lo\n\t" 8869 "JNC done\n\t" 8870 "BLSMSKL $dst.hi, $src.hi\n" 8871 "done:" 8872 %} 8873 8874 ins_encode %{ 8875 Label done; 8876 Register Rdst = $dst$$Register; 8877 Register Rsrc = $src$$Register; 8878 __ movl(HIGH_FROM_LOW(Rdst), 0); 8879 __ blsmskl(Rdst, Rsrc); 8880 __ jccb(Assembler::carryClear, done); 8881 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8882 __ bind(done); 8883 %} 8884 8885 ins_pipe(ialu_reg); 8886 %} 8887 8888 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8889 %{ 8890 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 8891 predicate(UseBMI1Instructions); 8892 effect(KILL cr, TEMP dst); 8893 8894 ins_cost(125); 8895 format %{ "MOVL $dst.hi, 0\n\t" 8896 "BLSMSKL $dst.lo, $src\n\t" 8897 "JNC done\n\t" 8898 "BLSMSKL $dst.hi, $src+4\n" 8899 "done:" 8900 %} 8901 8902 ins_encode %{ 8903 Label done; 8904 Register Rdst = $dst$$Register; 8905 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8906 8907 __ movl(HIGH_FROM_LOW(Rdst), 0); 8908 __ blsmskl(Rdst, $src$$Address); 8909 __ jccb(Assembler::carryClear, done); 8910 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 8911 __ bind(done); 8912 %} 8913 8914 ins_pipe(ialu_reg_mem); 8915 %} 8916 8917 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8918 %{ 8919 match(Set dst (AndL (AddL src minus_1) src) ); 8920 predicate(UseBMI1Instructions); 8921 effect(KILL cr, TEMP dst); 8922 8923 format %{ "MOVL $dst.hi, $src.hi\n\t" 8924 "BLSRL $dst.lo, $src.lo\n\t" 8925 "JNC done\n\t" 8926 "BLSRL $dst.hi, $src.hi\n" 8927 "done:" 8928 %} 8929 8930 ins_encode %{ 8931 Label done; 8932 Register Rdst = $dst$$Register; 8933 Register Rsrc = $src$$Register; 8934 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8935 __ blsrl(Rdst, Rsrc); 8936 __ jccb(Assembler::carryClear, done); 8937 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8938 __ bind(done); 8939 %} 8940 8941 ins_pipe(ialu_reg); 8942 %} 8943 8944 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8945 %{ 8946 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 8947 predicate(UseBMI1Instructions); 8948 effect(KILL cr, TEMP dst); 8949 8950 ins_cost(125); 8951 format %{ "MOVL $dst.hi, $src+4\n\t" 8952 "BLSRL $dst.lo, $src\n\t" 8953 "JNC done\n\t" 8954 "BLSRL $dst.hi, $src+4\n" 8955 "done:" 8956 %} 8957 8958 ins_encode %{ 8959 Label done; 8960 Register Rdst = $dst$$Register; 8961 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8962 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 8963 __ blsrl(Rdst, $src$$Address); 8964 __ jccb(Assembler::carryClear, done); 8965 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 8966 __ bind(done); 8967 %} 8968 8969 ins_pipe(ialu_reg_mem); 8970 %} 8971 8972 // Or Long Register with Register 8973 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8974 match(Set dst (OrL dst src)); 8975 effect(KILL cr); 8976 format %{ "OR $dst.lo,$src.lo\n\t" 8977 "OR $dst.hi,$src.hi" %} 8978 opcode(0x0B,0x0B); 8979 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8980 ins_pipe( ialu_reg_reg_long ); 8981 %} 8982 8983 // Or Long Register with Immediate 8984 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8985 match(Set dst (OrL dst src)); 8986 effect(KILL cr); 8987 format %{ "OR $dst.lo,$src.lo\n\t" 8988 "OR $dst.hi,$src.hi" %} 8989 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 8990 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8991 ins_pipe( ialu_reg_long ); 8992 %} 8993 8994 // Or Long Register with Memory 8995 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8996 match(Set dst (OrL dst (LoadL mem))); 8997 effect(KILL cr); 8998 ins_cost(125); 8999 format %{ "OR $dst.lo,$mem\n\t" 9000 "OR $dst.hi,$mem+4" %} 9001 opcode(0x0B,0x0B); 9002 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9003 ins_pipe( ialu_reg_long_mem ); 9004 %} 9005 9006 // Xor Long Register with Register 9007 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9008 match(Set dst (XorL dst src)); 9009 effect(KILL cr); 9010 format %{ "XOR $dst.lo,$src.lo\n\t" 9011 "XOR $dst.hi,$src.hi" %} 9012 opcode(0x33,0x33); 9013 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9014 ins_pipe( ialu_reg_reg_long ); 9015 %} 9016 9017 // Xor Long Register with Immediate -1 9018 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9019 match(Set dst (XorL dst imm)); 9020 format %{ "NOT $dst.lo\n\t" 9021 "NOT $dst.hi" %} 9022 ins_encode %{ 9023 __ notl($dst$$Register); 9024 __ notl(HIGH_FROM_LOW($dst$$Register)); 9025 %} 9026 ins_pipe( ialu_reg_long ); 9027 %} 9028 9029 // Xor Long Register with Immediate 9030 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9031 match(Set dst (XorL dst src)); 9032 effect(KILL cr); 9033 format %{ "XOR $dst.lo,$src.lo\n\t" 9034 "XOR $dst.hi,$src.hi" %} 9035 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9036 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9037 ins_pipe( ialu_reg_long ); 9038 %} 9039 9040 // Xor Long Register with Memory 9041 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9042 match(Set dst (XorL dst (LoadL mem))); 9043 effect(KILL cr); 9044 ins_cost(125); 9045 format %{ "XOR $dst.lo,$mem\n\t" 9046 "XOR $dst.hi,$mem+4" %} 9047 opcode(0x33,0x33); 9048 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9049 ins_pipe( ialu_reg_long_mem ); 9050 %} 9051 9052 // Shift Left Long by 1 9053 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9054 predicate(UseNewLongLShift); 9055 match(Set dst (LShiftL dst cnt)); 9056 effect(KILL cr); 9057 ins_cost(100); 9058 format %{ "ADD $dst.lo,$dst.lo\n\t" 9059 "ADC $dst.hi,$dst.hi" %} 9060 ins_encode %{ 9061 __ addl($dst$$Register,$dst$$Register); 9062 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9063 %} 9064 ins_pipe( ialu_reg_long ); 9065 %} 9066 9067 // Shift Left Long by 2 9068 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9069 predicate(UseNewLongLShift); 9070 match(Set dst (LShiftL dst cnt)); 9071 effect(KILL cr); 9072 ins_cost(100); 9073 format %{ "ADD $dst.lo,$dst.lo\n\t" 9074 "ADC $dst.hi,$dst.hi\n\t" 9075 "ADD $dst.lo,$dst.lo\n\t" 9076 "ADC $dst.hi,$dst.hi" %} 9077 ins_encode %{ 9078 __ addl($dst$$Register,$dst$$Register); 9079 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9080 __ addl($dst$$Register,$dst$$Register); 9081 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9082 %} 9083 ins_pipe( ialu_reg_long ); 9084 %} 9085 9086 // Shift Left Long by 3 9087 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9088 predicate(UseNewLongLShift); 9089 match(Set dst (LShiftL dst cnt)); 9090 effect(KILL cr); 9091 ins_cost(100); 9092 format %{ "ADD $dst.lo,$dst.lo\n\t" 9093 "ADC $dst.hi,$dst.hi\n\t" 9094 "ADD $dst.lo,$dst.lo\n\t" 9095 "ADC $dst.hi,$dst.hi\n\t" 9096 "ADD $dst.lo,$dst.lo\n\t" 9097 "ADC $dst.hi,$dst.hi" %} 9098 ins_encode %{ 9099 __ addl($dst$$Register,$dst$$Register); 9100 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9101 __ addl($dst$$Register,$dst$$Register); 9102 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9103 __ addl($dst$$Register,$dst$$Register); 9104 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9105 %} 9106 ins_pipe( ialu_reg_long ); 9107 %} 9108 9109 // Shift Left Long by 1-31 9110 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9111 match(Set dst (LShiftL dst cnt)); 9112 effect(KILL cr); 9113 ins_cost(200); 9114 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9115 "SHL $dst.lo,$cnt" %} 9116 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9117 ins_encode( move_long_small_shift(dst,cnt) ); 9118 ins_pipe( ialu_reg_long ); 9119 %} 9120 9121 // Shift Left Long by 32-63 9122 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9123 match(Set dst (LShiftL dst cnt)); 9124 effect(KILL cr); 9125 ins_cost(300); 9126 format %{ "MOV $dst.hi,$dst.lo\n" 9127 "\tSHL $dst.hi,$cnt-32\n" 9128 "\tXOR $dst.lo,$dst.lo" %} 9129 opcode(0xC1, 0x4); /* C1 /4 ib */ 9130 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9131 ins_pipe( ialu_reg_long ); 9132 %} 9133 9134 // Shift Left Long by variable 9135 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9136 match(Set dst (LShiftL dst shift)); 9137 effect(KILL cr); 9138 ins_cost(500+200); 9139 size(17); 9140 format %{ "TEST $shift,32\n\t" 9141 "JEQ,s small\n\t" 9142 "MOV $dst.hi,$dst.lo\n\t" 9143 "XOR $dst.lo,$dst.lo\n" 9144 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9145 "SHL $dst.lo,$shift" %} 9146 ins_encode( shift_left_long( dst, shift ) ); 9147 ins_pipe( pipe_slow ); 9148 %} 9149 9150 // Shift Right Long by 1-31 9151 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9152 match(Set dst (URShiftL dst cnt)); 9153 effect(KILL cr); 9154 ins_cost(200); 9155 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9156 "SHR $dst.hi,$cnt" %} 9157 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9158 ins_encode( move_long_small_shift(dst,cnt) ); 9159 ins_pipe( ialu_reg_long ); 9160 %} 9161 9162 // Shift Right Long by 32-63 9163 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9164 match(Set dst (URShiftL dst cnt)); 9165 effect(KILL cr); 9166 ins_cost(300); 9167 format %{ "MOV $dst.lo,$dst.hi\n" 9168 "\tSHR $dst.lo,$cnt-32\n" 9169 "\tXOR $dst.hi,$dst.hi" %} 9170 opcode(0xC1, 0x5); /* C1 /5 ib */ 9171 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9172 ins_pipe( ialu_reg_long ); 9173 %} 9174 9175 // Shift Right Long by variable 9176 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9177 match(Set dst (URShiftL dst shift)); 9178 effect(KILL cr); 9179 ins_cost(600); 9180 size(17); 9181 format %{ "TEST $shift,32\n\t" 9182 "JEQ,s small\n\t" 9183 "MOV $dst.lo,$dst.hi\n\t" 9184 "XOR $dst.hi,$dst.hi\n" 9185 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9186 "SHR $dst.hi,$shift" %} 9187 ins_encode( shift_right_long( dst, shift ) ); 9188 ins_pipe( pipe_slow ); 9189 %} 9190 9191 // Shift Right Long by 1-31 9192 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9193 match(Set dst (RShiftL dst cnt)); 9194 effect(KILL cr); 9195 ins_cost(200); 9196 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9197 "SAR $dst.hi,$cnt" %} 9198 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9199 ins_encode( move_long_small_shift(dst,cnt) ); 9200 ins_pipe( ialu_reg_long ); 9201 %} 9202 9203 // Shift Right Long by 32-63 9204 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9205 match(Set dst (RShiftL dst cnt)); 9206 effect(KILL cr); 9207 ins_cost(300); 9208 format %{ "MOV $dst.lo,$dst.hi\n" 9209 "\tSAR $dst.lo,$cnt-32\n" 9210 "\tSAR $dst.hi,31" %} 9211 opcode(0xC1, 0x7); /* C1 /7 ib */ 9212 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9213 ins_pipe( ialu_reg_long ); 9214 %} 9215 9216 // Shift Right arithmetic Long by variable 9217 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9218 match(Set dst (RShiftL dst shift)); 9219 effect(KILL cr); 9220 ins_cost(600); 9221 size(18); 9222 format %{ "TEST $shift,32\n\t" 9223 "JEQ,s small\n\t" 9224 "MOV $dst.lo,$dst.hi\n\t" 9225 "SAR $dst.hi,31\n" 9226 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9227 "SAR $dst.hi,$shift" %} 9228 ins_encode( shift_right_arith_long( dst, shift ) ); 9229 ins_pipe( pipe_slow ); 9230 %} 9231 9232 9233 //----------Double Instructions------------------------------------------------ 9234 // Double Math 9235 9236 // Compare & branch 9237 9238 // P6 version of float compare, sets condition codes in EFLAGS 9239 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9240 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9241 match(Set cr (CmpD src1 src2)); 9242 effect(KILL rax); 9243 ins_cost(150); 9244 format %{ "FLD $src1\n\t" 9245 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9246 "JNP exit\n\t" 9247 "MOV ah,1 // saw a NaN, set CF\n\t" 9248 "SAHF\n" 9249 "exit:\tNOP // avoid branch to branch" %} 9250 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9251 ins_encode( Push_Reg_DPR(src1), 9252 OpcP, RegOpc(src2), 9253 cmpF_P6_fixup ); 9254 ins_pipe( pipe_slow ); 9255 %} 9256 9257 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9258 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9259 match(Set cr (CmpD src1 src2)); 9260 ins_cost(150); 9261 format %{ "FLD $src1\n\t" 9262 "FUCOMIP ST,$src2 // P6 instruction" %} 9263 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9264 ins_encode( Push_Reg_DPR(src1), 9265 OpcP, RegOpc(src2)); 9266 ins_pipe( pipe_slow ); 9267 %} 9268 9269 // Compare & branch 9270 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9271 predicate(UseSSE<=1); 9272 match(Set cr (CmpD src1 src2)); 9273 effect(KILL rax); 9274 ins_cost(200); 9275 format %{ "FLD $src1\n\t" 9276 "FCOMp $src2\n\t" 9277 "FNSTSW AX\n\t" 9278 "TEST AX,0x400\n\t" 9279 "JZ,s flags\n\t" 9280 "MOV AH,1\t# unordered treat as LT\n" 9281 "flags:\tSAHF" %} 9282 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9283 ins_encode( Push_Reg_DPR(src1), 9284 OpcP, RegOpc(src2), 9285 fpu_flags); 9286 ins_pipe( pipe_slow ); 9287 %} 9288 9289 // Compare vs zero into -1,0,1 9290 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9291 predicate(UseSSE<=1); 9292 match(Set dst (CmpD3 src1 zero)); 9293 effect(KILL cr, KILL rax); 9294 ins_cost(280); 9295 format %{ "FTSTD $dst,$src1" %} 9296 opcode(0xE4, 0xD9); 9297 ins_encode( Push_Reg_DPR(src1), 9298 OpcS, OpcP, PopFPU, 9299 CmpF_Result(dst)); 9300 ins_pipe( pipe_slow ); 9301 %} 9302 9303 // Compare into -1,0,1 9304 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9305 predicate(UseSSE<=1); 9306 match(Set dst (CmpD3 src1 src2)); 9307 effect(KILL cr, KILL rax); 9308 ins_cost(300); 9309 format %{ "FCMPD $dst,$src1,$src2" %} 9310 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9311 ins_encode( Push_Reg_DPR(src1), 9312 OpcP, RegOpc(src2), 9313 CmpF_Result(dst)); 9314 ins_pipe( pipe_slow ); 9315 %} 9316 9317 // float compare and set condition codes in EFLAGS by XMM regs 9318 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9319 predicate(UseSSE>=2); 9320 match(Set cr (CmpD src1 src2)); 9321 ins_cost(145); 9322 format %{ "UCOMISD $src1,$src2\n\t" 9323 "JNP,s exit\n\t" 9324 "PUSHF\t# saw NaN, set CF\n\t" 9325 "AND [rsp], #0xffffff2b\n\t" 9326 "POPF\n" 9327 "exit:" %} 9328 ins_encode %{ 9329 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9330 emit_cmpfp_fixup(_masm); 9331 %} 9332 ins_pipe( pipe_slow ); 9333 %} 9334 9335 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9336 predicate(UseSSE>=2); 9337 match(Set cr (CmpD src1 src2)); 9338 ins_cost(100); 9339 format %{ "UCOMISD $src1,$src2" %} 9340 ins_encode %{ 9341 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9342 %} 9343 ins_pipe( pipe_slow ); 9344 %} 9345 9346 // float compare and set condition codes in EFLAGS by XMM regs 9347 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9348 predicate(UseSSE>=2); 9349 match(Set cr (CmpD src1 (LoadD src2))); 9350 ins_cost(145); 9351 format %{ "UCOMISD $src1,$src2\n\t" 9352 "JNP,s exit\n\t" 9353 "PUSHF\t# saw NaN, set CF\n\t" 9354 "AND [rsp], #0xffffff2b\n\t" 9355 "POPF\n" 9356 "exit:" %} 9357 ins_encode %{ 9358 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9359 emit_cmpfp_fixup(_masm); 9360 %} 9361 ins_pipe( pipe_slow ); 9362 %} 9363 9364 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9365 predicate(UseSSE>=2); 9366 match(Set cr (CmpD src1 (LoadD src2))); 9367 ins_cost(100); 9368 format %{ "UCOMISD $src1,$src2" %} 9369 ins_encode %{ 9370 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9371 %} 9372 ins_pipe( pipe_slow ); 9373 %} 9374 9375 // Compare into -1,0,1 in XMM 9376 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9377 predicate(UseSSE>=2); 9378 match(Set dst (CmpD3 src1 src2)); 9379 effect(KILL cr); 9380 ins_cost(255); 9381 format %{ "UCOMISD $src1, $src2\n\t" 9382 "MOV $dst, #-1\n\t" 9383 "JP,s done\n\t" 9384 "JB,s done\n\t" 9385 "SETNE $dst\n\t" 9386 "MOVZB $dst, $dst\n" 9387 "done:" %} 9388 ins_encode %{ 9389 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9390 emit_cmpfp3(_masm, $dst$$Register); 9391 %} 9392 ins_pipe( pipe_slow ); 9393 %} 9394 9395 // Compare into -1,0,1 in XMM and memory 9396 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9397 predicate(UseSSE>=2); 9398 match(Set dst (CmpD3 src1 (LoadD src2))); 9399 effect(KILL cr); 9400 ins_cost(275); 9401 format %{ "UCOMISD $src1, $src2\n\t" 9402 "MOV $dst, #-1\n\t" 9403 "JP,s done\n\t" 9404 "JB,s done\n\t" 9405 "SETNE $dst\n\t" 9406 "MOVZB $dst, $dst\n" 9407 "done:" %} 9408 ins_encode %{ 9409 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9410 emit_cmpfp3(_masm, $dst$$Register); 9411 %} 9412 ins_pipe( pipe_slow ); 9413 %} 9414 9415 9416 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9417 predicate (UseSSE <=1); 9418 match(Set dst (SubD dst src)); 9419 9420 format %{ "FLD $src\n\t" 9421 "DSUBp $dst,ST" %} 9422 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9423 ins_cost(150); 9424 ins_encode( Push_Reg_DPR(src), 9425 OpcP, RegOpc(dst) ); 9426 ins_pipe( fpu_reg_reg ); 9427 %} 9428 9429 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9430 predicate (UseSSE <=1); 9431 match(Set dst (RoundDouble (SubD src1 src2))); 9432 ins_cost(250); 9433 9434 format %{ "FLD $src2\n\t" 9435 "DSUB ST,$src1\n\t" 9436 "FSTP_D $dst\t# D-round" %} 9437 opcode(0xD8, 0x5); 9438 ins_encode( Push_Reg_DPR(src2), 9439 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9440 ins_pipe( fpu_mem_reg_reg ); 9441 %} 9442 9443 9444 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9445 predicate (UseSSE <=1); 9446 match(Set dst (SubD dst (LoadD src))); 9447 ins_cost(150); 9448 9449 format %{ "FLD $src\n\t" 9450 "DSUBp $dst,ST" %} 9451 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9452 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9453 OpcP, RegOpc(dst) ); 9454 ins_pipe( fpu_reg_mem ); 9455 %} 9456 9457 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9458 predicate (UseSSE<=1); 9459 match(Set dst (AbsD src)); 9460 ins_cost(100); 9461 format %{ "FABS" %} 9462 opcode(0xE1, 0xD9); 9463 ins_encode( OpcS, OpcP ); 9464 ins_pipe( fpu_reg_reg ); 9465 %} 9466 9467 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9468 predicate(UseSSE<=1); 9469 match(Set dst (NegD src)); 9470 ins_cost(100); 9471 format %{ "FCHS" %} 9472 opcode(0xE0, 0xD9); 9473 ins_encode( OpcS, OpcP ); 9474 ins_pipe( fpu_reg_reg ); 9475 %} 9476 9477 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9478 predicate(UseSSE<=1); 9479 match(Set dst (AddD dst src)); 9480 format %{ "FLD $src\n\t" 9481 "DADD $dst,ST" %} 9482 size(4); 9483 ins_cost(150); 9484 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9485 ins_encode( Push_Reg_DPR(src), 9486 OpcP, RegOpc(dst) ); 9487 ins_pipe( fpu_reg_reg ); 9488 %} 9489 9490 9491 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9492 predicate(UseSSE<=1); 9493 match(Set dst (RoundDouble (AddD src1 src2))); 9494 ins_cost(250); 9495 9496 format %{ "FLD $src2\n\t" 9497 "DADD ST,$src1\n\t" 9498 "FSTP_D $dst\t# D-round" %} 9499 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9500 ins_encode( Push_Reg_DPR(src2), 9501 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9502 ins_pipe( fpu_mem_reg_reg ); 9503 %} 9504 9505 9506 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9507 predicate(UseSSE<=1); 9508 match(Set dst (AddD dst (LoadD src))); 9509 ins_cost(150); 9510 9511 format %{ "FLD $src\n\t" 9512 "DADDp $dst,ST" %} 9513 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9514 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9515 OpcP, RegOpc(dst) ); 9516 ins_pipe( fpu_reg_mem ); 9517 %} 9518 9519 // add-to-memory 9520 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9521 predicate(UseSSE<=1); 9522 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9523 ins_cost(150); 9524 9525 format %{ "FLD_D $dst\n\t" 9526 "DADD ST,$src\n\t" 9527 "FST_D $dst" %} 9528 opcode(0xDD, 0x0); 9529 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9530 Opcode(0xD8), RegOpc(src), 9531 set_instruction_start, 9532 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9533 ins_pipe( fpu_reg_mem ); 9534 %} 9535 9536 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9537 predicate(UseSSE<=1); 9538 match(Set dst (AddD dst con)); 9539 ins_cost(125); 9540 format %{ "FLD1\n\t" 9541 "DADDp $dst,ST" %} 9542 ins_encode %{ 9543 __ fld1(); 9544 __ faddp($dst$$reg); 9545 %} 9546 ins_pipe(fpu_reg); 9547 %} 9548 9549 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9550 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9551 match(Set dst (AddD dst con)); 9552 ins_cost(200); 9553 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9554 "DADDp $dst,ST" %} 9555 ins_encode %{ 9556 __ fld_d($constantaddress($con)); 9557 __ faddp($dst$$reg); 9558 %} 9559 ins_pipe(fpu_reg_mem); 9560 %} 9561 9562 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9563 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9564 match(Set dst (RoundDouble (AddD src con))); 9565 ins_cost(200); 9566 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9567 "DADD ST,$src\n\t" 9568 "FSTP_D $dst\t# D-round" %} 9569 ins_encode %{ 9570 __ fld_d($constantaddress($con)); 9571 __ fadd($src$$reg); 9572 __ fstp_d(Address(rsp, $dst$$disp)); 9573 %} 9574 ins_pipe(fpu_mem_reg_con); 9575 %} 9576 9577 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9578 predicate(UseSSE<=1); 9579 match(Set dst (MulD dst src)); 9580 format %{ "FLD $src\n\t" 9581 "DMULp $dst,ST" %} 9582 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9583 ins_cost(150); 9584 ins_encode( Push_Reg_DPR(src), 9585 OpcP, RegOpc(dst) ); 9586 ins_pipe( fpu_reg_reg ); 9587 %} 9588 9589 // Strict FP instruction biases argument before multiply then 9590 // biases result to avoid double rounding of subnormals. 9591 // 9592 // scale arg1 by multiplying arg1 by 2^(-15360) 9593 // load arg2 9594 // multiply scaled arg1 by arg2 9595 // rescale product by 2^(15360) 9596 // 9597 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9598 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9599 match(Set dst (MulD dst src)); 9600 ins_cost(1); // Select this instruction for all strict FP double multiplies 9601 9602 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9603 "DMULp $dst,ST\n\t" 9604 "FLD $src\n\t" 9605 "DMULp $dst,ST\n\t" 9606 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9607 "DMULp $dst,ST\n\t" %} 9608 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9609 ins_encode( strictfp_bias1(dst), 9610 Push_Reg_DPR(src), 9611 OpcP, RegOpc(dst), 9612 strictfp_bias2(dst) ); 9613 ins_pipe( fpu_reg_reg ); 9614 %} 9615 9616 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9617 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9618 match(Set dst (MulD dst con)); 9619 ins_cost(200); 9620 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9621 "DMULp $dst,ST" %} 9622 ins_encode %{ 9623 __ fld_d($constantaddress($con)); 9624 __ fmulp($dst$$reg); 9625 %} 9626 ins_pipe(fpu_reg_mem); 9627 %} 9628 9629 9630 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9631 predicate( UseSSE<=1 ); 9632 match(Set dst (MulD dst (LoadD src))); 9633 ins_cost(200); 9634 format %{ "FLD_D $src\n\t" 9635 "DMULp $dst,ST" %} 9636 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9637 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9638 OpcP, RegOpc(dst) ); 9639 ins_pipe( fpu_reg_mem ); 9640 %} 9641 9642 // 9643 // Cisc-alternate to reg-reg multiply 9644 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9645 predicate( UseSSE<=1 ); 9646 match(Set dst (MulD src (LoadD mem))); 9647 ins_cost(250); 9648 format %{ "FLD_D $mem\n\t" 9649 "DMUL ST,$src\n\t" 9650 "FSTP_D $dst" %} 9651 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9652 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9653 OpcReg_FPR(src), 9654 Pop_Reg_DPR(dst) ); 9655 ins_pipe( fpu_reg_reg_mem ); 9656 %} 9657 9658 9659 // MACRO3 -- addDPR a mulDPR 9660 // This instruction is a '2-address' instruction in that the result goes 9661 // back to src2. This eliminates a move from the macro; possibly the 9662 // register allocator will have to add it back (and maybe not). 9663 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9664 predicate( UseSSE<=1 ); 9665 match(Set src2 (AddD (MulD src0 src1) src2)); 9666 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9667 "DMUL ST,$src1\n\t" 9668 "DADDp $src2,ST" %} 9669 ins_cost(250); 9670 opcode(0xDD); /* LoadD DD /0 */ 9671 ins_encode( Push_Reg_FPR(src0), 9672 FMul_ST_reg(src1), 9673 FAddP_reg_ST(src2) ); 9674 ins_pipe( fpu_reg_reg_reg ); 9675 %} 9676 9677 9678 // MACRO3 -- subDPR a mulDPR 9679 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9680 predicate( UseSSE<=1 ); 9681 match(Set src2 (SubD (MulD src0 src1) src2)); 9682 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9683 "DMUL ST,$src1\n\t" 9684 "DSUBRp $src2,ST" %} 9685 ins_cost(250); 9686 ins_encode( Push_Reg_FPR(src0), 9687 FMul_ST_reg(src1), 9688 Opcode(0xDE), Opc_plus(0xE0,src2)); 9689 ins_pipe( fpu_reg_reg_reg ); 9690 %} 9691 9692 9693 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9694 predicate( UseSSE<=1 ); 9695 match(Set dst (DivD dst src)); 9696 9697 format %{ "FLD $src\n\t" 9698 "FDIVp $dst,ST" %} 9699 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9700 ins_cost(150); 9701 ins_encode( Push_Reg_DPR(src), 9702 OpcP, RegOpc(dst) ); 9703 ins_pipe( fpu_reg_reg ); 9704 %} 9705 9706 // Strict FP instruction biases argument before division then 9707 // biases result, to avoid double rounding of subnormals. 9708 // 9709 // scale dividend by multiplying dividend by 2^(-15360) 9710 // load divisor 9711 // divide scaled dividend by divisor 9712 // rescale quotient by 2^(15360) 9713 // 9714 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9715 predicate (UseSSE<=1); 9716 match(Set dst (DivD dst src)); 9717 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9718 ins_cost(01); 9719 9720 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9721 "DMULp $dst,ST\n\t" 9722 "FLD $src\n\t" 9723 "FDIVp $dst,ST\n\t" 9724 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9725 "DMULp $dst,ST\n\t" %} 9726 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9727 ins_encode( strictfp_bias1(dst), 9728 Push_Reg_DPR(src), 9729 OpcP, RegOpc(dst), 9730 strictfp_bias2(dst) ); 9731 ins_pipe( fpu_reg_reg ); 9732 %} 9733 9734 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9735 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 9736 match(Set dst (RoundDouble (DivD src1 src2))); 9737 9738 format %{ "FLD $src1\n\t" 9739 "FDIV ST,$src2\n\t" 9740 "FSTP_D $dst\t# D-round" %} 9741 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 9742 ins_encode( Push_Reg_DPR(src1), 9743 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 9744 ins_pipe( fpu_mem_reg_reg ); 9745 %} 9746 9747 9748 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9749 predicate(UseSSE<=1); 9750 match(Set dst (ModD dst src)); 9751 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9752 9753 format %{ "DMOD $dst,$src" %} 9754 ins_cost(250); 9755 ins_encode(Push_Reg_Mod_DPR(dst, src), 9756 emitModDPR(), 9757 Push_Result_Mod_DPR(src), 9758 Pop_Reg_DPR(dst)); 9759 ins_pipe( pipe_slow ); 9760 %} 9761 9762 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9763 predicate(UseSSE>=2); 9764 match(Set dst (ModD src0 src1)); 9765 effect(KILL rax, KILL cr); 9766 9767 format %{ "SUB ESP,8\t # DMOD\n" 9768 "\tMOVSD [ESP+0],$src1\n" 9769 "\tFLD_D [ESP+0]\n" 9770 "\tMOVSD [ESP+0],$src0\n" 9771 "\tFLD_D [ESP+0]\n" 9772 "loop:\tFPREM\n" 9773 "\tFWAIT\n" 9774 "\tFNSTSW AX\n" 9775 "\tSAHF\n" 9776 "\tJP loop\n" 9777 "\tFSTP_D [ESP+0]\n" 9778 "\tMOVSD $dst,[ESP+0]\n" 9779 "\tADD ESP,8\n" 9780 "\tFSTP ST0\t # Restore FPU Stack" 9781 %} 9782 ins_cost(250); 9783 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9784 ins_pipe( pipe_slow ); 9785 %} 9786 9787 instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{ 9788 predicate (UseSSE<=1); 9789 match(Set dst (SinD src)); 9790 ins_cost(1800); 9791 format %{ "DSIN $dst" %} 9792 opcode(0xD9, 0xFE); 9793 ins_encode( OpcP, OpcS ); 9794 ins_pipe( pipe_slow ); 9795 %} 9796 9797 instruct sinD_reg(regD dst, eFlagsReg cr) %{ 9798 predicate (UseSSE>=2); 9799 match(Set dst (SinD dst)); 9800 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9801 ins_cost(1800); 9802 format %{ "DSIN $dst" %} 9803 opcode(0xD9, 0xFE); 9804 ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); 9805 ins_pipe( pipe_slow ); 9806 %} 9807 9808 instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{ 9809 predicate (UseSSE<=1); 9810 match(Set dst (CosD src)); 9811 ins_cost(1800); 9812 format %{ "DCOS $dst" %} 9813 opcode(0xD9, 0xFF); 9814 ins_encode( OpcP, OpcS ); 9815 ins_pipe( pipe_slow ); 9816 %} 9817 9818 instruct cosD_reg(regD dst, eFlagsReg cr) %{ 9819 predicate (UseSSE>=2); 9820 match(Set dst (CosD dst)); 9821 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9822 ins_cost(1800); 9823 format %{ "DCOS $dst" %} 9824 opcode(0xD9, 0xFF); 9825 ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); 9826 ins_pipe( pipe_slow ); 9827 %} 9828 9829 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{ 9830 predicate (UseSSE<=1); 9831 match(Set dst(TanD src)); 9832 format %{ "DTAN $dst" %} 9833 ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan 9834 Opcode(0xDD), Opcode(0xD8)); // fstp st 9835 ins_pipe( pipe_slow ); 9836 %} 9837 9838 instruct tanD_reg(regD dst, eFlagsReg cr) %{ 9839 predicate (UseSSE>=2); 9840 match(Set dst(TanD dst)); 9841 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9842 format %{ "DTAN $dst" %} 9843 ins_encode( Push_SrcD(dst), 9844 Opcode(0xD9), Opcode(0xF2), // fptan 9845 Opcode(0xDD), Opcode(0xD8), // fstp st 9846 Push_ResultD(dst) ); 9847 ins_pipe( pipe_slow ); 9848 %} 9849 9850 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9851 predicate (UseSSE<=1); 9852 match(Set dst(AtanD dst src)); 9853 format %{ "DATA $dst,$src" %} 9854 opcode(0xD9, 0xF3); 9855 ins_encode( Push_Reg_DPR(src), 9856 OpcP, OpcS, RegOpc(dst) ); 9857 ins_pipe( pipe_slow ); 9858 %} 9859 9860 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 9861 predicate (UseSSE>=2); 9862 match(Set dst(AtanD dst src)); 9863 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9864 format %{ "DATA $dst,$src" %} 9865 opcode(0xD9, 0xF3); 9866 ins_encode( Push_SrcD(src), 9867 OpcP, OpcS, Push_ResultD(dst) ); 9868 ins_pipe( pipe_slow ); 9869 %} 9870 9871 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 9872 predicate (UseSSE<=1); 9873 match(Set dst (SqrtD src)); 9874 format %{ "DSQRT $dst,$src" %} 9875 opcode(0xFA, 0xD9); 9876 ins_encode( Push_Reg_DPR(src), 9877 OpcS, OpcP, Pop_Reg_DPR(dst) ); 9878 ins_pipe( pipe_slow ); 9879 %} 9880 9881 instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ 9882 predicate (UseSSE<=1); 9883 match(Set Y (PowD X Y)); // Raise X to the Yth power 9884 effect(KILL rax, KILL rdx, KILL rcx, KILL cr); 9885 format %{ "fast_pow $X $Y -> $Y // KILL $rax, $rcx, $rdx" %} 9886 ins_encode %{ 9887 __ subptr(rsp, 8); 9888 __ fld_s($X$$reg - 1); 9889 __ fast_pow(); 9890 __ addptr(rsp, 8); 9891 %} 9892 ins_pipe( pipe_slow ); 9893 %} 9894 9895 instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ 9896 predicate (UseSSE>=2); 9897 match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power 9898 effect(KILL rax, KILL rdx, KILL rcx, KILL cr); 9899 format %{ "fast_pow $src0 $src1 -> $dst // KILL $rax, $rcx, $rdx" %} 9900 ins_encode %{ 9901 __ subptr(rsp, 8); 9902 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 9903 __ fld_d(Address(rsp, 0)); 9904 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 9905 __ fld_d(Address(rsp, 0)); 9906 __ fast_pow(); 9907 __ fstp_d(Address(rsp, 0)); 9908 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 9909 __ addptr(rsp, 8); 9910 %} 9911 ins_pipe( pipe_slow ); 9912 %} 9913 9914 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{ 9915 predicate (UseSSE<=1); 9916 // The source Double operand on FPU stack 9917 match(Set dst (Log10D src)); 9918 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 9919 // fxch ; swap ST(0) with ST(1) 9920 // fyl2x ; compute log_10(2) * log_2(x) 9921 format %{ "FLDLG2 \t\t\t#Log10\n\t" 9922 "FXCH \n\t" 9923 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 9924 %} 9925 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 9926 Opcode(0xD9), Opcode(0xC9), // fxch 9927 Opcode(0xD9), Opcode(0xF1)); // fyl2x 9928 9929 ins_pipe( pipe_slow ); 9930 %} 9931 9932 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{ 9933 predicate (UseSSE>=2); 9934 effect(KILL cr); 9935 match(Set dst (Log10D src)); 9936 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 9937 // fyl2x ; compute log_10(2) * log_2(x) 9938 format %{ "FLDLG2 \t\t\t#Log10\n\t" 9939 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 9940 %} 9941 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 9942 Push_SrcD(src), 9943 Opcode(0xD9), Opcode(0xF1), // fyl2x 9944 Push_ResultD(dst)); 9945 9946 ins_pipe( pipe_slow ); 9947 %} 9948 9949 instruct logDPR_reg(regDPR1 dst, regDPR1 src) %{ 9950 predicate (UseSSE<=1); 9951 // The source Double operand on FPU stack 9952 match(Set dst (LogD src)); 9953 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number 9954 // fxch ; swap ST(0) with ST(1) 9955 // fyl2x ; compute log_e(2) * log_2(x) 9956 format %{ "FLDLN2 \t\t\t#Log_e\n\t" 9957 "FXCH \n\t" 9958 "FYL2X \t\t\t# Q=Log_e*Log_2(x)" 9959 %} 9960 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2 9961 Opcode(0xD9), Opcode(0xC9), // fxch 9962 Opcode(0xD9), Opcode(0xF1)); // fyl2x 9963 9964 ins_pipe( pipe_slow ); 9965 %} 9966 9967 instruct logD_reg(regD dst, regD src, eFlagsReg cr) %{ 9968 predicate (UseSSE>=2); 9969 effect(KILL cr); 9970 // The source and result Double operands in XMM registers 9971 match(Set dst (LogD src)); 9972 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number 9973 // fyl2x ; compute log_e(2) * log_2(x) 9974 format %{ "FLDLN2 \t\t\t#Log_e\n\t" 9975 "FYL2X \t\t\t# Q=Log_e*Log_2(x)" 9976 %} 9977 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2 9978 Push_SrcD(src), 9979 Opcode(0xD9), Opcode(0xF1), // fyl2x 9980 Push_ResultD(dst)); 9981 ins_pipe( pipe_slow ); 9982 %} 9983 9984 //-------------Float Instructions------------------------------- 9985 // Float Math 9986 9987 // Code for float compare: 9988 // fcompp(); 9989 // fwait(); fnstsw_ax(); 9990 // sahf(); 9991 // movl(dst, unordered_result); 9992 // jcc(Assembler::parity, exit); 9993 // movl(dst, less_result); 9994 // jcc(Assembler::below, exit); 9995 // movl(dst, equal_result); 9996 // jcc(Assembler::equal, exit); 9997 // movl(dst, greater_result); 9998 // exit: 9999 10000 // P6 version of float compare, sets condition codes in EFLAGS 10001 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10002 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10003 match(Set cr (CmpF src1 src2)); 10004 effect(KILL rax); 10005 ins_cost(150); 10006 format %{ "FLD $src1\n\t" 10007 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10008 "JNP exit\n\t" 10009 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10010 "SAHF\n" 10011 "exit:\tNOP // avoid branch to branch" %} 10012 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10013 ins_encode( Push_Reg_DPR(src1), 10014 OpcP, RegOpc(src2), 10015 cmpF_P6_fixup ); 10016 ins_pipe( pipe_slow ); 10017 %} 10018 10019 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 10020 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10021 match(Set cr (CmpF src1 src2)); 10022 ins_cost(100); 10023 format %{ "FLD $src1\n\t" 10024 "FUCOMIP ST,$src2 // P6 instruction" %} 10025 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10026 ins_encode( Push_Reg_DPR(src1), 10027 OpcP, RegOpc(src2)); 10028 ins_pipe( pipe_slow ); 10029 %} 10030 10031 10032 // Compare & branch 10033 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10034 predicate(UseSSE == 0); 10035 match(Set cr (CmpF src1 src2)); 10036 effect(KILL rax); 10037 ins_cost(200); 10038 format %{ "FLD $src1\n\t" 10039 "FCOMp $src2\n\t" 10040 "FNSTSW AX\n\t" 10041 "TEST AX,0x400\n\t" 10042 "JZ,s flags\n\t" 10043 "MOV AH,1\t# unordered treat as LT\n" 10044 "flags:\tSAHF" %} 10045 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10046 ins_encode( Push_Reg_DPR(src1), 10047 OpcP, RegOpc(src2), 10048 fpu_flags); 10049 ins_pipe( pipe_slow ); 10050 %} 10051 10052 // Compare vs zero into -1,0,1 10053 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10054 predicate(UseSSE == 0); 10055 match(Set dst (CmpF3 src1 zero)); 10056 effect(KILL cr, KILL rax); 10057 ins_cost(280); 10058 format %{ "FTSTF $dst,$src1" %} 10059 opcode(0xE4, 0xD9); 10060 ins_encode( Push_Reg_DPR(src1), 10061 OpcS, OpcP, PopFPU, 10062 CmpF_Result(dst)); 10063 ins_pipe( pipe_slow ); 10064 %} 10065 10066 // Compare into -1,0,1 10067 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10068 predicate(UseSSE == 0); 10069 match(Set dst (CmpF3 src1 src2)); 10070 effect(KILL cr, KILL rax); 10071 ins_cost(300); 10072 format %{ "FCMPF $dst,$src1,$src2" %} 10073 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10074 ins_encode( Push_Reg_DPR(src1), 10075 OpcP, RegOpc(src2), 10076 CmpF_Result(dst)); 10077 ins_pipe( pipe_slow ); 10078 %} 10079 10080 // float compare and set condition codes in EFLAGS by XMM regs 10081 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10082 predicate(UseSSE>=1); 10083 match(Set cr (CmpF src1 src2)); 10084 ins_cost(145); 10085 format %{ "UCOMISS $src1,$src2\n\t" 10086 "JNP,s exit\n\t" 10087 "PUSHF\t# saw NaN, set CF\n\t" 10088 "AND [rsp], #0xffffff2b\n\t" 10089 "POPF\n" 10090 "exit:" %} 10091 ins_encode %{ 10092 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10093 emit_cmpfp_fixup(_masm); 10094 %} 10095 ins_pipe( pipe_slow ); 10096 %} 10097 10098 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10099 predicate(UseSSE>=1); 10100 match(Set cr (CmpF src1 src2)); 10101 ins_cost(100); 10102 format %{ "UCOMISS $src1,$src2" %} 10103 ins_encode %{ 10104 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10105 %} 10106 ins_pipe( pipe_slow ); 10107 %} 10108 10109 // float compare and set condition codes in EFLAGS by XMM regs 10110 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10111 predicate(UseSSE>=1); 10112 match(Set cr (CmpF src1 (LoadF src2))); 10113 ins_cost(165); 10114 format %{ "UCOMISS $src1,$src2\n\t" 10115 "JNP,s exit\n\t" 10116 "PUSHF\t# saw NaN, set CF\n\t" 10117 "AND [rsp], #0xffffff2b\n\t" 10118 "POPF\n" 10119 "exit:" %} 10120 ins_encode %{ 10121 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10122 emit_cmpfp_fixup(_masm); 10123 %} 10124 ins_pipe( pipe_slow ); 10125 %} 10126 10127 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10128 predicate(UseSSE>=1); 10129 match(Set cr (CmpF src1 (LoadF src2))); 10130 ins_cost(100); 10131 format %{ "UCOMISS $src1,$src2" %} 10132 ins_encode %{ 10133 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10134 %} 10135 ins_pipe( pipe_slow ); 10136 %} 10137 10138 // Compare into -1,0,1 in XMM 10139 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10140 predicate(UseSSE>=1); 10141 match(Set dst (CmpF3 src1 src2)); 10142 effect(KILL cr); 10143 ins_cost(255); 10144 format %{ "UCOMISS $src1, $src2\n\t" 10145 "MOV $dst, #-1\n\t" 10146 "JP,s done\n\t" 10147 "JB,s done\n\t" 10148 "SETNE $dst\n\t" 10149 "MOVZB $dst, $dst\n" 10150 "done:" %} 10151 ins_encode %{ 10152 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10153 emit_cmpfp3(_masm, $dst$$Register); 10154 %} 10155 ins_pipe( pipe_slow ); 10156 %} 10157 10158 // Compare into -1,0,1 in XMM and memory 10159 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10160 predicate(UseSSE>=1); 10161 match(Set dst (CmpF3 src1 (LoadF src2))); 10162 effect(KILL cr); 10163 ins_cost(275); 10164 format %{ "UCOMISS $src1, $src2\n\t" 10165 "MOV $dst, #-1\n\t" 10166 "JP,s done\n\t" 10167 "JB,s done\n\t" 10168 "SETNE $dst\n\t" 10169 "MOVZB $dst, $dst\n" 10170 "done:" %} 10171 ins_encode %{ 10172 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10173 emit_cmpfp3(_masm, $dst$$Register); 10174 %} 10175 ins_pipe( pipe_slow ); 10176 %} 10177 10178 // Spill to obtain 24-bit precision 10179 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10180 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10181 match(Set dst (SubF src1 src2)); 10182 10183 format %{ "FSUB $dst,$src1 - $src2" %} 10184 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10185 ins_encode( Push_Reg_FPR(src1), 10186 OpcReg_FPR(src2), 10187 Pop_Mem_FPR(dst) ); 10188 ins_pipe( fpu_mem_reg_reg ); 10189 %} 10190 // 10191 // This instruction does not round to 24-bits 10192 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10193 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10194 match(Set dst (SubF dst src)); 10195 10196 format %{ "FSUB $dst,$src" %} 10197 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10198 ins_encode( Push_Reg_FPR(src), 10199 OpcP, RegOpc(dst) ); 10200 ins_pipe( fpu_reg_reg ); 10201 %} 10202 10203 // Spill to obtain 24-bit precision 10204 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10205 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10206 match(Set dst (AddF src1 src2)); 10207 10208 format %{ "FADD $dst,$src1,$src2" %} 10209 opcode(0xD8, 0x0); /* D8 C0+i */ 10210 ins_encode( Push_Reg_FPR(src2), 10211 OpcReg_FPR(src1), 10212 Pop_Mem_FPR(dst) ); 10213 ins_pipe( fpu_mem_reg_reg ); 10214 %} 10215 // 10216 // This instruction does not round to 24-bits 10217 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10218 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10219 match(Set dst (AddF dst src)); 10220 10221 format %{ "FLD $src\n\t" 10222 "FADDp $dst,ST" %} 10223 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10224 ins_encode( Push_Reg_FPR(src), 10225 OpcP, RegOpc(dst) ); 10226 ins_pipe( fpu_reg_reg ); 10227 %} 10228 10229 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10230 predicate(UseSSE==0); 10231 match(Set dst (AbsF src)); 10232 ins_cost(100); 10233 format %{ "FABS" %} 10234 opcode(0xE1, 0xD9); 10235 ins_encode( OpcS, OpcP ); 10236 ins_pipe( fpu_reg_reg ); 10237 %} 10238 10239 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10240 predicate(UseSSE==0); 10241 match(Set dst (NegF src)); 10242 ins_cost(100); 10243 format %{ "FCHS" %} 10244 opcode(0xE0, 0xD9); 10245 ins_encode( OpcS, OpcP ); 10246 ins_pipe( fpu_reg_reg ); 10247 %} 10248 10249 // Cisc-alternate to addFPR_reg 10250 // Spill to obtain 24-bit precision 10251 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10252 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10253 match(Set dst (AddF src1 (LoadF src2))); 10254 10255 format %{ "FLD $src2\n\t" 10256 "FADD ST,$src1\n\t" 10257 "FSTP_S $dst" %} 10258 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10259 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10260 OpcReg_FPR(src1), 10261 Pop_Mem_FPR(dst) ); 10262 ins_pipe( fpu_mem_reg_mem ); 10263 %} 10264 // 10265 // Cisc-alternate to addFPR_reg 10266 // This instruction does not round to 24-bits 10267 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10268 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10269 match(Set dst (AddF dst (LoadF src))); 10270 10271 format %{ "FADD $dst,$src" %} 10272 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10273 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10274 OpcP, RegOpc(dst) ); 10275 ins_pipe( fpu_reg_mem ); 10276 %} 10277 10278 // // Following two instructions for _222_mpegaudio 10279 // Spill to obtain 24-bit precision 10280 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10281 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10282 match(Set dst (AddF src1 src2)); 10283 10284 format %{ "FADD $dst,$src1,$src2" %} 10285 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10286 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10287 OpcReg_FPR(src2), 10288 Pop_Mem_FPR(dst) ); 10289 ins_pipe( fpu_mem_reg_mem ); 10290 %} 10291 10292 // Cisc-spill variant 10293 // Spill to obtain 24-bit precision 10294 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10295 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10296 match(Set dst (AddF src1 (LoadF src2))); 10297 10298 format %{ "FADD $dst,$src1,$src2 cisc" %} 10299 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10300 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10301 set_instruction_start, 10302 OpcP, RMopc_Mem(secondary,src1), 10303 Pop_Mem_FPR(dst) ); 10304 ins_pipe( fpu_mem_mem_mem ); 10305 %} 10306 10307 // Spill to obtain 24-bit precision 10308 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10309 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10310 match(Set dst (AddF src1 src2)); 10311 10312 format %{ "FADD $dst,$src1,$src2" %} 10313 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10314 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10315 set_instruction_start, 10316 OpcP, RMopc_Mem(secondary,src1), 10317 Pop_Mem_FPR(dst) ); 10318 ins_pipe( fpu_mem_mem_mem ); 10319 %} 10320 10321 10322 // Spill to obtain 24-bit precision 10323 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10324 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10325 match(Set dst (AddF src con)); 10326 format %{ "FLD $src\n\t" 10327 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10328 "FSTP_S $dst" %} 10329 ins_encode %{ 10330 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10331 __ fadd_s($constantaddress($con)); 10332 __ fstp_s(Address(rsp, $dst$$disp)); 10333 %} 10334 ins_pipe(fpu_mem_reg_con); 10335 %} 10336 // 10337 // This instruction does not round to 24-bits 10338 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10339 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10340 match(Set dst (AddF src con)); 10341 format %{ "FLD $src\n\t" 10342 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10343 "FSTP $dst" %} 10344 ins_encode %{ 10345 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10346 __ fadd_s($constantaddress($con)); 10347 __ fstp_d($dst$$reg); 10348 %} 10349 ins_pipe(fpu_reg_reg_con); 10350 %} 10351 10352 // Spill to obtain 24-bit precision 10353 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10354 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10355 match(Set dst (MulF src1 src2)); 10356 10357 format %{ "FLD $src1\n\t" 10358 "FMUL $src2\n\t" 10359 "FSTP_S $dst" %} 10360 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10361 ins_encode( Push_Reg_FPR(src1), 10362 OpcReg_FPR(src2), 10363 Pop_Mem_FPR(dst) ); 10364 ins_pipe( fpu_mem_reg_reg ); 10365 %} 10366 // 10367 // This instruction does not round to 24-bits 10368 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10369 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10370 match(Set dst (MulF src1 src2)); 10371 10372 format %{ "FLD $src1\n\t" 10373 "FMUL $src2\n\t" 10374 "FSTP_S $dst" %} 10375 opcode(0xD8, 0x1); /* D8 C8+i */ 10376 ins_encode( Push_Reg_FPR(src2), 10377 OpcReg_FPR(src1), 10378 Pop_Reg_FPR(dst) ); 10379 ins_pipe( fpu_reg_reg_reg ); 10380 %} 10381 10382 10383 // Spill to obtain 24-bit precision 10384 // Cisc-alternate to reg-reg multiply 10385 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10386 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10387 match(Set dst (MulF src1 (LoadF src2))); 10388 10389 format %{ "FLD_S $src2\n\t" 10390 "FMUL $src1\n\t" 10391 "FSTP_S $dst" %} 10392 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10393 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10394 OpcReg_FPR(src1), 10395 Pop_Mem_FPR(dst) ); 10396 ins_pipe( fpu_mem_reg_mem ); 10397 %} 10398 // 10399 // This instruction does not round to 24-bits 10400 // Cisc-alternate to reg-reg multiply 10401 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10402 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10403 match(Set dst (MulF src1 (LoadF src2))); 10404 10405 format %{ "FMUL $dst,$src1,$src2" %} 10406 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10407 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10408 OpcReg_FPR(src1), 10409 Pop_Reg_FPR(dst) ); 10410 ins_pipe( fpu_reg_reg_mem ); 10411 %} 10412 10413 // Spill to obtain 24-bit precision 10414 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10415 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10416 match(Set dst (MulF src1 src2)); 10417 10418 format %{ "FMUL $dst,$src1,$src2" %} 10419 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10420 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10421 set_instruction_start, 10422 OpcP, RMopc_Mem(secondary,src1), 10423 Pop_Mem_FPR(dst) ); 10424 ins_pipe( fpu_mem_mem_mem ); 10425 %} 10426 10427 // Spill to obtain 24-bit precision 10428 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10429 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10430 match(Set dst (MulF src con)); 10431 10432 format %{ "FLD $src\n\t" 10433 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10434 "FSTP_S $dst" %} 10435 ins_encode %{ 10436 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10437 __ fmul_s($constantaddress($con)); 10438 __ fstp_s(Address(rsp, $dst$$disp)); 10439 %} 10440 ins_pipe(fpu_mem_reg_con); 10441 %} 10442 // 10443 // This instruction does not round to 24-bits 10444 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10445 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10446 match(Set dst (MulF src con)); 10447 10448 format %{ "FLD $src\n\t" 10449 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10450 "FSTP $dst" %} 10451 ins_encode %{ 10452 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10453 __ fmul_s($constantaddress($con)); 10454 __ fstp_d($dst$$reg); 10455 %} 10456 ins_pipe(fpu_reg_reg_con); 10457 %} 10458 10459 10460 // 10461 // MACRO1 -- subsume unshared load into mulFPR 10462 // This instruction does not round to 24-bits 10463 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10464 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10465 match(Set dst (MulF (LoadF mem1) src)); 10466 10467 format %{ "FLD $mem1 ===MACRO1===\n\t" 10468 "FMUL ST,$src\n\t" 10469 "FSTP $dst" %} 10470 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10471 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10472 OpcReg_FPR(src), 10473 Pop_Reg_FPR(dst) ); 10474 ins_pipe( fpu_reg_reg_mem ); 10475 %} 10476 // 10477 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10478 // This instruction does not round to 24-bits 10479 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10480 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10481 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10482 ins_cost(95); 10483 10484 format %{ "FLD $mem1 ===MACRO2===\n\t" 10485 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10486 "FADD ST,$src2\n\t" 10487 "FSTP $dst" %} 10488 opcode(0xD9); /* LoadF D9 /0 */ 10489 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10490 FMul_ST_reg(src1), 10491 FAdd_ST_reg(src2), 10492 Pop_Reg_FPR(dst) ); 10493 ins_pipe( fpu_reg_mem_reg_reg ); 10494 %} 10495 10496 // MACRO3 -- addFPR a mulFPR 10497 // This instruction does not round to 24-bits. It is a '2-address' 10498 // instruction in that the result goes back to src2. This eliminates 10499 // a move from the macro; possibly the register allocator will have 10500 // to add it back (and maybe not). 10501 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10502 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10503 match(Set src2 (AddF (MulF src0 src1) src2)); 10504 10505 format %{ "FLD $src0 ===MACRO3===\n\t" 10506 "FMUL ST,$src1\n\t" 10507 "FADDP $src2,ST" %} 10508 opcode(0xD9); /* LoadF D9 /0 */ 10509 ins_encode( Push_Reg_FPR(src0), 10510 FMul_ST_reg(src1), 10511 FAddP_reg_ST(src2) ); 10512 ins_pipe( fpu_reg_reg_reg ); 10513 %} 10514 10515 // MACRO4 -- divFPR subFPR 10516 // This instruction does not round to 24-bits 10517 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10518 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10519 match(Set dst (DivF (SubF src2 src1) src3)); 10520 10521 format %{ "FLD $src2 ===MACRO4===\n\t" 10522 "FSUB ST,$src1\n\t" 10523 "FDIV ST,$src3\n\t" 10524 "FSTP $dst" %} 10525 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10526 ins_encode( Push_Reg_FPR(src2), 10527 subFPR_divFPR_encode(src1,src3), 10528 Pop_Reg_FPR(dst) ); 10529 ins_pipe( fpu_reg_reg_reg_reg ); 10530 %} 10531 10532 // Spill to obtain 24-bit precision 10533 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10534 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10535 match(Set dst (DivF src1 src2)); 10536 10537 format %{ "FDIV $dst,$src1,$src2" %} 10538 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10539 ins_encode( Push_Reg_FPR(src1), 10540 OpcReg_FPR(src2), 10541 Pop_Mem_FPR(dst) ); 10542 ins_pipe( fpu_mem_reg_reg ); 10543 %} 10544 // 10545 // This instruction does not round to 24-bits 10546 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10547 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10548 match(Set dst (DivF dst src)); 10549 10550 format %{ "FDIV $dst,$src" %} 10551 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10552 ins_encode( Push_Reg_FPR(src), 10553 OpcP, RegOpc(dst) ); 10554 ins_pipe( fpu_reg_reg ); 10555 %} 10556 10557 10558 // Spill to obtain 24-bit precision 10559 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10560 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10561 match(Set dst (ModF src1 src2)); 10562 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10563 10564 format %{ "FMOD $dst,$src1,$src2" %} 10565 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10566 emitModDPR(), 10567 Push_Result_Mod_DPR(src2), 10568 Pop_Mem_FPR(dst)); 10569 ins_pipe( pipe_slow ); 10570 %} 10571 // 10572 // This instruction does not round to 24-bits 10573 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10574 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10575 match(Set dst (ModF dst src)); 10576 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10577 10578 format %{ "FMOD $dst,$src" %} 10579 ins_encode(Push_Reg_Mod_DPR(dst, src), 10580 emitModDPR(), 10581 Push_Result_Mod_DPR(src), 10582 Pop_Reg_FPR(dst)); 10583 ins_pipe( pipe_slow ); 10584 %} 10585 10586 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10587 predicate(UseSSE>=1); 10588 match(Set dst (ModF src0 src1)); 10589 effect(KILL rax, KILL cr); 10590 format %{ "SUB ESP,4\t # FMOD\n" 10591 "\tMOVSS [ESP+0],$src1\n" 10592 "\tFLD_S [ESP+0]\n" 10593 "\tMOVSS [ESP+0],$src0\n" 10594 "\tFLD_S [ESP+0]\n" 10595 "loop:\tFPREM\n" 10596 "\tFWAIT\n" 10597 "\tFNSTSW AX\n" 10598 "\tSAHF\n" 10599 "\tJP loop\n" 10600 "\tFSTP_S [ESP+0]\n" 10601 "\tMOVSS $dst,[ESP+0]\n" 10602 "\tADD ESP,4\n" 10603 "\tFSTP ST0\t # Restore FPU Stack" 10604 %} 10605 ins_cost(250); 10606 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10607 ins_pipe( pipe_slow ); 10608 %} 10609 10610 10611 //----------Arithmetic Conversion Instructions--------------------------------- 10612 // The conversions operations are all Alpha sorted. Please keep it that way! 10613 10614 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10615 predicate(UseSSE==0); 10616 match(Set dst (RoundFloat src)); 10617 ins_cost(125); 10618 format %{ "FST_S $dst,$src\t# F-round" %} 10619 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10620 ins_pipe( fpu_mem_reg ); 10621 %} 10622 10623 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10624 predicate(UseSSE<=1); 10625 match(Set dst (RoundDouble src)); 10626 ins_cost(125); 10627 format %{ "FST_D $dst,$src\t# D-round" %} 10628 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10629 ins_pipe( fpu_mem_reg ); 10630 %} 10631 10632 // Force rounding to 24-bit precision and 6-bit exponent 10633 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10634 predicate(UseSSE==0); 10635 match(Set dst (ConvD2F src)); 10636 format %{ "FST_S $dst,$src\t# F-round" %} 10637 expand %{ 10638 roundFloat_mem_reg(dst,src); 10639 %} 10640 %} 10641 10642 // Force rounding to 24-bit precision and 6-bit exponent 10643 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10644 predicate(UseSSE==1); 10645 match(Set dst (ConvD2F src)); 10646 effect( KILL cr ); 10647 format %{ "SUB ESP,4\n\t" 10648 "FST_S [ESP],$src\t# F-round\n\t" 10649 "MOVSS $dst,[ESP]\n\t" 10650 "ADD ESP,4" %} 10651 ins_encode %{ 10652 __ subptr(rsp, 4); 10653 if ($src$$reg != FPR1L_enc) { 10654 __ fld_s($src$$reg-1); 10655 __ fstp_s(Address(rsp, 0)); 10656 } else { 10657 __ fst_s(Address(rsp, 0)); 10658 } 10659 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10660 __ addptr(rsp, 4); 10661 %} 10662 ins_pipe( pipe_slow ); 10663 %} 10664 10665 // Force rounding double precision to single precision 10666 instruct convD2F_reg(regF dst, regD src) %{ 10667 predicate(UseSSE>=2); 10668 match(Set dst (ConvD2F src)); 10669 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10670 ins_encode %{ 10671 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10672 %} 10673 ins_pipe( pipe_slow ); 10674 %} 10675 10676 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10677 predicate(UseSSE==0); 10678 match(Set dst (ConvF2D src)); 10679 format %{ "FST_S $dst,$src\t# D-round" %} 10680 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10681 ins_pipe( fpu_reg_reg ); 10682 %} 10683 10684 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10685 predicate(UseSSE==1); 10686 match(Set dst (ConvF2D src)); 10687 format %{ "FST_D $dst,$src\t# D-round" %} 10688 expand %{ 10689 roundDouble_mem_reg(dst,src); 10690 %} 10691 %} 10692 10693 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10694 predicate(UseSSE==1); 10695 match(Set dst (ConvF2D src)); 10696 effect( KILL cr ); 10697 format %{ "SUB ESP,4\n\t" 10698 "MOVSS [ESP] $src\n\t" 10699 "FLD_S [ESP]\n\t" 10700 "ADD ESP,4\n\t" 10701 "FSTP $dst\t# D-round" %} 10702 ins_encode %{ 10703 __ subptr(rsp, 4); 10704 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10705 __ fld_s(Address(rsp, 0)); 10706 __ addptr(rsp, 4); 10707 __ fstp_d($dst$$reg); 10708 %} 10709 ins_pipe( pipe_slow ); 10710 %} 10711 10712 instruct convF2D_reg(regD dst, regF src) %{ 10713 predicate(UseSSE>=2); 10714 match(Set dst (ConvF2D src)); 10715 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10716 ins_encode %{ 10717 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10718 %} 10719 ins_pipe( pipe_slow ); 10720 %} 10721 10722 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10723 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10724 predicate(UseSSE<=1); 10725 match(Set dst (ConvD2I src)); 10726 effect( KILL tmp, KILL cr ); 10727 format %{ "FLD $src\t# Convert double to int \n\t" 10728 "FLDCW trunc mode\n\t" 10729 "SUB ESP,4\n\t" 10730 "FISTp [ESP + #0]\n\t" 10731 "FLDCW std/24-bit mode\n\t" 10732 "POP EAX\n\t" 10733 "CMP EAX,0x80000000\n\t" 10734 "JNE,s fast\n\t" 10735 "FLD_D $src\n\t" 10736 "CALL d2i_wrapper\n" 10737 "fast:" %} 10738 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10739 ins_pipe( pipe_slow ); 10740 %} 10741 10742 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10743 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10744 predicate(UseSSE>=2); 10745 match(Set dst (ConvD2I src)); 10746 effect( KILL tmp, KILL cr ); 10747 format %{ "CVTTSD2SI $dst, $src\n\t" 10748 "CMP $dst,0x80000000\n\t" 10749 "JNE,s fast\n\t" 10750 "SUB ESP, 8\n\t" 10751 "MOVSD [ESP], $src\n\t" 10752 "FLD_D [ESP]\n\t" 10753 "ADD ESP, 8\n\t" 10754 "CALL d2i_wrapper\n" 10755 "fast:" %} 10756 ins_encode %{ 10757 Label fast; 10758 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10759 __ cmpl($dst$$Register, 0x80000000); 10760 __ jccb(Assembler::notEqual, fast); 10761 __ subptr(rsp, 8); 10762 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10763 __ fld_d(Address(rsp, 0)); 10764 __ addptr(rsp, 8); 10765 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10766 __ bind(fast); 10767 %} 10768 ins_pipe( pipe_slow ); 10769 %} 10770 10771 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10772 predicate(UseSSE<=1); 10773 match(Set dst (ConvD2L src)); 10774 effect( KILL cr ); 10775 format %{ "FLD $src\t# Convert double to long\n\t" 10776 "FLDCW trunc mode\n\t" 10777 "SUB ESP,8\n\t" 10778 "FISTp [ESP + #0]\n\t" 10779 "FLDCW std/24-bit mode\n\t" 10780 "POP EAX\n\t" 10781 "POP EDX\n\t" 10782 "CMP EDX,0x80000000\n\t" 10783 "JNE,s fast\n\t" 10784 "TEST EAX,EAX\n\t" 10785 "JNE,s fast\n\t" 10786 "FLD $src\n\t" 10787 "CALL d2l_wrapper\n" 10788 "fast:" %} 10789 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10790 ins_pipe( pipe_slow ); 10791 %} 10792 10793 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10794 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10795 predicate (UseSSE>=2); 10796 match(Set dst (ConvD2L src)); 10797 effect( KILL cr ); 10798 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10799 "MOVSD [ESP],$src\n\t" 10800 "FLD_D [ESP]\n\t" 10801 "FLDCW trunc mode\n\t" 10802 "FISTp [ESP + #0]\n\t" 10803 "FLDCW std/24-bit mode\n\t" 10804 "POP EAX\n\t" 10805 "POP EDX\n\t" 10806 "CMP EDX,0x80000000\n\t" 10807 "JNE,s fast\n\t" 10808 "TEST EAX,EAX\n\t" 10809 "JNE,s fast\n\t" 10810 "SUB ESP,8\n\t" 10811 "MOVSD [ESP],$src\n\t" 10812 "FLD_D [ESP]\n\t" 10813 "ADD ESP,8\n\t" 10814 "CALL d2l_wrapper\n" 10815 "fast:" %} 10816 ins_encode %{ 10817 Label fast; 10818 __ subptr(rsp, 8); 10819 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10820 __ fld_d(Address(rsp, 0)); 10821 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10822 __ fistp_d(Address(rsp, 0)); 10823 // Restore the rounding mode, mask the exception 10824 if (Compile::current()->in_24_bit_fp_mode()) { 10825 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10826 } else { 10827 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10828 } 10829 // Load the converted long, adjust CPU stack 10830 __ pop(rax); 10831 __ pop(rdx); 10832 __ cmpl(rdx, 0x80000000); 10833 __ jccb(Assembler::notEqual, fast); 10834 __ testl(rax, rax); 10835 __ jccb(Assembler::notEqual, fast); 10836 __ subptr(rsp, 8); 10837 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10838 __ fld_d(Address(rsp, 0)); 10839 __ addptr(rsp, 8); 10840 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10841 __ bind(fast); 10842 %} 10843 ins_pipe( pipe_slow ); 10844 %} 10845 10846 // Convert a double to an int. Java semantics require we do complex 10847 // manglations in the corner cases. So we set the rounding mode to 10848 // 'zero', store the darned double down as an int, and reset the 10849 // rounding mode to 'nearest'. The hardware stores a flag value down 10850 // if we would overflow or converted a NAN; we check for this and 10851 // and go the slow path if needed. 10852 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10853 predicate(UseSSE==0); 10854 match(Set dst (ConvF2I src)); 10855 effect( KILL tmp, KILL cr ); 10856 format %{ "FLD $src\t# Convert float to int \n\t" 10857 "FLDCW trunc mode\n\t" 10858 "SUB ESP,4\n\t" 10859 "FISTp [ESP + #0]\n\t" 10860 "FLDCW std/24-bit mode\n\t" 10861 "POP EAX\n\t" 10862 "CMP EAX,0x80000000\n\t" 10863 "JNE,s fast\n\t" 10864 "FLD $src\n\t" 10865 "CALL d2i_wrapper\n" 10866 "fast:" %} 10867 // DPR2I_encoding works for FPR2I 10868 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10869 ins_pipe( pipe_slow ); 10870 %} 10871 10872 // Convert a float in xmm to an int reg. 10873 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10874 predicate(UseSSE>=1); 10875 match(Set dst (ConvF2I src)); 10876 effect( KILL tmp, KILL cr ); 10877 format %{ "CVTTSS2SI $dst, $src\n\t" 10878 "CMP $dst,0x80000000\n\t" 10879 "JNE,s fast\n\t" 10880 "SUB ESP, 4\n\t" 10881 "MOVSS [ESP], $src\n\t" 10882 "FLD [ESP]\n\t" 10883 "ADD ESP, 4\n\t" 10884 "CALL d2i_wrapper\n" 10885 "fast:" %} 10886 ins_encode %{ 10887 Label fast; 10888 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10889 __ cmpl($dst$$Register, 0x80000000); 10890 __ jccb(Assembler::notEqual, fast); 10891 __ subptr(rsp, 4); 10892 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10893 __ fld_s(Address(rsp, 0)); 10894 __ addptr(rsp, 4); 10895 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10896 __ bind(fast); 10897 %} 10898 ins_pipe( pipe_slow ); 10899 %} 10900 10901 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10902 predicate(UseSSE==0); 10903 match(Set dst (ConvF2L src)); 10904 effect( KILL cr ); 10905 format %{ "FLD $src\t# Convert float to long\n\t" 10906 "FLDCW trunc mode\n\t" 10907 "SUB ESP,8\n\t" 10908 "FISTp [ESP + #0]\n\t" 10909 "FLDCW std/24-bit mode\n\t" 10910 "POP EAX\n\t" 10911 "POP EDX\n\t" 10912 "CMP EDX,0x80000000\n\t" 10913 "JNE,s fast\n\t" 10914 "TEST EAX,EAX\n\t" 10915 "JNE,s fast\n\t" 10916 "FLD $src\n\t" 10917 "CALL d2l_wrapper\n" 10918 "fast:" %} 10919 // DPR2L_encoding works for FPR2L 10920 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10921 ins_pipe( pipe_slow ); 10922 %} 10923 10924 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10925 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10926 predicate (UseSSE>=1); 10927 match(Set dst (ConvF2L src)); 10928 effect( KILL cr ); 10929 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10930 "MOVSS [ESP],$src\n\t" 10931 "FLD_S [ESP]\n\t" 10932 "FLDCW trunc mode\n\t" 10933 "FISTp [ESP + #0]\n\t" 10934 "FLDCW std/24-bit mode\n\t" 10935 "POP EAX\n\t" 10936 "POP EDX\n\t" 10937 "CMP EDX,0x80000000\n\t" 10938 "JNE,s fast\n\t" 10939 "TEST EAX,EAX\n\t" 10940 "JNE,s fast\n\t" 10941 "SUB ESP,4\t# Convert float to long\n\t" 10942 "MOVSS [ESP],$src\n\t" 10943 "FLD_S [ESP]\n\t" 10944 "ADD ESP,4\n\t" 10945 "CALL d2l_wrapper\n" 10946 "fast:" %} 10947 ins_encode %{ 10948 Label fast; 10949 __ subptr(rsp, 8); 10950 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10951 __ fld_s(Address(rsp, 0)); 10952 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10953 __ fistp_d(Address(rsp, 0)); 10954 // Restore the rounding mode, mask the exception 10955 if (Compile::current()->in_24_bit_fp_mode()) { 10956 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10957 } else { 10958 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10959 } 10960 // Load the converted long, adjust CPU stack 10961 __ pop(rax); 10962 __ pop(rdx); 10963 __ cmpl(rdx, 0x80000000); 10964 __ jccb(Assembler::notEqual, fast); 10965 __ testl(rax, rax); 10966 __ jccb(Assembler::notEqual, fast); 10967 __ subptr(rsp, 4); 10968 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10969 __ fld_s(Address(rsp, 0)); 10970 __ addptr(rsp, 4); 10971 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10972 __ bind(fast); 10973 %} 10974 ins_pipe( pipe_slow ); 10975 %} 10976 10977 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 10978 predicate( UseSSE<=1 ); 10979 match(Set dst (ConvI2D src)); 10980 format %{ "FILD $src\n\t" 10981 "FSTP $dst" %} 10982 opcode(0xDB, 0x0); /* DB /0 */ 10983 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 10984 ins_pipe( fpu_reg_mem ); 10985 %} 10986 10987 instruct convI2D_reg(regD dst, rRegI src) %{ 10988 predicate( UseSSE>=2 && !UseXmmI2D ); 10989 match(Set dst (ConvI2D src)); 10990 format %{ "CVTSI2SD $dst,$src" %} 10991 ins_encode %{ 10992 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 10993 %} 10994 ins_pipe( pipe_slow ); 10995 %} 10996 10997 instruct convI2D_mem(regD dst, memory mem) %{ 10998 predicate( UseSSE>=2 ); 10999 match(Set dst (ConvI2D (LoadI mem))); 11000 format %{ "CVTSI2SD $dst,$mem" %} 11001 ins_encode %{ 11002 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 11003 %} 11004 ins_pipe( pipe_slow ); 11005 %} 11006 11007 instruct convXI2D_reg(regD dst, rRegI src) 11008 %{ 11009 predicate( UseSSE>=2 && UseXmmI2D ); 11010 match(Set dst (ConvI2D src)); 11011 11012 format %{ "MOVD $dst,$src\n\t" 11013 "CVTDQ2PD $dst,$dst\t# i2d" %} 11014 ins_encode %{ 11015 __ movdl($dst$$XMMRegister, $src$$Register); 11016 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11017 %} 11018 ins_pipe(pipe_slow); // XXX 11019 %} 11020 11021 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 11022 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11023 match(Set dst (ConvI2D (LoadI mem))); 11024 format %{ "FILD $mem\n\t" 11025 "FSTP $dst" %} 11026 opcode(0xDB); /* DB /0 */ 11027 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11028 Pop_Reg_DPR(dst)); 11029 ins_pipe( fpu_reg_mem ); 11030 %} 11031 11032 // Convert a byte to a float; no rounding step needed. 11033 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11034 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11035 match(Set dst (ConvI2F src)); 11036 format %{ "FILD $src\n\t" 11037 "FSTP $dst" %} 11038 11039 opcode(0xDB, 0x0); /* DB /0 */ 11040 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11041 ins_pipe( fpu_reg_mem ); 11042 %} 11043 11044 // In 24-bit mode, force exponent rounding by storing back out 11045 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11046 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11047 match(Set dst (ConvI2F src)); 11048 ins_cost(200); 11049 format %{ "FILD $src\n\t" 11050 "FSTP_S $dst" %} 11051 opcode(0xDB, 0x0); /* DB /0 */ 11052 ins_encode( Push_Mem_I(src), 11053 Pop_Mem_FPR(dst)); 11054 ins_pipe( fpu_mem_mem ); 11055 %} 11056 11057 // In 24-bit mode, force exponent rounding by storing back out 11058 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11059 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11060 match(Set dst (ConvI2F (LoadI mem))); 11061 ins_cost(200); 11062 format %{ "FILD $mem\n\t" 11063 "FSTP_S $dst" %} 11064 opcode(0xDB); /* DB /0 */ 11065 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11066 Pop_Mem_FPR(dst)); 11067 ins_pipe( fpu_mem_mem ); 11068 %} 11069 11070 // This instruction does not round to 24-bits 11071 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11072 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11073 match(Set dst (ConvI2F src)); 11074 format %{ "FILD $src\n\t" 11075 "FSTP $dst" %} 11076 opcode(0xDB, 0x0); /* DB /0 */ 11077 ins_encode( Push_Mem_I(src), 11078 Pop_Reg_FPR(dst)); 11079 ins_pipe( fpu_reg_mem ); 11080 %} 11081 11082 // This instruction does not round to 24-bits 11083 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11084 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11085 match(Set dst (ConvI2F (LoadI mem))); 11086 format %{ "FILD $mem\n\t" 11087 "FSTP $dst" %} 11088 opcode(0xDB); /* DB /0 */ 11089 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11090 Pop_Reg_FPR(dst)); 11091 ins_pipe( fpu_reg_mem ); 11092 %} 11093 11094 // Convert an int to a float in xmm; no rounding step needed. 11095 instruct convI2F_reg(regF dst, rRegI src) %{ 11096 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11097 match(Set dst (ConvI2F src)); 11098 format %{ "CVTSI2SS $dst, $src" %} 11099 ins_encode %{ 11100 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11101 %} 11102 ins_pipe( pipe_slow ); 11103 %} 11104 11105 instruct convXI2F_reg(regF dst, rRegI src) 11106 %{ 11107 predicate( UseSSE>=2 && UseXmmI2F ); 11108 match(Set dst (ConvI2F src)); 11109 11110 format %{ "MOVD $dst,$src\n\t" 11111 "CVTDQ2PS $dst,$dst\t# i2f" %} 11112 ins_encode %{ 11113 __ movdl($dst$$XMMRegister, $src$$Register); 11114 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11115 %} 11116 ins_pipe(pipe_slow); // XXX 11117 %} 11118 11119 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11120 match(Set dst (ConvI2L src)); 11121 effect(KILL cr); 11122 ins_cost(375); 11123 format %{ "MOV $dst.lo,$src\n\t" 11124 "MOV $dst.hi,$src\n\t" 11125 "SAR $dst.hi,31" %} 11126 ins_encode(convert_int_long(dst,src)); 11127 ins_pipe( ialu_reg_reg_long ); 11128 %} 11129 11130 // Zero-extend convert int to long 11131 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11132 match(Set dst (AndL (ConvI2L src) mask) ); 11133 effect( KILL flags ); 11134 ins_cost(250); 11135 format %{ "MOV $dst.lo,$src\n\t" 11136 "XOR $dst.hi,$dst.hi" %} 11137 opcode(0x33); // XOR 11138 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11139 ins_pipe( ialu_reg_reg_long ); 11140 %} 11141 11142 // Zero-extend long 11143 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11144 match(Set dst (AndL src mask) ); 11145 effect( KILL flags ); 11146 ins_cost(250); 11147 format %{ "MOV $dst.lo,$src.lo\n\t" 11148 "XOR $dst.hi,$dst.hi\n\t" %} 11149 opcode(0x33); // XOR 11150 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11151 ins_pipe( ialu_reg_reg_long ); 11152 %} 11153 11154 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11155 predicate (UseSSE<=1); 11156 match(Set dst (ConvL2D src)); 11157 effect( KILL cr ); 11158 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11159 "PUSH $src.lo\n\t" 11160 "FILD ST,[ESP + #0]\n\t" 11161 "ADD ESP,8\n\t" 11162 "FSTP_D $dst\t# D-round" %} 11163 opcode(0xDF, 0x5); /* DF /5 */ 11164 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11165 ins_pipe( pipe_slow ); 11166 %} 11167 11168 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11169 predicate (UseSSE>=2); 11170 match(Set dst (ConvL2D src)); 11171 effect( KILL cr ); 11172 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11173 "PUSH $src.lo\n\t" 11174 "FILD_D [ESP]\n\t" 11175 "FSTP_D [ESP]\n\t" 11176 "MOVSD $dst,[ESP]\n\t" 11177 "ADD ESP,8" %} 11178 opcode(0xDF, 0x5); /* DF /5 */ 11179 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11180 ins_pipe( pipe_slow ); 11181 %} 11182 11183 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11184 predicate (UseSSE>=1); 11185 match(Set dst (ConvL2F src)); 11186 effect( KILL cr ); 11187 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11188 "PUSH $src.lo\n\t" 11189 "FILD_D [ESP]\n\t" 11190 "FSTP_S [ESP]\n\t" 11191 "MOVSS $dst,[ESP]\n\t" 11192 "ADD ESP,8" %} 11193 opcode(0xDF, 0x5); /* DF /5 */ 11194 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11195 ins_pipe( pipe_slow ); 11196 %} 11197 11198 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11199 match(Set dst (ConvL2F src)); 11200 effect( KILL cr ); 11201 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11202 "PUSH $src.lo\n\t" 11203 "FILD ST,[ESP + #0]\n\t" 11204 "ADD ESP,8\n\t" 11205 "FSTP_S $dst\t# F-round" %} 11206 opcode(0xDF, 0x5); /* DF /5 */ 11207 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11208 ins_pipe( pipe_slow ); 11209 %} 11210 11211 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11212 match(Set dst (ConvL2I src)); 11213 effect( DEF dst, USE src ); 11214 format %{ "MOV $dst,$src.lo" %} 11215 ins_encode(enc_CopyL_Lo(dst,src)); 11216 ins_pipe( ialu_reg_reg ); 11217 %} 11218 11219 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11220 match(Set dst (MoveF2I src)); 11221 effect( DEF dst, USE src ); 11222 ins_cost(100); 11223 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11224 ins_encode %{ 11225 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11226 %} 11227 ins_pipe( ialu_reg_mem ); 11228 %} 11229 11230 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11231 predicate(UseSSE==0); 11232 match(Set dst (MoveF2I src)); 11233 effect( DEF dst, USE src ); 11234 11235 ins_cost(125); 11236 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11237 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11238 ins_pipe( fpu_mem_reg ); 11239 %} 11240 11241 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11242 predicate(UseSSE>=1); 11243 match(Set dst (MoveF2I src)); 11244 effect( DEF dst, USE src ); 11245 11246 ins_cost(95); 11247 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11248 ins_encode %{ 11249 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11250 %} 11251 ins_pipe( pipe_slow ); 11252 %} 11253 11254 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11255 predicate(UseSSE>=2); 11256 match(Set dst (MoveF2I src)); 11257 effect( DEF dst, USE src ); 11258 ins_cost(85); 11259 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11260 ins_encode %{ 11261 __ movdl($dst$$Register, $src$$XMMRegister); 11262 %} 11263 ins_pipe( pipe_slow ); 11264 %} 11265 11266 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11267 match(Set dst (MoveI2F src)); 11268 effect( DEF dst, USE src ); 11269 11270 ins_cost(100); 11271 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11272 ins_encode %{ 11273 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11274 %} 11275 ins_pipe( ialu_mem_reg ); 11276 %} 11277 11278 11279 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11280 predicate(UseSSE==0); 11281 match(Set dst (MoveI2F src)); 11282 effect(DEF dst, USE src); 11283 11284 ins_cost(125); 11285 format %{ "FLD_S $src\n\t" 11286 "FSTP $dst\t# MoveI2F_stack_reg" %} 11287 opcode(0xD9); /* D9 /0, FLD m32real */ 11288 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11289 Pop_Reg_FPR(dst) ); 11290 ins_pipe( fpu_reg_mem ); 11291 %} 11292 11293 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11294 predicate(UseSSE>=1); 11295 match(Set dst (MoveI2F src)); 11296 effect( DEF dst, USE src ); 11297 11298 ins_cost(95); 11299 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11300 ins_encode %{ 11301 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11302 %} 11303 ins_pipe( pipe_slow ); 11304 %} 11305 11306 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11307 predicate(UseSSE>=2); 11308 match(Set dst (MoveI2F src)); 11309 effect( DEF dst, USE src ); 11310 11311 ins_cost(85); 11312 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11313 ins_encode %{ 11314 __ movdl($dst$$XMMRegister, $src$$Register); 11315 %} 11316 ins_pipe( pipe_slow ); 11317 %} 11318 11319 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11320 match(Set dst (MoveD2L src)); 11321 effect(DEF dst, USE src); 11322 11323 ins_cost(250); 11324 format %{ "MOV $dst.lo,$src\n\t" 11325 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11326 opcode(0x8B, 0x8B); 11327 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11328 ins_pipe( ialu_mem_long_reg ); 11329 %} 11330 11331 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11332 predicate(UseSSE<=1); 11333 match(Set dst (MoveD2L src)); 11334 effect(DEF dst, USE src); 11335 11336 ins_cost(125); 11337 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11338 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11339 ins_pipe( fpu_mem_reg ); 11340 %} 11341 11342 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11343 predicate(UseSSE>=2); 11344 match(Set dst (MoveD2L src)); 11345 effect(DEF dst, USE src); 11346 ins_cost(95); 11347 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11348 ins_encode %{ 11349 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11350 %} 11351 ins_pipe( pipe_slow ); 11352 %} 11353 11354 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11355 predicate(UseSSE>=2); 11356 match(Set dst (MoveD2L src)); 11357 effect(DEF dst, USE src, TEMP tmp); 11358 ins_cost(85); 11359 format %{ "MOVD $dst.lo,$src\n\t" 11360 "PSHUFLW $tmp,$src,0x4E\n\t" 11361 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11362 ins_encode %{ 11363 __ movdl($dst$$Register, $src$$XMMRegister); 11364 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11365 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11366 %} 11367 ins_pipe( pipe_slow ); 11368 %} 11369 11370 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11371 match(Set dst (MoveL2D src)); 11372 effect(DEF dst, USE src); 11373 11374 ins_cost(200); 11375 format %{ "MOV $dst,$src.lo\n\t" 11376 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11377 opcode(0x89, 0x89); 11378 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11379 ins_pipe( ialu_mem_long_reg ); 11380 %} 11381 11382 11383 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11384 predicate(UseSSE<=1); 11385 match(Set dst (MoveL2D src)); 11386 effect(DEF dst, USE src); 11387 ins_cost(125); 11388 11389 format %{ "FLD_D $src\n\t" 11390 "FSTP $dst\t# MoveL2D_stack_reg" %} 11391 opcode(0xDD); /* DD /0, FLD m64real */ 11392 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11393 Pop_Reg_DPR(dst) ); 11394 ins_pipe( fpu_reg_mem ); 11395 %} 11396 11397 11398 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11399 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11400 match(Set dst (MoveL2D src)); 11401 effect(DEF dst, USE src); 11402 11403 ins_cost(95); 11404 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11405 ins_encode %{ 11406 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11407 %} 11408 ins_pipe( pipe_slow ); 11409 %} 11410 11411 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11412 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11413 match(Set dst (MoveL2D src)); 11414 effect(DEF dst, USE src); 11415 11416 ins_cost(95); 11417 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11418 ins_encode %{ 11419 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11420 %} 11421 ins_pipe( pipe_slow ); 11422 %} 11423 11424 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11425 predicate(UseSSE>=2); 11426 match(Set dst (MoveL2D src)); 11427 effect(TEMP dst, USE src, TEMP tmp); 11428 ins_cost(85); 11429 format %{ "MOVD $dst,$src.lo\n\t" 11430 "MOVD $tmp,$src.hi\n\t" 11431 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11432 ins_encode %{ 11433 __ movdl($dst$$XMMRegister, $src$$Register); 11434 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11435 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11436 %} 11437 ins_pipe( pipe_slow ); 11438 %} 11439 11440 11441 // ======================================================================= 11442 // fast clearing of an array 11443 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11444 predicate(!UseFastStosb); 11445 match(Set dummy (ClearArray cnt base)); 11446 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11447 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11448 "SHL ECX,1\t# Convert doublewords to words\n\t" 11449 "REP STOS\t# store EAX into [EDI++] while ECX--" %} 11450 ins_encode %{ 11451 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); 11452 %} 11453 ins_pipe( pipe_slow ); 11454 %} 11455 11456 instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11457 predicate(UseFastStosb); 11458 match(Set dummy (ClearArray cnt base)); 11459 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11460 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11461 "SHL ECX,3\t# Convert doublewords to bytes\n\t" 11462 "REP STOSB\t# store EAX into [EDI++] while ECX--" %} 11463 ins_encode %{ 11464 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); 11465 %} 11466 ins_pipe( pipe_slow ); 11467 %} 11468 11469 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11470 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11471 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11472 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11473 11474 format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11475 ins_encode %{ 11476 __ string_compare($str1$$Register, $str2$$Register, 11477 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11478 $tmp1$$XMMRegister); 11479 %} 11480 ins_pipe( pipe_slow ); 11481 %} 11482 11483 // fast string equals 11484 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11485 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11486 match(Set result (StrEquals (Binary str1 str2) cnt)); 11487 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11488 11489 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11490 ins_encode %{ 11491 __ char_arrays_equals(false, $str1$$Register, $str2$$Register, 11492 $cnt$$Register, $result$$Register, $tmp3$$Register, 11493 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11494 %} 11495 ins_pipe( pipe_slow ); 11496 %} 11497 11498 // fast search of substring with known size. 11499 instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11500 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11501 predicate(UseSSE42Intrinsics); 11502 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11503 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11504 11505 format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11506 ins_encode %{ 11507 int icnt2 = (int)$int_cnt2$$constant; 11508 if (icnt2 >= 8) { 11509 // IndexOf for constant substrings with size >= 8 elements 11510 // which don't need to be loaded through stack. 11511 __ string_indexofC8($str1$$Register, $str2$$Register, 11512 $cnt1$$Register, $cnt2$$Register, 11513 icnt2, $result$$Register, 11514 $vec$$XMMRegister, $tmp$$Register); 11515 } else { 11516 // Small strings are loaded through stack if they cross page boundary. 11517 __ string_indexof($str1$$Register, $str2$$Register, 11518 $cnt1$$Register, $cnt2$$Register, 11519 icnt2, $result$$Register, 11520 $vec$$XMMRegister, $tmp$$Register); 11521 } 11522 %} 11523 ins_pipe( pipe_slow ); 11524 %} 11525 11526 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11527 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11528 predicate(UseSSE42Intrinsics); 11529 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11530 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11531 11532 format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11533 ins_encode %{ 11534 __ string_indexof($str1$$Register, $str2$$Register, 11535 $cnt1$$Register, $cnt2$$Register, 11536 (-1), $result$$Register, 11537 $vec$$XMMRegister, $tmp$$Register); 11538 %} 11539 ins_pipe( pipe_slow ); 11540 %} 11541 11542 // fast array equals 11543 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11544 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11545 %{ 11546 match(Set result (AryEq ary1 ary2)); 11547 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11548 //ins_cost(300); 11549 11550 format %{ "Array Equals $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11551 ins_encode %{ 11552 __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register, 11553 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11554 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11555 %} 11556 ins_pipe( pipe_slow ); 11557 %} 11558 11559 // encode char[] to byte[] in ISO_8859_1 11560 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 11561 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11562 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11563 match(Set result (EncodeISOArray src (Binary dst len))); 11564 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11565 11566 format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 11567 ins_encode %{ 11568 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 11569 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11570 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11571 %} 11572 ins_pipe( pipe_slow ); 11573 %} 11574 11575 11576 //----------Control Flow Instructions------------------------------------------ 11577 // Signed compare Instructions 11578 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 11579 match(Set cr (CmpI op1 op2)); 11580 effect( DEF cr, USE op1, USE op2 ); 11581 format %{ "CMP $op1,$op2" %} 11582 opcode(0x3B); /* Opcode 3B /r */ 11583 ins_encode( OpcP, RegReg( op1, op2) ); 11584 ins_pipe( ialu_cr_reg_reg ); 11585 %} 11586 11587 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 11588 match(Set cr (CmpI op1 op2)); 11589 effect( DEF cr, USE op1 ); 11590 format %{ "CMP $op1,$op2" %} 11591 opcode(0x81,0x07); /* Opcode 81 /7 */ 11592 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 11593 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11594 ins_pipe( ialu_cr_reg_imm ); 11595 %} 11596 11597 // Cisc-spilled version of cmpI_eReg 11598 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 11599 match(Set cr (CmpI op1 (LoadI op2))); 11600 11601 format %{ "CMP $op1,$op2" %} 11602 ins_cost(500); 11603 opcode(0x3B); /* Opcode 3B /r */ 11604 ins_encode( OpcP, RegMem( op1, op2) ); 11605 ins_pipe( ialu_cr_reg_mem ); 11606 %} 11607 11608 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ 11609 match(Set cr (CmpI src zero)); 11610 effect( DEF cr, USE src ); 11611 11612 format %{ "TEST $src,$src" %} 11613 opcode(0x85); 11614 ins_encode( OpcP, RegReg( src, src ) ); 11615 ins_pipe( ialu_cr_reg_imm ); 11616 %} 11617 11618 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ 11619 match(Set cr (CmpI (AndI src con) zero)); 11620 11621 format %{ "TEST $src,$con" %} 11622 opcode(0xF7,0x00); 11623 ins_encode( OpcP, RegOpc(src), Con32(con) ); 11624 ins_pipe( ialu_cr_reg_imm ); 11625 %} 11626 11627 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ 11628 match(Set cr (CmpI (AndI src mem) zero)); 11629 11630 format %{ "TEST $src,$mem" %} 11631 opcode(0x85); 11632 ins_encode( OpcP, RegMem( src, mem ) ); 11633 ins_pipe( ialu_cr_reg_mem ); 11634 %} 11635 11636 // Unsigned compare Instructions; really, same as signed except they 11637 // produce an eFlagsRegU instead of eFlagsReg. 11638 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 11639 match(Set cr (CmpU op1 op2)); 11640 11641 format %{ "CMPu $op1,$op2" %} 11642 opcode(0x3B); /* Opcode 3B /r */ 11643 ins_encode( OpcP, RegReg( op1, op2) ); 11644 ins_pipe( ialu_cr_reg_reg ); 11645 %} 11646 11647 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 11648 match(Set cr (CmpU op1 op2)); 11649 11650 format %{ "CMPu $op1,$op2" %} 11651 opcode(0x81,0x07); /* Opcode 81 /7 */ 11652 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11653 ins_pipe( ialu_cr_reg_imm ); 11654 %} 11655 11656 // // Cisc-spilled version of cmpU_eReg 11657 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 11658 match(Set cr (CmpU op1 (LoadI op2))); 11659 11660 format %{ "CMPu $op1,$op2" %} 11661 ins_cost(500); 11662 opcode(0x3B); /* Opcode 3B /r */ 11663 ins_encode( OpcP, RegMem( op1, op2) ); 11664 ins_pipe( ialu_cr_reg_mem ); 11665 %} 11666 11667 // // Cisc-spilled version of cmpU_eReg 11668 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 11669 // match(Set cr (CmpU (LoadI op1) op2)); 11670 // 11671 // format %{ "CMPu $op1,$op2" %} 11672 // ins_cost(500); 11673 // opcode(0x39); /* Opcode 39 /r */ 11674 // ins_encode( OpcP, RegMem( op1, op2) ); 11675 //%} 11676 11677 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ 11678 match(Set cr (CmpU src zero)); 11679 11680 format %{ "TESTu $src,$src" %} 11681 opcode(0x85); 11682 ins_encode( OpcP, RegReg( src, src ) ); 11683 ins_pipe( ialu_cr_reg_imm ); 11684 %} 11685 11686 // Unsigned pointer compare Instructions 11687 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 11688 match(Set cr (CmpP op1 op2)); 11689 11690 format %{ "CMPu $op1,$op2" %} 11691 opcode(0x3B); /* Opcode 3B /r */ 11692 ins_encode( OpcP, RegReg( op1, op2) ); 11693 ins_pipe( ialu_cr_reg_reg ); 11694 %} 11695 11696 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 11697 match(Set cr (CmpP op1 op2)); 11698 11699 format %{ "CMPu $op1,$op2" %} 11700 opcode(0x81,0x07); /* Opcode 81 /7 */ 11701 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11702 ins_pipe( ialu_cr_reg_imm ); 11703 %} 11704 11705 // // Cisc-spilled version of cmpP_eReg 11706 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 11707 match(Set cr (CmpP op1 (LoadP op2))); 11708 11709 format %{ "CMPu $op1,$op2" %} 11710 ins_cost(500); 11711 opcode(0x3B); /* Opcode 3B /r */ 11712 ins_encode( OpcP, RegMem( op1, op2) ); 11713 ins_pipe( ialu_cr_reg_mem ); 11714 %} 11715 11716 // // Cisc-spilled version of cmpP_eReg 11717 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 11718 // match(Set cr (CmpP (LoadP op1) op2)); 11719 // 11720 // format %{ "CMPu $op1,$op2" %} 11721 // ins_cost(500); 11722 // opcode(0x39); /* Opcode 39 /r */ 11723 // ins_encode( OpcP, RegMem( op1, op2) ); 11724 //%} 11725 11726 // Compare raw pointer (used in out-of-heap check). 11727 // Only works because non-oop pointers must be raw pointers 11728 // and raw pointers have no anti-dependencies. 11729 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 11730 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 11731 match(Set cr (CmpP op1 (LoadP op2))); 11732 11733 format %{ "CMPu $op1,$op2" %} 11734 opcode(0x3B); /* Opcode 3B /r */ 11735 ins_encode( OpcP, RegMem( op1, op2) ); 11736 ins_pipe( ialu_cr_reg_mem ); 11737 %} 11738 11739 // 11740 // This will generate a signed flags result. This should be ok 11741 // since any compare to a zero should be eq/neq. 11742 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 11743 match(Set cr (CmpP src zero)); 11744 11745 format %{ "TEST $src,$src" %} 11746 opcode(0x85); 11747 ins_encode( OpcP, RegReg( src, src ) ); 11748 ins_pipe( ialu_cr_reg_imm ); 11749 %} 11750 11751 // Cisc-spilled version of testP_reg 11752 // This will generate a signed flags result. This should be ok 11753 // since any compare to a zero should be eq/neq. 11754 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 11755 match(Set cr (CmpP (LoadP op) zero)); 11756 11757 format %{ "TEST $op,0xFFFFFFFF" %} 11758 ins_cost(500); 11759 opcode(0xF7); /* Opcode F7 /0 */ 11760 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 11761 ins_pipe( ialu_cr_reg_imm ); 11762 %} 11763 11764 // Yanked all unsigned pointer compare operations. 11765 // Pointer compares are done with CmpP which is already unsigned. 11766 11767 //----------Max and Min-------------------------------------------------------- 11768 // Min Instructions 11769 //// 11770 // *** Min and Max using the conditional move are slower than the 11771 // *** branch version on a Pentium III. 11772 // // Conditional move for min 11773 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11774 // effect( USE_DEF op2, USE op1, USE cr ); 11775 // format %{ "CMOVlt $op2,$op1\t! min" %} 11776 // opcode(0x4C,0x0F); 11777 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11778 // ins_pipe( pipe_cmov_reg ); 11779 //%} 11780 // 11781 //// Min Register with Register (P6 version) 11782 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11783 // predicate(VM_Version::supports_cmov() ); 11784 // match(Set op2 (MinI op1 op2)); 11785 // ins_cost(200); 11786 // expand %{ 11787 // eFlagsReg cr; 11788 // compI_eReg(cr,op1,op2); 11789 // cmovI_reg_lt(op2,op1,cr); 11790 // %} 11791 //%} 11792 11793 // Min Register with Register (generic version) 11794 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11795 match(Set dst (MinI dst src)); 11796 effect(KILL flags); 11797 ins_cost(300); 11798 11799 format %{ "MIN $dst,$src" %} 11800 opcode(0xCC); 11801 ins_encode( min_enc(dst,src) ); 11802 ins_pipe( pipe_slow ); 11803 %} 11804 11805 // Max Register with Register 11806 // *** Min and Max using the conditional move are slower than the 11807 // *** branch version on a Pentium III. 11808 // // Conditional move for max 11809 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11810 // effect( USE_DEF op2, USE op1, USE cr ); 11811 // format %{ "CMOVgt $op2,$op1\t! max" %} 11812 // opcode(0x4F,0x0F); 11813 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11814 // ins_pipe( pipe_cmov_reg ); 11815 //%} 11816 // 11817 // // Max Register with Register (P6 version) 11818 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11819 // predicate(VM_Version::supports_cmov() ); 11820 // match(Set op2 (MaxI op1 op2)); 11821 // ins_cost(200); 11822 // expand %{ 11823 // eFlagsReg cr; 11824 // compI_eReg(cr,op1,op2); 11825 // cmovI_reg_gt(op2,op1,cr); 11826 // %} 11827 //%} 11828 11829 // Max Register with Register (generic version) 11830 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11831 match(Set dst (MaxI dst src)); 11832 effect(KILL flags); 11833 ins_cost(300); 11834 11835 format %{ "MAX $dst,$src" %} 11836 opcode(0xCC); 11837 ins_encode( max_enc(dst,src) ); 11838 ins_pipe( pipe_slow ); 11839 %} 11840 11841 // ============================================================================ 11842 // Counted Loop limit node which represents exact final iterator value. 11843 // Note: the resulting value should fit into integer range since 11844 // counted loops have limit check on overflow. 11845 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 11846 match(Set limit (LoopLimit (Binary init limit) stride)); 11847 effect(TEMP limit_hi, TEMP tmp, KILL flags); 11848 ins_cost(300); 11849 11850 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 11851 ins_encode %{ 11852 int strd = (int)$stride$$constant; 11853 assert(strd != 1 && strd != -1, "sanity"); 11854 int m1 = (strd > 0) ? 1 : -1; 11855 // Convert limit to long (EAX:EDX) 11856 __ cdql(); 11857 // Convert init to long (init:tmp) 11858 __ movl($tmp$$Register, $init$$Register); 11859 __ sarl($tmp$$Register, 31); 11860 // $limit - $init 11861 __ subl($limit$$Register, $init$$Register); 11862 __ sbbl($limit_hi$$Register, $tmp$$Register); 11863 // + ($stride - 1) 11864 if (strd > 0) { 11865 __ addl($limit$$Register, (strd - 1)); 11866 __ adcl($limit_hi$$Register, 0); 11867 __ movl($tmp$$Register, strd); 11868 } else { 11869 __ addl($limit$$Register, (strd + 1)); 11870 __ adcl($limit_hi$$Register, -1); 11871 __ lneg($limit_hi$$Register, $limit$$Register); 11872 __ movl($tmp$$Register, -strd); 11873 } 11874 // signed devision: (EAX:EDX) / pos_stride 11875 __ idivl($tmp$$Register); 11876 if (strd < 0) { 11877 // restore sign 11878 __ negl($tmp$$Register); 11879 } 11880 // (EAX) * stride 11881 __ mull($tmp$$Register); 11882 // + init (ignore upper bits) 11883 __ addl($limit$$Register, $init$$Register); 11884 %} 11885 ins_pipe( pipe_slow ); 11886 %} 11887 11888 // ============================================================================ 11889 // Branch Instructions 11890 // Jump Table 11891 instruct jumpXtnd(rRegI switch_val) %{ 11892 match(Jump switch_val); 11893 ins_cost(350); 11894 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 11895 ins_encode %{ 11896 // Jump to Address(table_base + switch_reg) 11897 Address index(noreg, $switch_val$$Register, Address::times_1); 11898 __ jump(ArrayAddress($constantaddress, index)); 11899 %} 11900 ins_pipe(pipe_jmp); 11901 %} 11902 11903 // Jump Direct - Label defines a relative address from JMP+1 11904 instruct jmpDir(label labl) %{ 11905 match(Goto); 11906 effect(USE labl); 11907 11908 ins_cost(300); 11909 format %{ "JMP $labl" %} 11910 size(5); 11911 ins_encode %{ 11912 Label* L = $labl$$label; 11913 __ jmp(*L, false); // Always long jump 11914 %} 11915 ins_pipe( pipe_jmp ); 11916 %} 11917 11918 // Jump Direct Conditional - Label defines a relative address from Jcc+1 11919 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 11920 match(If cop cr); 11921 effect(USE labl); 11922 11923 ins_cost(300); 11924 format %{ "J$cop $labl" %} 11925 size(6); 11926 ins_encode %{ 11927 Label* L = $labl$$label; 11928 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 11929 %} 11930 ins_pipe( pipe_jcc ); 11931 %} 11932 11933 // Jump Direct Conditional - Label defines a relative address from Jcc+1 11934 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 11935 match(CountedLoopEnd cop cr); 11936 effect(USE labl); 11937 11938 ins_cost(300); 11939 format %{ "J$cop $labl\t# Loop end" %} 11940 size(6); 11941 ins_encode %{ 11942 Label* L = $labl$$label; 11943 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 11944 %} 11945 ins_pipe( pipe_jcc ); 11946 %} 11947 11948 // Jump Direct Conditional - Label defines a relative address from Jcc+1 11949 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 11950 match(CountedLoopEnd cop cmp); 11951 effect(USE labl); 11952 11953 ins_cost(300); 11954 format %{ "J$cop,u $labl\t# Loop end" %} 11955 size(6); 11956 ins_encode %{ 11957 Label* L = $labl$$label; 11958 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 11959 %} 11960 ins_pipe( pipe_jcc ); 11961 %} 11962 11963 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 11964 match(CountedLoopEnd cop cmp); 11965 effect(USE labl); 11966 11967 ins_cost(200); 11968 format %{ "J$cop,u $labl\t# Loop end" %} 11969 size(6); 11970 ins_encode %{ 11971 Label* L = $labl$$label; 11972 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 11973 %} 11974 ins_pipe( pipe_jcc ); 11975 %} 11976 11977 // Jump Direct Conditional - using unsigned comparison 11978 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 11979 match(If cop cmp); 11980 effect(USE labl); 11981 11982 ins_cost(300); 11983 format %{ "J$cop,u $labl" %} 11984 size(6); 11985 ins_encode %{ 11986 Label* L = $labl$$label; 11987 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 11988 %} 11989 ins_pipe(pipe_jcc); 11990 %} 11991 11992 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 11993 match(If cop cmp); 11994 effect(USE labl); 11995 11996 ins_cost(200); 11997 format %{ "J$cop,u $labl" %} 11998 size(6); 11999 ins_encode %{ 12000 Label* L = $labl$$label; 12001 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12002 %} 12003 ins_pipe(pipe_jcc); 12004 %} 12005 12006 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12007 match(If cop cmp); 12008 effect(USE labl); 12009 12010 ins_cost(200); 12011 format %{ $$template 12012 if ($cop$$cmpcode == Assembler::notEqual) { 12013 $$emit$$"JP,u $labl\n\t" 12014 $$emit$$"J$cop,u $labl" 12015 } else { 12016 $$emit$$"JP,u done\n\t" 12017 $$emit$$"J$cop,u $labl\n\t" 12018 $$emit$$"done:" 12019 } 12020 %} 12021 ins_encode %{ 12022 Label* l = $labl$$label; 12023 if ($cop$$cmpcode == Assembler::notEqual) { 12024 __ jcc(Assembler::parity, *l, false); 12025 __ jcc(Assembler::notEqual, *l, false); 12026 } else if ($cop$$cmpcode == Assembler::equal) { 12027 Label done; 12028 __ jccb(Assembler::parity, done); 12029 __ jcc(Assembler::equal, *l, false); 12030 __ bind(done); 12031 } else { 12032 ShouldNotReachHere(); 12033 } 12034 %} 12035 ins_pipe(pipe_jcc); 12036 %} 12037 12038 // ============================================================================ 12039 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12040 // array for an instance of the superklass. Set a hidden internal cache on a 12041 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12042 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12043 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12044 match(Set result (PartialSubtypeCheck sub super)); 12045 effect( KILL rcx, KILL cr ); 12046 12047 ins_cost(1100); // slightly larger than the next version 12048 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12049 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12050 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12051 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12052 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12053 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12054 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12055 "miss:\t" %} 12056 12057 opcode(0x1); // Force a XOR of EDI 12058 ins_encode( enc_PartialSubtypeCheck() ); 12059 ins_pipe( pipe_slow ); 12060 %} 12061 12062 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12063 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12064 effect( KILL rcx, KILL result ); 12065 12066 ins_cost(1000); 12067 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12068 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12069 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12070 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12071 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12072 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12073 "miss:\t" %} 12074 12075 opcode(0x0); // No need to XOR EDI 12076 ins_encode( enc_PartialSubtypeCheck() ); 12077 ins_pipe( pipe_slow ); 12078 %} 12079 12080 // ============================================================================ 12081 // Branch Instructions -- short offset versions 12082 // 12083 // These instructions are used to replace jumps of a long offset (the default 12084 // match) with jumps of a shorter offset. These instructions are all tagged 12085 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12086 // match rules in general matching. Instead, the ADLC generates a conversion 12087 // method in the MachNode which can be used to do in-place replacement of the 12088 // long variant with the shorter variant. The compiler will determine if a 12089 // branch can be taken by the is_short_branch_offset() predicate in the machine 12090 // specific code section of the file. 12091 12092 // Jump Direct - Label defines a relative address from JMP+1 12093 instruct jmpDir_short(label labl) %{ 12094 match(Goto); 12095 effect(USE labl); 12096 12097 ins_cost(300); 12098 format %{ "JMP,s $labl" %} 12099 size(2); 12100 ins_encode %{ 12101 Label* L = $labl$$label; 12102 __ jmpb(*L); 12103 %} 12104 ins_pipe( pipe_jmp ); 12105 ins_short_branch(1); 12106 %} 12107 12108 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12109 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12110 match(If cop cr); 12111 effect(USE labl); 12112 12113 ins_cost(300); 12114 format %{ "J$cop,s $labl" %} 12115 size(2); 12116 ins_encode %{ 12117 Label* L = $labl$$label; 12118 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12119 %} 12120 ins_pipe( pipe_jcc ); 12121 ins_short_branch(1); 12122 %} 12123 12124 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12125 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12126 match(CountedLoopEnd cop cr); 12127 effect(USE labl); 12128 12129 ins_cost(300); 12130 format %{ "J$cop,s $labl\t# Loop end" %} 12131 size(2); 12132 ins_encode %{ 12133 Label* L = $labl$$label; 12134 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12135 %} 12136 ins_pipe( pipe_jcc ); 12137 ins_short_branch(1); 12138 %} 12139 12140 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12141 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12142 match(CountedLoopEnd cop cmp); 12143 effect(USE labl); 12144 12145 ins_cost(300); 12146 format %{ "J$cop,us $labl\t# Loop end" %} 12147 size(2); 12148 ins_encode %{ 12149 Label* L = $labl$$label; 12150 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12151 %} 12152 ins_pipe( pipe_jcc ); 12153 ins_short_branch(1); 12154 %} 12155 12156 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12157 match(CountedLoopEnd cop cmp); 12158 effect(USE labl); 12159 12160 ins_cost(300); 12161 format %{ "J$cop,us $labl\t# Loop end" %} 12162 size(2); 12163 ins_encode %{ 12164 Label* L = $labl$$label; 12165 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12166 %} 12167 ins_pipe( pipe_jcc ); 12168 ins_short_branch(1); 12169 %} 12170 12171 // Jump Direct Conditional - using unsigned comparison 12172 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12173 match(If cop cmp); 12174 effect(USE labl); 12175 12176 ins_cost(300); 12177 format %{ "J$cop,us $labl" %} 12178 size(2); 12179 ins_encode %{ 12180 Label* L = $labl$$label; 12181 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12182 %} 12183 ins_pipe( pipe_jcc ); 12184 ins_short_branch(1); 12185 %} 12186 12187 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12188 match(If cop cmp); 12189 effect(USE labl); 12190 12191 ins_cost(300); 12192 format %{ "J$cop,us $labl" %} 12193 size(2); 12194 ins_encode %{ 12195 Label* L = $labl$$label; 12196 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12197 %} 12198 ins_pipe( pipe_jcc ); 12199 ins_short_branch(1); 12200 %} 12201 12202 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12203 match(If cop cmp); 12204 effect(USE labl); 12205 12206 ins_cost(300); 12207 format %{ $$template 12208 if ($cop$$cmpcode == Assembler::notEqual) { 12209 $$emit$$"JP,u,s $labl\n\t" 12210 $$emit$$"J$cop,u,s $labl" 12211 } else { 12212 $$emit$$"JP,u,s done\n\t" 12213 $$emit$$"J$cop,u,s $labl\n\t" 12214 $$emit$$"done:" 12215 } 12216 %} 12217 size(4); 12218 ins_encode %{ 12219 Label* l = $labl$$label; 12220 if ($cop$$cmpcode == Assembler::notEqual) { 12221 __ jccb(Assembler::parity, *l); 12222 __ jccb(Assembler::notEqual, *l); 12223 } else if ($cop$$cmpcode == Assembler::equal) { 12224 Label done; 12225 __ jccb(Assembler::parity, done); 12226 __ jccb(Assembler::equal, *l); 12227 __ bind(done); 12228 } else { 12229 ShouldNotReachHere(); 12230 } 12231 %} 12232 ins_pipe(pipe_jcc); 12233 ins_short_branch(1); 12234 %} 12235 12236 // ============================================================================ 12237 // Long Compare 12238 // 12239 // Currently we hold longs in 2 registers. Comparing such values efficiently 12240 // is tricky. The flavor of compare used depends on whether we are testing 12241 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12242 // The GE test is the negated LT test. The LE test can be had by commuting 12243 // the operands (yielding a GE test) and then negating; negate again for the 12244 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12245 // NE test is negated from that. 12246 12247 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12248 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12249 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12250 // are collapsed internally in the ADLC's dfa-gen code. The match for 12251 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12252 // foo match ends up with the wrong leaf. One fix is to not match both 12253 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12254 // both forms beat the trinary form of long-compare and both are very useful 12255 // on Intel which has so few registers. 12256 12257 // Manifest a CmpL result in an integer register. Very painful. 12258 // This is the test to avoid. 12259 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12260 match(Set dst (CmpL3 src1 src2)); 12261 effect( KILL flags ); 12262 ins_cost(1000); 12263 format %{ "XOR $dst,$dst\n\t" 12264 "CMP $src1.hi,$src2.hi\n\t" 12265 "JLT,s m_one\n\t" 12266 "JGT,s p_one\n\t" 12267 "CMP $src1.lo,$src2.lo\n\t" 12268 "JB,s m_one\n\t" 12269 "JEQ,s done\n" 12270 "p_one:\tINC $dst\n\t" 12271 "JMP,s done\n" 12272 "m_one:\tDEC $dst\n" 12273 "done:" %} 12274 ins_encode %{ 12275 Label p_one, m_one, done; 12276 __ xorptr($dst$$Register, $dst$$Register); 12277 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12278 __ jccb(Assembler::less, m_one); 12279 __ jccb(Assembler::greater, p_one); 12280 __ cmpl($src1$$Register, $src2$$Register); 12281 __ jccb(Assembler::below, m_one); 12282 __ jccb(Assembler::equal, done); 12283 __ bind(p_one); 12284 __ incrementl($dst$$Register); 12285 __ jmpb(done); 12286 __ bind(m_one); 12287 __ decrementl($dst$$Register); 12288 __ bind(done); 12289 %} 12290 ins_pipe( pipe_slow ); 12291 %} 12292 12293 //====== 12294 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12295 // compares. Can be used for LE or GT compares by reversing arguments. 12296 // NOT GOOD FOR EQ/NE tests. 12297 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12298 match( Set flags (CmpL src zero )); 12299 ins_cost(100); 12300 format %{ "TEST $src.hi,$src.hi" %} 12301 opcode(0x85); 12302 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12303 ins_pipe( ialu_cr_reg_reg ); 12304 %} 12305 12306 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12307 // compares. Can be used for LE or GT compares by reversing arguments. 12308 // NOT GOOD FOR EQ/NE tests. 12309 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12310 match( Set flags (CmpL src1 src2 )); 12311 effect( TEMP tmp ); 12312 ins_cost(300); 12313 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12314 "MOV $tmp,$src1.hi\n\t" 12315 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12316 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12317 ins_pipe( ialu_cr_reg_reg ); 12318 %} 12319 12320 // Long compares reg < zero/req OR reg >= zero/req. 12321 // Just a wrapper for a normal branch, plus the predicate test. 12322 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12323 match(If cmp flags); 12324 effect(USE labl); 12325 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12326 expand %{ 12327 jmpCon(cmp,flags,labl); // JLT or JGE... 12328 %} 12329 %} 12330 12331 // Compare 2 longs and CMOVE longs. 12332 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12333 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12334 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12335 ins_cost(400); 12336 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12337 "CMOV$cmp $dst.hi,$src.hi" %} 12338 opcode(0x0F,0x40); 12339 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12340 ins_pipe( pipe_cmov_reg_long ); 12341 %} 12342 12343 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12344 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12345 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12346 ins_cost(500); 12347 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12348 "CMOV$cmp $dst.hi,$src.hi" %} 12349 opcode(0x0F,0x40); 12350 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12351 ins_pipe( pipe_cmov_reg_long ); 12352 %} 12353 12354 // Compare 2 longs and CMOVE ints. 12355 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12356 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12357 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12358 ins_cost(200); 12359 format %{ "CMOV$cmp $dst,$src" %} 12360 opcode(0x0F,0x40); 12361 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12362 ins_pipe( pipe_cmov_reg ); 12363 %} 12364 12365 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12366 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12367 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12368 ins_cost(250); 12369 format %{ "CMOV$cmp $dst,$src" %} 12370 opcode(0x0F,0x40); 12371 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12372 ins_pipe( pipe_cmov_mem ); 12373 %} 12374 12375 // Compare 2 longs and CMOVE ints. 12376 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12377 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12378 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12379 ins_cost(200); 12380 format %{ "CMOV$cmp $dst,$src" %} 12381 opcode(0x0F,0x40); 12382 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12383 ins_pipe( pipe_cmov_reg ); 12384 %} 12385 12386 // Compare 2 longs and CMOVE doubles 12387 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 12388 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12389 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12390 ins_cost(200); 12391 expand %{ 12392 fcmovDPR_regS(cmp,flags,dst,src); 12393 %} 12394 %} 12395 12396 // Compare 2 longs and CMOVE doubles 12397 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12398 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12399 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12400 ins_cost(200); 12401 expand %{ 12402 fcmovD_regS(cmp,flags,dst,src); 12403 %} 12404 %} 12405 12406 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 12407 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12408 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12409 ins_cost(200); 12410 expand %{ 12411 fcmovFPR_regS(cmp,flags,dst,src); 12412 %} 12413 %} 12414 12415 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 12416 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12417 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12418 ins_cost(200); 12419 expand %{ 12420 fcmovF_regS(cmp,flags,dst,src); 12421 %} 12422 %} 12423 12424 //====== 12425 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12426 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12427 match( Set flags (CmpL src zero )); 12428 effect(TEMP tmp); 12429 ins_cost(200); 12430 format %{ "MOV $tmp,$src.lo\n\t" 12431 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 12432 ins_encode( long_cmp_flags0( src, tmp ) ); 12433 ins_pipe( ialu_reg_reg_long ); 12434 %} 12435 12436 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12437 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 12438 match( Set flags (CmpL src1 src2 )); 12439 ins_cost(200+300); 12440 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12441 "JNE,s skip\n\t" 12442 "CMP $src1.hi,$src2.hi\n\t" 12443 "skip:\t" %} 12444 ins_encode( long_cmp_flags1( src1, src2 ) ); 12445 ins_pipe( ialu_cr_reg_reg ); 12446 %} 12447 12448 // Long compare reg == zero/reg OR reg != zero/reg 12449 // Just a wrapper for a normal branch, plus the predicate test. 12450 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 12451 match(If cmp flags); 12452 effect(USE labl); 12453 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12454 expand %{ 12455 jmpCon(cmp,flags,labl); // JEQ or JNE... 12456 %} 12457 %} 12458 12459 // Compare 2 longs and CMOVE longs. 12460 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 12461 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12462 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12463 ins_cost(400); 12464 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12465 "CMOV$cmp $dst.hi,$src.hi" %} 12466 opcode(0x0F,0x40); 12467 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12468 ins_pipe( pipe_cmov_reg_long ); 12469 %} 12470 12471 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 12472 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12473 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12474 ins_cost(500); 12475 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12476 "CMOV$cmp $dst.hi,$src.hi" %} 12477 opcode(0x0F,0x40); 12478 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12479 ins_pipe( pipe_cmov_reg_long ); 12480 %} 12481 12482 // Compare 2 longs and CMOVE ints. 12483 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 12484 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12485 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12486 ins_cost(200); 12487 format %{ "CMOV$cmp $dst,$src" %} 12488 opcode(0x0F,0x40); 12489 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12490 ins_pipe( pipe_cmov_reg ); 12491 %} 12492 12493 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 12494 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12495 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12496 ins_cost(250); 12497 format %{ "CMOV$cmp $dst,$src" %} 12498 opcode(0x0F,0x40); 12499 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12500 ins_pipe( pipe_cmov_mem ); 12501 %} 12502 12503 // Compare 2 longs and CMOVE ints. 12504 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 12505 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12506 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12507 ins_cost(200); 12508 format %{ "CMOV$cmp $dst,$src" %} 12509 opcode(0x0F,0x40); 12510 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12511 ins_pipe( pipe_cmov_reg ); 12512 %} 12513 12514 // Compare 2 longs and CMOVE doubles 12515 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 12516 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12517 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12518 ins_cost(200); 12519 expand %{ 12520 fcmovDPR_regS(cmp,flags,dst,src); 12521 %} 12522 %} 12523 12524 // Compare 2 longs and CMOVE doubles 12525 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 12526 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12527 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12528 ins_cost(200); 12529 expand %{ 12530 fcmovD_regS(cmp,flags,dst,src); 12531 %} 12532 %} 12533 12534 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 12535 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12536 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12537 ins_cost(200); 12538 expand %{ 12539 fcmovFPR_regS(cmp,flags,dst,src); 12540 %} 12541 %} 12542 12543 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 12544 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12545 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12546 ins_cost(200); 12547 expand %{ 12548 fcmovF_regS(cmp,flags,dst,src); 12549 %} 12550 %} 12551 12552 //====== 12553 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12554 // Same as cmpL_reg_flags_LEGT except must negate src 12555 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12556 match( Set flags (CmpL src zero )); 12557 effect( TEMP tmp ); 12558 ins_cost(300); 12559 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 12560 "CMP $tmp,$src.lo\n\t" 12561 "SBB $tmp,$src.hi\n\t" %} 12562 ins_encode( long_cmp_flags3(src, tmp) ); 12563 ins_pipe( ialu_reg_reg_long ); 12564 %} 12565 12566 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12567 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 12568 // requires a commuted test to get the same result. 12569 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12570 match( Set flags (CmpL src1 src2 )); 12571 effect( TEMP tmp ); 12572 ins_cost(300); 12573 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 12574 "MOV $tmp,$src2.hi\n\t" 12575 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 12576 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 12577 ins_pipe( ialu_cr_reg_reg ); 12578 %} 12579 12580 // Long compares reg < zero/req OR reg >= zero/req. 12581 // Just a wrapper for a normal branch, plus the predicate test 12582 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 12583 match(If cmp flags); 12584 effect(USE labl); 12585 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 12586 ins_cost(300); 12587 expand %{ 12588 jmpCon(cmp,flags,labl); // JGT or JLE... 12589 %} 12590 %} 12591 12592 // Compare 2 longs and CMOVE longs. 12593 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 12594 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12595 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12596 ins_cost(400); 12597 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12598 "CMOV$cmp $dst.hi,$src.hi" %} 12599 opcode(0x0F,0x40); 12600 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12601 ins_pipe( pipe_cmov_reg_long ); 12602 %} 12603 12604 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 12605 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12606 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12607 ins_cost(500); 12608 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12609 "CMOV$cmp $dst.hi,$src.hi+4" %} 12610 opcode(0x0F,0x40); 12611 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12612 ins_pipe( pipe_cmov_reg_long ); 12613 %} 12614 12615 // Compare 2 longs and CMOVE ints. 12616 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 12617 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12618 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12619 ins_cost(200); 12620 format %{ "CMOV$cmp $dst,$src" %} 12621 opcode(0x0F,0x40); 12622 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12623 ins_pipe( pipe_cmov_reg ); 12624 %} 12625 12626 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 12627 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12628 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12629 ins_cost(250); 12630 format %{ "CMOV$cmp $dst,$src" %} 12631 opcode(0x0F,0x40); 12632 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12633 ins_pipe( pipe_cmov_mem ); 12634 %} 12635 12636 // Compare 2 longs and CMOVE ptrs. 12637 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 12638 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12639 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12640 ins_cost(200); 12641 format %{ "CMOV$cmp $dst,$src" %} 12642 opcode(0x0F,0x40); 12643 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12644 ins_pipe( pipe_cmov_reg ); 12645 %} 12646 12647 // Compare 2 longs and CMOVE doubles 12648 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 12649 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12650 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12651 ins_cost(200); 12652 expand %{ 12653 fcmovDPR_regS(cmp,flags,dst,src); 12654 %} 12655 %} 12656 12657 // Compare 2 longs and CMOVE doubles 12658 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 12659 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12660 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12661 ins_cost(200); 12662 expand %{ 12663 fcmovD_regS(cmp,flags,dst,src); 12664 %} 12665 %} 12666 12667 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 12668 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12669 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12670 ins_cost(200); 12671 expand %{ 12672 fcmovFPR_regS(cmp,flags,dst,src); 12673 %} 12674 %} 12675 12676 12677 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 12678 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12679 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12680 ins_cost(200); 12681 expand %{ 12682 fcmovF_regS(cmp,flags,dst,src); 12683 %} 12684 %} 12685 12686 12687 // ============================================================================ 12688 // Procedure Call/Return Instructions 12689 // Call Java Static Instruction 12690 // Note: If this code changes, the corresponding ret_addr_offset() and 12691 // compute_padding() functions will have to be adjusted. 12692 instruct CallStaticJavaDirect(method meth) %{ 12693 match(CallStaticJava); 12694 effect(USE meth); 12695 12696 ins_cost(300); 12697 format %{ "CALL,static " %} 12698 opcode(0xE8); /* E8 cd */ 12699 ins_encode( pre_call_resets, 12700 Java_Static_Call( meth ), 12701 call_epilog, 12702 post_call_FPU ); 12703 ins_pipe( pipe_slow ); 12704 ins_alignment(4); 12705 %} 12706 12707 // Call Java Dynamic Instruction 12708 // Note: If this code changes, the corresponding ret_addr_offset() and 12709 // compute_padding() functions will have to be adjusted. 12710 instruct CallDynamicJavaDirect(method meth) %{ 12711 match(CallDynamicJava); 12712 effect(USE meth); 12713 12714 ins_cost(300); 12715 format %{ "MOV EAX,(oop)-1\n\t" 12716 "CALL,dynamic" %} 12717 opcode(0xE8); /* E8 cd */ 12718 ins_encode( pre_call_resets, 12719 Java_Dynamic_Call( meth ), 12720 call_epilog, 12721 post_call_FPU ); 12722 ins_pipe( pipe_slow ); 12723 ins_alignment(4); 12724 %} 12725 12726 // Call Runtime Instruction 12727 instruct CallRuntimeDirect(method meth) %{ 12728 match(CallRuntime ); 12729 effect(USE meth); 12730 12731 ins_cost(300); 12732 format %{ "CALL,runtime " %} 12733 opcode(0xE8); /* E8 cd */ 12734 // Use FFREEs to clear entries in float stack 12735 ins_encode( pre_call_resets, 12736 FFree_Float_Stack_All, 12737 Java_To_Runtime( meth ), 12738 post_call_FPU ); 12739 ins_pipe( pipe_slow ); 12740 %} 12741 12742 // Call runtime without safepoint 12743 instruct CallLeafDirect(method meth) %{ 12744 match(CallLeaf); 12745 effect(USE meth); 12746 12747 ins_cost(300); 12748 format %{ "CALL_LEAF,runtime " %} 12749 opcode(0xE8); /* E8 cd */ 12750 ins_encode( pre_call_resets, 12751 FFree_Float_Stack_All, 12752 Java_To_Runtime( meth ), 12753 Verify_FPU_For_Leaf, post_call_FPU ); 12754 ins_pipe( pipe_slow ); 12755 %} 12756 12757 instruct CallLeafNoFPDirect(method meth) %{ 12758 match(CallLeafNoFP); 12759 effect(USE meth); 12760 12761 ins_cost(300); 12762 format %{ "CALL_LEAF_NOFP,runtime " %} 12763 opcode(0xE8); /* E8 cd */ 12764 ins_encode(Java_To_Runtime(meth)); 12765 ins_pipe( pipe_slow ); 12766 %} 12767 12768 12769 // Return Instruction 12770 // Remove the return address & jump to it. 12771 instruct Ret() %{ 12772 match(Return); 12773 format %{ "RET" %} 12774 opcode(0xC3); 12775 ins_encode(OpcP); 12776 ins_pipe( pipe_jmp ); 12777 %} 12778 12779 // Tail Call; Jump from runtime stub to Java code. 12780 // Also known as an 'interprocedural jump'. 12781 // Target of jump will eventually return to caller. 12782 // TailJump below removes the return address. 12783 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 12784 match(TailCall jump_target method_oop ); 12785 ins_cost(300); 12786 format %{ "JMP $jump_target \t# EBX holds method oop" %} 12787 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12788 ins_encode( OpcP, RegOpc(jump_target) ); 12789 ins_pipe( pipe_jmp ); 12790 %} 12791 12792 12793 // Tail Jump; remove the return address; jump to target. 12794 // TailCall above leaves the return address around. 12795 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 12796 match( TailJump jump_target ex_oop ); 12797 ins_cost(300); 12798 format %{ "POP EDX\t# pop return address into dummy\n\t" 12799 "JMP $jump_target " %} 12800 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12801 ins_encode( enc_pop_rdx, 12802 OpcP, RegOpc(jump_target) ); 12803 ins_pipe( pipe_jmp ); 12804 %} 12805 12806 // Create exception oop: created by stack-crawling runtime code. 12807 // Created exception is now available to this handler, and is setup 12808 // just prior to jumping to this handler. No code emitted. 12809 instruct CreateException( eAXRegP ex_oop ) 12810 %{ 12811 match(Set ex_oop (CreateEx)); 12812 12813 size(0); 12814 // use the following format syntax 12815 format %{ "# exception oop is in EAX; no code emitted" %} 12816 ins_encode(); 12817 ins_pipe( empty ); 12818 %} 12819 12820 12821 // Rethrow exception: 12822 // The exception oop will come in the first argument position. 12823 // Then JUMP (not call) to the rethrow stub code. 12824 instruct RethrowException() 12825 %{ 12826 match(Rethrow); 12827 12828 // use the following format syntax 12829 format %{ "JMP rethrow_stub" %} 12830 ins_encode(enc_rethrow); 12831 ins_pipe( pipe_jmp ); 12832 %} 12833 12834 // inlined locking and unlocking 12835 12836 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 12837 predicate(Compile::current()->use_rtm()); 12838 match(Set cr (FastLock object box)); 12839 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 12840 ins_cost(300); 12841 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 12842 ins_encode %{ 12843 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 12844 $scr$$Register, $cx1$$Register, $cx2$$Register, 12845 _counters, _rtm_counters, _stack_rtm_counters, 12846 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 12847 true, ra_->C->profile_rtm()); 12848 %} 12849 ins_pipe(pipe_slow); 12850 %} 12851 12852 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 12853 predicate(!Compile::current()->use_rtm()); 12854 match(Set cr (FastLock object box)); 12855 effect(TEMP tmp, TEMP scr, USE_KILL box); 12856 ins_cost(300); 12857 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 12858 ins_encode %{ 12859 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 12860 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 12861 %} 12862 ins_pipe(pipe_slow); 12863 %} 12864 12865 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 12866 match(Set cr (FastUnlock object box)); 12867 effect(TEMP tmp, USE_KILL box); 12868 ins_cost(300); 12869 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 12870 ins_encode %{ 12871 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 12872 %} 12873 ins_pipe(pipe_slow); 12874 %} 12875 12876 12877 12878 // ============================================================================ 12879 // Safepoint Instruction 12880 instruct safePoint_poll(eFlagsReg cr) %{ 12881 match(SafePoint); 12882 effect(KILL cr); 12883 12884 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. 12885 // On SPARC that might be acceptable as we can generate the address with 12886 // just a sethi, saving an or. By polling at offset 0 we can end up 12887 // putting additional pressure on the index-0 in the D$. Because of 12888 // alignment (just like the situation at hand) the lower indices tend 12889 // to see more traffic. It'd be better to change the polling address 12890 // to offset 0 of the last $line in the polling page. 12891 12892 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} 12893 ins_cost(125); 12894 size(6) ; 12895 ins_encode( Safepoint_Poll() ); 12896 ins_pipe( ialu_reg_mem ); 12897 %} 12898 12899 12900 // ============================================================================ 12901 // This name is KNOWN by the ADLC and cannot be changed. 12902 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 12903 // for this guy. 12904 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 12905 match(Set dst (ThreadLocal)); 12906 effect(DEF dst, KILL cr); 12907 12908 format %{ "MOV $dst, Thread::current()" %} 12909 ins_encode %{ 12910 Register dstReg = as_Register($dst$$reg); 12911 __ get_thread(dstReg); 12912 %} 12913 ins_pipe( ialu_reg_fat ); 12914 %} 12915 12916 12917 12918 //----------PEEPHOLE RULES----------------------------------------------------- 12919 // These must follow all instruction definitions as they use the names 12920 // defined in the instructions definitions. 12921 // 12922 // peepmatch ( root_instr_name [preceding_instruction]* ); 12923 // 12924 // peepconstraint %{ 12925 // (instruction_number.operand_name relational_op instruction_number.operand_name 12926 // [, ...] ); 12927 // // instruction numbers are zero-based using left to right order in peepmatch 12928 // 12929 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 12930 // // provide an instruction_number.operand_name for each operand that appears 12931 // // in the replacement instruction's match rule 12932 // 12933 // ---------VM FLAGS--------------------------------------------------------- 12934 // 12935 // All peephole optimizations can be turned off using -XX:-OptoPeephole 12936 // 12937 // Each peephole rule is given an identifying number starting with zero and 12938 // increasing by one in the order seen by the parser. An individual peephole 12939 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 12940 // on the command-line. 12941 // 12942 // ---------CURRENT LIMITATIONS---------------------------------------------- 12943 // 12944 // Only match adjacent instructions in same basic block 12945 // Only equality constraints 12946 // Only constraints between operands, not (0.dest_reg == EAX_enc) 12947 // Only one replacement instruction 12948 // 12949 // ---------EXAMPLE---------------------------------------------------------- 12950 // 12951 // // pertinent parts of existing instructions in architecture description 12952 // instruct movI(rRegI dst, rRegI src) %{ 12953 // match(Set dst (CopyI src)); 12954 // %} 12955 // 12956 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 12957 // match(Set dst (AddI dst src)); 12958 // effect(KILL cr); 12959 // %} 12960 // 12961 // // Change (inc mov) to lea 12962 // peephole %{ 12963 // // increment preceeded by register-register move 12964 // peepmatch ( incI_eReg movI ); 12965 // // require that the destination register of the increment 12966 // // match the destination register of the move 12967 // peepconstraint ( 0.dst == 1.dst ); 12968 // // construct a replacement instruction that sets 12969 // // the destination to ( move's source register + one ) 12970 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 12971 // %} 12972 // 12973 // Implementation no longer uses movX instructions since 12974 // machine-independent system no longer uses CopyX nodes. 12975 // 12976 // peephole %{ 12977 // peepmatch ( incI_eReg movI ); 12978 // peepconstraint ( 0.dst == 1.dst ); 12979 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 12980 // %} 12981 // 12982 // peephole %{ 12983 // peepmatch ( decI_eReg movI ); 12984 // peepconstraint ( 0.dst == 1.dst ); 12985 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 12986 // %} 12987 // 12988 // peephole %{ 12989 // peepmatch ( addI_eReg_imm movI ); 12990 // peepconstraint ( 0.dst == 1.dst ); 12991 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 12992 // %} 12993 // 12994 // peephole %{ 12995 // peepmatch ( addP_eReg_imm movP ); 12996 // peepconstraint ( 0.dst == 1.dst ); 12997 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 12998 // %} 12999 13000 // // Change load of spilled value to only a spill 13001 // instruct storeI(memory mem, rRegI src) %{ 13002 // match(Set mem (StoreI mem src)); 13003 // %} 13004 // 13005 // instruct loadI(rRegI dst, memory mem) %{ 13006 // match(Set dst (LoadI mem)); 13007 // %} 13008 // 13009 peephole %{ 13010 peepmatch ( loadI storeI ); 13011 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13012 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13013 %} 13014 13015 //----------SMARTSPILL RULES--------------------------------------------------- 13016 // These must follow all instruction definitions as they use the names 13017 // defined in the instructions definitions.