1 // 2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2)); 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3)); 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4)); 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5)); 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6)); 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7)); 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8)); 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9)); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 137 // 138 // Class for no registers (empty set). 139 reg_class no_reg(); 140 141 // Class for all registers 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 143 // Class for all registers (excluding EBP) 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 145 // Dynamic register class that selects at runtime between register classes 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 149 150 // Class for general registers 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 152 // Class for general registers (excluding EBP). 153 // This register class can be used for implicit null checks on win95. 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 155 // Used also if the PreserveFramePointer flag is true. 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 159 160 // Class of "X" registers 161 reg_class int_x_reg(EBX, ECX, EDX, EAX); 162 163 // Class of registers that can appear in an address with no offset. 164 // EBP and ESP require an extra instruction byte for zero offset. 165 // Used in fast-unlock 166 reg_class p_reg(EDX, EDI, ESI, EBX); 167 168 // Class for general registers excluding ECX 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 170 // Class for general registers excluding ECX (and EBP) 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 174 175 // Class for general registers excluding EAX 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 177 178 // Class for general registers excluding EAX and EBX. 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 180 // Class for general registers excluding EAX and EBX (and EBP) 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 184 185 // Class of EAX (for multiply and divide operations) 186 reg_class eax_reg(EAX); 187 188 // Class of EBX (for atomic add) 189 reg_class ebx_reg(EBX); 190 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 192 reg_class ecx_reg(ECX); 193 194 // Class of EDX (for multiply and divide operations) 195 reg_class edx_reg(EDX); 196 197 // Class of EDI (for synchronization) 198 reg_class edi_reg(EDI); 199 200 // Class of ESI (for synchronization) 201 reg_class esi_reg(ESI); 202 203 // Singleton class for stack pointer 204 reg_class sp_reg(ESP); 205 206 // Singleton class for instruction pointer 207 // reg_class ip_reg(EIP); 208 209 // Class of integer register pairs 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 211 // Class of integer register pairs (excluding EBP and EDI); 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 215 216 // Class of integer register pairs that aligns with calling convention 217 reg_class eadx_reg( EAX,EDX ); 218 reg_class ebcx_reg( ECX,EBX ); 219 220 // Not AX or DX, used in divides 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 222 // Not AX or DX (and neither EBP), used in divides 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 226 227 // Floating point registers. Notice FPR0 is not a choice. 228 // FPR0 is not ever allocated; we use clever encodings to fake 229 // a 2-address instructions out of Intels FP stack. 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 231 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 233 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 234 FPR7L,FPR7H ); 235 236 reg_class fp_flt_reg0( FPR1L ); 237 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 238 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 240 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 241 242 %} 243 244 245 //----------SOURCE BLOCK------------------------------------------------------- 246 // This is a block of C++ code which provides values, functions, and 247 // definitions necessary in the rest of the architecture description 248 source_hpp %{ 249 // Must be visible to the DFA in dfa_x86_32.cpp 250 extern bool is_operand_hi32_zero(Node* n); 251 %} 252 253 source %{ 254 #define RELOC_IMM32 Assembler::imm_operand 255 #define RELOC_DISP32 Assembler::disp32_operand 256 257 #define __ _masm. 258 259 // How to find the high register of a Long pair, given the low register 260 #define HIGH_FROM_LOW(x) ((x)+2) 261 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM 263 // instructions, to allow sign-masking or sign-bit flipping. They allow 264 // fast versions of NegF/NegD and AbsF/AbsD. 265 266 // Note: 'double' and 'long long' have 32-bits alignment on x86. 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 268 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 269 // of 128-bits operands for SSE instructions. 270 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 271 // Store the value to a 128-bits operand. 272 operand[0] = lo; 273 operand[1] = hi; 274 return operand; 275 } 276 277 // Buffer for 128-bits masks used by SSE instructions. 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 279 280 // Static initialization during VM startup. 281 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 283 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 285 286 // Offset hacking within calls. 287 static int pre_call_resets_size() { 288 int size = 0; 289 Compile* C = Compile::current(); 290 if (C->in_24_bit_fp_mode()) { 291 size += 6; // fldcw 292 } 293 if (C->max_vector_size() > 16) { 294 size += 3; // vzeroupper 295 } 296 return size; 297 } 298 299 // !!!!! Special hack to get all type of calls to specify the byte offset 300 // from the start of the call to the point where the return address 301 // will point. 302 int MachCallStaticJavaNode::ret_addr_offset() { 303 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 304 } 305 306 int MachCallDynamicJavaNode::ret_addr_offset() { 307 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 308 } 309 310 static int sizeof_FFree_Float_Stack_All = -1; 311 312 int MachCallRuntimeNode::ret_addr_offset() { 313 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 314 return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size(); 315 } 316 317 // Indicate if the safepoint node needs the polling page as an input. 318 // Since x86 does have absolute addressing, it doesn't. 319 bool SafePointNode::needs_polling_address_input() { 320 return false; 321 } 322 323 // 324 // Compute padding required for nodes which need alignment 325 // 326 327 // The address of the call instruction needs to be 4-byte aligned to 328 // ensure that it does not span a cache line so that it can be patched. 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 330 current_offset += pre_call_resets_size(); // skip fldcw, if any 331 current_offset += 1; // skip call opcode byte 332 return round_to(current_offset, alignment_required()) - current_offset; 333 } 334 335 // The address of the call instruction needs to be 4-byte aligned to 336 // ensure that it does not span a cache line so that it can be patched. 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 338 current_offset += pre_call_resets_size(); // skip fldcw, if any 339 current_offset += 5; // skip MOV instruction 340 current_offset += 1; // skip call opcode byte 341 return round_to(current_offset, alignment_required()) - current_offset; 342 } 343 344 // EMIT_RM() 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 346 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 347 cbuf.insts()->emit_int8(c); 348 } 349 350 // EMIT_CC() 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 352 unsigned char c = (unsigned char)( f1 | f2 ); 353 cbuf.insts()->emit_int8(c); 354 } 355 356 // EMIT_OPCODE() 357 void emit_opcode(CodeBuffer &cbuf, int code) { 358 cbuf.insts()->emit_int8((unsigned char) code); 359 } 360 361 // EMIT_OPCODE() w/ relocation information 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 363 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 364 emit_opcode(cbuf, code); 365 } 366 367 // EMIT_D8() 368 void emit_d8(CodeBuffer &cbuf, int d8) { 369 cbuf.insts()->emit_int8((unsigned char) d8); 370 } 371 372 // EMIT_D16() 373 void emit_d16(CodeBuffer &cbuf, int d16) { 374 cbuf.insts()->emit_int16(d16); 375 } 376 377 // EMIT_D32() 378 void emit_d32(CodeBuffer &cbuf, int d32) { 379 cbuf.insts()->emit_int32(d32); 380 } 381 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 384 int format) { 385 cbuf.relocate(cbuf.insts_mark(), reloc, format); 386 cbuf.insts()->emit_int32(d32); 387 } 388 389 // emit 32 bit value and construct relocation entry from RelocationHolder 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 391 int format) { 392 #ifdef ASSERT 393 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 394 assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code"); 395 } 396 #endif 397 cbuf.relocate(cbuf.insts_mark(), rspec, format); 398 cbuf.insts()->emit_int32(d32); 399 } 400 401 // Access stack slot for load or store 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 403 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 404 if( -128 <= disp && disp <= 127 ) { 405 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 406 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 407 emit_d8 (cbuf, disp); // Displacement // R/M byte 408 } else { 409 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 410 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 411 emit_d32(cbuf, disp); // Displacement // R/M byte 412 } 413 } 414 415 // rRegI ereg, memory mem) %{ // emit_reg_mem 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 417 // There is no index & no scale, use form without SIB byte 418 if ((index == 0x4) && 419 (scale == 0) && (base != ESP_enc)) { 420 // If no displacement, mode is 0x0; unless base is [EBP] 421 if ( (displace == 0) && (base != EBP_enc) ) { 422 emit_rm(cbuf, 0x0, reg_encoding, base); 423 } 424 else { // If 8-bit displacement, mode 0x1 425 if ((displace >= -128) && (displace <= 127) 426 && (disp_reloc == relocInfo::none) ) { 427 emit_rm(cbuf, 0x1, reg_encoding, base); 428 emit_d8(cbuf, displace); 429 } 430 else { // If 32-bit displacement 431 if (base == -1) { // Special flag for absolute address 432 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 433 // (manual lies; no SIB needed here) 434 if ( disp_reloc != relocInfo::none ) { 435 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 436 } else { 437 emit_d32 (cbuf, displace); 438 } 439 } 440 else { // Normal base + offset 441 emit_rm(cbuf, 0x2, reg_encoding, base); 442 if ( disp_reloc != relocInfo::none ) { 443 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 444 } else { 445 emit_d32 (cbuf, displace); 446 } 447 } 448 } 449 } 450 } 451 else { // Else, encode with the SIB byte 452 // If no displacement, mode is 0x0; unless base is [EBP] 453 if (displace == 0 && (base != EBP_enc)) { // If no displacement 454 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 455 emit_rm(cbuf, scale, index, base); 456 } 457 else { // If 8-bit displacement, mode 0x1 458 if ((displace >= -128) && (displace <= 127) 459 && (disp_reloc == relocInfo::none) ) { 460 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 461 emit_rm(cbuf, scale, index, base); 462 emit_d8(cbuf, displace); 463 } 464 else { // If 32-bit displacement 465 if (base == 0x04 ) { 466 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 467 emit_rm(cbuf, scale, index, 0x04); 468 } else { 469 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 470 emit_rm(cbuf, scale, index, base); 471 } 472 if ( disp_reloc != relocInfo::none ) { 473 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 474 } else { 475 emit_d32 (cbuf, displace); 476 } 477 } 478 } 479 } 480 } 481 482 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 484 if( dst_encoding == src_encoding ) { 485 // reg-reg copy, use an empty encoding 486 } else { 487 emit_opcode( cbuf, 0x8B ); 488 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 489 } 490 } 491 492 void emit_cmpfp_fixup(MacroAssembler& _masm) { 493 Label exit; 494 __ jccb(Assembler::noParity, exit); 495 __ pushf(); 496 // 497 // comiss/ucomiss instructions set ZF,PF,CF flags and 498 // zero OF,AF,SF for NaN values. 499 // Fixup flags by zeroing ZF,PF so that compare of NaN 500 // values returns 'less than' result (CF is set). 501 // Leave the rest of flags unchanged. 502 // 503 // 7 6 5 4 3 2 1 0 504 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 505 // 0 0 1 0 1 0 1 1 (0x2B) 506 // 507 __ andl(Address(rsp, 0), 0xffffff2b); 508 __ popf(); 509 __ bind(exit); 510 } 511 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 513 Label done; 514 __ movl(dst, -1); 515 __ jcc(Assembler::parity, done); 516 __ jcc(Assembler::below, done); 517 __ setb(Assembler::notEqual, dst); 518 __ movzbl(dst, dst); 519 __ bind(done); 520 } 521 522 523 //============================================================================= 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 525 526 int Compile::ConstantTable::calculate_table_base_offset() const { 527 return 0; // absolute addressing, no offset 528 } 529 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 532 ShouldNotReachHere(); 533 } 534 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 536 // Empty encoding 537 } 538 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 540 return 0; 541 } 542 543 #ifndef PRODUCT 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 545 st->print("# MachConstantBaseNode (empty encoding)"); 546 } 547 #endif 548 549 550 //============================================================================= 551 #ifndef PRODUCT 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 553 Compile* C = ra_->C; 554 555 int framesize = C->frame_size_in_bytes(); 556 int bangsize = C->bang_size_in_bytes(); 557 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 558 // Remove wordSize for return addr which is already pushed. 559 framesize -= wordSize; 560 561 if (C->need_stack_bang(bangsize)) { 562 framesize -= wordSize; 563 st->print("# stack bang (%d bytes)", bangsize); 564 st->print("\n\t"); 565 st->print("PUSH EBP\t# Save EBP"); 566 if (PreserveFramePointer) { 567 st->print("\n\t"); 568 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 569 } 570 if (framesize) { 571 st->print("\n\t"); 572 st->print("SUB ESP, #%d\t# Create frame",framesize); 573 } 574 } else { 575 st->print("SUB ESP, #%d\t# Create frame",framesize); 576 st->print("\n\t"); 577 framesize -= wordSize; 578 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 579 if (PreserveFramePointer) { 580 st->print("\n\t"); 581 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 582 if (framesize > 0) { 583 st->print("\n\t"); 584 st->print("ADD EBP, #%d", framesize); 585 } 586 } 587 } 588 589 if (VerifyStackAtCalls) { 590 st->print("\n\t"); 591 framesize -= wordSize; 592 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 593 } 594 595 if( C->in_24_bit_fp_mode() ) { 596 st->print("\n\t"); 597 st->print("FLDCW \t# load 24 bit fpu control word"); 598 } 599 if (UseSSE >= 2 && VerifyFPU) { 600 st->print("\n\t"); 601 st->print("# verify FPU stack (must be clean on entry)"); 602 } 603 604 #ifdef ASSERT 605 if (VerifyStackAtCalls) { 606 st->print("\n\t"); 607 st->print("# stack alignment check"); 608 } 609 #endif 610 st->cr(); 611 } 612 #endif 613 614 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 616 Compile* C = ra_->C; 617 MacroAssembler _masm(&cbuf); 618 619 int framesize = C->frame_size_in_bytes(); 620 int bangsize = C->bang_size_in_bytes(); 621 622 __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode()); 623 624 C->set_frame_complete(cbuf.insts_size()); 625 626 if (C->has_mach_constant_base_node()) { 627 // NOTE: We set the table base offset here because users might be 628 // emitted before MachConstantBaseNode. 629 Compile::ConstantTable& constant_table = C->constant_table(); 630 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 631 } 632 } 633 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 635 return MachNode::size(ra_); // too many variables; just compute it the hard way 636 } 637 638 int MachPrologNode::reloc() const { 639 return 0; // a large enough number 640 } 641 642 //============================================================================= 643 #ifndef PRODUCT 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 645 Compile *C = ra_->C; 646 int framesize = C->frame_size_in_bytes(); 647 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 648 // Remove two words for return addr and rbp, 649 framesize -= 2*wordSize; 650 651 if (C->max_vector_size() > 16) { 652 st->print("VZEROUPPER"); 653 st->cr(); st->print("\t"); 654 } 655 if (C->in_24_bit_fp_mode()) { 656 st->print("FLDCW standard control word"); 657 st->cr(); st->print("\t"); 658 } 659 if (framesize) { 660 st->print("ADD ESP,%d\t# Destroy frame",framesize); 661 st->cr(); st->print("\t"); 662 } 663 st->print_cr("POPL EBP"); st->print("\t"); 664 if (do_polling() && C->is_method_compilation()) { 665 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 666 st->cr(); st->print("\t"); 667 } 668 } 669 #endif 670 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 672 Compile *C = ra_->C; 673 MacroAssembler _masm(&cbuf); 674 675 if (C->max_vector_size() > 16) { 676 // Clear upper bits of YMM registers when current compiled code uses 677 // wide vectors to avoid AVX <-> SSE transition penalty during call. 678 _masm.vzeroupper(); 679 } 680 // If method set FPU control word, restore to standard control word 681 if (C->in_24_bit_fp_mode()) { 682 _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 683 } 684 685 int framesize = C->frame_size_in_bytes(); 686 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 687 // Remove two words for return addr and rbp, 688 framesize -= 2*wordSize; 689 690 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 691 692 if (framesize >= 128) { 693 emit_opcode(cbuf, 0x81); // add SP, #framesize 694 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 695 emit_d32(cbuf, framesize); 696 } else if (framesize) { 697 emit_opcode(cbuf, 0x83); // add SP, #framesize 698 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 699 emit_d8(cbuf, framesize); 700 } 701 702 emit_opcode(cbuf, 0x58 | EBP_enc); 703 704 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 705 __ reserved_stack_check(); 706 } 707 708 if (do_polling() && C->is_method_compilation()) { 709 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); 710 emit_opcode(cbuf,0x85); 711 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX 712 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 713 } 714 } 715 716 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 717 Compile *C = ra_->C; 718 // If method set FPU control word, restore to standard control word 719 int size = C->in_24_bit_fp_mode() ? 6 : 0; 720 if (C->max_vector_size() > 16) size += 3; // vzeroupper 721 if (do_polling() && C->is_method_compilation()) size += 6; 722 723 int framesize = C->frame_size_in_bytes(); 724 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 725 // Remove two words for return addr and rbp, 726 framesize -= 2*wordSize; 727 728 size++; // popl rbp, 729 730 if (framesize >= 128) { 731 size += 6; 732 } else { 733 size += framesize ? 3 : 0; 734 } 735 size += 64; // added to support ReservedStackAccess 736 return size; 737 } 738 739 int MachEpilogNode::reloc() const { 740 return 0; // a large enough number 741 } 742 743 const Pipeline * MachEpilogNode::pipeline() const { 744 return MachNode::pipeline_class(); 745 } 746 747 int MachEpilogNode::safepoint_offset() const { return 0; } 748 749 //============================================================================= 750 751 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 752 static enum RC rc_class( OptoReg::Name reg ) { 753 754 if( !OptoReg::is_valid(reg) ) return rc_bad; 755 if (OptoReg::is_stack(reg)) return rc_stack; 756 757 VMReg r = OptoReg::as_VMReg(reg); 758 if (r->is_Register()) return rc_int; 759 if (r->is_FloatRegister()) { 760 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 761 return rc_float; 762 } 763 assert(r->is_XMMRegister(), "must be"); 764 return rc_xmm; 765 } 766 767 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 768 int opcode, const char *op_str, int size, outputStream* st ) { 769 if( cbuf ) { 770 emit_opcode (*cbuf, opcode ); 771 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 772 #ifndef PRODUCT 773 } else if( !do_size ) { 774 if( size != 0 ) st->print("\n\t"); 775 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 776 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 777 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 778 } else { // FLD, FST, PUSH, POP 779 st->print("%s [ESP + #%d]",op_str,offset); 780 } 781 #endif 782 } 783 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 784 return size+3+offset_size; 785 } 786 787 // Helper for XMM registers. Extra opcode bits, limited syntax. 788 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 789 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 790 int in_size_in_bits = Assembler::EVEX_32bit; 791 int evex_encoding = 0; 792 if (reg_lo+1 == reg_hi) { 793 in_size_in_bits = Assembler::EVEX_64bit; 794 evex_encoding = Assembler::VEX_W; 795 } 796 if (cbuf) { 797 MacroAssembler _masm(cbuf); 798 _masm.set_managed(); 799 if (reg_lo+1 == reg_hi) { // double move? 800 if (is_load) { 801 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 802 } else { 803 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 804 } 805 } else { 806 if (is_load) { 807 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 808 } else { 809 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 810 } 811 } 812 #ifndef PRODUCT 813 } else if (!do_size) { 814 if (size != 0) st->print("\n\t"); 815 if (reg_lo+1 == reg_hi) { // double move? 816 if (is_load) st->print("%s %s,[ESP + #%d]", 817 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 818 Matcher::regName[reg_lo], offset); 819 else st->print("MOVSD [ESP + #%d],%s", 820 offset, Matcher::regName[reg_lo]); 821 } else { 822 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 823 Matcher::regName[reg_lo], offset); 824 else st->print("MOVSS [ESP + #%d],%s", 825 offset, Matcher::regName[reg_lo]); 826 } 827 #endif 828 } 829 bool is_single_byte = false; 830 if ((UseAVX > 2) && (offset != 0)) { 831 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 832 } 833 int offset_size = 0; 834 if (UseAVX > 2 ) { 835 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 836 } else { 837 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 838 } 839 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 840 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 841 return size+5+offset_size; 842 } 843 844 845 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 846 int src_hi, int dst_hi, int size, outputStream* st ) { 847 if (cbuf) { 848 MacroAssembler _masm(cbuf); 849 _masm.set_managed(); 850 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 851 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 852 as_XMMRegister(Matcher::_regEncode[src_lo])); 853 } else { 854 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 855 as_XMMRegister(Matcher::_regEncode[src_lo])); 856 } 857 #ifndef PRODUCT 858 } else if (!do_size) { 859 if (size != 0) st->print("\n\t"); 860 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 861 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 862 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 863 } else { 864 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 865 } 866 } else { 867 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 868 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 869 } else { 870 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 871 } 872 } 873 #endif 874 } 875 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 876 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 877 int sz = (UseAVX > 2) ? 6 : 4; 878 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 879 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 880 return size + sz; 881 } 882 883 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 884 int src_hi, int dst_hi, int size, outputStream* st ) { 885 // 32-bit 886 if (cbuf) { 887 MacroAssembler _masm(cbuf); 888 _masm.set_managed(); 889 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 890 as_Register(Matcher::_regEncode[src_lo])); 891 #ifndef PRODUCT 892 } else if (!do_size) { 893 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 894 #endif 895 } 896 return (UseAVX> 2) ? 6 : 4; 897 } 898 899 900 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 901 int src_hi, int dst_hi, int size, outputStream* st ) { 902 // 32-bit 903 if (cbuf) { 904 MacroAssembler _masm(cbuf); 905 _masm.set_managed(); 906 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 907 as_XMMRegister(Matcher::_regEncode[src_lo])); 908 #ifndef PRODUCT 909 } else if (!do_size) { 910 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 911 #endif 912 } 913 return (UseAVX> 2) ? 6 : 4; 914 } 915 916 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 917 if( cbuf ) { 918 emit_opcode(*cbuf, 0x8B ); 919 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 920 #ifndef PRODUCT 921 } else if( !do_size ) { 922 if( size != 0 ) st->print("\n\t"); 923 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 924 #endif 925 } 926 return size+2; 927 } 928 929 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 930 int offset, int size, outputStream* st ) { 931 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 932 if( cbuf ) { 933 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 934 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 935 #ifndef PRODUCT 936 } else if( !do_size ) { 937 if( size != 0 ) st->print("\n\t"); 938 st->print("FLD %s",Matcher::regName[src_lo]); 939 #endif 940 } 941 size += 2; 942 } 943 944 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 945 const char *op_str; 946 int op; 947 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 948 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 949 op = 0xDD; 950 } else { // 32-bit store 951 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 952 op = 0xD9; 953 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 954 } 955 956 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 957 } 958 959 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 960 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 961 int src_hi, int dst_hi, uint ireg, outputStream* st); 962 963 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 964 int stack_offset, int reg, uint ireg, outputStream* st); 965 966 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, 967 int dst_offset, uint ireg, outputStream* st) { 968 int calc_size = 0; 969 int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 970 int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 971 switch (ireg) { 972 case Op_VecS: 973 calc_size = 3+src_offset_size + 3+dst_offset_size; 974 break; 975 case Op_VecD: { 976 calc_size = 3+src_offset_size + 3+dst_offset_size; 977 int tmp_src_offset = src_offset + 4; 978 int tmp_dst_offset = dst_offset + 4; 979 src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4); 980 dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4); 981 calc_size += 3+src_offset_size + 3+dst_offset_size; 982 break; 983 } 984 case Op_VecX: 985 case Op_VecY: 986 case Op_VecZ: 987 calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; 988 break; 989 default: 990 ShouldNotReachHere(); 991 } 992 if (cbuf) { 993 MacroAssembler _masm(cbuf); 994 int offset = __ offset(); 995 switch (ireg) { 996 case Op_VecS: 997 __ pushl(Address(rsp, src_offset)); 998 __ popl (Address(rsp, dst_offset)); 999 break; 1000 case Op_VecD: 1001 __ pushl(Address(rsp, src_offset)); 1002 __ popl (Address(rsp, dst_offset)); 1003 __ pushl(Address(rsp, src_offset+4)); 1004 __ popl (Address(rsp, dst_offset+4)); 1005 break; 1006 case Op_VecX: 1007 __ movdqu(Address(rsp, -16), xmm0); 1008 __ movdqu(xmm0, Address(rsp, src_offset)); 1009 __ movdqu(Address(rsp, dst_offset), xmm0); 1010 __ movdqu(xmm0, Address(rsp, -16)); 1011 break; 1012 case Op_VecY: 1013 __ vmovdqu(Address(rsp, -32), xmm0); 1014 __ vmovdqu(xmm0, Address(rsp, src_offset)); 1015 __ vmovdqu(Address(rsp, dst_offset), xmm0); 1016 __ vmovdqu(xmm0, Address(rsp, -32)); 1017 break; 1018 case Op_VecZ: 1019 __ evmovdqul(Address(rsp, -64), xmm0, 2); 1020 __ evmovdqul(xmm0, Address(rsp, src_offset), 2); 1021 __ evmovdqul(Address(rsp, dst_offset), xmm0, 2); 1022 __ evmovdqul(xmm0, Address(rsp, -64), 2); 1023 break; 1024 default: 1025 ShouldNotReachHere(); 1026 } 1027 int size = __ offset() - offset; 1028 assert(size == calc_size, "incorrect size calculation"); 1029 return size; 1030 #ifndef PRODUCT 1031 } else if (!do_size) { 1032 switch (ireg) { 1033 case Op_VecS: 1034 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1035 "popl [rsp + #%d]", 1036 src_offset, dst_offset); 1037 break; 1038 case Op_VecD: 1039 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1040 "popq [rsp + #%d]\n\t" 1041 "pushl [rsp + #%d]\n\t" 1042 "popq [rsp + #%d]", 1043 src_offset, dst_offset, src_offset+4, dst_offset+4); 1044 break; 1045 case Op_VecX: 1046 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1047 "movdqu xmm0, [rsp + #%d]\n\t" 1048 "movdqu [rsp + #%d], xmm0\n\t" 1049 "movdqu xmm0, [rsp - #16]", 1050 src_offset, dst_offset); 1051 break; 1052 case Op_VecY: 1053 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1054 "vmovdqu xmm0, [rsp + #%d]\n\t" 1055 "vmovdqu [rsp + #%d], xmm0\n\t" 1056 "vmovdqu xmm0, [rsp - #32]", 1057 src_offset, dst_offset); 1058 break; 1059 case Op_VecZ: 1060 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1061 "vmovdqu xmm0, [rsp + #%d]\n\t" 1062 "vmovdqu [rsp + #%d], xmm0\n\t" 1063 "vmovdqu xmm0, [rsp - #64]", 1064 src_offset, dst_offset); 1065 break; 1066 default: 1067 ShouldNotReachHere(); 1068 } 1069 #endif 1070 } 1071 return calc_size; 1072 } 1073 1074 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1075 // Get registers to move 1076 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1077 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1078 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1079 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1080 1081 enum RC src_second_rc = rc_class(src_second); 1082 enum RC src_first_rc = rc_class(src_first); 1083 enum RC dst_second_rc = rc_class(dst_second); 1084 enum RC dst_first_rc = rc_class(dst_first); 1085 1086 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1087 1088 // Generate spill code! 1089 int size = 0; 1090 1091 if( src_first == dst_first && src_second == dst_second ) 1092 return size; // Self copy, no move 1093 1094 if (bottom_type()->isa_vect() != NULL) { 1095 uint ireg = ideal_reg(); 1096 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1097 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1098 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1099 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1100 // mem -> mem 1101 int src_offset = ra_->reg2offset(src_first); 1102 int dst_offset = ra_->reg2offset(dst_first); 1103 return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); 1104 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1105 return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); 1106 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1107 int stack_offset = ra_->reg2offset(dst_first); 1108 return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); 1109 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1110 int stack_offset = ra_->reg2offset(src_first); 1111 return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); 1112 } else { 1113 ShouldNotReachHere(); 1114 } 1115 } 1116 1117 // -------------------------------------- 1118 // Check for mem-mem move. push/pop to move. 1119 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1120 if( src_second == dst_first ) { // overlapping stack copy ranges 1121 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1122 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1123 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1124 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1125 } 1126 // move low bits 1127 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1128 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1129 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1130 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1131 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1132 } 1133 return size; 1134 } 1135 1136 // -------------------------------------- 1137 // Check for integer reg-reg copy 1138 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1139 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1140 1141 // Check for integer store 1142 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1143 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1144 1145 // Check for integer load 1146 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 1147 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1148 1149 // Check for integer reg-xmm reg copy 1150 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1151 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1152 "no 64 bit integer-float reg moves" ); 1153 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1154 } 1155 // -------------------------------------- 1156 // Check for float reg-reg copy 1157 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1158 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1159 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1160 if( cbuf ) { 1161 1162 // Note the mucking with the register encode to compensate for the 0/1 1163 // indexing issue mentioned in a comment in the reg_def sections 1164 // for FPR registers many lines above here. 1165 1166 if( src_first != FPR1L_num ) { 1167 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1168 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1169 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1170 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1171 } else { 1172 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1173 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1174 } 1175 #ifndef PRODUCT 1176 } else if( !do_size ) { 1177 if( size != 0 ) st->print("\n\t"); 1178 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1179 else st->print( "FST %s", Matcher::regName[dst_first]); 1180 #endif 1181 } 1182 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1183 } 1184 1185 // Check for float store 1186 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1187 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1188 } 1189 1190 // Check for float load 1191 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1192 int offset = ra_->reg2offset(src_first); 1193 const char *op_str; 1194 int op; 1195 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1196 op_str = "FLD_D"; 1197 op = 0xDD; 1198 } else { // 32-bit load 1199 op_str = "FLD_S"; 1200 op = 0xD9; 1201 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1202 } 1203 if( cbuf ) { 1204 emit_opcode (*cbuf, op ); 1205 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1206 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1207 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1208 #ifndef PRODUCT 1209 } else if( !do_size ) { 1210 if( size != 0 ) st->print("\n\t"); 1211 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1212 #endif 1213 } 1214 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1215 return size + 3+offset_size+2; 1216 } 1217 1218 // Check for xmm reg-reg copy 1219 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1220 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1221 (src_first+1 == src_second && dst_first+1 == dst_second), 1222 "no non-adjacent float-moves" ); 1223 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1224 } 1225 1226 // Check for xmm reg-integer reg copy 1227 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1228 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1229 "no 64 bit float-integer reg moves" ); 1230 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1231 } 1232 1233 // Check for xmm store 1234 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1235 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1236 } 1237 1238 // Check for float xmm load 1239 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1240 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1241 } 1242 1243 // Copy from float reg to xmm reg 1244 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1245 // copy to the top of stack from floating point reg 1246 // and use LEA to preserve flags 1247 if( cbuf ) { 1248 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1249 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1250 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1251 emit_d8(*cbuf,0xF8); 1252 #ifndef PRODUCT 1253 } else if( !do_size ) { 1254 if( size != 0 ) st->print("\n\t"); 1255 st->print("LEA ESP,[ESP-8]"); 1256 #endif 1257 } 1258 size += 4; 1259 1260 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1261 1262 // Copy from the temp memory to the xmm reg. 1263 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1264 1265 if( cbuf ) { 1266 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1267 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1268 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1269 emit_d8(*cbuf,0x08); 1270 #ifndef PRODUCT 1271 } else if( !do_size ) { 1272 if( size != 0 ) st->print("\n\t"); 1273 st->print("LEA ESP,[ESP+8]"); 1274 #endif 1275 } 1276 size += 4; 1277 return size; 1278 } 1279 1280 assert( size > 0, "missed a case" ); 1281 1282 // -------------------------------------------------------------------- 1283 // Check for second bits still needing moving. 1284 if( src_second == dst_second ) 1285 return size; // Self copy; no move 1286 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1287 1288 // Check for second word int-int move 1289 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1290 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1291 1292 // Check for second word integer store 1293 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1294 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1295 1296 // Check for second word integer load 1297 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1298 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1299 1300 1301 Unimplemented(); 1302 return 0; // Mute compiler 1303 } 1304 1305 #ifndef PRODUCT 1306 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1307 implementation( NULL, ra_, false, st ); 1308 } 1309 #endif 1310 1311 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1312 implementation( &cbuf, ra_, false, NULL ); 1313 } 1314 1315 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1316 return implementation( NULL, ra_, true, NULL ); 1317 } 1318 1319 1320 //============================================================================= 1321 #ifndef PRODUCT 1322 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1323 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1324 int reg = ra_->get_reg_first(this); 1325 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1326 } 1327 #endif 1328 1329 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1330 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1331 int reg = ra_->get_encode(this); 1332 if( offset >= 128 ) { 1333 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1334 emit_rm(cbuf, 0x2, reg, 0x04); 1335 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1336 emit_d32(cbuf, offset); 1337 } 1338 else { 1339 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1340 emit_rm(cbuf, 0x1, reg, 0x04); 1341 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1342 emit_d8(cbuf, offset); 1343 } 1344 } 1345 1346 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1347 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1348 if( offset >= 128 ) { 1349 return 7; 1350 } 1351 else { 1352 return 4; 1353 } 1354 } 1355 1356 //============================================================================= 1357 #ifndef PRODUCT 1358 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1359 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1360 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1361 st->print_cr("\tNOP"); 1362 st->print_cr("\tNOP"); 1363 if( !OptoBreakpoint ) 1364 st->print_cr("\tNOP"); 1365 } 1366 #endif 1367 1368 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1369 MacroAssembler masm(&cbuf); 1370 #ifdef ASSERT 1371 uint insts_size = cbuf.insts_size(); 1372 #endif 1373 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1374 masm.jump_cc(Assembler::notEqual, 1375 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1376 /* WARNING these NOPs are critical so that verified entry point is properly 1377 aligned for patching by NativeJump::patch_verified_entry() */ 1378 int nops_cnt = 2; 1379 if( !OptoBreakpoint ) // Leave space for int3 1380 nops_cnt += 1; 1381 masm.nop(nops_cnt); 1382 1383 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1384 } 1385 1386 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1387 return OptoBreakpoint ? 11 : 12; 1388 } 1389 1390 1391 //============================================================================= 1392 1393 int Matcher::regnum_to_fpu_offset(int regnum) { 1394 return regnum - 32; // The FP registers are in the second chunk 1395 } 1396 1397 // This is UltraSparc specific, true just means we have fast l2f conversion 1398 const bool Matcher::convL2FSupported(void) { 1399 return true; 1400 } 1401 1402 // Is this branch offset short enough that a short branch can be used? 1403 // 1404 // NOTE: If the platform does not provide any short branch variants, then 1405 // this method should return false for offset 0. 1406 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1407 // The passed offset is relative to address of the branch. 1408 // On 86 a branch displacement is calculated relative to address 1409 // of a next instruction. 1410 offset -= br_size; 1411 1412 // the short version of jmpConUCF2 contains multiple branches, 1413 // making the reach slightly less 1414 if (rule == jmpConUCF2_rule) 1415 return (-126 <= offset && offset <= 125); 1416 return (-128 <= offset && offset <= 127); 1417 } 1418 1419 const bool Matcher::isSimpleConstant64(jlong value) { 1420 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1421 return false; 1422 } 1423 1424 // The ecx parameter to rep stos for the ClearArray node is in dwords. 1425 const bool Matcher::init_array_count_is_in_bytes = false; 1426 1427 // Needs 2 CMOV's for longs. 1428 const int Matcher::long_cmove_cost() { return 1; } 1429 1430 // No CMOVF/CMOVD with SSE/SSE2 1431 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 1432 1433 // Does the CPU require late expand (see block.cpp for description of late expand)? 1434 const bool Matcher::require_postalloc_expand = false; 1435 1436 // Should the Matcher clone shifts on addressing modes, expecting them to 1437 // be subsumed into complex addressing expressions or compute them into 1438 // registers? True for Intel but false for most RISCs 1439 const bool Matcher::clone_shift_expressions = true; 1440 1441 // Do we need to mask the count passed to shift instructions or does 1442 // the cpu only look at the lower 5/6 bits anyway? 1443 const bool Matcher::need_masked_shift_count = false; 1444 1445 bool Matcher::narrow_oop_use_complex_address() { 1446 ShouldNotCallThis(); 1447 return true; 1448 } 1449 1450 bool Matcher::narrow_klass_use_complex_address() { 1451 ShouldNotCallThis(); 1452 return true; 1453 } 1454 1455 1456 // Is it better to copy float constants, or load them directly from memory? 1457 // Intel can load a float constant from a direct address, requiring no 1458 // extra registers. Most RISCs will have to materialize an address into a 1459 // register first, so they would do better to copy the constant from stack. 1460 const bool Matcher::rematerialize_float_constants = true; 1461 1462 // If CPU can load and store mis-aligned doubles directly then no fixup is 1463 // needed. Else we split the double into 2 integer pieces and move it 1464 // piece-by-piece. Only happens when passing doubles into C code as the 1465 // Java calling convention forces doubles to be aligned. 1466 const bool Matcher::misaligned_doubles_ok = true; 1467 1468 1469 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1470 // Get the memory operand from the node 1471 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1472 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1473 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1474 uint opcnt = 1; // First operand 1475 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1476 while( idx >= skipped+num_edges ) { 1477 skipped += num_edges; 1478 opcnt++; // Bump operand count 1479 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1480 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1481 } 1482 1483 MachOper *memory = node->_opnds[opcnt]; 1484 MachOper *new_memory = NULL; 1485 switch (memory->opcode()) { 1486 case DIRECT: 1487 case INDOFFSET32X: 1488 // No transformation necessary. 1489 return; 1490 case INDIRECT: 1491 new_memory = new indirect_win95_safeOper( ); 1492 break; 1493 case INDOFFSET8: 1494 new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1495 break; 1496 case INDOFFSET32: 1497 new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1498 break; 1499 case INDINDEXOFFSET: 1500 new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1501 break; 1502 case INDINDEXSCALE: 1503 new_memory = new indIndexScale_win95_safeOper(memory->scale()); 1504 break; 1505 case INDINDEXSCALEOFFSET: 1506 new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1507 break; 1508 case LOAD_LONG_INDIRECT: 1509 case LOAD_LONG_INDOFFSET32: 1510 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1511 return; 1512 default: 1513 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1514 return; 1515 } 1516 node->_opnds[opcnt] = new_memory; 1517 } 1518 1519 // Advertise here if the CPU requires explicit rounding operations 1520 // to implement the UseStrictFP mode. 1521 const bool Matcher::strict_fp_requires_explicit_rounding = true; 1522 1523 // Are floats conerted to double when stored to stack during deoptimization? 1524 // On x32 it is stored with convertion only when FPU is used for floats. 1525 bool Matcher::float_in_double() { return (UseSSE == 0); } 1526 1527 // Do ints take an entire long register or just half? 1528 const bool Matcher::int_in_long = false; 1529 1530 // Return whether or not this register is ever used as an argument. This 1531 // function is used on startup to build the trampoline stubs in generateOptoStub. 1532 // Registers not mentioned will be killed by the VM call in the trampoline, and 1533 // arguments in those registers not be available to the callee. 1534 bool Matcher::can_be_java_arg( int reg ) { 1535 if( reg == ECX_num || reg == EDX_num ) return true; 1536 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1537 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1538 return false; 1539 } 1540 1541 bool Matcher::is_spillable_arg( int reg ) { 1542 return can_be_java_arg(reg); 1543 } 1544 1545 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1546 // Use hardware integer DIV instruction when 1547 // it is faster than a code which use multiply. 1548 // Only when constant divisor fits into 32 bit 1549 // (min_jint is excluded to get only correct 1550 // positive 32 bit values from negative). 1551 return VM_Version::has_fast_idiv() && 1552 (divisor == (int)divisor && divisor != min_jint); 1553 } 1554 1555 // Register for DIVI projection of divmodI 1556 RegMask Matcher::divI_proj_mask() { 1557 return EAX_REG_mask(); 1558 } 1559 1560 // Register for MODI projection of divmodI 1561 RegMask Matcher::modI_proj_mask() { 1562 return EDX_REG_mask(); 1563 } 1564 1565 // Register for DIVL projection of divmodL 1566 RegMask Matcher::divL_proj_mask() { 1567 ShouldNotReachHere(); 1568 return RegMask(); 1569 } 1570 1571 // Register for MODL projection of divmodL 1572 RegMask Matcher::modL_proj_mask() { 1573 ShouldNotReachHere(); 1574 return RegMask(); 1575 } 1576 1577 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1578 return NO_REG_mask(); 1579 } 1580 1581 // Returns true if the high 32 bits of the value is known to be zero. 1582 bool is_operand_hi32_zero(Node* n) { 1583 int opc = n->Opcode(); 1584 if (opc == Op_AndL) { 1585 Node* o2 = n->in(2); 1586 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1587 return true; 1588 } 1589 } 1590 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1591 return true; 1592 } 1593 return false; 1594 } 1595 1596 %} 1597 1598 //----------ENCODING BLOCK----------------------------------------------------- 1599 // This block specifies the encoding classes used by the compiler to output 1600 // byte streams. Encoding classes generate functions which are called by 1601 // Machine Instruction Nodes in order to generate the bit encoding of the 1602 // instruction. Operands specify their base encoding interface with the 1603 // interface keyword. There are currently supported four interfaces, 1604 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1605 // operand to generate a function which returns its register number when 1606 // queried. CONST_INTER causes an operand to generate a function which 1607 // returns the value of the constant when queried. MEMORY_INTER causes an 1608 // operand to generate four functions which return the Base Register, the 1609 // Index Register, the Scale Value, and the Offset Value of the operand when 1610 // queried. COND_INTER causes an operand to generate six functions which 1611 // return the encoding code (ie - encoding bits for the instruction) 1612 // associated with each basic boolean condition for a conditional instruction. 1613 // Instructions specify two basic values for encoding. They use the 1614 // ins_encode keyword to specify their encoding class (which must be one of 1615 // the class names specified in the encoding block), and they use the 1616 // opcode keyword to specify, in order, their primary, secondary, and 1617 // tertiary opcode. Only the opcode sections which a particular instruction 1618 // needs for encoding need to be specified. 1619 encode %{ 1620 // Build emit functions for each basic byte or larger field in the intel 1621 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1622 // code in the enc_class source block. Emit functions will live in the 1623 // main source block for now. In future, we can generalize this by 1624 // adding a syntax that specifies the sizes of fields in an order, 1625 // so that the adlc can build the emit functions automagically 1626 1627 // Emit primary opcode 1628 enc_class OpcP %{ 1629 emit_opcode(cbuf, $primary); 1630 %} 1631 1632 // Emit secondary opcode 1633 enc_class OpcS %{ 1634 emit_opcode(cbuf, $secondary); 1635 %} 1636 1637 // Emit opcode directly 1638 enc_class Opcode(immI d8) %{ 1639 emit_opcode(cbuf, $d8$$constant); 1640 %} 1641 1642 enc_class SizePrefix %{ 1643 emit_opcode(cbuf,0x66); 1644 %} 1645 1646 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1647 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1648 %} 1649 1650 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1651 emit_opcode(cbuf,$opcode$$constant); 1652 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1653 %} 1654 1655 enc_class mov_r32_imm0( rRegI dst ) %{ 1656 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1657 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1658 %} 1659 1660 enc_class cdq_enc %{ 1661 // Full implementation of Java idiv and irem; checks for 1662 // special case as described in JVM spec., p.243 & p.271. 1663 // 1664 // normal case special case 1665 // 1666 // input : rax,: dividend min_int 1667 // reg: divisor -1 1668 // 1669 // output: rax,: quotient (= rax, idiv reg) min_int 1670 // rdx: remainder (= rax, irem reg) 0 1671 // 1672 // Code sequnce: 1673 // 1674 // 81 F8 00 00 00 80 cmp rax,80000000h 1675 // 0F 85 0B 00 00 00 jne normal_case 1676 // 33 D2 xor rdx,edx 1677 // 83 F9 FF cmp rcx,0FFh 1678 // 0F 84 03 00 00 00 je done 1679 // normal_case: 1680 // 99 cdq 1681 // F7 F9 idiv rax,ecx 1682 // done: 1683 // 1684 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1685 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1686 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1687 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1688 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1689 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1690 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1691 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1692 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1693 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1694 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1695 // normal_case: 1696 emit_opcode(cbuf,0x99); // cdq 1697 // idiv (note: must be emitted by the user of this rule) 1698 // normal: 1699 %} 1700 1701 // Dense encoding for older common ops 1702 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1703 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1704 %} 1705 1706 1707 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1708 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1709 // Check for 8-bit immediate, and set sign extend bit in opcode 1710 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1711 emit_opcode(cbuf, $primary | 0x02); 1712 } 1713 else { // If 32-bit immediate 1714 emit_opcode(cbuf, $primary); 1715 } 1716 %} 1717 1718 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1719 // Emit primary opcode and set sign-extend bit 1720 // Check for 8-bit immediate, and set sign extend bit in opcode 1721 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1722 emit_opcode(cbuf, $primary | 0x02); } 1723 else { // If 32-bit immediate 1724 emit_opcode(cbuf, $primary); 1725 } 1726 // Emit r/m byte with secondary opcode, after primary opcode. 1727 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1728 %} 1729 1730 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1731 // Check for 8-bit immediate, and set sign extend bit in opcode 1732 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1733 $$$emit8$imm$$constant; 1734 } 1735 else { // If 32-bit immediate 1736 // Output immediate 1737 $$$emit32$imm$$constant; 1738 } 1739 %} 1740 1741 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1742 // Emit primary opcode and set sign-extend bit 1743 // Check for 8-bit immediate, and set sign extend bit in opcode 1744 int con = (int)$imm$$constant; // Throw away top bits 1745 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1746 // Emit r/m byte with secondary opcode, after primary opcode. 1747 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1748 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1749 else emit_d32(cbuf,con); 1750 %} 1751 1752 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1753 // Emit primary opcode and set sign-extend bit 1754 // Check for 8-bit immediate, and set sign extend bit in opcode 1755 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1756 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1757 // Emit r/m byte with tertiary opcode, after primary opcode. 1758 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1759 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1760 else emit_d32(cbuf,con); 1761 %} 1762 1763 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1764 emit_cc(cbuf, $secondary, $dst$$reg ); 1765 %} 1766 1767 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1768 int destlo = $dst$$reg; 1769 int desthi = HIGH_FROM_LOW(destlo); 1770 // bswap lo 1771 emit_opcode(cbuf, 0x0F); 1772 emit_cc(cbuf, 0xC8, destlo); 1773 // bswap hi 1774 emit_opcode(cbuf, 0x0F); 1775 emit_cc(cbuf, 0xC8, desthi); 1776 // xchg lo and hi 1777 emit_opcode(cbuf, 0x87); 1778 emit_rm(cbuf, 0x3, destlo, desthi); 1779 %} 1780 1781 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1782 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1783 %} 1784 1785 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1786 $$$emit8$primary; 1787 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1788 %} 1789 1790 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1791 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1792 emit_d8(cbuf, op >> 8 ); 1793 emit_d8(cbuf, op & 255); 1794 %} 1795 1796 // emulate a CMOV with a conditional branch around a MOV 1797 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1798 // Invert sense of branch from sense of CMOV 1799 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1800 emit_d8( cbuf, $brOffs$$constant ); 1801 %} 1802 1803 enc_class enc_PartialSubtypeCheck( ) %{ 1804 Register Redi = as_Register(EDI_enc); // result register 1805 Register Reax = as_Register(EAX_enc); // super class 1806 Register Recx = as_Register(ECX_enc); // killed 1807 Register Resi = as_Register(ESI_enc); // sub class 1808 Label miss; 1809 1810 MacroAssembler _masm(&cbuf); 1811 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1812 NULL, &miss, 1813 /*set_cond_codes:*/ true); 1814 if ($primary) { 1815 __ xorptr(Redi, Redi); 1816 } 1817 __ bind(miss); 1818 %} 1819 1820 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1821 MacroAssembler masm(&cbuf); 1822 int start = masm.offset(); 1823 if (UseSSE >= 2) { 1824 if (VerifyFPU) { 1825 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1826 } 1827 } else { 1828 // External c_calling_convention expects the FPU stack to be 'clean'. 1829 // Compiled code leaves it dirty. Do cleanup now. 1830 masm.empty_FPU_stack(); 1831 } 1832 if (sizeof_FFree_Float_Stack_All == -1) { 1833 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1834 } else { 1835 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1836 } 1837 %} 1838 1839 enc_class Verify_FPU_For_Leaf %{ 1840 if( VerifyFPU ) { 1841 MacroAssembler masm(&cbuf); 1842 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1843 } 1844 %} 1845 1846 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1847 // This is the instruction starting address for relocation info. 1848 cbuf.set_insts_mark(); 1849 $$$emit8$primary; 1850 // CALL directly to the runtime 1851 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1852 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1853 1854 if (UseSSE >= 2) { 1855 MacroAssembler _masm(&cbuf); 1856 BasicType rt = tf()->return_type(); 1857 1858 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1859 // A C runtime call where the return value is unused. In SSE2+ 1860 // mode the result needs to be removed from the FPU stack. It's 1861 // likely that this function call could be removed by the 1862 // optimizer if the C function is a pure function. 1863 __ ffree(0); 1864 } else if (rt == T_FLOAT) { 1865 __ lea(rsp, Address(rsp, -4)); 1866 __ fstp_s(Address(rsp, 0)); 1867 __ movflt(xmm0, Address(rsp, 0)); 1868 __ lea(rsp, Address(rsp, 4)); 1869 } else if (rt == T_DOUBLE) { 1870 __ lea(rsp, Address(rsp, -8)); 1871 __ fstp_d(Address(rsp, 0)); 1872 __ movdbl(xmm0, Address(rsp, 0)); 1873 __ lea(rsp, Address(rsp, 8)); 1874 } 1875 } 1876 %} 1877 1878 1879 enc_class pre_call_resets %{ 1880 // If method sets FPU control word restore it here 1881 debug_only(int off0 = cbuf.insts_size()); 1882 if (ra_->C->in_24_bit_fp_mode()) { 1883 MacroAssembler _masm(&cbuf); 1884 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1885 } 1886 if (ra_->C->max_vector_size() > 16) { 1887 // Clear upper bits of YMM registers when current compiled code uses 1888 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1889 MacroAssembler _masm(&cbuf); 1890 __ vzeroupper(); 1891 } 1892 debug_only(int off1 = cbuf.insts_size()); 1893 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1894 %} 1895 1896 enc_class post_call_FPU %{ 1897 // If method sets FPU control word do it here also 1898 if (Compile::current()->in_24_bit_fp_mode()) { 1899 MacroAssembler masm(&cbuf); 1900 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1901 } 1902 %} 1903 1904 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1905 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1906 // who we intended to call. 1907 cbuf.set_insts_mark(); 1908 $$$emit8$primary; 1909 1910 if (!_method) { 1911 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1912 runtime_call_Relocation::spec(), 1913 RELOC_IMM32); 1914 } else { 1915 int method_index = resolved_method_index(cbuf); 1916 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1917 : static_call_Relocation::spec(method_index); 1918 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1919 rspec, RELOC_DISP32); 1920 // Emit stubs for static call. 1921 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1922 if (stub == NULL) { 1923 ciEnv::current()->record_failure("CodeCache is full"); 1924 return; 1925 } 1926 } 1927 %} 1928 1929 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1930 MacroAssembler _masm(&cbuf); 1931 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1932 %} 1933 1934 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1935 int disp = in_bytes(Method::from_compiled_offset()); 1936 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1937 1938 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1939 cbuf.set_insts_mark(); 1940 $$$emit8$primary; 1941 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1942 emit_d8(cbuf, disp); // Displacement 1943 1944 %} 1945 1946 // Following encoding is no longer used, but may be restored if calling 1947 // convention changes significantly. 1948 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1949 // 1950 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1951 // // int ic_reg = Matcher::inline_cache_reg(); 1952 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1953 // // int imo_reg = Matcher::interpreter_method_oop_reg(); 1954 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1955 // 1956 // // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1957 // // // so we load it immediately before the call 1958 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1959 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1960 // 1961 // // xor rbp,ebp 1962 // emit_opcode(cbuf, 0x33); 1963 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1964 // 1965 // // CALL to interpreter. 1966 // cbuf.set_insts_mark(); 1967 // $$$emit8$primary; 1968 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1969 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1970 // %} 1971 1972 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1973 $$$emit8$primary; 1974 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1975 $$$emit8$shift$$constant; 1976 %} 1977 1978 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1979 // Load immediate does not have a zero or sign extended version 1980 // for 8-bit immediates 1981 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1982 $$$emit32$src$$constant; 1983 %} 1984 1985 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1986 // Load immediate does not have a zero or sign extended version 1987 // for 8-bit immediates 1988 emit_opcode(cbuf, $primary + $dst$$reg); 1989 $$$emit32$src$$constant; 1990 %} 1991 1992 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1993 // Load immediate does not have a zero or sign extended version 1994 // for 8-bit immediates 1995 int dst_enc = $dst$$reg; 1996 int src_con = $src$$constant & 0x0FFFFFFFFL; 1997 if (src_con == 0) { 1998 // xor dst, dst 1999 emit_opcode(cbuf, 0x33); 2000 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2001 } else { 2002 emit_opcode(cbuf, $primary + dst_enc); 2003 emit_d32(cbuf, src_con); 2004 } 2005 %} 2006 2007 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 2008 // Load immediate does not have a zero or sign extended version 2009 // for 8-bit immediates 2010 int dst_enc = $dst$$reg + 2; 2011 int src_con = ((julong)($src$$constant)) >> 32; 2012 if (src_con == 0) { 2013 // xor dst, dst 2014 emit_opcode(cbuf, 0x33); 2015 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2016 } else { 2017 emit_opcode(cbuf, $primary + dst_enc); 2018 emit_d32(cbuf, src_con); 2019 } 2020 %} 2021 2022 2023 // Encode a reg-reg copy. If it is useless, then empty encoding. 2024 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 2025 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2026 %} 2027 2028 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 2029 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2030 %} 2031 2032 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 2033 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2034 %} 2035 2036 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 2037 $$$emit8$primary; 2038 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2039 %} 2040 2041 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 2042 $$$emit8$secondary; 2043 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2044 %} 2045 2046 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 2047 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2048 %} 2049 2050 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2051 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2052 %} 2053 2054 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 2055 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2056 %} 2057 2058 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2059 // Output immediate 2060 $$$emit32$src$$constant; 2061 %} 2062 2063 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 2064 // Output Float immediate bits 2065 jfloat jf = $src$$constant; 2066 int jf_as_bits = jint_cast( jf ); 2067 emit_d32(cbuf, jf_as_bits); 2068 %} 2069 2070 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 2071 // Output Float immediate bits 2072 jfloat jf = $src$$constant; 2073 int jf_as_bits = jint_cast( jf ); 2074 emit_d32(cbuf, jf_as_bits); 2075 %} 2076 2077 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2078 // Output immediate 2079 $$$emit16$src$$constant; 2080 %} 2081 2082 enc_class Con_d32(immI src) %{ 2083 emit_d32(cbuf,$src$$constant); 2084 %} 2085 2086 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2087 // Output immediate memory reference 2088 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2089 emit_d32(cbuf, 0x00); 2090 %} 2091 2092 enc_class lock_prefix( ) %{ 2093 if( os::is_MP() ) 2094 emit_opcode(cbuf,0xF0); // [Lock] 2095 %} 2096 2097 // Cmp-xchg long value. 2098 // Note: we need to swap rbx, and rcx before and after the 2099 // cmpxchg8 instruction because the instruction uses 2100 // rcx as the high order word of the new value to store but 2101 // our register encoding uses rbx,. 2102 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2103 2104 // XCHG rbx,ecx 2105 emit_opcode(cbuf,0x87); 2106 emit_opcode(cbuf,0xD9); 2107 // [Lock] 2108 if( os::is_MP() ) 2109 emit_opcode(cbuf,0xF0); 2110 // CMPXCHG8 [Eptr] 2111 emit_opcode(cbuf,0x0F); 2112 emit_opcode(cbuf,0xC7); 2113 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2114 // XCHG rbx,ecx 2115 emit_opcode(cbuf,0x87); 2116 emit_opcode(cbuf,0xD9); 2117 %} 2118 2119 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2120 // [Lock] 2121 if( os::is_MP() ) 2122 emit_opcode(cbuf,0xF0); 2123 2124 // CMPXCHG [Eptr] 2125 emit_opcode(cbuf,0x0F); 2126 emit_opcode(cbuf,0xB1); 2127 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2128 %} 2129 2130 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2131 int res_encoding = $res$$reg; 2132 2133 // MOV res,0 2134 emit_opcode( cbuf, 0xB8 + res_encoding); 2135 emit_d32( cbuf, 0 ); 2136 // JNE,s fail 2137 emit_opcode(cbuf,0x75); 2138 emit_d8(cbuf, 5 ); 2139 // MOV res,1 2140 emit_opcode( cbuf, 0xB8 + res_encoding); 2141 emit_d32( cbuf, 1 ); 2142 // fail: 2143 %} 2144 2145 enc_class set_instruction_start( ) %{ 2146 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2147 %} 2148 2149 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2150 int reg_encoding = $ereg$$reg; 2151 int base = $mem$$base; 2152 int index = $mem$$index; 2153 int scale = $mem$$scale; 2154 int displace = $mem$$disp; 2155 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2156 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2157 %} 2158 2159 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2160 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2161 int base = $mem$$base; 2162 int index = $mem$$index; 2163 int scale = $mem$$scale; 2164 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2165 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2166 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2167 %} 2168 2169 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2170 int r1, r2; 2171 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2172 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2173 emit_opcode(cbuf,0x0F); 2174 emit_opcode(cbuf,$tertiary); 2175 emit_rm(cbuf, 0x3, r1, r2); 2176 emit_d8(cbuf,$cnt$$constant); 2177 emit_d8(cbuf,$primary); 2178 emit_rm(cbuf, 0x3, $secondary, r1); 2179 emit_d8(cbuf,$cnt$$constant); 2180 %} 2181 2182 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2183 emit_opcode( cbuf, 0x8B ); // Move 2184 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2185 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2186 emit_d8(cbuf,$primary); 2187 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2188 emit_d8(cbuf,$cnt$$constant-32); 2189 } 2190 emit_d8(cbuf,$primary); 2191 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2192 emit_d8(cbuf,31); 2193 %} 2194 2195 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2196 int r1, r2; 2197 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2198 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2199 2200 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2201 emit_rm(cbuf, 0x3, r1, r2); 2202 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2203 emit_opcode(cbuf,$primary); 2204 emit_rm(cbuf, 0x3, $secondary, r1); 2205 emit_d8(cbuf,$cnt$$constant-32); 2206 } 2207 emit_opcode(cbuf,0x33); // XOR r2,r2 2208 emit_rm(cbuf, 0x3, r2, r2); 2209 %} 2210 2211 // Clone of RegMem but accepts an extra parameter to access each 2212 // half of a double in memory; it never needs relocation info. 2213 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2214 emit_opcode(cbuf,$opcode$$constant); 2215 int reg_encoding = $rm_reg$$reg; 2216 int base = $mem$$base; 2217 int index = $mem$$index; 2218 int scale = $mem$$scale; 2219 int displace = $mem$$disp + $disp_for_half$$constant; 2220 relocInfo::relocType disp_reloc = relocInfo::none; 2221 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2222 %} 2223 2224 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2225 // 2226 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2227 // and it never needs relocation information. 2228 // Frequently used to move data between FPU's Stack Top and memory. 2229 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2230 int rm_byte_opcode = $rm_opcode$$constant; 2231 int base = $mem$$base; 2232 int index = $mem$$index; 2233 int scale = $mem$$scale; 2234 int displace = $mem$$disp; 2235 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2236 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2237 %} 2238 2239 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2240 int rm_byte_opcode = $rm_opcode$$constant; 2241 int base = $mem$$base; 2242 int index = $mem$$index; 2243 int scale = $mem$$scale; 2244 int displace = $mem$$disp; 2245 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2246 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2247 %} 2248 2249 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2250 int reg_encoding = $dst$$reg; 2251 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2252 int index = 0x04; // 0x04 indicates no index 2253 int scale = 0x00; // 0x00 indicates no scale 2254 int displace = $src1$$constant; // 0x00 indicates no displacement 2255 relocInfo::relocType disp_reloc = relocInfo::none; 2256 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2257 %} 2258 2259 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2260 // Compare dst,src 2261 emit_opcode(cbuf,0x3B); 2262 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2263 // jmp dst < src around move 2264 emit_opcode(cbuf,0x7C); 2265 emit_d8(cbuf,2); 2266 // move dst,src 2267 emit_opcode(cbuf,0x8B); 2268 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2269 %} 2270 2271 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2272 // Compare dst,src 2273 emit_opcode(cbuf,0x3B); 2274 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2275 // jmp dst > src around move 2276 emit_opcode(cbuf,0x7F); 2277 emit_d8(cbuf,2); 2278 // move dst,src 2279 emit_opcode(cbuf,0x8B); 2280 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2281 %} 2282 2283 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2284 // If src is FPR1, we can just FST to store it. 2285 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2286 int reg_encoding = 0x2; // Just store 2287 int base = $mem$$base; 2288 int index = $mem$$index; 2289 int scale = $mem$$scale; 2290 int displace = $mem$$disp; 2291 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2292 if( $src$$reg != FPR1L_enc ) { 2293 reg_encoding = 0x3; // Store & pop 2294 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2295 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2296 } 2297 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2298 emit_opcode(cbuf,$primary); 2299 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2300 %} 2301 2302 enc_class neg_reg(rRegI dst) %{ 2303 // NEG $dst 2304 emit_opcode(cbuf,0xF7); 2305 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2306 %} 2307 2308 enc_class setLT_reg(eCXRegI dst) %{ 2309 // SETLT $dst 2310 emit_opcode(cbuf,0x0F); 2311 emit_opcode(cbuf,0x9C); 2312 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2313 %} 2314 2315 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2316 int tmpReg = $tmp$$reg; 2317 2318 // SUB $p,$q 2319 emit_opcode(cbuf,0x2B); 2320 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2321 // SBB $tmp,$tmp 2322 emit_opcode(cbuf,0x1B); 2323 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2324 // AND $tmp,$y 2325 emit_opcode(cbuf,0x23); 2326 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2327 // ADD $p,$tmp 2328 emit_opcode(cbuf,0x03); 2329 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2330 %} 2331 2332 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2333 // TEST shift,32 2334 emit_opcode(cbuf,0xF7); 2335 emit_rm(cbuf, 0x3, 0, ECX_enc); 2336 emit_d32(cbuf,0x20); 2337 // JEQ,s small 2338 emit_opcode(cbuf, 0x74); 2339 emit_d8(cbuf, 0x04); 2340 // MOV $dst.hi,$dst.lo 2341 emit_opcode( cbuf, 0x8B ); 2342 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2343 // CLR $dst.lo 2344 emit_opcode(cbuf, 0x33); 2345 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2346 // small: 2347 // SHLD $dst.hi,$dst.lo,$shift 2348 emit_opcode(cbuf,0x0F); 2349 emit_opcode(cbuf,0xA5); 2350 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2351 // SHL $dst.lo,$shift" 2352 emit_opcode(cbuf,0xD3); 2353 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2354 %} 2355 2356 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2357 // TEST shift,32 2358 emit_opcode(cbuf,0xF7); 2359 emit_rm(cbuf, 0x3, 0, ECX_enc); 2360 emit_d32(cbuf,0x20); 2361 // JEQ,s small 2362 emit_opcode(cbuf, 0x74); 2363 emit_d8(cbuf, 0x04); 2364 // MOV $dst.lo,$dst.hi 2365 emit_opcode( cbuf, 0x8B ); 2366 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2367 // CLR $dst.hi 2368 emit_opcode(cbuf, 0x33); 2369 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2370 // small: 2371 // SHRD $dst.lo,$dst.hi,$shift 2372 emit_opcode(cbuf,0x0F); 2373 emit_opcode(cbuf,0xAD); 2374 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2375 // SHR $dst.hi,$shift" 2376 emit_opcode(cbuf,0xD3); 2377 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2378 %} 2379 2380 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2381 // TEST shift,32 2382 emit_opcode(cbuf,0xF7); 2383 emit_rm(cbuf, 0x3, 0, ECX_enc); 2384 emit_d32(cbuf,0x20); 2385 // JEQ,s small 2386 emit_opcode(cbuf, 0x74); 2387 emit_d8(cbuf, 0x05); 2388 // MOV $dst.lo,$dst.hi 2389 emit_opcode( cbuf, 0x8B ); 2390 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2391 // SAR $dst.hi,31 2392 emit_opcode(cbuf, 0xC1); 2393 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2394 emit_d8(cbuf, 0x1F ); 2395 // small: 2396 // SHRD $dst.lo,$dst.hi,$shift 2397 emit_opcode(cbuf,0x0F); 2398 emit_opcode(cbuf,0xAD); 2399 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2400 // SAR $dst.hi,$shift" 2401 emit_opcode(cbuf,0xD3); 2402 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2403 %} 2404 2405 2406 // ----------------- Encodings for floating point unit ----------------- 2407 // May leave result in FPU-TOS or FPU reg depending on opcodes 2408 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2409 $$$emit8$primary; 2410 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2411 %} 2412 2413 // Pop argument in FPR0 with FSTP ST(0) 2414 enc_class PopFPU() %{ 2415 emit_opcode( cbuf, 0xDD ); 2416 emit_d8( cbuf, 0xD8 ); 2417 %} 2418 2419 // !!!!! equivalent to Pop_Reg_F 2420 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2421 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2422 emit_d8( cbuf, 0xD8+$dst$$reg ); 2423 %} 2424 2425 enc_class Push_Reg_DPR( regDPR dst ) %{ 2426 emit_opcode( cbuf, 0xD9 ); 2427 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2428 %} 2429 2430 enc_class strictfp_bias1( regDPR dst ) %{ 2431 emit_opcode( cbuf, 0xDB ); // FLD m80real 2432 emit_opcode( cbuf, 0x2D ); 2433 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2434 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2435 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2436 %} 2437 2438 enc_class strictfp_bias2( regDPR dst ) %{ 2439 emit_opcode( cbuf, 0xDB ); // FLD m80real 2440 emit_opcode( cbuf, 0x2D ); 2441 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2442 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2443 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2444 %} 2445 2446 // Special case for moving an integer register to a stack slot. 2447 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2448 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2449 %} 2450 2451 // Special case for moving a register to a stack slot. 2452 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2453 // Opcode already emitted 2454 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2455 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2456 emit_d32(cbuf, $dst$$disp); // Displacement 2457 %} 2458 2459 // Push the integer in stackSlot 'src' onto FP-stack 2460 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2461 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2462 %} 2463 2464 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2465 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2466 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2467 %} 2468 2469 // Same as Pop_Mem_F except for opcode 2470 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2471 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2472 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2473 %} 2474 2475 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2476 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2477 emit_d8( cbuf, 0xD8+$dst$$reg ); 2478 %} 2479 2480 enc_class Push_Reg_FPR( regFPR dst ) %{ 2481 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2482 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2483 %} 2484 2485 // Push FPU's float to a stack-slot, and pop FPU-stack 2486 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2487 int pop = 0x02; 2488 if ($src$$reg != FPR1L_enc) { 2489 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2490 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2491 pop = 0x03; 2492 } 2493 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2494 %} 2495 2496 // Push FPU's double to a stack-slot, and pop FPU-stack 2497 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2498 int pop = 0x02; 2499 if ($src$$reg != FPR1L_enc) { 2500 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2501 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2502 pop = 0x03; 2503 } 2504 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2505 %} 2506 2507 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2508 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2509 int pop = 0xD0 - 1; // -1 since we skip FLD 2510 if ($src$$reg != FPR1L_enc) { 2511 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2512 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2513 pop = 0xD8; 2514 } 2515 emit_opcode( cbuf, 0xDD ); 2516 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2517 %} 2518 2519 2520 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2521 // load dst in FPR0 2522 emit_opcode( cbuf, 0xD9 ); 2523 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2524 if ($src$$reg != FPR1L_enc) { 2525 // fincstp 2526 emit_opcode (cbuf, 0xD9); 2527 emit_opcode (cbuf, 0xF7); 2528 // swap src with FPR1: 2529 // FXCH FPR1 with src 2530 emit_opcode(cbuf, 0xD9); 2531 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2532 // fdecstp 2533 emit_opcode (cbuf, 0xD9); 2534 emit_opcode (cbuf, 0xF6); 2535 } 2536 %} 2537 2538 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2539 MacroAssembler _masm(&cbuf); 2540 __ subptr(rsp, 8); 2541 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2542 __ fld_d(Address(rsp, 0)); 2543 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2544 __ fld_d(Address(rsp, 0)); 2545 %} 2546 2547 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2548 MacroAssembler _masm(&cbuf); 2549 __ subptr(rsp, 4); 2550 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2551 __ fld_s(Address(rsp, 0)); 2552 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2553 __ fld_s(Address(rsp, 0)); 2554 %} 2555 2556 enc_class Push_ResultD(regD dst) %{ 2557 MacroAssembler _masm(&cbuf); 2558 __ fstp_d(Address(rsp, 0)); 2559 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2560 __ addptr(rsp, 8); 2561 %} 2562 2563 enc_class Push_ResultF(regF dst, immI d8) %{ 2564 MacroAssembler _masm(&cbuf); 2565 __ fstp_s(Address(rsp, 0)); 2566 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2567 __ addptr(rsp, $d8$$constant); 2568 %} 2569 2570 enc_class Push_SrcD(regD src) %{ 2571 MacroAssembler _masm(&cbuf); 2572 __ subptr(rsp, 8); 2573 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2574 __ fld_d(Address(rsp, 0)); 2575 %} 2576 2577 enc_class push_stack_temp_qword() %{ 2578 MacroAssembler _masm(&cbuf); 2579 __ subptr(rsp, 8); 2580 %} 2581 2582 enc_class pop_stack_temp_qword() %{ 2583 MacroAssembler _masm(&cbuf); 2584 __ addptr(rsp, 8); 2585 %} 2586 2587 enc_class push_xmm_to_fpr1(regD src) %{ 2588 MacroAssembler _masm(&cbuf); 2589 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2590 __ fld_d(Address(rsp, 0)); 2591 %} 2592 2593 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2594 if ($src$$reg != FPR1L_enc) { 2595 // fincstp 2596 emit_opcode (cbuf, 0xD9); 2597 emit_opcode (cbuf, 0xF7); 2598 // FXCH FPR1 with src 2599 emit_opcode(cbuf, 0xD9); 2600 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2601 // fdecstp 2602 emit_opcode (cbuf, 0xD9); 2603 emit_opcode (cbuf, 0xF6); 2604 } 2605 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2606 // // FSTP FPR$dst$$reg 2607 // emit_opcode( cbuf, 0xDD ); 2608 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2609 %} 2610 2611 enc_class fnstsw_sahf_skip_parity() %{ 2612 // fnstsw ax 2613 emit_opcode( cbuf, 0xDF ); 2614 emit_opcode( cbuf, 0xE0 ); 2615 // sahf 2616 emit_opcode( cbuf, 0x9E ); 2617 // jnp ::skip 2618 emit_opcode( cbuf, 0x7B ); 2619 emit_opcode( cbuf, 0x05 ); 2620 %} 2621 2622 enc_class emitModDPR() %{ 2623 // fprem must be iterative 2624 // :: loop 2625 // fprem 2626 emit_opcode( cbuf, 0xD9 ); 2627 emit_opcode( cbuf, 0xF8 ); 2628 // wait 2629 emit_opcode( cbuf, 0x9b ); 2630 // fnstsw ax 2631 emit_opcode( cbuf, 0xDF ); 2632 emit_opcode( cbuf, 0xE0 ); 2633 // sahf 2634 emit_opcode( cbuf, 0x9E ); 2635 // jp ::loop 2636 emit_opcode( cbuf, 0x0F ); 2637 emit_opcode( cbuf, 0x8A ); 2638 emit_opcode( cbuf, 0xF4 ); 2639 emit_opcode( cbuf, 0xFF ); 2640 emit_opcode( cbuf, 0xFF ); 2641 emit_opcode( cbuf, 0xFF ); 2642 %} 2643 2644 enc_class fpu_flags() %{ 2645 // fnstsw_ax 2646 emit_opcode( cbuf, 0xDF); 2647 emit_opcode( cbuf, 0xE0); 2648 // test ax,0x0400 2649 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2650 emit_opcode( cbuf, 0xA9 ); 2651 emit_d16 ( cbuf, 0x0400 ); 2652 // // // This sequence works, but stalls for 12-16 cycles on PPro 2653 // // test rax,0x0400 2654 // emit_opcode( cbuf, 0xA9 ); 2655 // emit_d32 ( cbuf, 0x00000400 ); 2656 // 2657 // jz exit (no unordered comparison) 2658 emit_opcode( cbuf, 0x74 ); 2659 emit_d8 ( cbuf, 0x02 ); 2660 // mov ah,1 - treat as LT case (set carry flag) 2661 emit_opcode( cbuf, 0xB4 ); 2662 emit_d8 ( cbuf, 0x01 ); 2663 // sahf 2664 emit_opcode( cbuf, 0x9E); 2665 %} 2666 2667 enc_class cmpF_P6_fixup() %{ 2668 // Fixup the integer flags in case comparison involved a NaN 2669 // 2670 // JNP exit (no unordered comparison, P-flag is set by NaN) 2671 emit_opcode( cbuf, 0x7B ); 2672 emit_d8 ( cbuf, 0x03 ); 2673 // MOV AH,1 - treat as LT case (set carry flag) 2674 emit_opcode( cbuf, 0xB4 ); 2675 emit_d8 ( cbuf, 0x01 ); 2676 // SAHF 2677 emit_opcode( cbuf, 0x9E); 2678 // NOP // target for branch to avoid branch to branch 2679 emit_opcode( cbuf, 0x90); 2680 %} 2681 2682 // fnstsw_ax(); 2683 // sahf(); 2684 // movl(dst, nan_result); 2685 // jcc(Assembler::parity, exit); 2686 // movl(dst, less_result); 2687 // jcc(Assembler::below, exit); 2688 // movl(dst, equal_result); 2689 // jcc(Assembler::equal, exit); 2690 // movl(dst, greater_result); 2691 2692 // less_result = 1; 2693 // greater_result = -1; 2694 // equal_result = 0; 2695 // nan_result = -1; 2696 2697 enc_class CmpF_Result(rRegI dst) %{ 2698 // fnstsw_ax(); 2699 emit_opcode( cbuf, 0xDF); 2700 emit_opcode( cbuf, 0xE0); 2701 // sahf 2702 emit_opcode( cbuf, 0x9E); 2703 // movl(dst, nan_result); 2704 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2705 emit_d32( cbuf, -1 ); 2706 // jcc(Assembler::parity, exit); 2707 emit_opcode( cbuf, 0x7A ); 2708 emit_d8 ( cbuf, 0x13 ); 2709 // movl(dst, less_result); 2710 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2711 emit_d32( cbuf, -1 ); 2712 // jcc(Assembler::below, exit); 2713 emit_opcode( cbuf, 0x72 ); 2714 emit_d8 ( cbuf, 0x0C ); 2715 // movl(dst, equal_result); 2716 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2717 emit_d32( cbuf, 0 ); 2718 // jcc(Assembler::equal, exit); 2719 emit_opcode( cbuf, 0x74 ); 2720 emit_d8 ( cbuf, 0x05 ); 2721 // movl(dst, greater_result); 2722 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2723 emit_d32( cbuf, 1 ); 2724 %} 2725 2726 2727 // Compare the longs and set flags 2728 // BROKEN! Do Not use as-is 2729 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2730 // CMP $src1.hi,$src2.hi 2731 emit_opcode( cbuf, 0x3B ); 2732 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2733 // JNE,s done 2734 emit_opcode(cbuf,0x75); 2735 emit_d8(cbuf, 2 ); 2736 // CMP $src1.lo,$src2.lo 2737 emit_opcode( cbuf, 0x3B ); 2738 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2739 // done: 2740 %} 2741 2742 enc_class convert_int_long( regL dst, rRegI src ) %{ 2743 // mov $dst.lo,$src 2744 int dst_encoding = $dst$$reg; 2745 int src_encoding = $src$$reg; 2746 encode_Copy( cbuf, dst_encoding , src_encoding ); 2747 // mov $dst.hi,$src 2748 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2749 // sar $dst.hi,31 2750 emit_opcode( cbuf, 0xC1 ); 2751 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2752 emit_d8(cbuf, 0x1F ); 2753 %} 2754 2755 enc_class convert_long_double( eRegL src ) %{ 2756 // push $src.hi 2757 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2758 // push $src.lo 2759 emit_opcode(cbuf, 0x50+$src$$reg ); 2760 // fild 64-bits at [SP] 2761 emit_opcode(cbuf,0xdf); 2762 emit_d8(cbuf, 0x6C); 2763 emit_d8(cbuf, 0x24); 2764 emit_d8(cbuf, 0x00); 2765 // pop stack 2766 emit_opcode(cbuf, 0x83); // add SP, #8 2767 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2768 emit_d8(cbuf, 0x8); 2769 %} 2770 2771 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2772 // IMUL EDX:EAX,$src1 2773 emit_opcode( cbuf, 0xF7 ); 2774 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2775 // SAR EDX,$cnt-32 2776 int shift_count = ((int)$cnt$$constant) - 32; 2777 if (shift_count > 0) { 2778 emit_opcode(cbuf, 0xC1); 2779 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2780 emit_d8(cbuf, shift_count); 2781 } 2782 %} 2783 2784 // this version doesn't have add sp, 8 2785 enc_class convert_long_double2( eRegL src ) %{ 2786 // push $src.hi 2787 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2788 // push $src.lo 2789 emit_opcode(cbuf, 0x50+$src$$reg ); 2790 // fild 64-bits at [SP] 2791 emit_opcode(cbuf,0xdf); 2792 emit_d8(cbuf, 0x6C); 2793 emit_d8(cbuf, 0x24); 2794 emit_d8(cbuf, 0x00); 2795 %} 2796 2797 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2798 // Basic idea: long = (long)int * (long)int 2799 // IMUL EDX:EAX, src 2800 emit_opcode( cbuf, 0xF7 ); 2801 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2802 %} 2803 2804 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2805 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2806 // MUL EDX:EAX, src 2807 emit_opcode( cbuf, 0xF7 ); 2808 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2809 %} 2810 2811 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2812 // Basic idea: lo(result) = lo(x_lo * y_lo) 2813 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2814 // MOV $tmp,$src.lo 2815 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2816 // IMUL $tmp,EDX 2817 emit_opcode( cbuf, 0x0F ); 2818 emit_opcode( cbuf, 0xAF ); 2819 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2820 // MOV EDX,$src.hi 2821 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2822 // IMUL EDX,EAX 2823 emit_opcode( cbuf, 0x0F ); 2824 emit_opcode( cbuf, 0xAF ); 2825 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2826 // ADD $tmp,EDX 2827 emit_opcode( cbuf, 0x03 ); 2828 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2829 // MUL EDX:EAX,$src.lo 2830 emit_opcode( cbuf, 0xF7 ); 2831 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2832 // ADD EDX,ESI 2833 emit_opcode( cbuf, 0x03 ); 2834 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2835 %} 2836 2837 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2838 // Basic idea: lo(result) = lo(src * y_lo) 2839 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2840 // IMUL $tmp,EDX,$src 2841 emit_opcode( cbuf, 0x6B ); 2842 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2843 emit_d8( cbuf, (int)$src$$constant ); 2844 // MOV EDX,$src 2845 emit_opcode(cbuf, 0xB8 + EDX_enc); 2846 emit_d32( cbuf, (int)$src$$constant ); 2847 // MUL EDX:EAX,EDX 2848 emit_opcode( cbuf, 0xF7 ); 2849 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2850 // ADD EDX,ESI 2851 emit_opcode( cbuf, 0x03 ); 2852 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2853 %} 2854 2855 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2856 // PUSH src1.hi 2857 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2858 // PUSH src1.lo 2859 emit_opcode(cbuf, 0x50+$src1$$reg ); 2860 // PUSH src2.hi 2861 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2862 // PUSH src2.lo 2863 emit_opcode(cbuf, 0x50+$src2$$reg ); 2864 // CALL directly to the runtime 2865 cbuf.set_insts_mark(); 2866 emit_opcode(cbuf,0xE8); // Call into runtime 2867 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2868 // Restore stack 2869 emit_opcode(cbuf, 0x83); // add SP, #framesize 2870 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2871 emit_d8(cbuf, 4*4); 2872 %} 2873 2874 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2875 // PUSH src1.hi 2876 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2877 // PUSH src1.lo 2878 emit_opcode(cbuf, 0x50+$src1$$reg ); 2879 // PUSH src2.hi 2880 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2881 // PUSH src2.lo 2882 emit_opcode(cbuf, 0x50+$src2$$reg ); 2883 // CALL directly to the runtime 2884 cbuf.set_insts_mark(); 2885 emit_opcode(cbuf,0xE8); // Call into runtime 2886 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2887 // Restore stack 2888 emit_opcode(cbuf, 0x83); // add SP, #framesize 2889 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2890 emit_d8(cbuf, 4*4); 2891 %} 2892 2893 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2894 // MOV $tmp,$src.lo 2895 emit_opcode(cbuf, 0x8B); 2896 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2897 // OR $tmp,$src.hi 2898 emit_opcode(cbuf, 0x0B); 2899 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2900 %} 2901 2902 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2903 // CMP $src1.lo,$src2.lo 2904 emit_opcode( cbuf, 0x3B ); 2905 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2906 // JNE,s skip 2907 emit_cc(cbuf, 0x70, 0x5); 2908 emit_d8(cbuf,2); 2909 // CMP $src1.hi,$src2.hi 2910 emit_opcode( cbuf, 0x3B ); 2911 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2912 %} 2913 2914 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2915 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2916 emit_opcode( cbuf, 0x3B ); 2917 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2918 // MOV $tmp,$src1.hi 2919 emit_opcode( cbuf, 0x8B ); 2920 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2921 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2922 emit_opcode( cbuf, 0x1B ); 2923 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2924 %} 2925 2926 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2927 // XOR $tmp,$tmp 2928 emit_opcode(cbuf,0x33); // XOR 2929 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2930 // CMP $tmp,$src.lo 2931 emit_opcode( cbuf, 0x3B ); 2932 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2933 // SBB $tmp,$src.hi 2934 emit_opcode( cbuf, 0x1B ); 2935 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2936 %} 2937 2938 // Sniff, sniff... smells like Gnu Superoptimizer 2939 enc_class neg_long( eRegL dst ) %{ 2940 emit_opcode(cbuf,0xF7); // NEG hi 2941 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2942 emit_opcode(cbuf,0xF7); // NEG lo 2943 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2944 emit_opcode(cbuf,0x83); // SBB hi,0 2945 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2946 emit_d8 (cbuf,0 ); 2947 %} 2948 2949 enc_class enc_pop_rdx() %{ 2950 emit_opcode(cbuf,0x5A); 2951 %} 2952 2953 enc_class enc_rethrow() %{ 2954 cbuf.set_insts_mark(); 2955 emit_opcode(cbuf, 0xE9); // jmp entry 2956 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2957 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2958 %} 2959 2960 2961 // Convert a double to an int. Java semantics require we do complex 2962 // manglelations in the corner cases. So we set the rounding mode to 2963 // 'zero', store the darned double down as an int, and reset the 2964 // rounding mode to 'nearest'. The hardware throws an exception which 2965 // patches up the correct value directly to the stack. 2966 enc_class DPR2I_encoding( regDPR src ) %{ 2967 // Flip to round-to-zero mode. We attempted to allow invalid-op 2968 // exceptions here, so that a NAN or other corner-case value will 2969 // thrown an exception (but normal values get converted at full speed). 2970 // However, I2C adapters and other float-stack manglers leave pending 2971 // invalid-op exceptions hanging. We would have to clear them before 2972 // enabling them and that is more expensive than just testing for the 2973 // invalid value Intel stores down in the corner cases. 2974 emit_opcode(cbuf,0xD9); // FLDCW trunc 2975 emit_opcode(cbuf,0x2D); 2976 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 2977 // Allocate a word 2978 emit_opcode(cbuf,0x83); // SUB ESP,4 2979 emit_opcode(cbuf,0xEC); 2980 emit_d8(cbuf,0x04); 2981 // Encoding assumes a double has been pushed into FPR0. 2982 // Store down the double as an int, popping the FPU stack 2983 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2984 emit_opcode(cbuf,0x1C); 2985 emit_d8(cbuf,0x24); 2986 // Restore the rounding mode; mask the exception 2987 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2988 emit_opcode(cbuf,0x2D); 2989 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2990 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 2991 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 2992 2993 // Load the converted int; adjust CPU stack 2994 emit_opcode(cbuf,0x58); // POP EAX 2995 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2996 emit_d32 (cbuf,0x80000000); // 0x80000000 2997 emit_opcode(cbuf,0x75); // JNE around_slow_call 2998 emit_d8 (cbuf,0x07); // Size of slow_call 2999 // Push src onto stack slow-path 3000 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3001 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3002 // CALL directly to the runtime 3003 cbuf.set_insts_mark(); 3004 emit_opcode(cbuf,0xE8); // Call into runtime 3005 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3006 // Carry on here... 3007 %} 3008 3009 enc_class DPR2L_encoding( regDPR src ) %{ 3010 emit_opcode(cbuf,0xD9); // FLDCW trunc 3011 emit_opcode(cbuf,0x2D); 3012 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3013 // Allocate a word 3014 emit_opcode(cbuf,0x83); // SUB ESP,8 3015 emit_opcode(cbuf,0xEC); 3016 emit_d8(cbuf,0x08); 3017 // Encoding assumes a double has been pushed into FPR0. 3018 // Store down the double as a long, popping the FPU stack 3019 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3020 emit_opcode(cbuf,0x3C); 3021 emit_d8(cbuf,0x24); 3022 // Restore the rounding mode; mask the exception 3023 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3024 emit_opcode(cbuf,0x2D); 3025 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3026 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3027 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3028 3029 // Load the converted int; adjust CPU stack 3030 emit_opcode(cbuf,0x58); // POP EAX 3031 emit_opcode(cbuf,0x5A); // POP EDX 3032 emit_opcode(cbuf,0x81); // CMP EDX,imm 3033 emit_d8 (cbuf,0xFA); // rdx 3034 emit_d32 (cbuf,0x80000000); // 0x80000000 3035 emit_opcode(cbuf,0x75); // JNE around_slow_call 3036 emit_d8 (cbuf,0x07+4); // Size of slow_call 3037 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3038 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3039 emit_opcode(cbuf,0x75); // JNE around_slow_call 3040 emit_d8 (cbuf,0x07); // Size of slow_call 3041 // Push src onto stack slow-path 3042 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3043 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3044 // CALL directly to the runtime 3045 cbuf.set_insts_mark(); 3046 emit_opcode(cbuf,0xE8); // Call into runtime 3047 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3048 // Carry on here... 3049 %} 3050 3051 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3052 // Operand was loaded from memory into fp ST (stack top) 3053 // FMUL ST,$src /* D8 C8+i */ 3054 emit_opcode(cbuf, 0xD8); 3055 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3056 %} 3057 3058 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3059 // FADDP ST,src2 /* D8 C0+i */ 3060 emit_opcode(cbuf, 0xD8); 3061 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3062 //could use FADDP src2,fpST /* DE C0+i */ 3063 %} 3064 3065 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3066 // FADDP src2,ST /* DE C0+i */ 3067 emit_opcode(cbuf, 0xDE); 3068 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3069 %} 3070 3071 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3072 // Operand has been loaded into fp ST (stack top) 3073 // FSUB ST,$src1 3074 emit_opcode(cbuf, 0xD8); 3075 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3076 3077 // FDIV 3078 emit_opcode(cbuf, 0xD8); 3079 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3080 %} 3081 3082 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3083 // Operand was loaded from memory into fp ST (stack top) 3084 // FADD ST,$src /* D8 C0+i */ 3085 emit_opcode(cbuf, 0xD8); 3086 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3087 3088 // FMUL ST,src2 /* D8 C*+i */ 3089 emit_opcode(cbuf, 0xD8); 3090 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3091 %} 3092 3093 3094 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3095 // Operand was loaded from memory into fp ST (stack top) 3096 // FADD ST,$src /* D8 C0+i */ 3097 emit_opcode(cbuf, 0xD8); 3098 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3099 3100 // FMULP src2,ST /* DE C8+i */ 3101 emit_opcode(cbuf, 0xDE); 3102 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3103 %} 3104 3105 // Atomically load the volatile long 3106 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3107 emit_opcode(cbuf,0xDF); 3108 int rm_byte_opcode = 0x05; 3109 int base = $mem$$base; 3110 int index = $mem$$index; 3111 int scale = $mem$$scale; 3112 int displace = $mem$$disp; 3113 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3114 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3115 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3116 %} 3117 3118 // Volatile Store Long. Must be atomic, so move it into 3119 // the FP TOS and then do a 64-bit FIST. Has to probe the 3120 // target address before the store (for null-ptr checks) 3121 // so the memory operand is used twice in the encoding. 3122 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3123 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3124 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3125 emit_opcode(cbuf,0xDF); 3126 int rm_byte_opcode = 0x07; 3127 int base = $mem$$base; 3128 int index = $mem$$index; 3129 int scale = $mem$$scale; 3130 int displace = $mem$$disp; 3131 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3132 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3133 %} 3134 3135 // Safepoint Poll. This polls the safepoint page, and causes an 3136 // exception if it is not readable. Unfortunately, it kills the condition code 3137 // in the process 3138 // We current use TESTL [spp],EDI 3139 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 3140 3141 enc_class Safepoint_Poll() %{ 3142 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); 3143 emit_opcode(cbuf,0x85); 3144 emit_rm (cbuf, 0x0, 0x7, 0x5); 3145 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 3146 %} 3147 %} 3148 3149 3150 //----------FRAME-------------------------------------------------------------- 3151 // Definition of frame structure and management information. 3152 // 3153 // S T A C K L A Y O U T Allocators stack-slot number 3154 // | (to get allocators register number 3155 // G Owned by | | v add OptoReg::stack0()) 3156 // r CALLER | | 3157 // o | +--------+ pad to even-align allocators stack-slot 3158 // w V | pad0 | numbers; owned by CALLER 3159 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3160 // h ^ | in | 5 3161 // | | args | 4 Holes in incoming args owned by SELF 3162 // | | | | 3 3163 // | | +--------+ 3164 // V | | old out| Empty on Intel, window on Sparc 3165 // | old |preserve| Must be even aligned. 3166 // | SP-+--------+----> Matcher::_old_SP, even aligned 3167 // | | in | 3 area for Intel ret address 3168 // Owned by |preserve| Empty on Sparc. 3169 // SELF +--------+ 3170 // | | pad2 | 2 pad to align old SP 3171 // | +--------+ 1 3172 // | | locks | 0 3173 // | +--------+----> OptoReg::stack0(), even aligned 3174 // | | pad1 | 11 pad to align new SP 3175 // | +--------+ 3176 // | | | 10 3177 // | | spills | 9 spills 3178 // V | | 8 (pad0 slot for callee) 3179 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3180 // ^ | out | 7 3181 // | | args | 6 Holes in outgoing args owned by CALLEE 3182 // Owned by +--------+ 3183 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3184 // | new |preserve| Must be even-aligned. 3185 // | SP-+--------+----> Matcher::_new_SP, even aligned 3186 // | | | 3187 // 3188 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3189 // known from SELF's arguments and the Java calling convention. 3190 // Region 6-7 is determined per call site. 3191 // Note 2: If the calling convention leaves holes in the incoming argument 3192 // area, those holes are owned by SELF. Holes in the outgoing area 3193 // are owned by the CALLEE. Holes should not be nessecary in the 3194 // incoming area, as the Java calling convention is completely under 3195 // the control of the AD file. Doubles can be sorted and packed to 3196 // avoid holes. Holes in the outgoing arguments may be nessecary for 3197 // varargs C calling conventions. 3198 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3199 // even aligned with pad0 as needed. 3200 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3201 // region 6-11 is even aligned; it may be padded out more so that 3202 // the region from SP to FP meets the minimum stack alignment. 3203 3204 frame %{ 3205 // What direction does stack grow in (assumed to be same for C & Java) 3206 stack_direction(TOWARDS_LOW); 3207 3208 // These three registers define part of the calling convention 3209 // between compiled code and the interpreter. 3210 inline_cache_reg(EAX); // Inline Cache Register 3211 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 3212 3213 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3214 cisc_spilling_operand_name(indOffset32); 3215 3216 // Number of stack slots consumed by locking an object 3217 sync_stack_slots(1); 3218 3219 // Compiled code's Frame Pointer 3220 frame_pointer(ESP); 3221 // Interpreter stores its frame pointer in a register which is 3222 // stored to the stack by I2CAdaptors. 3223 // I2CAdaptors convert from interpreted java to compiled java. 3224 interpreter_frame_pointer(EBP); 3225 3226 // Stack alignment requirement 3227 // Alignment size in bytes (128-bit -> 16 bytes) 3228 stack_alignment(StackAlignmentInBytes); 3229 3230 // Number of stack slots between incoming argument block and the start of 3231 // a new frame. The PROLOG must add this many slots to the stack. The 3232 // EPILOG must remove this many slots. Intel needs one slot for 3233 // return address and one for rbp, (must save rbp) 3234 in_preserve_stack_slots(2+VerifyStackAtCalls); 3235 3236 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3237 // for calls to C. Supports the var-args backing area for register parms. 3238 varargs_C_out_slots_killed(0); 3239 3240 // The after-PROLOG location of the return address. Location of 3241 // return address specifies a type (REG or STACK) and a number 3242 // representing the register number (i.e. - use a register name) or 3243 // stack slot. 3244 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3245 // Otherwise, it is above the locks and verification slot and alignment word 3246 return_addr(STACK - 1 + 3247 round_to((Compile::current()->in_preserve_stack_slots() + 3248 Compile::current()->fixed_slots()), 3249 stack_alignment_in_slots())); 3250 3251 // Body of function which returns an integer array locating 3252 // arguments either in registers or in stack slots. Passed an array 3253 // of ideal registers called "sig" and a "length" count. Stack-slot 3254 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3255 // arguments for a CALLEE. Incoming stack arguments are 3256 // automatically biased by the preserve_stack_slots field above. 3257 calling_convention %{ 3258 // No difference between ingoing/outgoing just pass false 3259 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 3260 %} 3261 3262 3263 // Body of function which returns an integer array locating 3264 // arguments either in registers or in stack slots. Passed an array 3265 // of ideal registers called "sig" and a "length" count. Stack-slot 3266 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3267 // arguments for a CALLEE. Incoming stack arguments are 3268 // automatically biased by the preserve_stack_slots field above. 3269 c_calling_convention %{ 3270 // This is obviously always outgoing 3271 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 3272 %} 3273 3274 // Location of C & interpreter return values 3275 c_return_value %{ 3276 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3277 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3278 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3279 3280 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3281 // that C functions return float and double results in XMM0. 3282 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3283 return OptoRegPair(XMM0b_num,XMM0_num); 3284 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3285 return OptoRegPair(OptoReg::Bad,XMM0_num); 3286 3287 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3288 %} 3289 3290 // Location of return values 3291 return_value %{ 3292 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3293 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3294 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3295 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3296 return OptoRegPair(XMM0b_num,XMM0_num); 3297 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3298 return OptoRegPair(OptoReg::Bad,XMM0_num); 3299 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3300 %} 3301 3302 %} 3303 3304 //----------ATTRIBUTES--------------------------------------------------------- 3305 //----------Operand Attributes------------------------------------------------- 3306 op_attrib op_cost(0); // Required cost attribute 3307 3308 //----------Instruction Attributes--------------------------------------------- 3309 ins_attrib ins_cost(100); // Required cost attribute 3310 ins_attrib ins_size(8); // Required size attribute (in bits) 3311 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3312 // non-matching short branch variant of some 3313 // long branch? 3314 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3315 // specifies the alignment that some part of the instruction (not 3316 // necessarily the start) requires. If > 1, a compute_padding() 3317 // function must be provided for the instruction 3318 3319 //----------OPERANDS----------------------------------------------------------- 3320 // Operand definitions must precede instruction definitions for correct parsing 3321 // in the ADLC because operands constitute user defined types which are used in 3322 // instruction definitions. 3323 3324 //----------Simple Operands---------------------------------------------------- 3325 // Immediate Operands 3326 // Integer Immediate 3327 operand immI() %{ 3328 match(ConI); 3329 3330 op_cost(10); 3331 format %{ %} 3332 interface(CONST_INTER); 3333 %} 3334 3335 // Constant for test vs zero 3336 operand immI0() %{ 3337 predicate(n->get_int() == 0); 3338 match(ConI); 3339 3340 op_cost(0); 3341 format %{ %} 3342 interface(CONST_INTER); 3343 %} 3344 3345 // Constant for increment 3346 operand immI1() %{ 3347 predicate(n->get_int() == 1); 3348 match(ConI); 3349 3350 op_cost(0); 3351 format %{ %} 3352 interface(CONST_INTER); 3353 %} 3354 3355 // Constant for decrement 3356 operand immI_M1() %{ 3357 predicate(n->get_int() == -1); 3358 match(ConI); 3359 3360 op_cost(0); 3361 format %{ %} 3362 interface(CONST_INTER); 3363 %} 3364 3365 // Valid scale values for addressing modes 3366 operand immI2() %{ 3367 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3368 match(ConI); 3369 3370 format %{ %} 3371 interface(CONST_INTER); 3372 %} 3373 3374 operand immI8() %{ 3375 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3376 match(ConI); 3377 3378 op_cost(5); 3379 format %{ %} 3380 interface(CONST_INTER); 3381 %} 3382 3383 operand immI16() %{ 3384 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3385 match(ConI); 3386 3387 op_cost(10); 3388 format %{ %} 3389 interface(CONST_INTER); 3390 %} 3391 3392 // Int Immediate non-negative 3393 operand immU31() 3394 %{ 3395 predicate(n->get_int() >= 0); 3396 match(ConI); 3397 3398 op_cost(0); 3399 format %{ %} 3400 interface(CONST_INTER); 3401 %} 3402 3403 // Constant for long shifts 3404 operand immI_32() %{ 3405 predicate( n->get_int() == 32 ); 3406 match(ConI); 3407 3408 op_cost(0); 3409 format %{ %} 3410 interface(CONST_INTER); 3411 %} 3412 3413 operand immI_1_31() %{ 3414 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3415 match(ConI); 3416 3417 op_cost(0); 3418 format %{ %} 3419 interface(CONST_INTER); 3420 %} 3421 3422 operand immI_32_63() %{ 3423 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3424 match(ConI); 3425 op_cost(0); 3426 3427 format %{ %} 3428 interface(CONST_INTER); 3429 %} 3430 3431 operand immI_1() %{ 3432 predicate( n->get_int() == 1 ); 3433 match(ConI); 3434 3435 op_cost(0); 3436 format %{ %} 3437 interface(CONST_INTER); 3438 %} 3439 3440 operand immI_2() %{ 3441 predicate( n->get_int() == 2 ); 3442 match(ConI); 3443 3444 op_cost(0); 3445 format %{ %} 3446 interface(CONST_INTER); 3447 %} 3448 3449 operand immI_3() %{ 3450 predicate( n->get_int() == 3 ); 3451 match(ConI); 3452 3453 op_cost(0); 3454 format %{ %} 3455 interface(CONST_INTER); 3456 %} 3457 3458 // Pointer Immediate 3459 operand immP() %{ 3460 match(ConP); 3461 3462 op_cost(10); 3463 format %{ %} 3464 interface(CONST_INTER); 3465 %} 3466 3467 // NULL Pointer Immediate 3468 operand immP0() %{ 3469 predicate( n->get_ptr() == 0 ); 3470 match(ConP); 3471 op_cost(0); 3472 3473 format %{ %} 3474 interface(CONST_INTER); 3475 %} 3476 3477 // Long Immediate 3478 operand immL() %{ 3479 match(ConL); 3480 3481 op_cost(20); 3482 format %{ %} 3483 interface(CONST_INTER); 3484 %} 3485 3486 // Long Immediate zero 3487 operand immL0() %{ 3488 predicate( n->get_long() == 0L ); 3489 match(ConL); 3490 op_cost(0); 3491 3492 format %{ %} 3493 interface(CONST_INTER); 3494 %} 3495 3496 // Long Immediate zero 3497 operand immL_M1() %{ 3498 predicate( n->get_long() == -1L ); 3499 match(ConL); 3500 op_cost(0); 3501 3502 format %{ %} 3503 interface(CONST_INTER); 3504 %} 3505 3506 // Long immediate from 0 to 127. 3507 // Used for a shorter form of long mul by 10. 3508 operand immL_127() %{ 3509 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3510 match(ConL); 3511 op_cost(0); 3512 3513 format %{ %} 3514 interface(CONST_INTER); 3515 %} 3516 3517 // Long Immediate: low 32-bit mask 3518 operand immL_32bits() %{ 3519 predicate(n->get_long() == 0xFFFFFFFFL); 3520 match(ConL); 3521 op_cost(0); 3522 3523 format %{ %} 3524 interface(CONST_INTER); 3525 %} 3526 3527 // Long Immediate: low 32-bit mask 3528 operand immL32() %{ 3529 predicate(n->get_long() == (int)(n->get_long())); 3530 match(ConL); 3531 op_cost(20); 3532 3533 format %{ %} 3534 interface(CONST_INTER); 3535 %} 3536 3537 //Double Immediate zero 3538 operand immDPR0() %{ 3539 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3540 // bug that generates code such that NaNs compare equal to 0.0 3541 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3542 match(ConD); 3543 3544 op_cost(5); 3545 format %{ %} 3546 interface(CONST_INTER); 3547 %} 3548 3549 // Double Immediate one 3550 operand immDPR1() %{ 3551 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3552 match(ConD); 3553 3554 op_cost(5); 3555 format %{ %} 3556 interface(CONST_INTER); 3557 %} 3558 3559 // Double Immediate 3560 operand immDPR() %{ 3561 predicate(UseSSE<=1); 3562 match(ConD); 3563 3564 op_cost(5); 3565 format %{ %} 3566 interface(CONST_INTER); 3567 %} 3568 3569 operand immD() %{ 3570 predicate(UseSSE>=2); 3571 match(ConD); 3572 3573 op_cost(5); 3574 format %{ %} 3575 interface(CONST_INTER); 3576 %} 3577 3578 // Double Immediate zero 3579 operand immD0() %{ 3580 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3581 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3582 // compare equal to -0.0. 3583 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3584 match(ConD); 3585 3586 format %{ %} 3587 interface(CONST_INTER); 3588 %} 3589 3590 // Float Immediate zero 3591 operand immFPR0() %{ 3592 predicate(UseSSE == 0 && n->getf() == 0.0F); 3593 match(ConF); 3594 3595 op_cost(5); 3596 format %{ %} 3597 interface(CONST_INTER); 3598 %} 3599 3600 // Float Immediate one 3601 operand immFPR1() %{ 3602 predicate(UseSSE == 0 && n->getf() == 1.0F); 3603 match(ConF); 3604 3605 op_cost(5); 3606 format %{ %} 3607 interface(CONST_INTER); 3608 %} 3609 3610 // Float Immediate 3611 operand immFPR() %{ 3612 predicate( UseSSE == 0 ); 3613 match(ConF); 3614 3615 op_cost(5); 3616 format %{ %} 3617 interface(CONST_INTER); 3618 %} 3619 3620 // Float Immediate 3621 operand immF() %{ 3622 predicate(UseSSE >= 1); 3623 match(ConF); 3624 3625 op_cost(5); 3626 format %{ %} 3627 interface(CONST_INTER); 3628 %} 3629 3630 // Float Immediate zero. Zero and not -0.0 3631 operand immF0() %{ 3632 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3633 match(ConF); 3634 3635 op_cost(5); 3636 format %{ %} 3637 interface(CONST_INTER); 3638 %} 3639 3640 // Immediates for special shifts (sign extend) 3641 3642 // Constants for increment 3643 operand immI_16() %{ 3644 predicate( n->get_int() == 16 ); 3645 match(ConI); 3646 3647 format %{ %} 3648 interface(CONST_INTER); 3649 %} 3650 3651 operand immI_24() %{ 3652 predicate( n->get_int() == 24 ); 3653 match(ConI); 3654 3655 format %{ %} 3656 interface(CONST_INTER); 3657 %} 3658 3659 // Constant for byte-wide masking 3660 operand immI_255() %{ 3661 predicate( n->get_int() == 255 ); 3662 match(ConI); 3663 3664 format %{ %} 3665 interface(CONST_INTER); 3666 %} 3667 3668 // Constant for short-wide masking 3669 operand immI_65535() %{ 3670 predicate(n->get_int() == 65535); 3671 match(ConI); 3672 3673 format %{ %} 3674 interface(CONST_INTER); 3675 %} 3676 3677 // Register Operands 3678 // Integer Register 3679 operand rRegI() %{ 3680 constraint(ALLOC_IN_RC(int_reg)); 3681 match(RegI); 3682 match(xRegI); 3683 match(eAXRegI); 3684 match(eBXRegI); 3685 match(eCXRegI); 3686 match(eDXRegI); 3687 match(eDIRegI); 3688 match(eSIRegI); 3689 3690 format %{ %} 3691 interface(REG_INTER); 3692 %} 3693 3694 // Subset of Integer Register 3695 operand xRegI(rRegI reg) %{ 3696 constraint(ALLOC_IN_RC(int_x_reg)); 3697 match(reg); 3698 match(eAXRegI); 3699 match(eBXRegI); 3700 match(eCXRegI); 3701 match(eDXRegI); 3702 3703 format %{ %} 3704 interface(REG_INTER); 3705 %} 3706 3707 // Special Registers 3708 operand eAXRegI(xRegI reg) %{ 3709 constraint(ALLOC_IN_RC(eax_reg)); 3710 match(reg); 3711 match(rRegI); 3712 3713 format %{ "EAX" %} 3714 interface(REG_INTER); 3715 %} 3716 3717 // Special Registers 3718 operand eBXRegI(xRegI reg) %{ 3719 constraint(ALLOC_IN_RC(ebx_reg)); 3720 match(reg); 3721 match(rRegI); 3722 3723 format %{ "EBX" %} 3724 interface(REG_INTER); 3725 %} 3726 3727 operand eCXRegI(xRegI reg) %{ 3728 constraint(ALLOC_IN_RC(ecx_reg)); 3729 match(reg); 3730 match(rRegI); 3731 3732 format %{ "ECX" %} 3733 interface(REG_INTER); 3734 %} 3735 3736 operand eDXRegI(xRegI reg) %{ 3737 constraint(ALLOC_IN_RC(edx_reg)); 3738 match(reg); 3739 match(rRegI); 3740 3741 format %{ "EDX" %} 3742 interface(REG_INTER); 3743 %} 3744 3745 operand eDIRegI(xRegI reg) %{ 3746 constraint(ALLOC_IN_RC(edi_reg)); 3747 match(reg); 3748 match(rRegI); 3749 3750 format %{ "EDI" %} 3751 interface(REG_INTER); 3752 %} 3753 3754 operand naxRegI() %{ 3755 constraint(ALLOC_IN_RC(nax_reg)); 3756 match(RegI); 3757 match(eCXRegI); 3758 match(eDXRegI); 3759 match(eSIRegI); 3760 match(eDIRegI); 3761 3762 format %{ %} 3763 interface(REG_INTER); 3764 %} 3765 3766 operand nadxRegI() %{ 3767 constraint(ALLOC_IN_RC(nadx_reg)); 3768 match(RegI); 3769 match(eBXRegI); 3770 match(eCXRegI); 3771 match(eSIRegI); 3772 match(eDIRegI); 3773 3774 format %{ %} 3775 interface(REG_INTER); 3776 %} 3777 3778 operand ncxRegI() %{ 3779 constraint(ALLOC_IN_RC(ncx_reg)); 3780 match(RegI); 3781 match(eAXRegI); 3782 match(eDXRegI); 3783 match(eSIRegI); 3784 match(eDIRegI); 3785 3786 format %{ %} 3787 interface(REG_INTER); 3788 %} 3789 3790 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3791 // // 3792 operand eSIRegI(xRegI reg) %{ 3793 constraint(ALLOC_IN_RC(esi_reg)); 3794 match(reg); 3795 match(rRegI); 3796 3797 format %{ "ESI" %} 3798 interface(REG_INTER); 3799 %} 3800 3801 // Pointer Register 3802 operand anyRegP() %{ 3803 constraint(ALLOC_IN_RC(any_reg)); 3804 match(RegP); 3805 match(eAXRegP); 3806 match(eBXRegP); 3807 match(eCXRegP); 3808 match(eDIRegP); 3809 match(eRegP); 3810 3811 format %{ %} 3812 interface(REG_INTER); 3813 %} 3814 3815 operand eRegP() %{ 3816 constraint(ALLOC_IN_RC(int_reg)); 3817 match(RegP); 3818 match(eAXRegP); 3819 match(eBXRegP); 3820 match(eCXRegP); 3821 match(eDIRegP); 3822 3823 format %{ %} 3824 interface(REG_INTER); 3825 %} 3826 3827 // On windows95, EBP is not safe to use for implicit null tests. 3828 operand eRegP_no_EBP() %{ 3829 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3830 match(RegP); 3831 match(eAXRegP); 3832 match(eBXRegP); 3833 match(eCXRegP); 3834 match(eDIRegP); 3835 3836 op_cost(100); 3837 format %{ %} 3838 interface(REG_INTER); 3839 %} 3840 3841 operand naxRegP() %{ 3842 constraint(ALLOC_IN_RC(nax_reg)); 3843 match(RegP); 3844 match(eBXRegP); 3845 match(eDXRegP); 3846 match(eCXRegP); 3847 match(eSIRegP); 3848 match(eDIRegP); 3849 3850 format %{ %} 3851 interface(REG_INTER); 3852 %} 3853 3854 operand nabxRegP() %{ 3855 constraint(ALLOC_IN_RC(nabx_reg)); 3856 match(RegP); 3857 match(eCXRegP); 3858 match(eDXRegP); 3859 match(eSIRegP); 3860 match(eDIRegP); 3861 3862 format %{ %} 3863 interface(REG_INTER); 3864 %} 3865 3866 operand pRegP() %{ 3867 constraint(ALLOC_IN_RC(p_reg)); 3868 match(RegP); 3869 match(eBXRegP); 3870 match(eDXRegP); 3871 match(eSIRegP); 3872 match(eDIRegP); 3873 3874 format %{ %} 3875 interface(REG_INTER); 3876 %} 3877 3878 // Special Registers 3879 // Return a pointer value 3880 operand eAXRegP(eRegP reg) %{ 3881 constraint(ALLOC_IN_RC(eax_reg)); 3882 match(reg); 3883 format %{ "EAX" %} 3884 interface(REG_INTER); 3885 %} 3886 3887 // Used in AtomicAdd 3888 operand eBXRegP(eRegP reg) %{ 3889 constraint(ALLOC_IN_RC(ebx_reg)); 3890 match(reg); 3891 format %{ "EBX" %} 3892 interface(REG_INTER); 3893 %} 3894 3895 // Tail-call (interprocedural jump) to interpreter 3896 operand eCXRegP(eRegP reg) %{ 3897 constraint(ALLOC_IN_RC(ecx_reg)); 3898 match(reg); 3899 format %{ "ECX" %} 3900 interface(REG_INTER); 3901 %} 3902 3903 operand eSIRegP(eRegP reg) %{ 3904 constraint(ALLOC_IN_RC(esi_reg)); 3905 match(reg); 3906 format %{ "ESI" %} 3907 interface(REG_INTER); 3908 %} 3909 3910 // Used in rep stosw 3911 operand eDIRegP(eRegP reg) %{ 3912 constraint(ALLOC_IN_RC(edi_reg)); 3913 match(reg); 3914 format %{ "EDI" %} 3915 interface(REG_INTER); 3916 %} 3917 3918 operand eRegL() %{ 3919 constraint(ALLOC_IN_RC(long_reg)); 3920 match(RegL); 3921 match(eADXRegL); 3922 3923 format %{ %} 3924 interface(REG_INTER); 3925 %} 3926 3927 operand eADXRegL( eRegL reg ) %{ 3928 constraint(ALLOC_IN_RC(eadx_reg)); 3929 match(reg); 3930 3931 format %{ "EDX:EAX" %} 3932 interface(REG_INTER); 3933 %} 3934 3935 operand eBCXRegL( eRegL reg ) %{ 3936 constraint(ALLOC_IN_RC(ebcx_reg)); 3937 match(reg); 3938 3939 format %{ "EBX:ECX" %} 3940 interface(REG_INTER); 3941 %} 3942 3943 // Special case for integer high multiply 3944 operand eADXRegL_low_only() %{ 3945 constraint(ALLOC_IN_RC(eadx_reg)); 3946 match(RegL); 3947 3948 format %{ "EAX" %} 3949 interface(REG_INTER); 3950 %} 3951 3952 // Flags register, used as output of compare instructions 3953 operand eFlagsReg() %{ 3954 constraint(ALLOC_IN_RC(int_flags)); 3955 match(RegFlags); 3956 3957 format %{ "EFLAGS" %} 3958 interface(REG_INTER); 3959 %} 3960 3961 // Flags register, used as output of FLOATING POINT compare instructions 3962 operand eFlagsRegU() %{ 3963 constraint(ALLOC_IN_RC(int_flags)); 3964 match(RegFlags); 3965 3966 format %{ "EFLAGS_U" %} 3967 interface(REG_INTER); 3968 %} 3969 3970 operand eFlagsRegUCF() %{ 3971 constraint(ALLOC_IN_RC(int_flags)); 3972 match(RegFlags); 3973 predicate(false); 3974 3975 format %{ "EFLAGS_U_CF" %} 3976 interface(REG_INTER); 3977 %} 3978 3979 // Condition Code Register used by long compare 3980 operand flagsReg_long_LTGE() %{ 3981 constraint(ALLOC_IN_RC(int_flags)); 3982 match(RegFlags); 3983 format %{ "FLAGS_LTGE" %} 3984 interface(REG_INTER); 3985 %} 3986 operand flagsReg_long_EQNE() %{ 3987 constraint(ALLOC_IN_RC(int_flags)); 3988 match(RegFlags); 3989 format %{ "FLAGS_EQNE" %} 3990 interface(REG_INTER); 3991 %} 3992 operand flagsReg_long_LEGT() %{ 3993 constraint(ALLOC_IN_RC(int_flags)); 3994 match(RegFlags); 3995 format %{ "FLAGS_LEGT" %} 3996 interface(REG_INTER); 3997 %} 3998 3999 // Float register operands 4000 operand regDPR() %{ 4001 predicate( UseSSE < 2 ); 4002 constraint(ALLOC_IN_RC(fp_dbl_reg)); 4003 match(RegD); 4004 match(regDPR1); 4005 match(regDPR2); 4006 format %{ %} 4007 interface(REG_INTER); 4008 %} 4009 4010 operand regDPR1(regDPR reg) %{ 4011 predicate( UseSSE < 2 ); 4012 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4013 match(reg); 4014 format %{ "FPR1" %} 4015 interface(REG_INTER); 4016 %} 4017 4018 operand regDPR2(regDPR reg) %{ 4019 predicate( UseSSE < 2 ); 4020 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4021 match(reg); 4022 format %{ "FPR2" %} 4023 interface(REG_INTER); 4024 %} 4025 4026 operand regnotDPR1(regDPR reg) %{ 4027 predicate( UseSSE < 2 ); 4028 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4029 match(reg); 4030 format %{ %} 4031 interface(REG_INTER); 4032 %} 4033 4034 // Float register operands 4035 operand regFPR() %{ 4036 predicate( UseSSE < 2 ); 4037 constraint(ALLOC_IN_RC(fp_flt_reg)); 4038 match(RegF); 4039 match(regFPR1); 4040 format %{ %} 4041 interface(REG_INTER); 4042 %} 4043 4044 // Float register operands 4045 operand regFPR1(regFPR reg) %{ 4046 predicate( UseSSE < 2 ); 4047 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4048 match(reg); 4049 format %{ "FPR1" %} 4050 interface(REG_INTER); 4051 %} 4052 4053 // XMM Float register operands 4054 operand regF() %{ 4055 predicate( UseSSE>=1 ); 4056 constraint(ALLOC_IN_RC(float_reg_legacy)); 4057 match(RegF); 4058 format %{ %} 4059 interface(REG_INTER); 4060 %} 4061 4062 // XMM Double register operands 4063 operand regD() %{ 4064 predicate( UseSSE>=2 ); 4065 constraint(ALLOC_IN_RC(double_reg_legacy)); 4066 match(RegD); 4067 format %{ %} 4068 interface(REG_INTER); 4069 %} 4070 4071 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM) 4072 // runtime code generation via reg_class_dynamic. 4073 operand vecS() %{ 4074 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 4075 match(VecS); 4076 4077 format %{ %} 4078 interface(REG_INTER); 4079 %} 4080 4081 operand vecD() %{ 4082 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 4083 match(VecD); 4084 4085 format %{ %} 4086 interface(REG_INTER); 4087 %} 4088 4089 operand vecX() %{ 4090 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 4091 match(VecX); 4092 4093 format %{ %} 4094 interface(REG_INTER); 4095 %} 4096 4097 operand vecY() %{ 4098 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 4099 match(VecY); 4100 4101 format %{ %} 4102 interface(REG_INTER); 4103 %} 4104 4105 //----------Memory Operands---------------------------------------------------- 4106 // Direct Memory Operand 4107 operand direct(immP addr) %{ 4108 match(addr); 4109 4110 format %{ "[$addr]" %} 4111 interface(MEMORY_INTER) %{ 4112 base(0xFFFFFFFF); 4113 index(0x4); 4114 scale(0x0); 4115 disp($addr); 4116 %} 4117 %} 4118 4119 // Indirect Memory Operand 4120 operand indirect(eRegP reg) %{ 4121 constraint(ALLOC_IN_RC(int_reg)); 4122 match(reg); 4123 4124 format %{ "[$reg]" %} 4125 interface(MEMORY_INTER) %{ 4126 base($reg); 4127 index(0x4); 4128 scale(0x0); 4129 disp(0x0); 4130 %} 4131 %} 4132 4133 // Indirect Memory Plus Short Offset Operand 4134 operand indOffset8(eRegP reg, immI8 off) %{ 4135 match(AddP reg off); 4136 4137 format %{ "[$reg + $off]" %} 4138 interface(MEMORY_INTER) %{ 4139 base($reg); 4140 index(0x4); 4141 scale(0x0); 4142 disp($off); 4143 %} 4144 %} 4145 4146 // Indirect Memory Plus Long Offset Operand 4147 operand indOffset32(eRegP reg, immI off) %{ 4148 match(AddP reg off); 4149 4150 format %{ "[$reg + $off]" %} 4151 interface(MEMORY_INTER) %{ 4152 base($reg); 4153 index(0x4); 4154 scale(0x0); 4155 disp($off); 4156 %} 4157 %} 4158 4159 // Indirect Memory Plus Long Offset Operand 4160 operand indOffset32X(rRegI reg, immP off) %{ 4161 match(AddP off reg); 4162 4163 format %{ "[$reg + $off]" %} 4164 interface(MEMORY_INTER) %{ 4165 base($reg); 4166 index(0x4); 4167 scale(0x0); 4168 disp($off); 4169 %} 4170 %} 4171 4172 // Indirect Memory Plus Index Register Plus Offset Operand 4173 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4174 match(AddP (AddP reg ireg) off); 4175 4176 op_cost(10); 4177 format %{"[$reg + $off + $ireg]" %} 4178 interface(MEMORY_INTER) %{ 4179 base($reg); 4180 index($ireg); 4181 scale(0x0); 4182 disp($off); 4183 %} 4184 %} 4185 4186 // Indirect Memory Plus Index Register Plus Offset Operand 4187 operand indIndex(eRegP reg, rRegI ireg) %{ 4188 match(AddP reg ireg); 4189 4190 op_cost(10); 4191 format %{"[$reg + $ireg]" %} 4192 interface(MEMORY_INTER) %{ 4193 base($reg); 4194 index($ireg); 4195 scale(0x0); 4196 disp(0x0); 4197 %} 4198 %} 4199 4200 // // ------------------------------------------------------------------------- 4201 // // 486 architecture doesn't support "scale * index + offset" with out a base 4202 // // ------------------------------------------------------------------------- 4203 // // Scaled Memory Operands 4204 // // Indirect Memory Times Scale Plus Offset Operand 4205 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4206 // match(AddP off (LShiftI ireg scale)); 4207 // 4208 // op_cost(10); 4209 // format %{"[$off + $ireg << $scale]" %} 4210 // interface(MEMORY_INTER) %{ 4211 // base(0x4); 4212 // index($ireg); 4213 // scale($scale); 4214 // disp($off); 4215 // %} 4216 // %} 4217 4218 // Indirect Memory Times Scale Plus Index Register 4219 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4220 match(AddP reg (LShiftI ireg scale)); 4221 4222 op_cost(10); 4223 format %{"[$reg + $ireg << $scale]" %} 4224 interface(MEMORY_INTER) %{ 4225 base($reg); 4226 index($ireg); 4227 scale($scale); 4228 disp(0x0); 4229 %} 4230 %} 4231 4232 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4233 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4234 match(AddP (AddP reg (LShiftI ireg scale)) off); 4235 4236 op_cost(10); 4237 format %{"[$reg + $off + $ireg << $scale]" %} 4238 interface(MEMORY_INTER) %{ 4239 base($reg); 4240 index($ireg); 4241 scale($scale); 4242 disp($off); 4243 %} 4244 %} 4245 4246 //----------Load Long Memory Operands------------------------------------------ 4247 // The load-long idiom will use it's address expression again after loading 4248 // the first word of the long. If the load-long destination overlaps with 4249 // registers used in the addressing expression, the 2nd half will be loaded 4250 // from a clobbered address. Fix this by requiring that load-long use 4251 // address registers that do not overlap with the load-long target. 4252 4253 // load-long support 4254 operand load_long_RegP() %{ 4255 constraint(ALLOC_IN_RC(esi_reg)); 4256 match(RegP); 4257 match(eSIRegP); 4258 op_cost(100); 4259 format %{ %} 4260 interface(REG_INTER); 4261 %} 4262 4263 // Indirect Memory Operand Long 4264 operand load_long_indirect(load_long_RegP reg) %{ 4265 constraint(ALLOC_IN_RC(esi_reg)); 4266 match(reg); 4267 4268 format %{ "[$reg]" %} 4269 interface(MEMORY_INTER) %{ 4270 base($reg); 4271 index(0x4); 4272 scale(0x0); 4273 disp(0x0); 4274 %} 4275 %} 4276 4277 // Indirect Memory Plus Long Offset Operand 4278 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4279 match(AddP reg off); 4280 4281 format %{ "[$reg + $off]" %} 4282 interface(MEMORY_INTER) %{ 4283 base($reg); 4284 index(0x4); 4285 scale(0x0); 4286 disp($off); 4287 %} 4288 %} 4289 4290 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4291 4292 4293 //----------Special Memory Operands-------------------------------------------- 4294 // Stack Slot Operand - This operand is used for loading and storing temporary 4295 // values on the stack where a match requires a value to 4296 // flow through memory. 4297 operand stackSlotP(sRegP reg) %{ 4298 constraint(ALLOC_IN_RC(stack_slots)); 4299 // No match rule because this operand is only generated in matching 4300 format %{ "[$reg]" %} 4301 interface(MEMORY_INTER) %{ 4302 base(0x4); // ESP 4303 index(0x4); // No Index 4304 scale(0x0); // No Scale 4305 disp($reg); // Stack Offset 4306 %} 4307 %} 4308 4309 operand stackSlotI(sRegI reg) %{ 4310 constraint(ALLOC_IN_RC(stack_slots)); 4311 // No match rule because this operand is only generated in matching 4312 format %{ "[$reg]" %} 4313 interface(MEMORY_INTER) %{ 4314 base(0x4); // ESP 4315 index(0x4); // No Index 4316 scale(0x0); // No Scale 4317 disp($reg); // Stack Offset 4318 %} 4319 %} 4320 4321 operand stackSlotF(sRegF reg) %{ 4322 constraint(ALLOC_IN_RC(stack_slots)); 4323 // No match rule because this operand is only generated in matching 4324 format %{ "[$reg]" %} 4325 interface(MEMORY_INTER) %{ 4326 base(0x4); // ESP 4327 index(0x4); // No Index 4328 scale(0x0); // No Scale 4329 disp($reg); // Stack Offset 4330 %} 4331 %} 4332 4333 operand stackSlotD(sRegD reg) %{ 4334 constraint(ALLOC_IN_RC(stack_slots)); 4335 // No match rule because this operand is only generated in matching 4336 format %{ "[$reg]" %} 4337 interface(MEMORY_INTER) %{ 4338 base(0x4); // ESP 4339 index(0x4); // No Index 4340 scale(0x0); // No Scale 4341 disp($reg); // Stack Offset 4342 %} 4343 %} 4344 4345 operand stackSlotL(sRegL reg) %{ 4346 constraint(ALLOC_IN_RC(stack_slots)); 4347 // No match rule because this operand is only generated in matching 4348 format %{ "[$reg]" %} 4349 interface(MEMORY_INTER) %{ 4350 base(0x4); // ESP 4351 index(0x4); // No Index 4352 scale(0x0); // No Scale 4353 disp($reg); // Stack Offset 4354 %} 4355 %} 4356 4357 //----------Memory Operands - Win95 Implicit Null Variants---------------- 4358 // Indirect Memory Operand 4359 operand indirect_win95_safe(eRegP_no_EBP reg) 4360 %{ 4361 constraint(ALLOC_IN_RC(int_reg)); 4362 match(reg); 4363 4364 op_cost(100); 4365 format %{ "[$reg]" %} 4366 interface(MEMORY_INTER) %{ 4367 base($reg); 4368 index(0x4); 4369 scale(0x0); 4370 disp(0x0); 4371 %} 4372 %} 4373 4374 // Indirect Memory Plus Short Offset Operand 4375 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 4376 %{ 4377 match(AddP reg off); 4378 4379 op_cost(100); 4380 format %{ "[$reg + $off]" %} 4381 interface(MEMORY_INTER) %{ 4382 base($reg); 4383 index(0x4); 4384 scale(0x0); 4385 disp($off); 4386 %} 4387 %} 4388 4389 // Indirect Memory Plus Long Offset Operand 4390 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 4391 %{ 4392 match(AddP reg off); 4393 4394 op_cost(100); 4395 format %{ "[$reg + $off]" %} 4396 interface(MEMORY_INTER) %{ 4397 base($reg); 4398 index(0x4); 4399 scale(0x0); 4400 disp($off); 4401 %} 4402 %} 4403 4404 // Indirect Memory Plus Index Register Plus Offset Operand 4405 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) 4406 %{ 4407 match(AddP (AddP reg ireg) off); 4408 4409 op_cost(100); 4410 format %{"[$reg + $off + $ireg]" %} 4411 interface(MEMORY_INTER) %{ 4412 base($reg); 4413 index($ireg); 4414 scale(0x0); 4415 disp($off); 4416 %} 4417 %} 4418 4419 // Indirect Memory Times Scale Plus Index Register 4420 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) 4421 %{ 4422 match(AddP reg (LShiftI ireg scale)); 4423 4424 op_cost(100); 4425 format %{"[$reg + $ireg << $scale]" %} 4426 interface(MEMORY_INTER) %{ 4427 base($reg); 4428 index($ireg); 4429 scale($scale); 4430 disp(0x0); 4431 %} 4432 %} 4433 4434 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4435 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) 4436 %{ 4437 match(AddP (AddP reg (LShiftI ireg scale)) off); 4438 4439 op_cost(100); 4440 format %{"[$reg + $off + $ireg << $scale]" %} 4441 interface(MEMORY_INTER) %{ 4442 base($reg); 4443 index($ireg); 4444 scale($scale); 4445 disp($off); 4446 %} 4447 %} 4448 4449 //----------Conditional Branch Operands---------------------------------------- 4450 // Comparison Op - This is the operation of the comparison, and is limited to 4451 // the following set of codes: 4452 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4453 // 4454 // Other attributes of the comparison, such as unsignedness, are specified 4455 // by the comparison instruction that sets a condition code flags register. 4456 // That result is represented by a flags operand whose subtype is appropriate 4457 // to the unsignedness (etc.) of the comparison. 4458 // 4459 // Later, the instruction which matches both the Comparison Op (a Bool) and 4460 // the flags (produced by the Cmp) specifies the coding of the comparison op 4461 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4462 4463 // Comparision Code 4464 operand cmpOp() %{ 4465 match(Bool); 4466 4467 format %{ "" %} 4468 interface(COND_INTER) %{ 4469 equal(0x4, "e"); 4470 not_equal(0x5, "ne"); 4471 less(0xC, "l"); 4472 greater_equal(0xD, "ge"); 4473 less_equal(0xE, "le"); 4474 greater(0xF, "g"); 4475 overflow(0x0, "o"); 4476 no_overflow(0x1, "no"); 4477 %} 4478 %} 4479 4480 // Comparison Code, unsigned compare. Used by FP also, with 4481 // C2 (unordered) turned into GT or LT already. The other bits 4482 // C0 and C3 are turned into Carry & Zero flags. 4483 operand cmpOpU() %{ 4484 match(Bool); 4485 4486 format %{ "" %} 4487 interface(COND_INTER) %{ 4488 equal(0x4, "e"); 4489 not_equal(0x5, "ne"); 4490 less(0x2, "b"); 4491 greater_equal(0x3, "nb"); 4492 less_equal(0x6, "be"); 4493 greater(0x7, "nbe"); 4494 overflow(0x0, "o"); 4495 no_overflow(0x1, "no"); 4496 %} 4497 %} 4498 4499 // Floating comparisons that don't require any fixup for the unordered case 4500 operand cmpOpUCF() %{ 4501 match(Bool); 4502 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4503 n->as_Bool()->_test._test == BoolTest::ge || 4504 n->as_Bool()->_test._test == BoolTest::le || 4505 n->as_Bool()->_test._test == BoolTest::gt); 4506 format %{ "" %} 4507 interface(COND_INTER) %{ 4508 equal(0x4, "e"); 4509 not_equal(0x5, "ne"); 4510 less(0x2, "b"); 4511 greater_equal(0x3, "nb"); 4512 less_equal(0x6, "be"); 4513 greater(0x7, "nbe"); 4514 overflow(0x0, "o"); 4515 no_overflow(0x1, "no"); 4516 %} 4517 %} 4518 4519 4520 // Floating comparisons that can be fixed up with extra conditional jumps 4521 operand cmpOpUCF2() %{ 4522 match(Bool); 4523 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4524 n->as_Bool()->_test._test == BoolTest::eq); 4525 format %{ "" %} 4526 interface(COND_INTER) %{ 4527 equal(0x4, "e"); 4528 not_equal(0x5, "ne"); 4529 less(0x2, "b"); 4530 greater_equal(0x3, "nb"); 4531 less_equal(0x6, "be"); 4532 greater(0x7, "nbe"); 4533 overflow(0x0, "o"); 4534 no_overflow(0x1, "no"); 4535 %} 4536 %} 4537 4538 // Comparison Code for FP conditional move 4539 operand cmpOp_fcmov() %{ 4540 match(Bool); 4541 4542 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4543 n->as_Bool()->_test._test != BoolTest::no_overflow); 4544 format %{ "" %} 4545 interface(COND_INTER) %{ 4546 equal (0x0C8); 4547 not_equal (0x1C8); 4548 less (0x0C0); 4549 greater_equal(0x1C0); 4550 less_equal (0x0D0); 4551 greater (0x1D0); 4552 overflow(0x0, "o"); // not really supported by the instruction 4553 no_overflow(0x1, "no"); // not really supported by the instruction 4554 %} 4555 %} 4556 4557 // Comparision Code used in long compares 4558 operand cmpOp_commute() %{ 4559 match(Bool); 4560 4561 format %{ "" %} 4562 interface(COND_INTER) %{ 4563 equal(0x4, "e"); 4564 not_equal(0x5, "ne"); 4565 less(0xF, "g"); 4566 greater_equal(0xE, "le"); 4567 less_equal(0xD, "ge"); 4568 greater(0xC, "l"); 4569 overflow(0x0, "o"); 4570 no_overflow(0x1, "no"); 4571 %} 4572 %} 4573 4574 //----------OPERAND CLASSES---------------------------------------------------- 4575 // Operand Classes are groups of operands that are used as to simplify 4576 // instruction definitions by not requiring the AD writer to specify separate 4577 // instructions for every form of operand when the instruction accepts 4578 // multiple operand types with the same basic encoding and format. The classic 4579 // case of this is memory operands. 4580 4581 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4582 indIndex, indIndexScale, indIndexScaleOffset); 4583 4584 // Long memory operations are encoded in 2 instructions and a +4 offset. 4585 // This means some kind of offset is always required and you cannot use 4586 // an oop as the offset (done when working on static globals). 4587 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4588 indIndex, indIndexScale, indIndexScaleOffset); 4589 4590 4591 //----------PIPELINE----------------------------------------------------------- 4592 // Rules which define the behavior of the target architectures pipeline. 4593 pipeline %{ 4594 4595 //----------ATTRIBUTES--------------------------------------------------------- 4596 attributes %{ 4597 variable_size_instructions; // Fixed size instructions 4598 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4599 instruction_unit_size = 1; // An instruction is 1 bytes long 4600 instruction_fetch_unit_size = 16; // The processor fetches one line 4601 instruction_fetch_units = 1; // of 16 bytes 4602 4603 // List of nop instructions 4604 nops( MachNop ); 4605 %} 4606 4607 //----------RESOURCES---------------------------------------------------------- 4608 // Resources are the functional units available to the machine 4609 4610 // Generic P2/P3 pipeline 4611 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4612 // 3 instructions decoded per cycle. 4613 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4614 // 2 ALU op, only ALU0 handles mul/div instructions. 4615 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4616 MS0, MS1, MEM = MS0 | MS1, 4617 BR, FPU, 4618 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4619 4620 //----------PIPELINE DESCRIPTION----------------------------------------------- 4621 // Pipeline Description specifies the stages in the machine's pipeline 4622 4623 // Generic P2/P3 pipeline 4624 pipe_desc(S0, S1, S2, S3, S4, S5); 4625 4626 //----------PIPELINE CLASSES--------------------------------------------------- 4627 // Pipeline Classes describe the stages in which input and output are 4628 // referenced by the hardware pipeline. 4629 4630 // Naming convention: ialu or fpu 4631 // Then: _reg 4632 // Then: _reg if there is a 2nd register 4633 // Then: _long if it's a pair of instructions implementing a long 4634 // Then: _fat if it requires the big decoder 4635 // Or: _mem if it requires the big decoder and a memory unit. 4636 4637 // Integer ALU reg operation 4638 pipe_class ialu_reg(rRegI dst) %{ 4639 single_instruction; 4640 dst : S4(write); 4641 dst : S3(read); 4642 DECODE : S0; // any decoder 4643 ALU : S3; // any alu 4644 %} 4645 4646 // Long ALU reg operation 4647 pipe_class ialu_reg_long(eRegL dst) %{ 4648 instruction_count(2); 4649 dst : S4(write); 4650 dst : S3(read); 4651 DECODE : S0(2); // any 2 decoders 4652 ALU : S3(2); // both alus 4653 %} 4654 4655 // Integer ALU reg operation using big decoder 4656 pipe_class ialu_reg_fat(rRegI dst) %{ 4657 single_instruction; 4658 dst : S4(write); 4659 dst : S3(read); 4660 D0 : S0; // big decoder only 4661 ALU : S3; // any alu 4662 %} 4663 4664 // Long ALU reg operation using big decoder 4665 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4666 instruction_count(2); 4667 dst : S4(write); 4668 dst : S3(read); 4669 D0 : S0(2); // big decoder only; twice 4670 ALU : S3(2); // any 2 alus 4671 %} 4672 4673 // Integer ALU reg-reg operation 4674 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4675 single_instruction; 4676 dst : S4(write); 4677 src : S3(read); 4678 DECODE : S0; // any decoder 4679 ALU : S3; // any alu 4680 %} 4681 4682 // Long ALU reg-reg operation 4683 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4684 instruction_count(2); 4685 dst : S4(write); 4686 src : S3(read); 4687 DECODE : S0(2); // any 2 decoders 4688 ALU : S3(2); // both alus 4689 %} 4690 4691 // Integer ALU reg-reg operation 4692 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4693 single_instruction; 4694 dst : S4(write); 4695 src : S3(read); 4696 D0 : S0; // big decoder only 4697 ALU : S3; // any alu 4698 %} 4699 4700 // Long ALU reg-reg operation 4701 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4702 instruction_count(2); 4703 dst : S4(write); 4704 src : S3(read); 4705 D0 : S0(2); // big decoder only; twice 4706 ALU : S3(2); // both alus 4707 %} 4708 4709 // Integer ALU reg-mem operation 4710 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4711 single_instruction; 4712 dst : S5(write); 4713 mem : S3(read); 4714 D0 : S0; // big decoder only 4715 ALU : S4; // any alu 4716 MEM : S3; // any mem 4717 %} 4718 4719 // Long ALU reg-mem operation 4720 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4721 instruction_count(2); 4722 dst : S5(write); 4723 mem : S3(read); 4724 D0 : S0(2); // big decoder only; twice 4725 ALU : S4(2); // any 2 alus 4726 MEM : S3(2); // both mems 4727 %} 4728 4729 // Integer mem operation (prefetch) 4730 pipe_class ialu_mem(memory mem) 4731 %{ 4732 single_instruction; 4733 mem : S3(read); 4734 D0 : S0; // big decoder only 4735 MEM : S3; // any mem 4736 %} 4737 4738 // Integer Store to Memory 4739 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4740 single_instruction; 4741 mem : S3(read); 4742 src : S5(read); 4743 D0 : S0; // big decoder only 4744 ALU : S4; // any alu 4745 MEM : S3; 4746 %} 4747 4748 // Long Store to Memory 4749 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4750 instruction_count(2); 4751 mem : S3(read); 4752 src : S5(read); 4753 D0 : S0(2); // big decoder only; twice 4754 ALU : S4(2); // any 2 alus 4755 MEM : S3(2); // Both mems 4756 %} 4757 4758 // Integer Store to Memory 4759 pipe_class ialu_mem_imm(memory mem) %{ 4760 single_instruction; 4761 mem : S3(read); 4762 D0 : S0; // big decoder only 4763 ALU : S4; // any alu 4764 MEM : S3; 4765 %} 4766 4767 // Integer ALU0 reg-reg operation 4768 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4769 single_instruction; 4770 dst : S4(write); 4771 src : S3(read); 4772 D0 : S0; // Big decoder only 4773 ALU0 : S3; // only alu0 4774 %} 4775 4776 // Integer ALU0 reg-mem operation 4777 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4778 single_instruction; 4779 dst : S5(write); 4780 mem : S3(read); 4781 D0 : S0; // big decoder only 4782 ALU0 : S4; // ALU0 only 4783 MEM : S3; // any mem 4784 %} 4785 4786 // Integer ALU reg-reg operation 4787 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4788 single_instruction; 4789 cr : S4(write); 4790 src1 : S3(read); 4791 src2 : S3(read); 4792 DECODE : S0; // any decoder 4793 ALU : S3; // any alu 4794 %} 4795 4796 // Integer ALU reg-imm operation 4797 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4798 single_instruction; 4799 cr : S4(write); 4800 src1 : S3(read); 4801 DECODE : S0; // any decoder 4802 ALU : S3; // any alu 4803 %} 4804 4805 // Integer ALU reg-mem operation 4806 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4807 single_instruction; 4808 cr : S4(write); 4809 src1 : S3(read); 4810 src2 : S3(read); 4811 D0 : S0; // big decoder only 4812 ALU : S4; // any alu 4813 MEM : S3; 4814 %} 4815 4816 // Conditional move reg-reg 4817 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4818 instruction_count(4); 4819 y : S4(read); 4820 q : S3(read); 4821 p : S3(read); 4822 DECODE : S0(4); // any decoder 4823 %} 4824 4825 // Conditional move reg-reg 4826 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4827 single_instruction; 4828 dst : S4(write); 4829 src : S3(read); 4830 cr : S3(read); 4831 DECODE : S0; // any decoder 4832 %} 4833 4834 // Conditional move reg-mem 4835 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4836 single_instruction; 4837 dst : S4(write); 4838 src : S3(read); 4839 cr : S3(read); 4840 DECODE : S0; // any decoder 4841 MEM : S3; 4842 %} 4843 4844 // Conditional move reg-reg long 4845 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4846 single_instruction; 4847 dst : S4(write); 4848 src : S3(read); 4849 cr : S3(read); 4850 DECODE : S0(2); // any 2 decoders 4851 %} 4852 4853 // Conditional move double reg-reg 4854 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4855 single_instruction; 4856 dst : S4(write); 4857 src : S3(read); 4858 cr : S3(read); 4859 DECODE : S0; // any decoder 4860 %} 4861 4862 // Float reg-reg operation 4863 pipe_class fpu_reg(regDPR dst) %{ 4864 instruction_count(2); 4865 dst : S3(read); 4866 DECODE : S0(2); // any 2 decoders 4867 FPU : S3; 4868 %} 4869 4870 // Float reg-reg operation 4871 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4872 instruction_count(2); 4873 dst : S4(write); 4874 src : S3(read); 4875 DECODE : S0(2); // any 2 decoders 4876 FPU : S3; 4877 %} 4878 4879 // Float reg-reg operation 4880 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4881 instruction_count(3); 4882 dst : S4(write); 4883 src1 : S3(read); 4884 src2 : S3(read); 4885 DECODE : S0(3); // any 3 decoders 4886 FPU : S3(2); 4887 %} 4888 4889 // Float reg-reg operation 4890 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4891 instruction_count(4); 4892 dst : S4(write); 4893 src1 : S3(read); 4894 src2 : S3(read); 4895 src3 : S3(read); 4896 DECODE : S0(4); // any 3 decoders 4897 FPU : S3(2); 4898 %} 4899 4900 // Float reg-reg operation 4901 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4902 instruction_count(4); 4903 dst : S4(write); 4904 src1 : S3(read); 4905 src2 : S3(read); 4906 src3 : S3(read); 4907 DECODE : S1(3); // any 3 decoders 4908 D0 : S0; // Big decoder only 4909 FPU : S3(2); 4910 MEM : S3; 4911 %} 4912 4913 // Float reg-mem operation 4914 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4915 instruction_count(2); 4916 dst : S5(write); 4917 mem : S3(read); 4918 D0 : S0; // big decoder only 4919 DECODE : S1; // any decoder for FPU POP 4920 FPU : S4; 4921 MEM : S3; // any mem 4922 %} 4923 4924 // Float reg-mem operation 4925 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4926 instruction_count(3); 4927 dst : S5(write); 4928 src1 : S3(read); 4929 mem : S3(read); 4930 D0 : S0; // big decoder only 4931 DECODE : S1(2); // any decoder for FPU POP 4932 FPU : S4; 4933 MEM : S3; // any mem 4934 %} 4935 4936 // Float mem-reg operation 4937 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4938 instruction_count(2); 4939 src : S5(read); 4940 mem : S3(read); 4941 DECODE : S0; // any decoder for FPU PUSH 4942 D0 : S1; // big decoder only 4943 FPU : S4; 4944 MEM : S3; // any mem 4945 %} 4946 4947 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4948 instruction_count(3); 4949 src1 : S3(read); 4950 src2 : S3(read); 4951 mem : S3(read); 4952 DECODE : S0(2); // any decoder for FPU PUSH 4953 D0 : S1; // big decoder only 4954 FPU : S4; 4955 MEM : S3; // any mem 4956 %} 4957 4958 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4959 instruction_count(3); 4960 src1 : S3(read); 4961 src2 : S3(read); 4962 mem : S4(read); 4963 DECODE : S0; // any decoder for FPU PUSH 4964 D0 : S0(2); // big decoder only 4965 FPU : S4; 4966 MEM : S3(2); // any mem 4967 %} 4968 4969 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4970 instruction_count(2); 4971 src1 : S3(read); 4972 dst : S4(read); 4973 D0 : S0(2); // big decoder only 4974 MEM : S3(2); // any mem 4975 %} 4976 4977 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4978 instruction_count(3); 4979 src1 : S3(read); 4980 src2 : S3(read); 4981 dst : S4(read); 4982 D0 : S0(3); // big decoder only 4983 FPU : S4; 4984 MEM : S3(3); // any mem 4985 %} 4986 4987 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4988 instruction_count(3); 4989 src1 : S4(read); 4990 mem : S4(read); 4991 DECODE : S0; // any decoder for FPU PUSH 4992 D0 : S0(2); // big decoder only 4993 FPU : S4; 4994 MEM : S3(2); // any mem 4995 %} 4996 4997 // Float load constant 4998 pipe_class fpu_reg_con(regDPR dst) %{ 4999 instruction_count(2); 5000 dst : S5(write); 5001 D0 : S0; // big decoder only for the load 5002 DECODE : S1; // any decoder for FPU POP 5003 FPU : S4; 5004 MEM : S3; // any mem 5005 %} 5006 5007 // Float load constant 5008 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 5009 instruction_count(3); 5010 dst : S5(write); 5011 src : S3(read); 5012 D0 : S0; // big decoder only for the load 5013 DECODE : S1(2); // any decoder for FPU POP 5014 FPU : S4; 5015 MEM : S3; // any mem 5016 %} 5017 5018 // UnConditional branch 5019 pipe_class pipe_jmp( label labl ) %{ 5020 single_instruction; 5021 BR : S3; 5022 %} 5023 5024 // Conditional branch 5025 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5026 single_instruction; 5027 cr : S1(read); 5028 BR : S3; 5029 %} 5030 5031 // Allocation idiom 5032 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5033 instruction_count(1); force_serialization; 5034 fixed_latency(6); 5035 heap_ptr : S3(read); 5036 DECODE : S0(3); 5037 D0 : S2; 5038 MEM : S3; 5039 ALU : S3(2); 5040 dst : S5(write); 5041 BR : S5; 5042 %} 5043 5044 // Generic big/slow expanded idiom 5045 pipe_class pipe_slow( ) %{ 5046 instruction_count(10); multiple_bundles; force_serialization; 5047 fixed_latency(100); 5048 D0 : S0(2); 5049 MEM : S3(2); 5050 %} 5051 5052 // The real do-nothing guy 5053 pipe_class empty( ) %{ 5054 instruction_count(0); 5055 %} 5056 5057 // Define the class for the Nop node 5058 define %{ 5059 MachNop = empty; 5060 %} 5061 5062 %} 5063 5064 //----------INSTRUCTIONS------------------------------------------------------- 5065 // 5066 // match -- States which machine-independent subtree may be replaced 5067 // by this instruction. 5068 // ins_cost -- The estimated cost of this instruction is used by instruction 5069 // selection to identify a minimum cost tree of machine 5070 // instructions that matches a tree of machine-independent 5071 // instructions. 5072 // format -- A string providing the disassembly for this instruction. 5073 // The value of an instruction's operand may be inserted 5074 // by referring to it with a '$' prefix. 5075 // opcode -- Three instruction opcodes may be provided. These are referred 5076 // to within an encode class as $primary, $secondary, and $tertiary 5077 // respectively. The primary opcode is commonly used to 5078 // indicate the type of machine instruction, while secondary 5079 // and tertiary are often used for prefix options or addressing 5080 // modes. 5081 // ins_encode -- A list of encode classes with parameters. The encode class 5082 // name must have been defined in an 'enc_class' specification 5083 // in the encode section of the architecture description. 5084 5085 //----------BSWAP-Instruction-------------------------------------------------- 5086 instruct bytes_reverse_int(rRegI dst) %{ 5087 match(Set dst (ReverseBytesI dst)); 5088 5089 format %{ "BSWAP $dst" %} 5090 opcode(0x0F, 0xC8); 5091 ins_encode( OpcP, OpcSReg(dst) ); 5092 ins_pipe( ialu_reg ); 5093 %} 5094 5095 instruct bytes_reverse_long(eRegL dst) %{ 5096 match(Set dst (ReverseBytesL dst)); 5097 5098 format %{ "BSWAP $dst.lo\n\t" 5099 "BSWAP $dst.hi\n\t" 5100 "XCHG $dst.lo $dst.hi" %} 5101 5102 ins_cost(125); 5103 ins_encode( bswap_long_bytes(dst) ); 5104 ins_pipe( ialu_reg_reg); 5105 %} 5106 5107 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5108 match(Set dst (ReverseBytesUS dst)); 5109 effect(KILL cr); 5110 5111 format %{ "BSWAP $dst\n\t" 5112 "SHR $dst,16\n\t" %} 5113 ins_encode %{ 5114 __ bswapl($dst$$Register); 5115 __ shrl($dst$$Register, 16); 5116 %} 5117 ins_pipe( ialu_reg ); 5118 %} 5119 5120 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5121 match(Set dst (ReverseBytesS dst)); 5122 effect(KILL cr); 5123 5124 format %{ "BSWAP $dst\n\t" 5125 "SAR $dst,16\n\t" %} 5126 ins_encode %{ 5127 __ bswapl($dst$$Register); 5128 __ sarl($dst$$Register, 16); 5129 %} 5130 ins_pipe( ialu_reg ); 5131 %} 5132 5133 5134 //---------- Zeros Count Instructions ------------------------------------------ 5135 5136 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5137 predicate(UseCountLeadingZerosInstruction); 5138 match(Set dst (CountLeadingZerosI src)); 5139 effect(KILL cr); 5140 5141 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5142 ins_encode %{ 5143 __ lzcntl($dst$$Register, $src$$Register); 5144 %} 5145 ins_pipe(ialu_reg); 5146 %} 5147 5148 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5149 predicate(!UseCountLeadingZerosInstruction); 5150 match(Set dst (CountLeadingZerosI src)); 5151 effect(KILL cr); 5152 5153 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5154 "JNZ skip\n\t" 5155 "MOV $dst, -1\n" 5156 "skip:\n\t" 5157 "NEG $dst\n\t" 5158 "ADD $dst, 31" %} 5159 ins_encode %{ 5160 Register Rdst = $dst$$Register; 5161 Register Rsrc = $src$$Register; 5162 Label skip; 5163 __ bsrl(Rdst, Rsrc); 5164 __ jccb(Assembler::notZero, skip); 5165 __ movl(Rdst, -1); 5166 __ bind(skip); 5167 __ negl(Rdst); 5168 __ addl(Rdst, BitsPerInt - 1); 5169 %} 5170 ins_pipe(ialu_reg); 5171 %} 5172 5173 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5174 predicate(UseCountLeadingZerosInstruction); 5175 match(Set dst (CountLeadingZerosL src)); 5176 effect(TEMP dst, KILL cr); 5177 5178 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5179 "JNC done\n\t" 5180 "LZCNT $dst, $src.lo\n\t" 5181 "ADD $dst, 32\n" 5182 "done:" %} 5183 ins_encode %{ 5184 Register Rdst = $dst$$Register; 5185 Register Rsrc = $src$$Register; 5186 Label done; 5187 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5188 __ jccb(Assembler::carryClear, done); 5189 __ lzcntl(Rdst, Rsrc); 5190 __ addl(Rdst, BitsPerInt); 5191 __ bind(done); 5192 %} 5193 ins_pipe(ialu_reg); 5194 %} 5195 5196 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5197 predicate(!UseCountLeadingZerosInstruction); 5198 match(Set dst (CountLeadingZerosL src)); 5199 effect(TEMP dst, KILL cr); 5200 5201 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5202 "JZ msw_is_zero\n\t" 5203 "ADD $dst, 32\n\t" 5204 "JMP not_zero\n" 5205 "msw_is_zero:\n\t" 5206 "BSR $dst, $src.lo\n\t" 5207 "JNZ not_zero\n\t" 5208 "MOV $dst, -1\n" 5209 "not_zero:\n\t" 5210 "NEG $dst\n\t" 5211 "ADD $dst, 63\n" %} 5212 ins_encode %{ 5213 Register Rdst = $dst$$Register; 5214 Register Rsrc = $src$$Register; 5215 Label msw_is_zero; 5216 Label not_zero; 5217 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5218 __ jccb(Assembler::zero, msw_is_zero); 5219 __ addl(Rdst, BitsPerInt); 5220 __ jmpb(not_zero); 5221 __ bind(msw_is_zero); 5222 __ bsrl(Rdst, Rsrc); 5223 __ jccb(Assembler::notZero, not_zero); 5224 __ movl(Rdst, -1); 5225 __ bind(not_zero); 5226 __ negl(Rdst); 5227 __ addl(Rdst, BitsPerLong - 1); 5228 %} 5229 ins_pipe(ialu_reg); 5230 %} 5231 5232 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5233 predicate(UseCountTrailingZerosInstruction); 5234 match(Set dst (CountTrailingZerosI src)); 5235 effect(KILL cr); 5236 5237 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5238 ins_encode %{ 5239 __ tzcntl($dst$$Register, $src$$Register); 5240 %} 5241 ins_pipe(ialu_reg); 5242 %} 5243 5244 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5245 predicate(!UseCountTrailingZerosInstruction); 5246 match(Set dst (CountTrailingZerosI src)); 5247 effect(KILL cr); 5248 5249 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5250 "JNZ done\n\t" 5251 "MOV $dst, 32\n" 5252 "done:" %} 5253 ins_encode %{ 5254 Register Rdst = $dst$$Register; 5255 Label done; 5256 __ bsfl(Rdst, $src$$Register); 5257 __ jccb(Assembler::notZero, done); 5258 __ movl(Rdst, BitsPerInt); 5259 __ bind(done); 5260 %} 5261 ins_pipe(ialu_reg); 5262 %} 5263 5264 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5265 predicate(UseCountTrailingZerosInstruction); 5266 match(Set dst (CountTrailingZerosL src)); 5267 effect(TEMP dst, KILL cr); 5268 5269 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5270 "JNC done\n\t" 5271 "TZCNT $dst, $src.hi\n\t" 5272 "ADD $dst, 32\n" 5273 "done:" %} 5274 ins_encode %{ 5275 Register Rdst = $dst$$Register; 5276 Register Rsrc = $src$$Register; 5277 Label done; 5278 __ tzcntl(Rdst, Rsrc); 5279 __ jccb(Assembler::carryClear, done); 5280 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5281 __ addl(Rdst, BitsPerInt); 5282 __ bind(done); 5283 %} 5284 ins_pipe(ialu_reg); 5285 %} 5286 5287 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5288 predicate(!UseCountTrailingZerosInstruction); 5289 match(Set dst (CountTrailingZerosL src)); 5290 effect(TEMP dst, KILL cr); 5291 5292 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5293 "JNZ done\n\t" 5294 "BSF $dst, $src.hi\n\t" 5295 "JNZ msw_not_zero\n\t" 5296 "MOV $dst, 32\n" 5297 "msw_not_zero:\n\t" 5298 "ADD $dst, 32\n" 5299 "done:" %} 5300 ins_encode %{ 5301 Register Rdst = $dst$$Register; 5302 Register Rsrc = $src$$Register; 5303 Label msw_not_zero; 5304 Label done; 5305 __ bsfl(Rdst, Rsrc); 5306 __ jccb(Assembler::notZero, done); 5307 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5308 __ jccb(Assembler::notZero, msw_not_zero); 5309 __ movl(Rdst, BitsPerInt); 5310 __ bind(msw_not_zero); 5311 __ addl(Rdst, BitsPerInt); 5312 __ bind(done); 5313 %} 5314 ins_pipe(ialu_reg); 5315 %} 5316 5317 5318 //---------- Population Count Instructions ------------------------------------- 5319 5320 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5321 predicate(UsePopCountInstruction); 5322 match(Set dst (PopCountI src)); 5323 effect(KILL cr); 5324 5325 format %{ "POPCNT $dst, $src" %} 5326 ins_encode %{ 5327 __ popcntl($dst$$Register, $src$$Register); 5328 %} 5329 ins_pipe(ialu_reg); 5330 %} 5331 5332 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5333 predicate(UsePopCountInstruction); 5334 match(Set dst (PopCountI (LoadI mem))); 5335 effect(KILL cr); 5336 5337 format %{ "POPCNT $dst, $mem" %} 5338 ins_encode %{ 5339 __ popcntl($dst$$Register, $mem$$Address); 5340 %} 5341 ins_pipe(ialu_reg); 5342 %} 5343 5344 // Note: Long.bitCount(long) returns an int. 5345 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5346 predicate(UsePopCountInstruction); 5347 match(Set dst (PopCountL src)); 5348 effect(KILL cr, TEMP tmp, TEMP dst); 5349 5350 format %{ "POPCNT $dst, $src.lo\n\t" 5351 "POPCNT $tmp, $src.hi\n\t" 5352 "ADD $dst, $tmp" %} 5353 ins_encode %{ 5354 __ popcntl($dst$$Register, $src$$Register); 5355 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5356 __ addl($dst$$Register, $tmp$$Register); 5357 %} 5358 ins_pipe(ialu_reg); 5359 %} 5360 5361 // Note: Long.bitCount(long) returns an int. 5362 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5363 predicate(UsePopCountInstruction); 5364 match(Set dst (PopCountL (LoadL mem))); 5365 effect(KILL cr, TEMP tmp, TEMP dst); 5366 5367 format %{ "POPCNT $dst, $mem\n\t" 5368 "POPCNT $tmp, $mem+4\n\t" 5369 "ADD $dst, $tmp" %} 5370 ins_encode %{ 5371 //__ popcntl($dst$$Register, $mem$$Address$$first); 5372 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5373 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5374 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5375 __ addl($dst$$Register, $tmp$$Register); 5376 %} 5377 ins_pipe(ialu_reg); 5378 %} 5379 5380 5381 //----------Load/Store/Move Instructions--------------------------------------- 5382 //----------Load Instructions-------------------------------------------------- 5383 // Load Byte (8bit signed) 5384 instruct loadB(xRegI dst, memory mem) %{ 5385 match(Set dst (LoadB mem)); 5386 5387 ins_cost(125); 5388 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5389 5390 ins_encode %{ 5391 __ movsbl($dst$$Register, $mem$$Address); 5392 %} 5393 5394 ins_pipe(ialu_reg_mem); 5395 %} 5396 5397 // Load Byte (8bit signed) into Long Register 5398 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5399 match(Set dst (ConvI2L (LoadB mem))); 5400 effect(KILL cr); 5401 5402 ins_cost(375); 5403 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5404 "MOV $dst.hi,$dst.lo\n\t" 5405 "SAR $dst.hi,7" %} 5406 5407 ins_encode %{ 5408 __ movsbl($dst$$Register, $mem$$Address); 5409 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5410 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5411 %} 5412 5413 ins_pipe(ialu_reg_mem); 5414 %} 5415 5416 // Load Unsigned Byte (8bit UNsigned) 5417 instruct loadUB(xRegI dst, memory mem) %{ 5418 match(Set dst (LoadUB mem)); 5419 5420 ins_cost(125); 5421 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5422 5423 ins_encode %{ 5424 __ movzbl($dst$$Register, $mem$$Address); 5425 %} 5426 5427 ins_pipe(ialu_reg_mem); 5428 %} 5429 5430 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5431 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5432 match(Set dst (ConvI2L (LoadUB mem))); 5433 effect(KILL cr); 5434 5435 ins_cost(250); 5436 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5437 "XOR $dst.hi,$dst.hi" %} 5438 5439 ins_encode %{ 5440 Register Rdst = $dst$$Register; 5441 __ movzbl(Rdst, $mem$$Address); 5442 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5443 %} 5444 5445 ins_pipe(ialu_reg_mem); 5446 %} 5447 5448 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5449 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5450 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5451 effect(KILL cr); 5452 5453 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5454 "XOR $dst.hi,$dst.hi\n\t" 5455 "AND $dst.lo,right_n_bits($mask, 8)" %} 5456 ins_encode %{ 5457 Register Rdst = $dst$$Register; 5458 __ movzbl(Rdst, $mem$$Address); 5459 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5460 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5461 %} 5462 ins_pipe(ialu_reg_mem); 5463 %} 5464 5465 // Load Short (16bit signed) 5466 instruct loadS(rRegI dst, memory mem) %{ 5467 match(Set dst (LoadS mem)); 5468 5469 ins_cost(125); 5470 format %{ "MOVSX $dst,$mem\t# short" %} 5471 5472 ins_encode %{ 5473 __ movswl($dst$$Register, $mem$$Address); 5474 %} 5475 5476 ins_pipe(ialu_reg_mem); 5477 %} 5478 5479 // Load Short (16 bit signed) to Byte (8 bit signed) 5480 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5481 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5482 5483 ins_cost(125); 5484 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5485 ins_encode %{ 5486 __ movsbl($dst$$Register, $mem$$Address); 5487 %} 5488 ins_pipe(ialu_reg_mem); 5489 %} 5490 5491 // Load Short (16bit signed) into Long Register 5492 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5493 match(Set dst (ConvI2L (LoadS mem))); 5494 effect(KILL cr); 5495 5496 ins_cost(375); 5497 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5498 "MOV $dst.hi,$dst.lo\n\t" 5499 "SAR $dst.hi,15" %} 5500 5501 ins_encode %{ 5502 __ movswl($dst$$Register, $mem$$Address); 5503 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5504 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5505 %} 5506 5507 ins_pipe(ialu_reg_mem); 5508 %} 5509 5510 // Load Unsigned Short/Char (16bit unsigned) 5511 instruct loadUS(rRegI dst, memory mem) %{ 5512 match(Set dst (LoadUS mem)); 5513 5514 ins_cost(125); 5515 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5516 5517 ins_encode %{ 5518 __ movzwl($dst$$Register, $mem$$Address); 5519 %} 5520 5521 ins_pipe(ialu_reg_mem); 5522 %} 5523 5524 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5525 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5526 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5527 5528 ins_cost(125); 5529 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5530 ins_encode %{ 5531 __ movsbl($dst$$Register, $mem$$Address); 5532 %} 5533 ins_pipe(ialu_reg_mem); 5534 %} 5535 5536 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5537 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5538 match(Set dst (ConvI2L (LoadUS mem))); 5539 effect(KILL cr); 5540 5541 ins_cost(250); 5542 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5543 "XOR $dst.hi,$dst.hi" %} 5544 5545 ins_encode %{ 5546 __ movzwl($dst$$Register, $mem$$Address); 5547 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5548 %} 5549 5550 ins_pipe(ialu_reg_mem); 5551 %} 5552 5553 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5554 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5555 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5556 effect(KILL cr); 5557 5558 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5559 "XOR $dst.hi,$dst.hi" %} 5560 ins_encode %{ 5561 Register Rdst = $dst$$Register; 5562 __ movzbl(Rdst, $mem$$Address); 5563 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5564 %} 5565 ins_pipe(ialu_reg_mem); 5566 %} 5567 5568 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5569 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5570 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5571 effect(KILL cr); 5572 5573 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5574 "XOR $dst.hi,$dst.hi\n\t" 5575 "AND $dst.lo,right_n_bits($mask, 16)" %} 5576 ins_encode %{ 5577 Register Rdst = $dst$$Register; 5578 __ movzwl(Rdst, $mem$$Address); 5579 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5580 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5581 %} 5582 ins_pipe(ialu_reg_mem); 5583 %} 5584 5585 // Load Integer 5586 instruct loadI(rRegI dst, memory mem) %{ 5587 match(Set dst (LoadI mem)); 5588 5589 ins_cost(125); 5590 format %{ "MOV $dst,$mem\t# int" %} 5591 5592 ins_encode %{ 5593 __ movl($dst$$Register, $mem$$Address); 5594 %} 5595 5596 ins_pipe(ialu_reg_mem); 5597 %} 5598 5599 // Load Integer (32 bit signed) to Byte (8 bit signed) 5600 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5601 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5602 5603 ins_cost(125); 5604 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5605 ins_encode %{ 5606 __ movsbl($dst$$Register, $mem$$Address); 5607 %} 5608 ins_pipe(ialu_reg_mem); 5609 %} 5610 5611 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5612 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5613 match(Set dst (AndI (LoadI mem) mask)); 5614 5615 ins_cost(125); 5616 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5617 ins_encode %{ 5618 __ movzbl($dst$$Register, $mem$$Address); 5619 %} 5620 ins_pipe(ialu_reg_mem); 5621 %} 5622 5623 // Load Integer (32 bit signed) to Short (16 bit signed) 5624 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5625 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5626 5627 ins_cost(125); 5628 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5629 ins_encode %{ 5630 __ movswl($dst$$Register, $mem$$Address); 5631 %} 5632 ins_pipe(ialu_reg_mem); 5633 %} 5634 5635 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5636 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5637 match(Set dst (AndI (LoadI mem) mask)); 5638 5639 ins_cost(125); 5640 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5641 ins_encode %{ 5642 __ movzwl($dst$$Register, $mem$$Address); 5643 %} 5644 ins_pipe(ialu_reg_mem); 5645 %} 5646 5647 // Load Integer into Long Register 5648 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5649 match(Set dst (ConvI2L (LoadI mem))); 5650 effect(KILL cr); 5651 5652 ins_cost(375); 5653 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5654 "MOV $dst.hi,$dst.lo\n\t" 5655 "SAR $dst.hi,31" %} 5656 5657 ins_encode %{ 5658 __ movl($dst$$Register, $mem$$Address); 5659 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5660 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5661 %} 5662 5663 ins_pipe(ialu_reg_mem); 5664 %} 5665 5666 // Load Integer with mask 0xFF into Long Register 5667 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5668 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5669 effect(KILL cr); 5670 5671 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5672 "XOR $dst.hi,$dst.hi" %} 5673 ins_encode %{ 5674 Register Rdst = $dst$$Register; 5675 __ movzbl(Rdst, $mem$$Address); 5676 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5677 %} 5678 ins_pipe(ialu_reg_mem); 5679 %} 5680 5681 // Load Integer with mask 0xFFFF into Long Register 5682 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5683 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5684 effect(KILL cr); 5685 5686 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5687 "XOR $dst.hi,$dst.hi" %} 5688 ins_encode %{ 5689 Register Rdst = $dst$$Register; 5690 __ movzwl(Rdst, $mem$$Address); 5691 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5692 %} 5693 ins_pipe(ialu_reg_mem); 5694 %} 5695 5696 // Load Integer with 31-bit mask into Long Register 5697 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5698 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5699 effect(KILL cr); 5700 5701 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5702 "XOR $dst.hi,$dst.hi\n\t" 5703 "AND $dst.lo,$mask" %} 5704 ins_encode %{ 5705 Register Rdst = $dst$$Register; 5706 __ movl(Rdst, $mem$$Address); 5707 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5708 __ andl(Rdst, $mask$$constant); 5709 %} 5710 ins_pipe(ialu_reg_mem); 5711 %} 5712 5713 // Load Unsigned Integer into Long Register 5714 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5715 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5716 effect(KILL cr); 5717 5718 ins_cost(250); 5719 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5720 "XOR $dst.hi,$dst.hi" %} 5721 5722 ins_encode %{ 5723 __ movl($dst$$Register, $mem$$Address); 5724 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5725 %} 5726 5727 ins_pipe(ialu_reg_mem); 5728 %} 5729 5730 // Load Long. Cannot clobber address while loading, so restrict address 5731 // register to ESI 5732 instruct loadL(eRegL dst, load_long_memory mem) %{ 5733 predicate(!((LoadLNode*)n)->require_atomic_access()); 5734 match(Set dst (LoadL mem)); 5735 5736 ins_cost(250); 5737 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5738 "MOV $dst.hi,$mem+4" %} 5739 5740 ins_encode %{ 5741 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5742 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5743 __ movl($dst$$Register, Amemlo); 5744 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5745 %} 5746 5747 ins_pipe(ialu_reg_long_mem); 5748 %} 5749 5750 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5751 // then store it down to the stack and reload on the int 5752 // side. 5753 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5754 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5755 match(Set dst (LoadL mem)); 5756 5757 ins_cost(200); 5758 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5759 "FISTp $dst" %} 5760 ins_encode(enc_loadL_volatile(mem,dst)); 5761 ins_pipe( fpu_reg_mem ); 5762 %} 5763 5764 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5765 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5766 match(Set dst (LoadL mem)); 5767 effect(TEMP tmp); 5768 ins_cost(180); 5769 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5770 "MOVSD $dst,$tmp" %} 5771 ins_encode %{ 5772 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5773 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5774 %} 5775 ins_pipe( pipe_slow ); 5776 %} 5777 5778 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5779 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5780 match(Set dst (LoadL mem)); 5781 effect(TEMP tmp); 5782 ins_cost(160); 5783 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5784 "MOVD $dst.lo,$tmp\n\t" 5785 "PSRLQ $tmp,32\n\t" 5786 "MOVD $dst.hi,$tmp" %} 5787 ins_encode %{ 5788 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5789 __ movdl($dst$$Register, $tmp$$XMMRegister); 5790 __ psrlq($tmp$$XMMRegister, 32); 5791 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5792 %} 5793 ins_pipe( pipe_slow ); 5794 %} 5795 5796 // Load Range 5797 instruct loadRange(rRegI dst, memory mem) %{ 5798 match(Set dst (LoadRange mem)); 5799 5800 ins_cost(125); 5801 format %{ "MOV $dst,$mem" %} 5802 opcode(0x8B); 5803 ins_encode( OpcP, RegMem(dst,mem)); 5804 ins_pipe( ialu_reg_mem ); 5805 %} 5806 5807 5808 // Load Pointer 5809 instruct loadP(eRegP dst, memory mem) %{ 5810 match(Set dst (LoadP mem)); 5811 5812 ins_cost(125); 5813 format %{ "MOV $dst,$mem" %} 5814 opcode(0x8B); 5815 ins_encode( OpcP, RegMem(dst,mem)); 5816 ins_pipe( ialu_reg_mem ); 5817 %} 5818 5819 // Load Klass Pointer 5820 instruct loadKlass(eRegP dst, memory mem) %{ 5821 match(Set dst (LoadKlass mem)); 5822 5823 ins_cost(125); 5824 format %{ "MOV $dst,$mem" %} 5825 opcode(0x8B); 5826 ins_encode( OpcP, RegMem(dst,mem)); 5827 ins_pipe( ialu_reg_mem ); 5828 %} 5829 5830 // Load Double 5831 instruct loadDPR(regDPR dst, memory mem) %{ 5832 predicate(UseSSE<=1); 5833 match(Set dst (LoadD mem)); 5834 5835 ins_cost(150); 5836 format %{ "FLD_D ST,$mem\n\t" 5837 "FSTP $dst" %} 5838 opcode(0xDD); /* DD /0 */ 5839 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5840 Pop_Reg_DPR(dst) ); 5841 ins_pipe( fpu_reg_mem ); 5842 %} 5843 5844 // Load Double to XMM 5845 instruct loadD(regD dst, memory mem) %{ 5846 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5847 match(Set dst (LoadD mem)); 5848 ins_cost(145); 5849 format %{ "MOVSD $dst,$mem" %} 5850 ins_encode %{ 5851 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5852 %} 5853 ins_pipe( pipe_slow ); 5854 %} 5855 5856 instruct loadD_partial(regD dst, memory mem) %{ 5857 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5858 match(Set dst (LoadD mem)); 5859 ins_cost(145); 5860 format %{ "MOVLPD $dst,$mem" %} 5861 ins_encode %{ 5862 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5863 %} 5864 ins_pipe( pipe_slow ); 5865 %} 5866 5867 // Load to XMM register (single-precision floating point) 5868 // MOVSS instruction 5869 instruct loadF(regF dst, memory mem) %{ 5870 predicate(UseSSE>=1); 5871 match(Set dst (LoadF mem)); 5872 ins_cost(145); 5873 format %{ "MOVSS $dst,$mem" %} 5874 ins_encode %{ 5875 __ movflt ($dst$$XMMRegister, $mem$$Address); 5876 %} 5877 ins_pipe( pipe_slow ); 5878 %} 5879 5880 // Load Float 5881 instruct loadFPR(regFPR dst, memory mem) %{ 5882 predicate(UseSSE==0); 5883 match(Set dst (LoadF mem)); 5884 5885 ins_cost(150); 5886 format %{ "FLD_S ST,$mem\n\t" 5887 "FSTP $dst" %} 5888 opcode(0xD9); /* D9 /0 */ 5889 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5890 Pop_Reg_FPR(dst) ); 5891 ins_pipe( fpu_reg_mem ); 5892 %} 5893 5894 // Load Effective Address 5895 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5896 match(Set dst mem); 5897 5898 ins_cost(110); 5899 format %{ "LEA $dst,$mem" %} 5900 opcode(0x8D); 5901 ins_encode( OpcP, RegMem(dst,mem)); 5902 ins_pipe( ialu_reg_reg_fat ); 5903 %} 5904 5905 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5906 match(Set dst mem); 5907 5908 ins_cost(110); 5909 format %{ "LEA $dst,$mem" %} 5910 opcode(0x8D); 5911 ins_encode( OpcP, RegMem(dst,mem)); 5912 ins_pipe( ialu_reg_reg_fat ); 5913 %} 5914 5915 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5916 match(Set dst mem); 5917 5918 ins_cost(110); 5919 format %{ "LEA $dst,$mem" %} 5920 opcode(0x8D); 5921 ins_encode( OpcP, RegMem(dst,mem)); 5922 ins_pipe( ialu_reg_reg_fat ); 5923 %} 5924 5925 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5926 match(Set dst mem); 5927 5928 ins_cost(110); 5929 format %{ "LEA $dst,$mem" %} 5930 opcode(0x8D); 5931 ins_encode( OpcP, RegMem(dst,mem)); 5932 ins_pipe( ialu_reg_reg_fat ); 5933 %} 5934 5935 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5936 match(Set dst mem); 5937 5938 ins_cost(110); 5939 format %{ "LEA $dst,$mem" %} 5940 opcode(0x8D); 5941 ins_encode( OpcP, RegMem(dst,mem)); 5942 ins_pipe( ialu_reg_reg_fat ); 5943 %} 5944 5945 // Load Constant 5946 instruct loadConI(rRegI dst, immI src) %{ 5947 match(Set dst src); 5948 5949 format %{ "MOV $dst,$src" %} 5950 ins_encode( LdImmI(dst, src) ); 5951 ins_pipe( ialu_reg_fat ); 5952 %} 5953 5954 // Load Constant zero 5955 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ 5956 match(Set dst src); 5957 effect(KILL cr); 5958 5959 ins_cost(50); 5960 format %{ "XOR $dst,$dst" %} 5961 opcode(0x33); /* + rd */ 5962 ins_encode( OpcP, RegReg( dst, dst ) ); 5963 ins_pipe( ialu_reg ); 5964 %} 5965 5966 instruct loadConP(eRegP dst, immP src) %{ 5967 match(Set dst src); 5968 5969 format %{ "MOV $dst,$src" %} 5970 opcode(0xB8); /* + rd */ 5971 ins_encode( LdImmP(dst, src) ); 5972 ins_pipe( ialu_reg_fat ); 5973 %} 5974 5975 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 5976 match(Set dst src); 5977 effect(KILL cr); 5978 ins_cost(200); 5979 format %{ "MOV $dst.lo,$src.lo\n\t" 5980 "MOV $dst.hi,$src.hi" %} 5981 opcode(0xB8); 5982 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 5983 ins_pipe( ialu_reg_long_fat ); 5984 %} 5985 5986 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 5987 match(Set dst src); 5988 effect(KILL cr); 5989 ins_cost(150); 5990 format %{ "XOR $dst.lo,$dst.lo\n\t" 5991 "XOR $dst.hi,$dst.hi" %} 5992 opcode(0x33,0x33); 5993 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 5994 ins_pipe( ialu_reg_long ); 5995 %} 5996 5997 // The instruction usage is guarded by predicate in operand immFPR(). 5998 instruct loadConFPR(regFPR dst, immFPR con) %{ 5999 match(Set dst con); 6000 ins_cost(125); 6001 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 6002 "FSTP $dst" %} 6003 ins_encode %{ 6004 __ fld_s($constantaddress($con)); 6005 __ fstp_d($dst$$reg); 6006 %} 6007 ins_pipe(fpu_reg_con); 6008 %} 6009 6010 // The instruction usage is guarded by predicate in operand immFPR0(). 6011 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6012 match(Set dst con); 6013 ins_cost(125); 6014 format %{ "FLDZ ST\n\t" 6015 "FSTP $dst" %} 6016 ins_encode %{ 6017 __ fldz(); 6018 __ fstp_d($dst$$reg); 6019 %} 6020 ins_pipe(fpu_reg_con); 6021 %} 6022 6023 // The instruction usage is guarded by predicate in operand immFPR1(). 6024 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6025 match(Set dst con); 6026 ins_cost(125); 6027 format %{ "FLD1 ST\n\t" 6028 "FSTP $dst" %} 6029 ins_encode %{ 6030 __ fld1(); 6031 __ fstp_d($dst$$reg); 6032 %} 6033 ins_pipe(fpu_reg_con); 6034 %} 6035 6036 // The instruction usage is guarded by predicate in operand immF(). 6037 instruct loadConF(regF dst, immF con) %{ 6038 match(Set dst con); 6039 ins_cost(125); 6040 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6041 ins_encode %{ 6042 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6043 %} 6044 ins_pipe(pipe_slow); 6045 %} 6046 6047 // The instruction usage is guarded by predicate in operand immF0(). 6048 instruct loadConF0(regF dst, immF0 src) %{ 6049 match(Set dst src); 6050 ins_cost(100); 6051 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6052 ins_encode %{ 6053 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6054 %} 6055 ins_pipe(pipe_slow); 6056 %} 6057 6058 // The instruction usage is guarded by predicate in operand immDPR(). 6059 instruct loadConDPR(regDPR dst, immDPR con) %{ 6060 match(Set dst con); 6061 ins_cost(125); 6062 6063 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6064 "FSTP $dst" %} 6065 ins_encode %{ 6066 __ fld_d($constantaddress($con)); 6067 __ fstp_d($dst$$reg); 6068 %} 6069 ins_pipe(fpu_reg_con); 6070 %} 6071 6072 // The instruction usage is guarded by predicate in operand immDPR0(). 6073 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6074 match(Set dst con); 6075 ins_cost(125); 6076 6077 format %{ "FLDZ ST\n\t" 6078 "FSTP $dst" %} 6079 ins_encode %{ 6080 __ fldz(); 6081 __ fstp_d($dst$$reg); 6082 %} 6083 ins_pipe(fpu_reg_con); 6084 %} 6085 6086 // The instruction usage is guarded by predicate in operand immDPR1(). 6087 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6088 match(Set dst con); 6089 ins_cost(125); 6090 6091 format %{ "FLD1 ST\n\t" 6092 "FSTP $dst" %} 6093 ins_encode %{ 6094 __ fld1(); 6095 __ fstp_d($dst$$reg); 6096 %} 6097 ins_pipe(fpu_reg_con); 6098 %} 6099 6100 // The instruction usage is guarded by predicate in operand immD(). 6101 instruct loadConD(regD dst, immD con) %{ 6102 match(Set dst con); 6103 ins_cost(125); 6104 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6105 ins_encode %{ 6106 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6107 %} 6108 ins_pipe(pipe_slow); 6109 %} 6110 6111 // The instruction usage is guarded by predicate in operand immD0(). 6112 instruct loadConD0(regD dst, immD0 src) %{ 6113 match(Set dst src); 6114 ins_cost(100); 6115 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6116 ins_encode %{ 6117 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6118 %} 6119 ins_pipe( pipe_slow ); 6120 %} 6121 6122 // Load Stack Slot 6123 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6124 match(Set dst src); 6125 ins_cost(125); 6126 6127 format %{ "MOV $dst,$src" %} 6128 opcode(0x8B); 6129 ins_encode( OpcP, RegMem(dst,src)); 6130 ins_pipe( ialu_reg_mem ); 6131 %} 6132 6133 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6134 match(Set dst src); 6135 6136 ins_cost(200); 6137 format %{ "MOV $dst,$src.lo\n\t" 6138 "MOV $dst+4,$src.hi" %} 6139 opcode(0x8B, 0x8B); 6140 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6141 ins_pipe( ialu_mem_long_reg ); 6142 %} 6143 6144 // Load Stack Slot 6145 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6146 match(Set dst src); 6147 ins_cost(125); 6148 6149 format %{ "MOV $dst,$src" %} 6150 opcode(0x8B); 6151 ins_encode( OpcP, RegMem(dst,src)); 6152 ins_pipe( ialu_reg_mem ); 6153 %} 6154 6155 // Load Stack Slot 6156 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6157 match(Set dst src); 6158 ins_cost(125); 6159 6160 format %{ "FLD_S $src\n\t" 6161 "FSTP $dst" %} 6162 opcode(0xD9); /* D9 /0, FLD m32real */ 6163 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6164 Pop_Reg_FPR(dst) ); 6165 ins_pipe( fpu_reg_mem ); 6166 %} 6167 6168 // Load Stack Slot 6169 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6170 match(Set dst src); 6171 ins_cost(125); 6172 6173 format %{ "FLD_D $src\n\t" 6174 "FSTP $dst" %} 6175 opcode(0xDD); /* DD /0, FLD m64real */ 6176 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6177 Pop_Reg_DPR(dst) ); 6178 ins_pipe( fpu_reg_mem ); 6179 %} 6180 6181 // Prefetch instructions for allocation. 6182 // Must be safe to execute with invalid address (cannot fault). 6183 6184 instruct prefetchAlloc0( memory mem ) %{ 6185 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6186 match(PrefetchAllocation mem); 6187 ins_cost(0); 6188 size(0); 6189 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6190 ins_encode(); 6191 ins_pipe(empty); 6192 %} 6193 6194 instruct prefetchAlloc( memory mem ) %{ 6195 predicate(AllocatePrefetchInstr==3); 6196 match( PrefetchAllocation mem ); 6197 ins_cost(100); 6198 6199 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6200 ins_encode %{ 6201 __ prefetchw($mem$$Address); 6202 %} 6203 ins_pipe(ialu_mem); 6204 %} 6205 6206 instruct prefetchAllocNTA( memory mem ) %{ 6207 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6208 match(PrefetchAllocation mem); 6209 ins_cost(100); 6210 6211 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6212 ins_encode %{ 6213 __ prefetchnta($mem$$Address); 6214 %} 6215 ins_pipe(ialu_mem); 6216 %} 6217 6218 instruct prefetchAllocT0( memory mem ) %{ 6219 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6220 match(PrefetchAllocation mem); 6221 ins_cost(100); 6222 6223 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6224 ins_encode %{ 6225 __ prefetcht0($mem$$Address); 6226 %} 6227 ins_pipe(ialu_mem); 6228 %} 6229 6230 instruct prefetchAllocT2( memory mem ) %{ 6231 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6232 match(PrefetchAllocation mem); 6233 ins_cost(100); 6234 6235 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6236 ins_encode %{ 6237 __ prefetcht2($mem$$Address); 6238 %} 6239 ins_pipe(ialu_mem); 6240 %} 6241 6242 //----------Store Instructions------------------------------------------------- 6243 6244 // Store Byte 6245 instruct storeB(memory mem, xRegI src) %{ 6246 match(Set mem (StoreB mem src)); 6247 6248 ins_cost(125); 6249 format %{ "MOV8 $mem,$src" %} 6250 opcode(0x88); 6251 ins_encode( OpcP, RegMem( src, mem ) ); 6252 ins_pipe( ialu_mem_reg ); 6253 %} 6254 6255 // Store Char/Short 6256 instruct storeC(memory mem, rRegI src) %{ 6257 match(Set mem (StoreC mem src)); 6258 6259 ins_cost(125); 6260 format %{ "MOV16 $mem,$src" %} 6261 opcode(0x89, 0x66); 6262 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6263 ins_pipe( ialu_mem_reg ); 6264 %} 6265 6266 // Store Integer 6267 instruct storeI(memory mem, rRegI src) %{ 6268 match(Set mem (StoreI mem src)); 6269 6270 ins_cost(125); 6271 format %{ "MOV $mem,$src" %} 6272 opcode(0x89); 6273 ins_encode( OpcP, RegMem( src, mem ) ); 6274 ins_pipe( ialu_mem_reg ); 6275 %} 6276 6277 // Store Long 6278 instruct storeL(long_memory mem, eRegL src) %{ 6279 predicate(!((StoreLNode*)n)->require_atomic_access()); 6280 match(Set mem (StoreL mem src)); 6281 6282 ins_cost(200); 6283 format %{ "MOV $mem,$src.lo\n\t" 6284 "MOV $mem+4,$src.hi" %} 6285 opcode(0x89, 0x89); 6286 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6287 ins_pipe( ialu_mem_long_reg ); 6288 %} 6289 6290 // Store Long to Integer 6291 instruct storeL2I(memory mem, eRegL src) %{ 6292 match(Set mem (StoreI mem (ConvL2I src))); 6293 6294 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6295 ins_encode %{ 6296 __ movl($mem$$Address, $src$$Register); 6297 %} 6298 ins_pipe(ialu_mem_reg); 6299 %} 6300 6301 // Volatile Store Long. Must be atomic, so move it into 6302 // the FP TOS and then do a 64-bit FIST. Has to probe the 6303 // target address before the store (for null-ptr checks) 6304 // so the memory operand is used twice in the encoding. 6305 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6306 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6307 match(Set mem (StoreL mem src)); 6308 effect( KILL cr ); 6309 ins_cost(400); 6310 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6311 "FILD $src\n\t" 6312 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6313 opcode(0x3B); 6314 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6315 ins_pipe( fpu_reg_mem ); 6316 %} 6317 6318 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6319 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6320 match(Set mem (StoreL mem src)); 6321 effect( TEMP tmp, KILL cr ); 6322 ins_cost(380); 6323 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6324 "MOVSD $tmp,$src\n\t" 6325 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6326 ins_encode %{ 6327 __ cmpl(rax, $mem$$Address); 6328 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6329 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6330 %} 6331 ins_pipe( pipe_slow ); 6332 %} 6333 6334 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6335 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6336 match(Set mem (StoreL mem src)); 6337 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6338 ins_cost(360); 6339 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6340 "MOVD $tmp,$src.lo\n\t" 6341 "MOVD $tmp2,$src.hi\n\t" 6342 "PUNPCKLDQ $tmp,$tmp2\n\t" 6343 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6344 ins_encode %{ 6345 __ cmpl(rax, $mem$$Address); 6346 __ movdl($tmp$$XMMRegister, $src$$Register); 6347 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6348 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6349 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6350 %} 6351 ins_pipe( pipe_slow ); 6352 %} 6353 6354 // Store Pointer; for storing unknown oops and raw pointers 6355 instruct storeP(memory mem, anyRegP src) %{ 6356 match(Set mem (StoreP mem src)); 6357 6358 ins_cost(125); 6359 format %{ "MOV $mem,$src" %} 6360 opcode(0x89); 6361 ins_encode( OpcP, RegMem( src, mem ) ); 6362 ins_pipe( ialu_mem_reg ); 6363 %} 6364 6365 // Store Integer Immediate 6366 instruct storeImmI(memory mem, immI src) %{ 6367 match(Set mem (StoreI mem src)); 6368 6369 ins_cost(150); 6370 format %{ "MOV $mem,$src" %} 6371 opcode(0xC7); /* C7 /0 */ 6372 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6373 ins_pipe( ialu_mem_imm ); 6374 %} 6375 6376 // Store Short/Char Immediate 6377 instruct storeImmI16(memory mem, immI16 src) %{ 6378 predicate(UseStoreImmI16); 6379 match(Set mem (StoreC mem src)); 6380 6381 ins_cost(150); 6382 format %{ "MOV16 $mem,$src" %} 6383 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6384 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6385 ins_pipe( ialu_mem_imm ); 6386 %} 6387 6388 // Store Pointer Immediate; null pointers or constant oops that do not 6389 // need card-mark barriers. 6390 instruct storeImmP(memory mem, immP src) %{ 6391 match(Set mem (StoreP mem src)); 6392 6393 ins_cost(150); 6394 format %{ "MOV $mem,$src" %} 6395 opcode(0xC7); /* C7 /0 */ 6396 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6397 ins_pipe( ialu_mem_imm ); 6398 %} 6399 6400 // Store Byte Immediate 6401 instruct storeImmB(memory mem, immI8 src) %{ 6402 match(Set mem (StoreB mem src)); 6403 6404 ins_cost(150); 6405 format %{ "MOV8 $mem,$src" %} 6406 opcode(0xC6); /* C6 /0 */ 6407 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6408 ins_pipe( ialu_mem_imm ); 6409 %} 6410 6411 // Store CMS card-mark Immediate 6412 instruct storeImmCM(memory mem, immI8 src) %{ 6413 match(Set mem (StoreCM mem src)); 6414 6415 ins_cost(150); 6416 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6417 opcode(0xC6); /* C6 /0 */ 6418 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6419 ins_pipe( ialu_mem_imm ); 6420 %} 6421 6422 // Store Double 6423 instruct storeDPR( memory mem, regDPR1 src) %{ 6424 predicate(UseSSE<=1); 6425 match(Set mem (StoreD mem src)); 6426 6427 ins_cost(100); 6428 format %{ "FST_D $mem,$src" %} 6429 opcode(0xDD); /* DD /2 */ 6430 ins_encode( enc_FPR_store(mem,src) ); 6431 ins_pipe( fpu_mem_reg ); 6432 %} 6433 6434 // Store double does rounding on x86 6435 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6436 predicate(UseSSE<=1); 6437 match(Set mem (StoreD mem (RoundDouble src))); 6438 6439 ins_cost(100); 6440 format %{ "FST_D $mem,$src\t# round" %} 6441 opcode(0xDD); /* DD /2 */ 6442 ins_encode( enc_FPR_store(mem,src) ); 6443 ins_pipe( fpu_mem_reg ); 6444 %} 6445 6446 // Store XMM register to memory (double-precision floating points) 6447 // MOVSD instruction 6448 instruct storeD(memory mem, regD src) %{ 6449 predicate(UseSSE>=2); 6450 match(Set mem (StoreD mem src)); 6451 ins_cost(95); 6452 format %{ "MOVSD $mem,$src" %} 6453 ins_encode %{ 6454 __ movdbl($mem$$Address, $src$$XMMRegister); 6455 %} 6456 ins_pipe( pipe_slow ); 6457 %} 6458 6459 // Store XMM register to memory (single-precision floating point) 6460 // MOVSS instruction 6461 instruct storeF(memory mem, regF src) %{ 6462 predicate(UseSSE>=1); 6463 match(Set mem (StoreF mem src)); 6464 ins_cost(95); 6465 format %{ "MOVSS $mem,$src" %} 6466 ins_encode %{ 6467 __ movflt($mem$$Address, $src$$XMMRegister); 6468 %} 6469 ins_pipe( pipe_slow ); 6470 %} 6471 6472 // Store Float 6473 instruct storeFPR( memory mem, regFPR1 src) %{ 6474 predicate(UseSSE==0); 6475 match(Set mem (StoreF mem src)); 6476 6477 ins_cost(100); 6478 format %{ "FST_S $mem,$src" %} 6479 opcode(0xD9); /* D9 /2 */ 6480 ins_encode( enc_FPR_store(mem,src) ); 6481 ins_pipe( fpu_mem_reg ); 6482 %} 6483 6484 // Store Float does rounding on x86 6485 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6486 predicate(UseSSE==0); 6487 match(Set mem (StoreF mem (RoundFloat src))); 6488 6489 ins_cost(100); 6490 format %{ "FST_S $mem,$src\t# round" %} 6491 opcode(0xD9); /* D9 /2 */ 6492 ins_encode( enc_FPR_store(mem,src) ); 6493 ins_pipe( fpu_mem_reg ); 6494 %} 6495 6496 // Store Float does rounding on x86 6497 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6498 predicate(UseSSE<=1); 6499 match(Set mem (StoreF mem (ConvD2F src))); 6500 6501 ins_cost(100); 6502 format %{ "FST_S $mem,$src\t# D-round" %} 6503 opcode(0xD9); /* D9 /2 */ 6504 ins_encode( enc_FPR_store(mem,src) ); 6505 ins_pipe( fpu_mem_reg ); 6506 %} 6507 6508 // Store immediate Float value (it is faster than store from FPU register) 6509 // The instruction usage is guarded by predicate in operand immFPR(). 6510 instruct storeFPR_imm( memory mem, immFPR src) %{ 6511 match(Set mem (StoreF mem src)); 6512 6513 ins_cost(50); 6514 format %{ "MOV $mem,$src\t# store float" %} 6515 opcode(0xC7); /* C7 /0 */ 6516 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6517 ins_pipe( ialu_mem_imm ); 6518 %} 6519 6520 // Store immediate Float value (it is faster than store from XMM register) 6521 // The instruction usage is guarded by predicate in operand immF(). 6522 instruct storeF_imm( memory mem, immF src) %{ 6523 match(Set mem (StoreF mem src)); 6524 6525 ins_cost(50); 6526 format %{ "MOV $mem,$src\t# store float" %} 6527 opcode(0xC7); /* C7 /0 */ 6528 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6529 ins_pipe( ialu_mem_imm ); 6530 %} 6531 6532 // Store Integer to stack slot 6533 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6534 match(Set dst src); 6535 6536 ins_cost(100); 6537 format %{ "MOV $dst,$src" %} 6538 opcode(0x89); 6539 ins_encode( OpcPRegSS( dst, src ) ); 6540 ins_pipe( ialu_mem_reg ); 6541 %} 6542 6543 // Store Integer to stack slot 6544 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6545 match(Set dst src); 6546 6547 ins_cost(100); 6548 format %{ "MOV $dst,$src" %} 6549 opcode(0x89); 6550 ins_encode( OpcPRegSS( dst, src ) ); 6551 ins_pipe( ialu_mem_reg ); 6552 %} 6553 6554 // Store Long to stack slot 6555 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6556 match(Set dst src); 6557 6558 ins_cost(200); 6559 format %{ "MOV $dst,$src.lo\n\t" 6560 "MOV $dst+4,$src.hi" %} 6561 opcode(0x89, 0x89); 6562 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6563 ins_pipe( ialu_mem_long_reg ); 6564 %} 6565 6566 //----------MemBar Instructions----------------------------------------------- 6567 // Memory barrier flavors 6568 6569 instruct membar_acquire() %{ 6570 match(MemBarAcquire); 6571 match(LoadFence); 6572 ins_cost(400); 6573 6574 size(0); 6575 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6576 ins_encode(); 6577 ins_pipe(empty); 6578 %} 6579 6580 instruct membar_acquire_lock() %{ 6581 match(MemBarAcquireLock); 6582 ins_cost(0); 6583 6584 size(0); 6585 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6586 ins_encode( ); 6587 ins_pipe(empty); 6588 %} 6589 6590 instruct membar_release() %{ 6591 match(MemBarRelease); 6592 match(StoreFence); 6593 ins_cost(400); 6594 6595 size(0); 6596 format %{ "MEMBAR-release ! (empty encoding)" %} 6597 ins_encode( ); 6598 ins_pipe(empty); 6599 %} 6600 6601 instruct membar_release_lock() %{ 6602 match(MemBarReleaseLock); 6603 ins_cost(0); 6604 6605 size(0); 6606 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6607 ins_encode( ); 6608 ins_pipe(empty); 6609 %} 6610 6611 instruct membar_volatile(eFlagsReg cr) %{ 6612 match(MemBarVolatile); 6613 effect(KILL cr); 6614 ins_cost(400); 6615 6616 format %{ 6617 $$template 6618 if (os::is_MP()) { 6619 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6620 } else { 6621 $$emit$$"MEMBAR-volatile ! (empty encoding)" 6622 } 6623 %} 6624 ins_encode %{ 6625 __ membar(Assembler::StoreLoad); 6626 %} 6627 ins_pipe(pipe_slow); 6628 %} 6629 6630 instruct unnecessary_membar_volatile() %{ 6631 match(MemBarVolatile); 6632 predicate(Matcher::post_store_load_barrier(n)); 6633 ins_cost(0); 6634 6635 size(0); 6636 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6637 ins_encode( ); 6638 ins_pipe(empty); 6639 %} 6640 6641 instruct membar_storestore() %{ 6642 match(MemBarStoreStore); 6643 ins_cost(0); 6644 6645 size(0); 6646 format %{ "MEMBAR-storestore (empty encoding)" %} 6647 ins_encode( ); 6648 ins_pipe(empty); 6649 %} 6650 6651 //----------Move Instructions-------------------------------------------------- 6652 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6653 match(Set dst (CastX2P src)); 6654 format %{ "# X2P $dst, $src" %} 6655 ins_encode( /*empty encoding*/ ); 6656 ins_cost(0); 6657 ins_pipe(empty); 6658 %} 6659 6660 instruct castP2X(rRegI dst, eRegP src ) %{ 6661 match(Set dst (CastP2X src)); 6662 ins_cost(50); 6663 format %{ "MOV $dst, $src\t# CastP2X" %} 6664 ins_encode( enc_Copy( dst, src) ); 6665 ins_pipe( ialu_reg_reg ); 6666 %} 6667 6668 //----------Conditional Move--------------------------------------------------- 6669 // Conditional move 6670 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6671 predicate(!VM_Version::supports_cmov() ); 6672 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6673 ins_cost(200); 6674 format %{ "J$cop,us skip\t# signed cmove\n\t" 6675 "MOV $dst,$src\n" 6676 "skip:" %} 6677 ins_encode %{ 6678 Label Lskip; 6679 // Invert sense of branch from sense of CMOV 6680 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6681 __ movl($dst$$Register, $src$$Register); 6682 __ bind(Lskip); 6683 %} 6684 ins_pipe( pipe_cmov_reg ); 6685 %} 6686 6687 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6688 predicate(!VM_Version::supports_cmov() ); 6689 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6690 ins_cost(200); 6691 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6692 "MOV $dst,$src\n" 6693 "skip:" %} 6694 ins_encode %{ 6695 Label Lskip; 6696 // Invert sense of branch from sense of CMOV 6697 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6698 __ movl($dst$$Register, $src$$Register); 6699 __ bind(Lskip); 6700 %} 6701 ins_pipe( pipe_cmov_reg ); 6702 %} 6703 6704 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6705 predicate(VM_Version::supports_cmov() ); 6706 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6707 ins_cost(200); 6708 format %{ "CMOV$cop $dst,$src" %} 6709 opcode(0x0F,0x40); 6710 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6711 ins_pipe( pipe_cmov_reg ); 6712 %} 6713 6714 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6715 predicate(VM_Version::supports_cmov() ); 6716 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6717 ins_cost(200); 6718 format %{ "CMOV$cop $dst,$src" %} 6719 opcode(0x0F,0x40); 6720 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6721 ins_pipe( pipe_cmov_reg ); 6722 %} 6723 6724 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6725 predicate(VM_Version::supports_cmov() ); 6726 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6727 ins_cost(200); 6728 expand %{ 6729 cmovI_regU(cop, cr, dst, src); 6730 %} 6731 %} 6732 6733 // Conditional move 6734 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6735 predicate(VM_Version::supports_cmov() ); 6736 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6737 ins_cost(250); 6738 format %{ "CMOV$cop $dst,$src" %} 6739 opcode(0x0F,0x40); 6740 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6741 ins_pipe( pipe_cmov_mem ); 6742 %} 6743 6744 // Conditional move 6745 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6746 predicate(VM_Version::supports_cmov() ); 6747 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6748 ins_cost(250); 6749 format %{ "CMOV$cop $dst,$src" %} 6750 opcode(0x0F,0x40); 6751 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6752 ins_pipe( pipe_cmov_mem ); 6753 %} 6754 6755 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6756 predicate(VM_Version::supports_cmov() ); 6757 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6758 ins_cost(250); 6759 expand %{ 6760 cmovI_memU(cop, cr, dst, src); 6761 %} 6762 %} 6763 6764 // Conditional move 6765 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6766 predicate(VM_Version::supports_cmov() ); 6767 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6768 ins_cost(200); 6769 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6770 opcode(0x0F,0x40); 6771 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6772 ins_pipe( pipe_cmov_reg ); 6773 %} 6774 6775 // Conditional move (non-P6 version) 6776 // Note: a CMoveP is generated for stubs and native wrappers 6777 // regardless of whether we are on a P6, so we 6778 // emulate a cmov here 6779 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6780 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6781 ins_cost(300); 6782 format %{ "Jn$cop skip\n\t" 6783 "MOV $dst,$src\t# pointer\n" 6784 "skip:" %} 6785 opcode(0x8b); 6786 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6787 ins_pipe( pipe_cmov_reg ); 6788 %} 6789 6790 // Conditional move 6791 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6792 predicate(VM_Version::supports_cmov() ); 6793 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6794 ins_cost(200); 6795 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6796 opcode(0x0F,0x40); 6797 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6798 ins_pipe( pipe_cmov_reg ); 6799 %} 6800 6801 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6802 predicate(VM_Version::supports_cmov() ); 6803 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6804 ins_cost(200); 6805 expand %{ 6806 cmovP_regU(cop, cr, dst, src); 6807 %} 6808 %} 6809 6810 // DISABLED: Requires the ADLC to emit a bottom_type call that 6811 // correctly meets the two pointer arguments; one is an incoming 6812 // register but the other is a memory operand. ALSO appears to 6813 // be buggy with implicit null checks. 6814 // 6815 //// Conditional move 6816 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6817 // predicate(VM_Version::supports_cmov() ); 6818 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6819 // ins_cost(250); 6820 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6821 // opcode(0x0F,0x40); 6822 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6823 // ins_pipe( pipe_cmov_mem ); 6824 //%} 6825 // 6826 //// Conditional move 6827 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6828 // predicate(VM_Version::supports_cmov() ); 6829 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6830 // ins_cost(250); 6831 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6832 // opcode(0x0F,0x40); 6833 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6834 // ins_pipe( pipe_cmov_mem ); 6835 //%} 6836 6837 // Conditional move 6838 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6839 predicate(UseSSE<=1); 6840 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6841 ins_cost(200); 6842 format %{ "FCMOV$cop $dst,$src\t# double" %} 6843 opcode(0xDA); 6844 ins_encode( enc_cmov_dpr(cop,src) ); 6845 ins_pipe( pipe_cmovDPR_reg ); 6846 %} 6847 6848 // Conditional move 6849 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6850 predicate(UseSSE==0); 6851 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6852 ins_cost(200); 6853 format %{ "FCMOV$cop $dst,$src\t# float" %} 6854 opcode(0xDA); 6855 ins_encode( enc_cmov_dpr(cop,src) ); 6856 ins_pipe( pipe_cmovDPR_reg ); 6857 %} 6858 6859 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6860 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6861 predicate(UseSSE<=1); 6862 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6863 ins_cost(200); 6864 format %{ "Jn$cop skip\n\t" 6865 "MOV $dst,$src\t# double\n" 6866 "skip:" %} 6867 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6868 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6869 ins_pipe( pipe_cmovDPR_reg ); 6870 %} 6871 6872 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6873 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6874 predicate(UseSSE==0); 6875 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6876 ins_cost(200); 6877 format %{ "Jn$cop skip\n\t" 6878 "MOV $dst,$src\t# float\n" 6879 "skip:" %} 6880 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6881 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6882 ins_pipe( pipe_cmovDPR_reg ); 6883 %} 6884 6885 // No CMOVE with SSE/SSE2 6886 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6887 predicate (UseSSE>=1); 6888 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6889 ins_cost(200); 6890 format %{ "Jn$cop skip\n\t" 6891 "MOVSS $dst,$src\t# float\n" 6892 "skip:" %} 6893 ins_encode %{ 6894 Label skip; 6895 // Invert sense of branch from sense of CMOV 6896 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6897 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6898 __ bind(skip); 6899 %} 6900 ins_pipe( pipe_slow ); 6901 %} 6902 6903 // No CMOVE with SSE/SSE2 6904 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6905 predicate (UseSSE>=2); 6906 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6907 ins_cost(200); 6908 format %{ "Jn$cop skip\n\t" 6909 "MOVSD $dst,$src\t# float\n" 6910 "skip:" %} 6911 ins_encode %{ 6912 Label skip; 6913 // Invert sense of branch from sense of CMOV 6914 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6915 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6916 __ bind(skip); 6917 %} 6918 ins_pipe( pipe_slow ); 6919 %} 6920 6921 // unsigned version 6922 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6923 predicate (UseSSE>=1); 6924 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6925 ins_cost(200); 6926 format %{ "Jn$cop skip\n\t" 6927 "MOVSS $dst,$src\t# float\n" 6928 "skip:" %} 6929 ins_encode %{ 6930 Label skip; 6931 // Invert sense of branch from sense of CMOV 6932 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6933 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6934 __ bind(skip); 6935 %} 6936 ins_pipe( pipe_slow ); 6937 %} 6938 6939 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6940 predicate (UseSSE>=1); 6941 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6942 ins_cost(200); 6943 expand %{ 6944 fcmovF_regU(cop, cr, dst, src); 6945 %} 6946 %} 6947 6948 // unsigned version 6949 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6950 predicate (UseSSE>=2); 6951 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6952 ins_cost(200); 6953 format %{ "Jn$cop skip\n\t" 6954 "MOVSD $dst,$src\t# float\n" 6955 "skip:" %} 6956 ins_encode %{ 6957 Label skip; 6958 // Invert sense of branch from sense of CMOV 6959 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6960 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6961 __ bind(skip); 6962 %} 6963 ins_pipe( pipe_slow ); 6964 %} 6965 6966 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 6967 predicate (UseSSE>=2); 6968 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6969 ins_cost(200); 6970 expand %{ 6971 fcmovD_regU(cop, cr, dst, src); 6972 %} 6973 %} 6974 6975 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 6976 predicate(VM_Version::supports_cmov() ); 6977 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6978 ins_cost(200); 6979 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6980 "CMOV$cop $dst.hi,$src.hi" %} 6981 opcode(0x0F,0x40); 6982 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6983 ins_pipe( pipe_cmov_reg_long ); 6984 %} 6985 6986 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 6987 predicate(VM_Version::supports_cmov() ); 6988 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6989 ins_cost(200); 6990 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6991 "CMOV$cop $dst.hi,$src.hi" %} 6992 opcode(0x0F,0x40); 6993 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6994 ins_pipe( pipe_cmov_reg_long ); 6995 %} 6996 6997 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 6998 predicate(VM_Version::supports_cmov() ); 6999 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7000 ins_cost(200); 7001 expand %{ 7002 cmovL_regU(cop, cr, dst, src); 7003 %} 7004 %} 7005 7006 //----------Arithmetic Instructions-------------------------------------------- 7007 //----------Addition Instructions---------------------------------------------- 7008 7009 // Integer Addition Instructions 7010 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7011 match(Set dst (AddI dst src)); 7012 effect(KILL cr); 7013 7014 size(2); 7015 format %{ "ADD $dst,$src" %} 7016 opcode(0x03); 7017 ins_encode( OpcP, RegReg( dst, src) ); 7018 ins_pipe( ialu_reg_reg ); 7019 %} 7020 7021 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7022 match(Set dst (AddI dst src)); 7023 effect(KILL cr); 7024 7025 format %{ "ADD $dst,$src" %} 7026 opcode(0x81, 0x00); /* /0 id */ 7027 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7028 ins_pipe( ialu_reg ); 7029 %} 7030 7031 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 7032 predicate(UseIncDec); 7033 match(Set dst (AddI dst src)); 7034 effect(KILL cr); 7035 7036 size(1); 7037 format %{ "INC $dst" %} 7038 opcode(0x40); /* */ 7039 ins_encode( Opc_plus( primary, dst ) ); 7040 ins_pipe( ialu_reg ); 7041 %} 7042 7043 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7044 match(Set dst (AddI src0 src1)); 7045 ins_cost(110); 7046 7047 format %{ "LEA $dst,[$src0 + $src1]" %} 7048 opcode(0x8D); /* 0x8D /r */ 7049 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7050 ins_pipe( ialu_reg_reg ); 7051 %} 7052 7053 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7054 match(Set dst (AddP src0 src1)); 7055 ins_cost(110); 7056 7057 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7058 opcode(0x8D); /* 0x8D /r */ 7059 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7060 ins_pipe( ialu_reg_reg ); 7061 %} 7062 7063 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7064 predicate(UseIncDec); 7065 match(Set dst (AddI dst src)); 7066 effect(KILL cr); 7067 7068 size(1); 7069 format %{ "DEC $dst" %} 7070 opcode(0x48); /* */ 7071 ins_encode( Opc_plus( primary, dst ) ); 7072 ins_pipe( ialu_reg ); 7073 %} 7074 7075 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7076 match(Set dst (AddP dst src)); 7077 effect(KILL cr); 7078 7079 size(2); 7080 format %{ "ADD $dst,$src" %} 7081 opcode(0x03); 7082 ins_encode( OpcP, RegReg( dst, src) ); 7083 ins_pipe( ialu_reg_reg ); 7084 %} 7085 7086 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7087 match(Set dst (AddP dst src)); 7088 effect(KILL cr); 7089 7090 format %{ "ADD $dst,$src" %} 7091 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7092 // ins_encode( RegImm( dst, src) ); 7093 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7094 ins_pipe( ialu_reg ); 7095 %} 7096 7097 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7098 match(Set dst (AddI dst (LoadI src))); 7099 effect(KILL cr); 7100 7101 ins_cost(125); 7102 format %{ "ADD $dst,$src" %} 7103 opcode(0x03); 7104 ins_encode( OpcP, RegMem( dst, src) ); 7105 ins_pipe( ialu_reg_mem ); 7106 %} 7107 7108 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7109 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7110 effect(KILL cr); 7111 7112 ins_cost(150); 7113 format %{ "ADD $dst,$src" %} 7114 opcode(0x01); /* Opcode 01 /r */ 7115 ins_encode( OpcP, RegMem( src, dst ) ); 7116 ins_pipe( ialu_mem_reg ); 7117 %} 7118 7119 // Add Memory with Immediate 7120 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7121 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7122 effect(KILL cr); 7123 7124 ins_cost(125); 7125 format %{ "ADD $dst,$src" %} 7126 opcode(0x81); /* Opcode 81 /0 id */ 7127 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7128 ins_pipe( ialu_mem_imm ); 7129 %} 7130 7131 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 7132 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7133 effect(KILL cr); 7134 7135 ins_cost(125); 7136 format %{ "INC $dst" %} 7137 opcode(0xFF); /* Opcode FF /0 */ 7138 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7139 ins_pipe( ialu_mem_imm ); 7140 %} 7141 7142 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7143 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7144 effect(KILL cr); 7145 7146 ins_cost(125); 7147 format %{ "DEC $dst" %} 7148 opcode(0xFF); /* Opcode FF /1 */ 7149 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7150 ins_pipe( ialu_mem_imm ); 7151 %} 7152 7153 7154 instruct checkCastPP( eRegP dst ) %{ 7155 match(Set dst (CheckCastPP dst)); 7156 7157 size(0); 7158 format %{ "#checkcastPP of $dst" %} 7159 ins_encode( /*empty encoding*/ ); 7160 ins_pipe( empty ); 7161 %} 7162 7163 instruct castPP( eRegP dst ) %{ 7164 match(Set dst (CastPP dst)); 7165 format %{ "#castPP of $dst" %} 7166 ins_encode( /*empty encoding*/ ); 7167 ins_pipe( empty ); 7168 %} 7169 7170 instruct castII( rRegI dst ) %{ 7171 match(Set dst (CastII dst)); 7172 format %{ "#castII of $dst" %} 7173 ins_encode( /*empty encoding*/ ); 7174 ins_cost(0); 7175 ins_pipe( empty ); 7176 %} 7177 7178 7179 // Load-locked - same as a regular pointer load when used with compare-swap 7180 instruct loadPLocked(eRegP dst, memory mem) %{ 7181 match(Set dst (LoadPLocked mem)); 7182 7183 ins_cost(125); 7184 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7185 opcode(0x8B); 7186 ins_encode( OpcP, RegMem(dst,mem)); 7187 ins_pipe( ialu_reg_mem ); 7188 %} 7189 7190 // Conditional-store of the updated heap-top. 7191 // Used during allocation of the shared heap. 7192 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7193 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7194 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7195 // EAX is killed if there is contention, but then it's also unused. 7196 // In the common case of no contention, EAX holds the new oop address. 7197 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7198 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7199 ins_pipe( pipe_cmpxchg ); 7200 %} 7201 7202 // Conditional-store of an int value. 7203 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7204 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7205 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7206 effect(KILL oldval); 7207 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7208 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7209 ins_pipe( pipe_cmpxchg ); 7210 %} 7211 7212 // Conditional-store of a long value. 7213 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7214 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7215 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7216 effect(KILL oldval); 7217 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7218 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7219 "XCHG EBX,ECX" 7220 %} 7221 ins_encode %{ 7222 // Note: we need to swap rbx, and rcx before and after the 7223 // cmpxchg8 instruction because the instruction uses 7224 // rcx as the high order word of the new value to store but 7225 // our register encoding uses rbx. 7226 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7227 if( os::is_MP() ) 7228 __ lock(); 7229 __ cmpxchg8($mem$$Address); 7230 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7231 %} 7232 ins_pipe( pipe_cmpxchg ); 7233 %} 7234 7235 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7236 7237 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7238 predicate(VM_Version::supports_cx8()); 7239 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7240 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7241 effect(KILL cr, KILL oldval); 7242 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7243 "MOV $res,0\n\t" 7244 "JNE,s fail\n\t" 7245 "MOV $res,1\n" 7246 "fail:" %} 7247 ins_encode( enc_cmpxchg8(mem_ptr), 7248 enc_flags_ne_to_boolean(res) ); 7249 ins_pipe( pipe_cmpxchg ); 7250 %} 7251 7252 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7253 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7254 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7255 effect(KILL cr, KILL oldval); 7256 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7257 "MOV $res,0\n\t" 7258 "JNE,s fail\n\t" 7259 "MOV $res,1\n" 7260 "fail:" %} 7261 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7262 ins_pipe( pipe_cmpxchg ); 7263 %} 7264 7265 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7266 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7267 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7268 effect(KILL cr, KILL oldval); 7269 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7270 "MOV $res,0\n\t" 7271 "JNE,s fail\n\t" 7272 "MOV $res,1\n" 7273 "fail:" %} 7274 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7275 ins_pipe( pipe_cmpxchg ); 7276 %} 7277 7278 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7279 predicate(VM_Version::supports_cx8()); 7280 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7281 effect(KILL cr); 7282 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7283 ins_encode( enc_cmpxchg8(mem_ptr) ); 7284 ins_pipe( pipe_cmpxchg ); 7285 %} 7286 7287 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7288 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7289 effect(KILL cr); 7290 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7291 ins_encode( enc_cmpxchg(mem_ptr) ); 7292 ins_pipe( pipe_cmpxchg ); 7293 %} 7294 7295 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7296 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7297 effect(KILL cr); 7298 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7299 ins_encode( enc_cmpxchg(mem_ptr) ); 7300 ins_pipe( pipe_cmpxchg ); 7301 %} 7302 7303 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7304 predicate(n->as_LoadStore()->result_not_used()); 7305 match(Set dummy (GetAndAddI mem add)); 7306 effect(KILL cr); 7307 format %{ "ADDL [$mem],$add" %} 7308 ins_encode %{ 7309 if (os::is_MP()) { __ lock(); } 7310 __ addl($mem$$Address, $add$$constant); 7311 %} 7312 ins_pipe( pipe_cmpxchg ); 7313 %} 7314 7315 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7316 match(Set newval (GetAndAddI mem newval)); 7317 effect(KILL cr); 7318 format %{ "XADDL [$mem],$newval" %} 7319 ins_encode %{ 7320 if (os::is_MP()) { __ lock(); } 7321 __ xaddl($mem$$Address, $newval$$Register); 7322 %} 7323 ins_pipe( pipe_cmpxchg ); 7324 %} 7325 7326 instruct xchgI( memory mem, rRegI newval) %{ 7327 match(Set newval (GetAndSetI mem newval)); 7328 format %{ "XCHGL $newval,[$mem]" %} 7329 ins_encode %{ 7330 __ xchgl($newval$$Register, $mem$$Address); 7331 %} 7332 ins_pipe( pipe_cmpxchg ); 7333 %} 7334 7335 instruct xchgP( memory mem, pRegP newval) %{ 7336 match(Set newval (GetAndSetP mem newval)); 7337 format %{ "XCHGL $newval,[$mem]" %} 7338 ins_encode %{ 7339 __ xchgl($newval$$Register, $mem$$Address); 7340 %} 7341 ins_pipe( pipe_cmpxchg ); 7342 %} 7343 7344 //----------Subtraction Instructions------------------------------------------- 7345 7346 // Integer Subtraction Instructions 7347 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7348 match(Set dst (SubI dst src)); 7349 effect(KILL cr); 7350 7351 size(2); 7352 format %{ "SUB $dst,$src" %} 7353 opcode(0x2B); 7354 ins_encode( OpcP, RegReg( dst, src) ); 7355 ins_pipe( ialu_reg_reg ); 7356 %} 7357 7358 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7359 match(Set dst (SubI dst src)); 7360 effect(KILL cr); 7361 7362 format %{ "SUB $dst,$src" %} 7363 opcode(0x81,0x05); /* Opcode 81 /5 */ 7364 // ins_encode( RegImm( dst, src) ); 7365 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7366 ins_pipe( ialu_reg ); 7367 %} 7368 7369 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7370 match(Set dst (SubI dst (LoadI src))); 7371 effect(KILL cr); 7372 7373 ins_cost(125); 7374 format %{ "SUB $dst,$src" %} 7375 opcode(0x2B); 7376 ins_encode( OpcP, RegMem( dst, src) ); 7377 ins_pipe( ialu_reg_mem ); 7378 %} 7379 7380 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7381 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7382 effect(KILL cr); 7383 7384 ins_cost(150); 7385 format %{ "SUB $dst,$src" %} 7386 opcode(0x29); /* Opcode 29 /r */ 7387 ins_encode( OpcP, RegMem( src, dst ) ); 7388 ins_pipe( ialu_mem_reg ); 7389 %} 7390 7391 // Subtract from a pointer 7392 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ 7393 match(Set dst (AddP dst (SubI zero src))); 7394 effect(KILL cr); 7395 7396 size(2); 7397 format %{ "SUB $dst,$src" %} 7398 opcode(0x2B); 7399 ins_encode( OpcP, RegReg( dst, src) ); 7400 ins_pipe( ialu_reg_reg ); 7401 %} 7402 7403 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ 7404 match(Set dst (SubI zero dst)); 7405 effect(KILL cr); 7406 7407 size(2); 7408 format %{ "NEG $dst" %} 7409 opcode(0xF7,0x03); // Opcode F7 /3 7410 ins_encode( OpcP, RegOpc( dst ) ); 7411 ins_pipe( ialu_reg ); 7412 %} 7413 7414 //----------Multiplication/Division Instructions------------------------------- 7415 // Integer Multiplication Instructions 7416 // Multiply Register 7417 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7418 match(Set dst (MulI dst src)); 7419 effect(KILL cr); 7420 7421 size(3); 7422 ins_cost(300); 7423 format %{ "IMUL $dst,$src" %} 7424 opcode(0xAF, 0x0F); 7425 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7426 ins_pipe( ialu_reg_reg_alu0 ); 7427 %} 7428 7429 // Multiply 32-bit Immediate 7430 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7431 match(Set dst (MulI src imm)); 7432 effect(KILL cr); 7433 7434 ins_cost(300); 7435 format %{ "IMUL $dst,$src,$imm" %} 7436 opcode(0x69); /* 69 /r id */ 7437 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7438 ins_pipe( ialu_reg_reg_alu0 ); 7439 %} 7440 7441 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7442 match(Set dst src); 7443 effect(KILL cr); 7444 7445 // Note that this is artificially increased to make it more expensive than loadConL 7446 ins_cost(250); 7447 format %{ "MOV EAX,$src\t// low word only" %} 7448 opcode(0xB8); 7449 ins_encode( LdImmL_Lo(dst, src) ); 7450 ins_pipe( ialu_reg_fat ); 7451 %} 7452 7453 // Multiply by 32-bit Immediate, taking the shifted high order results 7454 // (special case for shift by 32) 7455 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7456 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7457 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7458 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7459 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7460 effect(USE src1, KILL cr); 7461 7462 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7463 ins_cost(0*100 + 1*400 - 150); 7464 format %{ "IMUL EDX:EAX,$src1" %} 7465 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7466 ins_pipe( pipe_slow ); 7467 %} 7468 7469 // Multiply by 32-bit Immediate, taking the shifted high order results 7470 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7471 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7472 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7473 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7474 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7475 effect(USE src1, KILL cr); 7476 7477 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7478 ins_cost(1*100 + 1*400 - 150); 7479 format %{ "IMUL EDX:EAX,$src1\n\t" 7480 "SAR EDX,$cnt-32" %} 7481 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7482 ins_pipe( pipe_slow ); 7483 %} 7484 7485 // Multiply Memory 32-bit Immediate 7486 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7487 match(Set dst (MulI (LoadI src) imm)); 7488 effect(KILL cr); 7489 7490 ins_cost(300); 7491 format %{ "IMUL $dst,$src,$imm" %} 7492 opcode(0x69); /* 69 /r id */ 7493 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7494 ins_pipe( ialu_reg_mem_alu0 ); 7495 %} 7496 7497 // Multiply Memory 7498 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7499 match(Set dst (MulI dst (LoadI src))); 7500 effect(KILL cr); 7501 7502 ins_cost(350); 7503 format %{ "IMUL $dst,$src" %} 7504 opcode(0xAF, 0x0F); 7505 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7506 ins_pipe( ialu_reg_mem_alu0 ); 7507 %} 7508 7509 // Multiply Register Int to Long 7510 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7511 // Basic Idea: long = (long)int * (long)int 7512 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7513 effect(DEF dst, USE src, USE src1, KILL flags); 7514 7515 ins_cost(300); 7516 format %{ "IMUL $dst,$src1" %} 7517 7518 ins_encode( long_int_multiply( dst, src1 ) ); 7519 ins_pipe( ialu_reg_reg_alu0 ); 7520 %} 7521 7522 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7523 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7524 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7525 effect(KILL flags); 7526 7527 ins_cost(300); 7528 format %{ "MUL $dst,$src1" %} 7529 7530 ins_encode( long_uint_multiply(dst, src1) ); 7531 ins_pipe( ialu_reg_reg_alu0 ); 7532 %} 7533 7534 // Multiply Register Long 7535 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7536 match(Set dst (MulL dst src)); 7537 effect(KILL cr, TEMP tmp); 7538 ins_cost(4*100+3*400); 7539 // Basic idea: lo(result) = lo(x_lo * y_lo) 7540 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7541 format %{ "MOV $tmp,$src.lo\n\t" 7542 "IMUL $tmp,EDX\n\t" 7543 "MOV EDX,$src.hi\n\t" 7544 "IMUL EDX,EAX\n\t" 7545 "ADD $tmp,EDX\n\t" 7546 "MUL EDX:EAX,$src.lo\n\t" 7547 "ADD EDX,$tmp" %} 7548 ins_encode( long_multiply( dst, src, tmp ) ); 7549 ins_pipe( pipe_slow ); 7550 %} 7551 7552 // Multiply Register Long where the left operand's high 32 bits are zero 7553 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7554 predicate(is_operand_hi32_zero(n->in(1))); 7555 match(Set dst (MulL dst src)); 7556 effect(KILL cr, TEMP tmp); 7557 ins_cost(2*100+2*400); 7558 // Basic idea: lo(result) = lo(x_lo * y_lo) 7559 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7560 format %{ "MOV $tmp,$src.hi\n\t" 7561 "IMUL $tmp,EAX\n\t" 7562 "MUL EDX:EAX,$src.lo\n\t" 7563 "ADD EDX,$tmp" %} 7564 ins_encode %{ 7565 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7566 __ imull($tmp$$Register, rax); 7567 __ mull($src$$Register); 7568 __ addl(rdx, $tmp$$Register); 7569 %} 7570 ins_pipe( pipe_slow ); 7571 %} 7572 7573 // Multiply Register Long where the right operand's high 32 bits are zero 7574 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7575 predicate(is_operand_hi32_zero(n->in(2))); 7576 match(Set dst (MulL dst src)); 7577 effect(KILL cr, TEMP tmp); 7578 ins_cost(2*100+2*400); 7579 // Basic idea: lo(result) = lo(x_lo * y_lo) 7580 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7581 format %{ "MOV $tmp,$src.lo\n\t" 7582 "IMUL $tmp,EDX\n\t" 7583 "MUL EDX:EAX,$src.lo\n\t" 7584 "ADD EDX,$tmp" %} 7585 ins_encode %{ 7586 __ movl($tmp$$Register, $src$$Register); 7587 __ imull($tmp$$Register, rdx); 7588 __ mull($src$$Register); 7589 __ addl(rdx, $tmp$$Register); 7590 %} 7591 ins_pipe( pipe_slow ); 7592 %} 7593 7594 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7595 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7596 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7597 match(Set dst (MulL dst src)); 7598 effect(KILL cr); 7599 ins_cost(1*400); 7600 // Basic idea: lo(result) = lo(x_lo * y_lo) 7601 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7602 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7603 ins_encode %{ 7604 __ mull($src$$Register); 7605 %} 7606 ins_pipe( pipe_slow ); 7607 %} 7608 7609 // Multiply Register Long by small constant 7610 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7611 match(Set dst (MulL dst src)); 7612 effect(KILL cr, TEMP tmp); 7613 ins_cost(2*100+2*400); 7614 size(12); 7615 // Basic idea: lo(result) = lo(src * EAX) 7616 // hi(result) = hi(src * EAX) + lo(src * EDX) 7617 format %{ "IMUL $tmp,EDX,$src\n\t" 7618 "MOV EDX,$src\n\t" 7619 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7620 "ADD EDX,$tmp" %} 7621 ins_encode( long_multiply_con( dst, src, tmp ) ); 7622 ins_pipe( pipe_slow ); 7623 %} 7624 7625 // Integer DIV with Register 7626 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7627 match(Set rax (DivI rax div)); 7628 effect(KILL rdx, KILL cr); 7629 size(26); 7630 ins_cost(30*100+10*100); 7631 format %{ "CMP EAX,0x80000000\n\t" 7632 "JNE,s normal\n\t" 7633 "XOR EDX,EDX\n\t" 7634 "CMP ECX,-1\n\t" 7635 "JE,s done\n" 7636 "normal: CDQ\n\t" 7637 "IDIV $div\n\t" 7638 "done:" %} 7639 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7640 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7641 ins_pipe( ialu_reg_reg_alu0 ); 7642 %} 7643 7644 // Divide Register Long 7645 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7646 match(Set dst (DivL src1 src2)); 7647 effect( KILL cr, KILL cx, KILL bx ); 7648 ins_cost(10000); 7649 format %{ "PUSH $src1.hi\n\t" 7650 "PUSH $src1.lo\n\t" 7651 "PUSH $src2.hi\n\t" 7652 "PUSH $src2.lo\n\t" 7653 "CALL SharedRuntime::ldiv\n\t" 7654 "ADD ESP,16" %} 7655 ins_encode( long_div(src1,src2) ); 7656 ins_pipe( pipe_slow ); 7657 %} 7658 7659 // Integer DIVMOD with Register, both quotient and mod results 7660 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7661 match(DivModI rax div); 7662 effect(KILL cr); 7663 size(26); 7664 ins_cost(30*100+10*100); 7665 format %{ "CMP EAX,0x80000000\n\t" 7666 "JNE,s normal\n\t" 7667 "XOR EDX,EDX\n\t" 7668 "CMP ECX,-1\n\t" 7669 "JE,s done\n" 7670 "normal: CDQ\n\t" 7671 "IDIV $div\n\t" 7672 "done:" %} 7673 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7674 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7675 ins_pipe( pipe_slow ); 7676 %} 7677 7678 // Integer MOD with Register 7679 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7680 match(Set rdx (ModI rax div)); 7681 effect(KILL rax, KILL cr); 7682 7683 size(26); 7684 ins_cost(300); 7685 format %{ "CDQ\n\t" 7686 "IDIV $div" %} 7687 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7688 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7689 ins_pipe( ialu_reg_reg_alu0 ); 7690 %} 7691 7692 // Remainder Register Long 7693 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7694 match(Set dst (ModL src1 src2)); 7695 effect( KILL cr, KILL cx, KILL bx ); 7696 ins_cost(10000); 7697 format %{ "PUSH $src1.hi\n\t" 7698 "PUSH $src1.lo\n\t" 7699 "PUSH $src2.hi\n\t" 7700 "PUSH $src2.lo\n\t" 7701 "CALL SharedRuntime::lrem\n\t" 7702 "ADD ESP,16" %} 7703 ins_encode( long_mod(src1,src2) ); 7704 ins_pipe( pipe_slow ); 7705 %} 7706 7707 // Divide Register Long (no special case since divisor != -1) 7708 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7709 match(Set dst (DivL dst imm)); 7710 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7711 ins_cost(1000); 7712 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7713 "XOR $tmp2,$tmp2\n\t" 7714 "CMP $tmp,EDX\n\t" 7715 "JA,s fast\n\t" 7716 "MOV $tmp2,EAX\n\t" 7717 "MOV EAX,EDX\n\t" 7718 "MOV EDX,0\n\t" 7719 "JLE,s pos\n\t" 7720 "LNEG EAX : $tmp2\n\t" 7721 "DIV $tmp # unsigned division\n\t" 7722 "XCHG EAX,$tmp2\n\t" 7723 "DIV $tmp\n\t" 7724 "LNEG $tmp2 : EAX\n\t" 7725 "JMP,s done\n" 7726 "pos:\n\t" 7727 "DIV $tmp\n\t" 7728 "XCHG EAX,$tmp2\n" 7729 "fast:\n\t" 7730 "DIV $tmp\n" 7731 "done:\n\t" 7732 "MOV EDX,$tmp2\n\t" 7733 "NEG EDX:EAX # if $imm < 0" %} 7734 ins_encode %{ 7735 int con = (int)$imm$$constant; 7736 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7737 int pcon = (con > 0) ? con : -con; 7738 Label Lfast, Lpos, Ldone; 7739 7740 __ movl($tmp$$Register, pcon); 7741 __ xorl($tmp2$$Register,$tmp2$$Register); 7742 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7743 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7744 7745 __ movl($tmp2$$Register, $dst$$Register); // save 7746 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7747 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7748 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7749 7750 // Negative dividend. 7751 // convert value to positive to use unsigned division 7752 __ lneg($dst$$Register, $tmp2$$Register); 7753 __ divl($tmp$$Register); 7754 __ xchgl($dst$$Register, $tmp2$$Register); 7755 __ divl($tmp$$Register); 7756 // revert result back to negative 7757 __ lneg($tmp2$$Register, $dst$$Register); 7758 __ jmpb(Ldone); 7759 7760 __ bind(Lpos); 7761 __ divl($tmp$$Register); // Use unsigned division 7762 __ xchgl($dst$$Register, $tmp2$$Register); 7763 // Fallthrow for final divide, tmp2 has 32 bit hi result 7764 7765 __ bind(Lfast); 7766 // fast path: src is positive 7767 __ divl($tmp$$Register); // Use unsigned division 7768 7769 __ bind(Ldone); 7770 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7771 if (con < 0) { 7772 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7773 } 7774 %} 7775 ins_pipe( pipe_slow ); 7776 %} 7777 7778 // Remainder Register Long (remainder fit into 32 bits) 7779 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7780 match(Set dst (ModL dst imm)); 7781 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7782 ins_cost(1000); 7783 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7784 "CMP $tmp,EDX\n\t" 7785 "JA,s fast\n\t" 7786 "MOV $tmp2,EAX\n\t" 7787 "MOV EAX,EDX\n\t" 7788 "MOV EDX,0\n\t" 7789 "JLE,s pos\n\t" 7790 "LNEG EAX : $tmp2\n\t" 7791 "DIV $tmp # unsigned division\n\t" 7792 "MOV EAX,$tmp2\n\t" 7793 "DIV $tmp\n\t" 7794 "NEG EDX\n\t" 7795 "JMP,s done\n" 7796 "pos:\n\t" 7797 "DIV $tmp\n\t" 7798 "MOV EAX,$tmp2\n" 7799 "fast:\n\t" 7800 "DIV $tmp\n" 7801 "done:\n\t" 7802 "MOV EAX,EDX\n\t" 7803 "SAR EDX,31\n\t" %} 7804 ins_encode %{ 7805 int con = (int)$imm$$constant; 7806 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7807 int pcon = (con > 0) ? con : -con; 7808 Label Lfast, Lpos, Ldone; 7809 7810 __ movl($tmp$$Register, pcon); 7811 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7812 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7813 7814 __ movl($tmp2$$Register, $dst$$Register); // save 7815 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7816 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7817 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7818 7819 // Negative dividend. 7820 // convert value to positive to use unsigned division 7821 __ lneg($dst$$Register, $tmp2$$Register); 7822 __ divl($tmp$$Register); 7823 __ movl($dst$$Register, $tmp2$$Register); 7824 __ divl($tmp$$Register); 7825 // revert remainder back to negative 7826 __ negl(HIGH_FROM_LOW($dst$$Register)); 7827 __ jmpb(Ldone); 7828 7829 __ bind(Lpos); 7830 __ divl($tmp$$Register); 7831 __ movl($dst$$Register, $tmp2$$Register); 7832 7833 __ bind(Lfast); 7834 // fast path: src is positive 7835 __ divl($tmp$$Register); 7836 7837 __ bind(Ldone); 7838 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7839 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 7840 7841 %} 7842 ins_pipe( pipe_slow ); 7843 %} 7844 7845 // Integer Shift Instructions 7846 // Shift Left by one 7847 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7848 match(Set dst (LShiftI dst shift)); 7849 effect(KILL cr); 7850 7851 size(2); 7852 format %{ "SHL $dst,$shift" %} 7853 opcode(0xD1, 0x4); /* D1 /4 */ 7854 ins_encode( OpcP, RegOpc( dst ) ); 7855 ins_pipe( ialu_reg ); 7856 %} 7857 7858 // Shift Left by 8-bit immediate 7859 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7860 match(Set dst (LShiftI dst shift)); 7861 effect(KILL cr); 7862 7863 size(3); 7864 format %{ "SHL $dst,$shift" %} 7865 opcode(0xC1, 0x4); /* C1 /4 ib */ 7866 ins_encode( RegOpcImm( dst, shift) ); 7867 ins_pipe( ialu_reg ); 7868 %} 7869 7870 // Shift Left by variable 7871 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7872 match(Set dst (LShiftI dst shift)); 7873 effect(KILL cr); 7874 7875 size(2); 7876 format %{ "SHL $dst,$shift" %} 7877 opcode(0xD3, 0x4); /* D3 /4 */ 7878 ins_encode( OpcP, RegOpc( dst ) ); 7879 ins_pipe( ialu_reg_reg ); 7880 %} 7881 7882 // Arithmetic shift right by one 7883 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7884 match(Set dst (RShiftI dst shift)); 7885 effect(KILL cr); 7886 7887 size(2); 7888 format %{ "SAR $dst,$shift" %} 7889 opcode(0xD1, 0x7); /* D1 /7 */ 7890 ins_encode( OpcP, RegOpc( dst ) ); 7891 ins_pipe( ialu_reg ); 7892 %} 7893 7894 // Arithmetic shift right by one 7895 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 7896 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7897 effect(KILL cr); 7898 format %{ "SAR $dst,$shift" %} 7899 opcode(0xD1, 0x7); /* D1 /7 */ 7900 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 7901 ins_pipe( ialu_mem_imm ); 7902 %} 7903 7904 // Arithmetic Shift Right by 8-bit immediate 7905 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7906 match(Set dst (RShiftI dst shift)); 7907 effect(KILL cr); 7908 7909 size(3); 7910 format %{ "SAR $dst,$shift" %} 7911 opcode(0xC1, 0x7); /* C1 /7 ib */ 7912 ins_encode( RegOpcImm( dst, shift ) ); 7913 ins_pipe( ialu_mem_imm ); 7914 %} 7915 7916 // Arithmetic Shift Right by 8-bit immediate 7917 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 7918 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7919 effect(KILL cr); 7920 7921 format %{ "SAR $dst,$shift" %} 7922 opcode(0xC1, 0x7); /* C1 /7 ib */ 7923 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 7924 ins_pipe( ialu_mem_imm ); 7925 %} 7926 7927 // Arithmetic Shift Right by variable 7928 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7929 match(Set dst (RShiftI dst shift)); 7930 effect(KILL cr); 7931 7932 size(2); 7933 format %{ "SAR $dst,$shift" %} 7934 opcode(0xD3, 0x7); /* D3 /7 */ 7935 ins_encode( OpcP, RegOpc( dst ) ); 7936 ins_pipe( ialu_reg_reg ); 7937 %} 7938 7939 // Logical shift right by one 7940 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7941 match(Set dst (URShiftI dst shift)); 7942 effect(KILL cr); 7943 7944 size(2); 7945 format %{ "SHR $dst,$shift" %} 7946 opcode(0xD1, 0x5); /* D1 /5 */ 7947 ins_encode( OpcP, RegOpc( dst ) ); 7948 ins_pipe( ialu_reg ); 7949 %} 7950 7951 // Logical Shift Right by 8-bit immediate 7952 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7953 match(Set dst (URShiftI dst shift)); 7954 effect(KILL cr); 7955 7956 size(3); 7957 format %{ "SHR $dst,$shift" %} 7958 opcode(0xC1, 0x5); /* C1 /5 ib */ 7959 ins_encode( RegOpcImm( dst, shift) ); 7960 ins_pipe( ialu_reg ); 7961 %} 7962 7963 7964 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 7965 // This idiom is used by the compiler for the i2b bytecode. 7966 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 7967 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 7968 7969 size(3); 7970 format %{ "MOVSX $dst,$src :8" %} 7971 ins_encode %{ 7972 __ movsbl($dst$$Register, $src$$Register); 7973 %} 7974 ins_pipe(ialu_reg_reg); 7975 %} 7976 7977 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 7978 // This idiom is used by the compiler the i2s bytecode. 7979 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 7980 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 7981 7982 size(3); 7983 format %{ "MOVSX $dst,$src :16" %} 7984 ins_encode %{ 7985 __ movswl($dst$$Register, $src$$Register); 7986 %} 7987 ins_pipe(ialu_reg_reg); 7988 %} 7989 7990 7991 // Logical Shift Right by variable 7992 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7993 match(Set dst (URShiftI dst shift)); 7994 effect(KILL cr); 7995 7996 size(2); 7997 format %{ "SHR $dst,$shift" %} 7998 opcode(0xD3, 0x5); /* D3 /5 */ 7999 ins_encode( OpcP, RegOpc( dst ) ); 8000 ins_pipe( ialu_reg_reg ); 8001 %} 8002 8003 8004 //----------Logical Instructions----------------------------------------------- 8005 //----------Integer Logical Instructions--------------------------------------- 8006 // And Instructions 8007 // And Register with Register 8008 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8009 match(Set dst (AndI dst src)); 8010 effect(KILL cr); 8011 8012 size(2); 8013 format %{ "AND $dst,$src" %} 8014 opcode(0x23); 8015 ins_encode( OpcP, RegReg( dst, src) ); 8016 ins_pipe( ialu_reg_reg ); 8017 %} 8018 8019 // And Register with Immediate 8020 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8021 match(Set dst (AndI dst src)); 8022 effect(KILL cr); 8023 8024 format %{ "AND $dst,$src" %} 8025 opcode(0x81,0x04); /* Opcode 81 /4 */ 8026 // ins_encode( RegImm( dst, src) ); 8027 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8028 ins_pipe( ialu_reg ); 8029 %} 8030 8031 // And Register with Memory 8032 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8033 match(Set dst (AndI dst (LoadI src))); 8034 effect(KILL cr); 8035 8036 ins_cost(125); 8037 format %{ "AND $dst,$src" %} 8038 opcode(0x23); 8039 ins_encode( OpcP, RegMem( dst, src) ); 8040 ins_pipe( ialu_reg_mem ); 8041 %} 8042 8043 // And Memory with Register 8044 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8045 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8046 effect(KILL cr); 8047 8048 ins_cost(150); 8049 format %{ "AND $dst,$src" %} 8050 opcode(0x21); /* Opcode 21 /r */ 8051 ins_encode( OpcP, RegMem( src, dst ) ); 8052 ins_pipe( ialu_mem_reg ); 8053 %} 8054 8055 // And Memory with Immediate 8056 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8057 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8058 effect(KILL cr); 8059 8060 ins_cost(125); 8061 format %{ "AND $dst,$src" %} 8062 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8063 // ins_encode( MemImm( dst, src) ); 8064 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8065 ins_pipe( ialu_mem_imm ); 8066 %} 8067 8068 // BMI1 instructions 8069 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8070 match(Set dst (AndI (XorI src1 minus_1) src2)); 8071 predicate(UseBMI1Instructions); 8072 effect(KILL cr); 8073 8074 format %{ "ANDNL $dst, $src1, $src2" %} 8075 8076 ins_encode %{ 8077 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8078 %} 8079 ins_pipe(ialu_reg); 8080 %} 8081 8082 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8083 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8084 predicate(UseBMI1Instructions); 8085 effect(KILL cr); 8086 8087 ins_cost(125); 8088 format %{ "ANDNL $dst, $src1, $src2" %} 8089 8090 ins_encode %{ 8091 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8092 %} 8093 ins_pipe(ialu_reg_mem); 8094 %} 8095 8096 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ 8097 match(Set dst (AndI (SubI imm_zero src) src)); 8098 predicate(UseBMI1Instructions); 8099 effect(KILL cr); 8100 8101 format %{ "BLSIL $dst, $src" %} 8102 8103 ins_encode %{ 8104 __ blsil($dst$$Register, $src$$Register); 8105 %} 8106 ins_pipe(ialu_reg); 8107 %} 8108 8109 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ 8110 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8111 predicate(UseBMI1Instructions); 8112 effect(KILL cr); 8113 8114 ins_cost(125); 8115 format %{ "BLSIL $dst, $src" %} 8116 8117 ins_encode %{ 8118 __ blsil($dst$$Register, $src$$Address); 8119 %} 8120 ins_pipe(ialu_reg_mem); 8121 %} 8122 8123 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8124 %{ 8125 match(Set dst (XorI (AddI src minus_1) src)); 8126 predicate(UseBMI1Instructions); 8127 effect(KILL cr); 8128 8129 format %{ "BLSMSKL $dst, $src" %} 8130 8131 ins_encode %{ 8132 __ blsmskl($dst$$Register, $src$$Register); 8133 %} 8134 8135 ins_pipe(ialu_reg); 8136 %} 8137 8138 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8139 %{ 8140 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8141 predicate(UseBMI1Instructions); 8142 effect(KILL cr); 8143 8144 ins_cost(125); 8145 format %{ "BLSMSKL $dst, $src" %} 8146 8147 ins_encode %{ 8148 __ blsmskl($dst$$Register, $src$$Address); 8149 %} 8150 8151 ins_pipe(ialu_reg_mem); 8152 %} 8153 8154 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8155 %{ 8156 match(Set dst (AndI (AddI src minus_1) src) ); 8157 predicate(UseBMI1Instructions); 8158 effect(KILL cr); 8159 8160 format %{ "BLSRL $dst, $src" %} 8161 8162 ins_encode %{ 8163 __ blsrl($dst$$Register, $src$$Register); 8164 %} 8165 8166 ins_pipe(ialu_reg); 8167 %} 8168 8169 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8170 %{ 8171 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8172 predicate(UseBMI1Instructions); 8173 effect(KILL cr); 8174 8175 ins_cost(125); 8176 format %{ "BLSRL $dst, $src" %} 8177 8178 ins_encode %{ 8179 __ blsrl($dst$$Register, $src$$Address); 8180 %} 8181 8182 ins_pipe(ialu_reg_mem); 8183 %} 8184 8185 // Or Instructions 8186 // Or Register with Register 8187 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8188 match(Set dst (OrI dst src)); 8189 effect(KILL cr); 8190 8191 size(2); 8192 format %{ "OR $dst,$src" %} 8193 opcode(0x0B); 8194 ins_encode( OpcP, RegReg( dst, src) ); 8195 ins_pipe( ialu_reg_reg ); 8196 %} 8197 8198 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8199 match(Set dst (OrI dst (CastP2X src))); 8200 effect(KILL cr); 8201 8202 size(2); 8203 format %{ "OR $dst,$src" %} 8204 opcode(0x0B); 8205 ins_encode( OpcP, RegReg( dst, src) ); 8206 ins_pipe( ialu_reg_reg ); 8207 %} 8208 8209 8210 // Or Register with Immediate 8211 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8212 match(Set dst (OrI dst src)); 8213 effect(KILL cr); 8214 8215 format %{ "OR $dst,$src" %} 8216 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8217 // ins_encode( RegImm( dst, src) ); 8218 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8219 ins_pipe( ialu_reg ); 8220 %} 8221 8222 // Or Register with Memory 8223 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8224 match(Set dst (OrI dst (LoadI src))); 8225 effect(KILL cr); 8226 8227 ins_cost(125); 8228 format %{ "OR $dst,$src" %} 8229 opcode(0x0B); 8230 ins_encode( OpcP, RegMem( dst, src) ); 8231 ins_pipe( ialu_reg_mem ); 8232 %} 8233 8234 // Or Memory with Register 8235 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8236 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8237 effect(KILL cr); 8238 8239 ins_cost(150); 8240 format %{ "OR $dst,$src" %} 8241 opcode(0x09); /* Opcode 09 /r */ 8242 ins_encode( OpcP, RegMem( src, dst ) ); 8243 ins_pipe( ialu_mem_reg ); 8244 %} 8245 8246 // Or Memory with Immediate 8247 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8248 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8249 effect(KILL cr); 8250 8251 ins_cost(125); 8252 format %{ "OR $dst,$src" %} 8253 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8254 // ins_encode( MemImm( dst, src) ); 8255 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8256 ins_pipe( ialu_mem_imm ); 8257 %} 8258 8259 // ROL/ROR 8260 // ROL expand 8261 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8262 effect(USE_DEF dst, USE shift, KILL cr); 8263 8264 format %{ "ROL $dst, $shift" %} 8265 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8266 ins_encode( OpcP, RegOpc( dst )); 8267 ins_pipe( ialu_reg ); 8268 %} 8269 8270 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8271 effect(USE_DEF dst, USE shift, KILL cr); 8272 8273 format %{ "ROL $dst, $shift" %} 8274 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8275 ins_encode( RegOpcImm(dst, shift) ); 8276 ins_pipe(ialu_reg); 8277 %} 8278 8279 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8280 effect(USE_DEF dst, USE shift, KILL cr); 8281 8282 format %{ "ROL $dst, $shift" %} 8283 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8284 ins_encode(OpcP, RegOpc(dst)); 8285 ins_pipe( ialu_reg_reg ); 8286 %} 8287 // end of ROL expand 8288 8289 // ROL 32bit by one once 8290 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8291 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8292 8293 expand %{ 8294 rolI_eReg_imm1(dst, lshift, cr); 8295 %} 8296 %} 8297 8298 // ROL 32bit var by imm8 once 8299 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8300 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8301 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8302 8303 expand %{ 8304 rolI_eReg_imm8(dst, lshift, cr); 8305 %} 8306 %} 8307 8308 // ROL 32bit var by var once 8309 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8310 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8311 8312 expand %{ 8313 rolI_eReg_CL(dst, shift, cr); 8314 %} 8315 %} 8316 8317 // ROL 32bit var by var once 8318 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8319 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8320 8321 expand %{ 8322 rolI_eReg_CL(dst, shift, cr); 8323 %} 8324 %} 8325 8326 // ROR expand 8327 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8328 effect(USE_DEF dst, USE shift, KILL cr); 8329 8330 format %{ "ROR $dst, $shift" %} 8331 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8332 ins_encode( OpcP, RegOpc( dst ) ); 8333 ins_pipe( ialu_reg ); 8334 %} 8335 8336 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8337 effect (USE_DEF dst, USE shift, KILL cr); 8338 8339 format %{ "ROR $dst, $shift" %} 8340 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8341 ins_encode( RegOpcImm(dst, shift) ); 8342 ins_pipe( ialu_reg ); 8343 %} 8344 8345 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8346 effect(USE_DEF dst, USE shift, KILL cr); 8347 8348 format %{ "ROR $dst, $shift" %} 8349 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8350 ins_encode(OpcP, RegOpc(dst)); 8351 ins_pipe( ialu_reg_reg ); 8352 %} 8353 // end of ROR expand 8354 8355 // ROR right once 8356 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8357 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8358 8359 expand %{ 8360 rorI_eReg_imm1(dst, rshift, cr); 8361 %} 8362 %} 8363 8364 // ROR 32bit by immI8 once 8365 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8366 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8367 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8368 8369 expand %{ 8370 rorI_eReg_imm8(dst, rshift, cr); 8371 %} 8372 %} 8373 8374 // ROR 32bit var by var once 8375 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8376 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8377 8378 expand %{ 8379 rorI_eReg_CL(dst, shift, cr); 8380 %} 8381 %} 8382 8383 // ROR 32bit var by var once 8384 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8385 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8386 8387 expand %{ 8388 rorI_eReg_CL(dst, shift, cr); 8389 %} 8390 %} 8391 8392 // Xor Instructions 8393 // Xor Register with Register 8394 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8395 match(Set dst (XorI dst src)); 8396 effect(KILL cr); 8397 8398 size(2); 8399 format %{ "XOR $dst,$src" %} 8400 opcode(0x33); 8401 ins_encode( OpcP, RegReg( dst, src) ); 8402 ins_pipe( ialu_reg_reg ); 8403 %} 8404 8405 // Xor Register with Immediate -1 8406 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8407 match(Set dst (XorI dst imm)); 8408 8409 size(2); 8410 format %{ "NOT $dst" %} 8411 ins_encode %{ 8412 __ notl($dst$$Register); 8413 %} 8414 ins_pipe( ialu_reg ); 8415 %} 8416 8417 // Xor Register with Immediate 8418 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8419 match(Set dst (XorI dst src)); 8420 effect(KILL cr); 8421 8422 format %{ "XOR $dst,$src" %} 8423 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8424 // ins_encode( RegImm( dst, src) ); 8425 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8426 ins_pipe( ialu_reg ); 8427 %} 8428 8429 // Xor Register with Memory 8430 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8431 match(Set dst (XorI dst (LoadI src))); 8432 effect(KILL cr); 8433 8434 ins_cost(125); 8435 format %{ "XOR $dst,$src" %} 8436 opcode(0x33); 8437 ins_encode( OpcP, RegMem(dst, src) ); 8438 ins_pipe( ialu_reg_mem ); 8439 %} 8440 8441 // Xor Memory with Register 8442 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8443 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8444 effect(KILL cr); 8445 8446 ins_cost(150); 8447 format %{ "XOR $dst,$src" %} 8448 opcode(0x31); /* Opcode 31 /r */ 8449 ins_encode( OpcP, RegMem( src, dst ) ); 8450 ins_pipe( ialu_mem_reg ); 8451 %} 8452 8453 // Xor Memory with Immediate 8454 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8455 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8456 effect(KILL cr); 8457 8458 ins_cost(125); 8459 format %{ "XOR $dst,$src" %} 8460 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8461 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8462 ins_pipe( ialu_mem_imm ); 8463 %} 8464 8465 //----------Convert Int to Boolean--------------------------------------------- 8466 8467 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8468 effect( DEF dst, USE src ); 8469 format %{ "MOV $dst,$src" %} 8470 ins_encode( enc_Copy( dst, src) ); 8471 ins_pipe( ialu_reg_reg ); 8472 %} 8473 8474 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8475 effect( USE_DEF dst, USE src, KILL cr ); 8476 8477 size(4); 8478 format %{ "NEG $dst\n\t" 8479 "ADC $dst,$src" %} 8480 ins_encode( neg_reg(dst), 8481 OpcRegReg(0x13,dst,src) ); 8482 ins_pipe( ialu_reg_reg_long ); 8483 %} 8484 8485 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8486 match(Set dst (Conv2B src)); 8487 8488 expand %{ 8489 movI_nocopy(dst,src); 8490 ci2b(dst,src,cr); 8491 %} 8492 %} 8493 8494 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8495 effect( DEF dst, USE src ); 8496 format %{ "MOV $dst,$src" %} 8497 ins_encode( enc_Copy( dst, src) ); 8498 ins_pipe( ialu_reg_reg ); 8499 %} 8500 8501 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8502 effect( USE_DEF dst, USE src, KILL cr ); 8503 format %{ "NEG $dst\n\t" 8504 "ADC $dst,$src" %} 8505 ins_encode( neg_reg(dst), 8506 OpcRegReg(0x13,dst,src) ); 8507 ins_pipe( ialu_reg_reg_long ); 8508 %} 8509 8510 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8511 match(Set dst (Conv2B src)); 8512 8513 expand %{ 8514 movP_nocopy(dst,src); 8515 cp2b(dst,src,cr); 8516 %} 8517 %} 8518 8519 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8520 match(Set dst (CmpLTMask p q)); 8521 effect(KILL cr); 8522 ins_cost(400); 8523 8524 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8525 format %{ "XOR $dst,$dst\n\t" 8526 "CMP $p,$q\n\t" 8527 "SETlt $dst\n\t" 8528 "NEG $dst" %} 8529 ins_encode %{ 8530 Register Rp = $p$$Register; 8531 Register Rq = $q$$Register; 8532 Register Rd = $dst$$Register; 8533 Label done; 8534 __ xorl(Rd, Rd); 8535 __ cmpl(Rp, Rq); 8536 __ setb(Assembler::less, Rd); 8537 __ negl(Rd); 8538 %} 8539 8540 ins_pipe(pipe_slow); 8541 %} 8542 8543 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ 8544 match(Set dst (CmpLTMask dst zero)); 8545 effect(DEF dst, KILL cr); 8546 ins_cost(100); 8547 8548 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8549 ins_encode %{ 8550 __ sarl($dst$$Register, 31); 8551 %} 8552 ins_pipe(ialu_reg); 8553 %} 8554 8555 /* better to save a register than avoid a branch */ 8556 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8557 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8558 effect(KILL cr); 8559 ins_cost(400); 8560 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8561 "JGE done\n\t" 8562 "ADD $p,$y\n" 8563 "done: " %} 8564 ins_encode %{ 8565 Register Rp = $p$$Register; 8566 Register Rq = $q$$Register; 8567 Register Ry = $y$$Register; 8568 Label done; 8569 __ subl(Rp, Rq); 8570 __ jccb(Assembler::greaterEqual, done); 8571 __ addl(Rp, Ry); 8572 __ bind(done); 8573 %} 8574 8575 ins_pipe(pipe_cmplt); 8576 %} 8577 8578 /* better to save a register than avoid a branch */ 8579 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8580 match(Set y (AndI (CmpLTMask p q) y)); 8581 effect(KILL cr); 8582 8583 ins_cost(300); 8584 8585 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8586 "JLT done\n\t" 8587 "XORL $y, $y\n" 8588 "done: " %} 8589 ins_encode %{ 8590 Register Rp = $p$$Register; 8591 Register Rq = $q$$Register; 8592 Register Ry = $y$$Register; 8593 Label done; 8594 __ cmpl(Rp, Rq); 8595 __ jccb(Assembler::less, done); 8596 __ xorl(Ry, Ry); 8597 __ bind(done); 8598 %} 8599 8600 ins_pipe(pipe_cmplt); 8601 %} 8602 8603 /* If I enable this, I encourage spilling in the inner loop of compress. 8604 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8605 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8606 */ 8607 //----------Overflow Math Instructions----------------------------------------- 8608 8609 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8610 %{ 8611 match(Set cr (OverflowAddI op1 op2)); 8612 effect(DEF cr, USE_KILL op1, USE op2); 8613 8614 format %{ "ADD $op1, $op2\t# overflow check int" %} 8615 8616 ins_encode %{ 8617 __ addl($op1$$Register, $op2$$Register); 8618 %} 8619 ins_pipe(ialu_reg_reg); 8620 %} 8621 8622 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8623 %{ 8624 match(Set cr (OverflowAddI op1 op2)); 8625 effect(DEF cr, USE_KILL op1, USE op2); 8626 8627 format %{ "ADD $op1, $op2\t# overflow check int" %} 8628 8629 ins_encode %{ 8630 __ addl($op1$$Register, $op2$$constant); 8631 %} 8632 ins_pipe(ialu_reg_reg); 8633 %} 8634 8635 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8636 %{ 8637 match(Set cr (OverflowSubI op1 op2)); 8638 8639 format %{ "CMP $op1, $op2\t# overflow check int" %} 8640 ins_encode %{ 8641 __ cmpl($op1$$Register, $op2$$Register); 8642 %} 8643 ins_pipe(ialu_reg_reg); 8644 %} 8645 8646 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8647 %{ 8648 match(Set cr (OverflowSubI op1 op2)); 8649 8650 format %{ "CMP $op1, $op2\t# overflow check int" %} 8651 ins_encode %{ 8652 __ cmpl($op1$$Register, $op2$$constant); 8653 %} 8654 ins_pipe(ialu_reg_reg); 8655 %} 8656 8657 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) 8658 %{ 8659 match(Set cr (OverflowSubI zero op2)); 8660 effect(DEF cr, USE_KILL op2); 8661 8662 format %{ "NEG $op2\t# overflow check int" %} 8663 ins_encode %{ 8664 __ negl($op2$$Register); 8665 %} 8666 ins_pipe(ialu_reg_reg); 8667 %} 8668 8669 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8670 %{ 8671 match(Set cr (OverflowMulI op1 op2)); 8672 effect(DEF cr, USE_KILL op1, USE op2); 8673 8674 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8675 ins_encode %{ 8676 __ imull($op1$$Register, $op2$$Register); 8677 %} 8678 ins_pipe(ialu_reg_reg_alu0); 8679 %} 8680 8681 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8682 %{ 8683 match(Set cr (OverflowMulI op1 op2)); 8684 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8685 8686 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8687 ins_encode %{ 8688 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8689 %} 8690 ins_pipe(ialu_reg_reg_alu0); 8691 %} 8692 8693 //----------Long Instructions------------------------------------------------ 8694 // Add Long Register with Register 8695 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8696 match(Set dst (AddL dst src)); 8697 effect(KILL cr); 8698 ins_cost(200); 8699 format %{ "ADD $dst.lo,$src.lo\n\t" 8700 "ADC $dst.hi,$src.hi" %} 8701 opcode(0x03, 0x13); 8702 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8703 ins_pipe( ialu_reg_reg_long ); 8704 %} 8705 8706 // Add Long Register with Immediate 8707 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8708 match(Set dst (AddL dst src)); 8709 effect(KILL cr); 8710 format %{ "ADD $dst.lo,$src.lo\n\t" 8711 "ADC $dst.hi,$src.hi" %} 8712 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8713 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8714 ins_pipe( ialu_reg_long ); 8715 %} 8716 8717 // Add Long Register with Memory 8718 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8719 match(Set dst (AddL dst (LoadL mem))); 8720 effect(KILL cr); 8721 ins_cost(125); 8722 format %{ "ADD $dst.lo,$mem\n\t" 8723 "ADC $dst.hi,$mem+4" %} 8724 opcode(0x03, 0x13); 8725 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8726 ins_pipe( ialu_reg_long_mem ); 8727 %} 8728 8729 // Subtract Long Register with Register. 8730 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8731 match(Set dst (SubL dst src)); 8732 effect(KILL cr); 8733 ins_cost(200); 8734 format %{ "SUB $dst.lo,$src.lo\n\t" 8735 "SBB $dst.hi,$src.hi" %} 8736 opcode(0x2B, 0x1B); 8737 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8738 ins_pipe( ialu_reg_reg_long ); 8739 %} 8740 8741 // Subtract Long Register with Immediate 8742 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8743 match(Set dst (SubL dst src)); 8744 effect(KILL cr); 8745 format %{ "SUB $dst.lo,$src.lo\n\t" 8746 "SBB $dst.hi,$src.hi" %} 8747 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8748 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8749 ins_pipe( ialu_reg_long ); 8750 %} 8751 8752 // Subtract Long Register with Memory 8753 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8754 match(Set dst (SubL dst (LoadL mem))); 8755 effect(KILL cr); 8756 ins_cost(125); 8757 format %{ "SUB $dst.lo,$mem\n\t" 8758 "SBB $dst.hi,$mem+4" %} 8759 opcode(0x2B, 0x1B); 8760 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8761 ins_pipe( ialu_reg_long_mem ); 8762 %} 8763 8764 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8765 match(Set dst (SubL zero dst)); 8766 effect(KILL cr); 8767 ins_cost(300); 8768 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8769 ins_encode( neg_long(dst) ); 8770 ins_pipe( ialu_reg_reg_long ); 8771 %} 8772 8773 // And Long Register with Register 8774 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8775 match(Set dst (AndL dst src)); 8776 effect(KILL cr); 8777 format %{ "AND $dst.lo,$src.lo\n\t" 8778 "AND $dst.hi,$src.hi" %} 8779 opcode(0x23,0x23); 8780 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8781 ins_pipe( ialu_reg_reg_long ); 8782 %} 8783 8784 // And Long Register with Immediate 8785 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8786 match(Set dst (AndL dst src)); 8787 effect(KILL cr); 8788 format %{ "AND $dst.lo,$src.lo\n\t" 8789 "AND $dst.hi,$src.hi" %} 8790 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8791 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8792 ins_pipe( ialu_reg_long ); 8793 %} 8794 8795 // And Long Register with Memory 8796 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8797 match(Set dst (AndL dst (LoadL mem))); 8798 effect(KILL cr); 8799 ins_cost(125); 8800 format %{ "AND $dst.lo,$mem\n\t" 8801 "AND $dst.hi,$mem+4" %} 8802 opcode(0x23, 0x23); 8803 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8804 ins_pipe( ialu_reg_long_mem ); 8805 %} 8806 8807 // BMI1 instructions 8808 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8809 match(Set dst (AndL (XorL src1 minus_1) src2)); 8810 predicate(UseBMI1Instructions); 8811 effect(KILL cr, TEMP dst); 8812 8813 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8814 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8815 %} 8816 8817 ins_encode %{ 8818 Register Rdst = $dst$$Register; 8819 Register Rsrc1 = $src1$$Register; 8820 Register Rsrc2 = $src2$$Register; 8821 __ andnl(Rdst, Rsrc1, Rsrc2); 8822 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 8823 %} 8824 ins_pipe(ialu_reg_reg_long); 8825 %} 8826 8827 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 8828 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 8829 predicate(UseBMI1Instructions); 8830 effect(KILL cr, TEMP dst); 8831 8832 ins_cost(125); 8833 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 8834 "ANDNL $dst.hi, $src1.hi, $src2+4" 8835 %} 8836 8837 ins_encode %{ 8838 Register Rdst = $dst$$Register; 8839 Register Rsrc1 = $src1$$Register; 8840 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 8841 8842 __ andnl(Rdst, Rsrc1, $src2$$Address); 8843 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 8844 %} 8845 ins_pipe(ialu_reg_mem); 8846 %} 8847 8848 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 8849 match(Set dst (AndL (SubL imm_zero src) src)); 8850 predicate(UseBMI1Instructions); 8851 effect(KILL cr, TEMP dst); 8852 8853 format %{ "MOVL $dst.hi, 0\n\t" 8854 "BLSIL $dst.lo, $src.lo\n\t" 8855 "JNZ done\n\t" 8856 "BLSIL $dst.hi, $src.hi\n" 8857 "done:" 8858 %} 8859 8860 ins_encode %{ 8861 Label done; 8862 Register Rdst = $dst$$Register; 8863 Register Rsrc = $src$$Register; 8864 __ movl(HIGH_FROM_LOW(Rdst), 0); 8865 __ blsil(Rdst, Rsrc); 8866 __ jccb(Assembler::notZero, done); 8867 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8868 __ bind(done); 8869 %} 8870 ins_pipe(ialu_reg); 8871 %} 8872 8873 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 8874 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 8875 predicate(UseBMI1Instructions); 8876 effect(KILL cr, TEMP dst); 8877 8878 ins_cost(125); 8879 format %{ "MOVL $dst.hi, 0\n\t" 8880 "BLSIL $dst.lo, $src\n\t" 8881 "JNZ done\n\t" 8882 "BLSIL $dst.hi, $src+4\n" 8883 "done:" 8884 %} 8885 8886 ins_encode %{ 8887 Label done; 8888 Register Rdst = $dst$$Register; 8889 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8890 8891 __ movl(HIGH_FROM_LOW(Rdst), 0); 8892 __ blsil(Rdst, $src$$Address); 8893 __ jccb(Assembler::notZero, done); 8894 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 8895 __ bind(done); 8896 %} 8897 ins_pipe(ialu_reg_mem); 8898 %} 8899 8900 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8901 %{ 8902 match(Set dst (XorL (AddL src minus_1) src)); 8903 predicate(UseBMI1Instructions); 8904 effect(KILL cr, TEMP dst); 8905 8906 format %{ "MOVL $dst.hi, 0\n\t" 8907 "BLSMSKL $dst.lo, $src.lo\n\t" 8908 "JNC done\n\t" 8909 "BLSMSKL $dst.hi, $src.hi\n" 8910 "done:" 8911 %} 8912 8913 ins_encode %{ 8914 Label done; 8915 Register Rdst = $dst$$Register; 8916 Register Rsrc = $src$$Register; 8917 __ movl(HIGH_FROM_LOW(Rdst), 0); 8918 __ blsmskl(Rdst, Rsrc); 8919 __ jccb(Assembler::carryClear, done); 8920 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8921 __ bind(done); 8922 %} 8923 8924 ins_pipe(ialu_reg); 8925 %} 8926 8927 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8928 %{ 8929 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 8930 predicate(UseBMI1Instructions); 8931 effect(KILL cr, TEMP dst); 8932 8933 ins_cost(125); 8934 format %{ "MOVL $dst.hi, 0\n\t" 8935 "BLSMSKL $dst.lo, $src\n\t" 8936 "JNC done\n\t" 8937 "BLSMSKL $dst.hi, $src+4\n" 8938 "done:" 8939 %} 8940 8941 ins_encode %{ 8942 Label done; 8943 Register Rdst = $dst$$Register; 8944 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8945 8946 __ movl(HIGH_FROM_LOW(Rdst), 0); 8947 __ blsmskl(Rdst, $src$$Address); 8948 __ jccb(Assembler::carryClear, done); 8949 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 8950 __ bind(done); 8951 %} 8952 8953 ins_pipe(ialu_reg_mem); 8954 %} 8955 8956 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8957 %{ 8958 match(Set dst (AndL (AddL src minus_1) src) ); 8959 predicate(UseBMI1Instructions); 8960 effect(KILL cr, TEMP dst); 8961 8962 format %{ "MOVL $dst.hi, $src.hi\n\t" 8963 "BLSRL $dst.lo, $src.lo\n\t" 8964 "JNC done\n\t" 8965 "BLSRL $dst.hi, $src.hi\n" 8966 "done:" 8967 %} 8968 8969 ins_encode %{ 8970 Label done; 8971 Register Rdst = $dst$$Register; 8972 Register Rsrc = $src$$Register; 8973 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8974 __ blsrl(Rdst, Rsrc); 8975 __ jccb(Assembler::carryClear, done); 8976 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8977 __ bind(done); 8978 %} 8979 8980 ins_pipe(ialu_reg); 8981 %} 8982 8983 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8984 %{ 8985 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 8986 predicate(UseBMI1Instructions); 8987 effect(KILL cr, TEMP dst); 8988 8989 ins_cost(125); 8990 format %{ "MOVL $dst.hi, $src+4\n\t" 8991 "BLSRL $dst.lo, $src\n\t" 8992 "JNC done\n\t" 8993 "BLSRL $dst.hi, $src+4\n" 8994 "done:" 8995 %} 8996 8997 ins_encode %{ 8998 Label done; 8999 Register Rdst = $dst$$Register; 9000 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9001 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9002 __ blsrl(Rdst, $src$$Address); 9003 __ jccb(Assembler::carryClear, done); 9004 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9005 __ bind(done); 9006 %} 9007 9008 ins_pipe(ialu_reg_mem); 9009 %} 9010 9011 // Or Long Register with Register 9012 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9013 match(Set dst (OrL dst src)); 9014 effect(KILL cr); 9015 format %{ "OR $dst.lo,$src.lo\n\t" 9016 "OR $dst.hi,$src.hi" %} 9017 opcode(0x0B,0x0B); 9018 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9019 ins_pipe( ialu_reg_reg_long ); 9020 %} 9021 9022 // Or Long Register with Immediate 9023 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9024 match(Set dst (OrL dst src)); 9025 effect(KILL cr); 9026 format %{ "OR $dst.lo,$src.lo\n\t" 9027 "OR $dst.hi,$src.hi" %} 9028 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9029 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9030 ins_pipe( ialu_reg_long ); 9031 %} 9032 9033 // Or Long Register with Memory 9034 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9035 match(Set dst (OrL dst (LoadL mem))); 9036 effect(KILL cr); 9037 ins_cost(125); 9038 format %{ "OR $dst.lo,$mem\n\t" 9039 "OR $dst.hi,$mem+4" %} 9040 opcode(0x0B,0x0B); 9041 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9042 ins_pipe( ialu_reg_long_mem ); 9043 %} 9044 9045 // Xor Long Register with Register 9046 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9047 match(Set dst (XorL dst src)); 9048 effect(KILL cr); 9049 format %{ "XOR $dst.lo,$src.lo\n\t" 9050 "XOR $dst.hi,$src.hi" %} 9051 opcode(0x33,0x33); 9052 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9053 ins_pipe( ialu_reg_reg_long ); 9054 %} 9055 9056 // Xor Long Register with Immediate -1 9057 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9058 match(Set dst (XorL dst imm)); 9059 format %{ "NOT $dst.lo\n\t" 9060 "NOT $dst.hi" %} 9061 ins_encode %{ 9062 __ notl($dst$$Register); 9063 __ notl(HIGH_FROM_LOW($dst$$Register)); 9064 %} 9065 ins_pipe( ialu_reg_long ); 9066 %} 9067 9068 // Xor Long Register with Immediate 9069 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9070 match(Set dst (XorL dst src)); 9071 effect(KILL cr); 9072 format %{ "XOR $dst.lo,$src.lo\n\t" 9073 "XOR $dst.hi,$src.hi" %} 9074 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9075 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9076 ins_pipe( ialu_reg_long ); 9077 %} 9078 9079 // Xor Long Register with Memory 9080 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9081 match(Set dst (XorL dst (LoadL mem))); 9082 effect(KILL cr); 9083 ins_cost(125); 9084 format %{ "XOR $dst.lo,$mem\n\t" 9085 "XOR $dst.hi,$mem+4" %} 9086 opcode(0x33,0x33); 9087 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9088 ins_pipe( ialu_reg_long_mem ); 9089 %} 9090 9091 // Shift Left Long by 1 9092 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9093 predicate(UseNewLongLShift); 9094 match(Set dst (LShiftL dst cnt)); 9095 effect(KILL cr); 9096 ins_cost(100); 9097 format %{ "ADD $dst.lo,$dst.lo\n\t" 9098 "ADC $dst.hi,$dst.hi" %} 9099 ins_encode %{ 9100 __ addl($dst$$Register,$dst$$Register); 9101 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9102 %} 9103 ins_pipe( ialu_reg_long ); 9104 %} 9105 9106 // Shift Left Long by 2 9107 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9108 predicate(UseNewLongLShift); 9109 match(Set dst (LShiftL dst cnt)); 9110 effect(KILL cr); 9111 ins_cost(100); 9112 format %{ "ADD $dst.lo,$dst.lo\n\t" 9113 "ADC $dst.hi,$dst.hi\n\t" 9114 "ADD $dst.lo,$dst.lo\n\t" 9115 "ADC $dst.hi,$dst.hi" %} 9116 ins_encode %{ 9117 __ addl($dst$$Register,$dst$$Register); 9118 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9119 __ addl($dst$$Register,$dst$$Register); 9120 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9121 %} 9122 ins_pipe( ialu_reg_long ); 9123 %} 9124 9125 // Shift Left Long by 3 9126 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9127 predicate(UseNewLongLShift); 9128 match(Set dst (LShiftL dst cnt)); 9129 effect(KILL cr); 9130 ins_cost(100); 9131 format %{ "ADD $dst.lo,$dst.lo\n\t" 9132 "ADC $dst.hi,$dst.hi\n\t" 9133 "ADD $dst.lo,$dst.lo\n\t" 9134 "ADC $dst.hi,$dst.hi\n\t" 9135 "ADD $dst.lo,$dst.lo\n\t" 9136 "ADC $dst.hi,$dst.hi" %} 9137 ins_encode %{ 9138 __ addl($dst$$Register,$dst$$Register); 9139 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9140 __ addl($dst$$Register,$dst$$Register); 9141 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9142 __ addl($dst$$Register,$dst$$Register); 9143 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9144 %} 9145 ins_pipe( ialu_reg_long ); 9146 %} 9147 9148 // Shift Left Long by 1-31 9149 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9150 match(Set dst (LShiftL dst cnt)); 9151 effect(KILL cr); 9152 ins_cost(200); 9153 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9154 "SHL $dst.lo,$cnt" %} 9155 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9156 ins_encode( move_long_small_shift(dst,cnt) ); 9157 ins_pipe( ialu_reg_long ); 9158 %} 9159 9160 // Shift Left Long by 32-63 9161 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9162 match(Set dst (LShiftL dst cnt)); 9163 effect(KILL cr); 9164 ins_cost(300); 9165 format %{ "MOV $dst.hi,$dst.lo\n" 9166 "\tSHL $dst.hi,$cnt-32\n" 9167 "\tXOR $dst.lo,$dst.lo" %} 9168 opcode(0xC1, 0x4); /* C1 /4 ib */ 9169 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9170 ins_pipe( ialu_reg_long ); 9171 %} 9172 9173 // Shift Left Long by variable 9174 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9175 match(Set dst (LShiftL dst shift)); 9176 effect(KILL cr); 9177 ins_cost(500+200); 9178 size(17); 9179 format %{ "TEST $shift,32\n\t" 9180 "JEQ,s small\n\t" 9181 "MOV $dst.hi,$dst.lo\n\t" 9182 "XOR $dst.lo,$dst.lo\n" 9183 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9184 "SHL $dst.lo,$shift" %} 9185 ins_encode( shift_left_long( dst, shift ) ); 9186 ins_pipe( pipe_slow ); 9187 %} 9188 9189 // Shift Right Long by 1-31 9190 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9191 match(Set dst (URShiftL dst cnt)); 9192 effect(KILL cr); 9193 ins_cost(200); 9194 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9195 "SHR $dst.hi,$cnt" %} 9196 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9197 ins_encode( move_long_small_shift(dst,cnt) ); 9198 ins_pipe( ialu_reg_long ); 9199 %} 9200 9201 // Shift Right Long by 32-63 9202 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9203 match(Set dst (URShiftL dst cnt)); 9204 effect(KILL cr); 9205 ins_cost(300); 9206 format %{ "MOV $dst.lo,$dst.hi\n" 9207 "\tSHR $dst.lo,$cnt-32\n" 9208 "\tXOR $dst.hi,$dst.hi" %} 9209 opcode(0xC1, 0x5); /* C1 /5 ib */ 9210 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9211 ins_pipe( ialu_reg_long ); 9212 %} 9213 9214 // Shift Right Long by variable 9215 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9216 match(Set dst (URShiftL dst shift)); 9217 effect(KILL cr); 9218 ins_cost(600); 9219 size(17); 9220 format %{ "TEST $shift,32\n\t" 9221 "JEQ,s small\n\t" 9222 "MOV $dst.lo,$dst.hi\n\t" 9223 "XOR $dst.hi,$dst.hi\n" 9224 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9225 "SHR $dst.hi,$shift" %} 9226 ins_encode( shift_right_long( dst, shift ) ); 9227 ins_pipe( pipe_slow ); 9228 %} 9229 9230 // Shift Right Long by 1-31 9231 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9232 match(Set dst (RShiftL dst cnt)); 9233 effect(KILL cr); 9234 ins_cost(200); 9235 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9236 "SAR $dst.hi,$cnt" %} 9237 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9238 ins_encode( move_long_small_shift(dst,cnt) ); 9239 ins_pipe( ialu_reg_long ); 9240 %} 9241 9242 // Shift Right Long by 32-63 9243 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9244 match(Set dst (RShiftL dst cnt)); 9245 effect(KILL cr); 9246 ins_cost(300); 9247 format %{ "MOV $dst.lo,$dst.hi\n" 9248 "\tSAR $dst.lo,$cnt-32\n" 9249 "\tSAR $dst.hi,31" %} 9250 opcode(0xC1, 0x7); /* C1 /7 ib */ 9251 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9252 ins_pipe( ialu_reg_long ); 9253 %} 9254 9255 // Shift Right arithmetic Long by variable 9256 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9257 match(Set dst (RShiftL dst shift)); 9258 effect(KILL cr); 9259 ins_cost(600); 9260 size(18); 9261 format %{ "TEST $shift,32\n\t" 9262 "JEQ,s small\n\t" 9263 "MOV $dst.lo,$dst.hi\n\t" 9264 "SAR $dst.hi,31\n" 9265 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9266 "SAR $dst.hi,$shift" %} 9267 ins_encode( shift_right_arith_long( dst, shift ) ); 9268 ins_pipe( pipe_slow ); 9269 %} 9270 9271 9272 //----------Double Instructions------------------------------------------------ 9273 // Double Math 9274 9275 // Compare & branch 9276 9277 // P6 version of float compare, sets condition codes in EFLAGS 9278 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9279 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9280 match(Set cr (CmpD src1 src2)); 9281 effect(KILL rax); 9282 ins_cost(150); 9283 format %{ "FLD $src1\n\t" 9284 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9285 "JNP exit\n\t" 9286 "MOV ah,1 // saw a NaN, set CF\n\t" 9287 "SAHF\n" 9288 "exit:\tNOP // avoid branch to branch" %} 9289 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9290 ins_encode( Push_Reg_DPR(src1), 9291 OpcP, RegOpc(src2), 9292 cmpF_P6_fixup ); 9293 ins_pipe( pipe_slow ); 9294 %} 9295 9296 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9297 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9298 match(Set cr (CmpD src1 src2)); 9299 ins_cost(150); 9300 format %{ "FLD $src1\n\t" 9301 "FUCOMIP ST,$src2 // P6 instruction" %} 9302 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9303 ins_encode( Push_Reg_DPR(src1), 9304 OpcP, RegOpc(src2)); 9305 ins_pipe( pipe_slow ); 9306 %} 9307 9308 // Compare & branch 9309 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9310 predicate(UseSSE<=1); 9311 match(Set cr (CmpD src1 src2)); 9312 effect(KILL rax); 9313 ins_cost(200); 9314 format %{ "FLD $src1\n\t" 9315 "FCOMp $src2\n\t" 9316 "FNSTSW AX\n\t" 9317 "TEST AX,0x400\n\t" 9318 "JZ,s flags\n\t" 9319 "MOV AH,1\t# unordered treat as LT\n" 9320 "flags:\tSAHF" %} 9321 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9322 ins_encode( Push_Reg_DPR(src1), 9323 OpcP, RegOpc(src2), 9324 fpu_flags); 9325 ins_pipe( pipe_slow ); 9326 %} 9327 9328 // Compare vs zero into -1,0,1 9329 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9330 predicate(UseSSE<=1); 9331 match(Set dst (CmpD3 src1 zero)); 9332 effect(KILL cr, KILL rax); 9333 ins_cost(280); 9334 format %{ "FTSTD $dst,$src1" %} 9335 opcode(0xE4, 0xD9); 9336 ins_encode( Push_Reg_DPR(src1), 9337 OpcS, OpcP, PopFPU, 9338 CmpF_Result(dst)); 9339 ins_pipe( pipe_slow ); 9340 %} 9341 9342 // Compare into -1,0,1 9343 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9344 predicate(UseSSE<=1); 9345 match(Set dst (CmpD3 src1 src2)); 9346 effect(KILL cr, KILL rax); 9347 ins_cost(300); 9348 format %{ "FCMPD $dst,$src1,$src2" %} 9349 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9350 ins_encode( Push_Reg_DPR(src1), 9351 OpcP, RegOpc(src2), 9352 CmpF_Result(dst)); 9353 ins_pipe( pipe_slow ); 9354 %} 9355 9356 // float compare and set condition codes in EFLAGS by XMM regs 9357 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9358 predicate(UseSSE>=2); 9359 match(Set cr (CmpD src1 src2)); 9360 ins_cost(145); 9361 format %{ "UCOMISD $src1,$src2\n\t" 9362 "JNP,s exit\n\t" 9363 "PUSHF\t# saw NaN, set CF\n\t" 9364 "AND [rsp], #0xffffff2b\n\t" 9365 "POPF\n" 9366 "exit:" %} 9367 ins_encode %{ 9368 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9369 emit_cmpfp_fixup(_masm); 9370 %} 9371 ins_pipe( pipe_slow ); 9372 %} 9373 9374 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9375 predicate(UseSSE>=2); 9376 match(Set cr (CmpD src1 src2)); 9377 ins_cost(100); 9378 format %{ "UCOMISD $src1,$src2" %} 9379 ins_encode %{ 9380 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9381 %} 9382 ins_pipe( pipe_slow ); 9383 %} 9384 9385 // float compare and set condition codes in EFLAGS by XMM regs 9386 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9387 predicate(UseSSE>=2); 9388 match(Set cr (CmpD src1 (LoadD src2))); 9389 ins_cost(145); 9390 format %{ "UCOMISD $src1,$src2\n\t" 9391 "JNP,s exit\n\t" 9392 "PUSHF\t# saw NaN, set CF\n\t" 9393 "AND [rsp], #0xffffff2b\n\t" 9394 "POPF\n" 9395 "exit:" %} 9396 ins_encode %{ 9397 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9398 emit_cmpfp_fixup(_masm); 9399 %} 9400 ins_pipe( pipe_slow ); 9401 %} 9402 9403 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9404 predicate(UseSSE>=2); 9405 match(Set cr (CmpD src1 (LoadD src2))); 9406 ins_cost(100); 9407 format %{ "UCOMISD $src1,$src2" %} 9408 ins_encode %{ 9409 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9410 %} 9411 ins_pipe( pipe_slow ); 9412 %} 9413 9414 // Compare into -1,0,1 in XMM 9415 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9416 predicate(UseSSE>=2); 9417 match(Set dst (CmpD3 src1 src2)); 9418 effect(KILL cr); 9419 ins_cost(255); 9420 format %{ "UCOMISD $src1, $src2\n\t" 9421 "MOV $dst, #-1\n\t" 9422 "JP,s done\n\t" 9423 "JB,s done\n\t" 9424 "SETNE $dst\n\t" 9425 "MOVZB $dst, $dst\n" 9426 "done:" %} 9427 ins_encode %{ 9428 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9429 emit_cmpfp3(_masm, $dst$$Register); 9430 %} 9431 ins_pipe( pipe_slow ); 9432 %} 9433 9434 // Compare into -1,0,1 in XMM and memory 9435 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9436 predicate(UseSSE>=2); 9437 match(Set dst (CmpD3 src1 (LoadD src2))); 9438 effect(KILL cr); 9439 ins_cost(275); 9440 format %{ "UCOMISD $src1, $src2\n\t" 9441 "MOV $dst, #-1\n\t" 9442 "JP,s done\n\t" 9443 "JB,s done\n\t" 9444 "SETNE $dst\n\t" 9445 "MOVZB $dst, $dst\n" 9446 "done:" %} 9447 ins_encode %{ 9448 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9449 emit_cmpfp3(_masm, $dst$$Register); 9450 %} 9451 ins_pipe( pipe_slow ); 9452 %} 9453 9454 9455 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9456 predicate (UseSSE <=1); 9457 match(Set dst (SubD dst src)); 9458 9459 format %{ "FLD $src\n\t" 9460 "DSUBp $dst,ST" %} 9461 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9462 ins_cost(150); 9463 ins_encode( Push_Reg_DPR(src), 9464 OpcP, RegOpc(dst) ); 9465 ins_pipe( fpu_reg_reg ); 9466 %} 9467 9468 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9469 predicate (UseSSE <=1); 9470 match(Set dst (RoundDouble (SubD src1 src2))); 9471 ins_cost(250); 9472 9473 format %{ "FLD $src2\n\t" 9474 "DSUB ST,$src1\n\t" 9475 "FSTP_D $dst\t# D-round" %} 9476 opcode(0xD8, 0x5); 9477 ins_encode( Push_Reg_DPR(src2), 9478 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9479 ins_pipe( fpu_mem_reg_reg ); 9480 %} 9481 9482 9483 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9484 predicate (UseSSE <=1); 9485 match(Set dst (SubD dst (LoadD src))); 9486 ins_cost(150); 9487 9488 format %{ "FLD $src\n\t" 9489 "DSUBp $dst,ST" %} 9490 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9491 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9492 OpcP, RegOpc(dst) ); 9493 ins_pipe( fpu_reg_mem ); 9494 %} 9495 9496 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9497 predicate (UseSSE<=1); 9498 match(Set dst (AbsD src)); 9499 ins_cost(100); 9500 format %{ "FABS" %} 9501 opcode(0xE1, 0xD9); 9502 ins_encode( OpcS, OpcP ); 9503 ins_pipe( fpu_reg_reg ); 9504 %} 9505 9506 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9507 predicate(UseSSE<=1); 9508 match(Set dst (NegD src)); 9509 ins_cost(100); 9510 format %{ "FCHS" %} 9511 opcode(0xE0, 0xD9); 9512 ins_encode( OpcS, OpcP ); 9513 ins_pipe( fpu_reg_reg ); 9514 %} 9515 9516 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9517 predicate(UseSSE<=1); 9518 match(Set dst (AddD dst src)); 9519 format %{ "FLD $src\n\t" 9520 "DADD $dst,ST" %} 9521 size(4); 9522 ins_cost(150); 9523 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9524 ins_encode( Push_Reg_DPR(src), 9525 OpcP, RegOpc(dst) ); 9526 ins_pipe( fpu_reg_reg ); 9527 %} 9528 9529 9530 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9531 predicate(UseSSE<=1); 9532 match(Set dst (RoundDouble (AddD src1 src2))); 9533 ins_cost(250); 9534 9535 format %{ "FLD $src2\n\t" 9536 "DADD ST,$src1\n\t" 9537 "FSTP_D $dst\t# D-round" %} 9538 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9539 ins_encode( Push_Reg_DPR(src2), 9540 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9541 ins_pipe( fpu_mem_reg_reg ); 9542 %} 9543 9544 9545 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9546 predicate(UseSSE<=1); 9547 match(Set dst (AddD dst (LoadD src))); 9548 ins_cost(150); 9549 9550 format %{ "FLD $src\n\t" 9551 "DADDp $dst,ST" %} 9552 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9553 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9554 OpcP, RegOpc(dst) ); 9555 ins_pipe( fpu_reg_mem ); 9556 %} 9557 9558 // add-to-memory 9559 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9560 predicate(UseSSE<=1); 9561 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9562 ins_cost(150); 9563 9564 format %{ "FLD_D $dst\n\t" 9565 "DADD ST,$src\n\t" 9566 "FST_D $dst" %} 9567 opcode(0xDD, 0x0); 9568 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9569 Opcode(0xD8), RegOpc(src), 9570 set_instruction_start, 9571 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9572 ins_pipe( fpu_reg_mem ); 9573 %} 9574 9575 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9576 predicate(UseSSE<=1); 9577 match(Set dst (AddD dst con)); 9578 ins_cost(125); 9579 format %{ "FLD1\n\t" 9580 "DADDp $dst,ST" %} 9581 ins_encode %{ 9582 __ fld1(); 9583 __ faddp($dst$$reg); 9584 %} 9585 ins_pipe(fpu_reg); 9586 %} 9587 9588 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9589 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9590 match(Set dst (AddD dst con)); 9591 ins_cost(200); 9592 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9593 "DADDp $dst,ST" %} 9594 ins_encode %{ 9595 __ fld_d($constantaddress($con)); 9596 __ faddp($dst$$reg); 9597 %} 9598 ins_pipe(fpu_reg_mem); 9599 %} 9600 9601 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9602 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9603 match(Set dst (RoundDouble (AddD src con))); 9604 ins_cost(200); 9605 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9606 "DADD ST,$src\n\t" 9607 "FSTP_D $dst\t# D-round" %} 9608 ins_encode %{ 9609 __ fld_d($constantaddress($con)); 9610 __ fadd($src$$reg); 9611 __ fstp_d(Address(rsp, $dst$$disp)); 9612 %} 9613 ins_pipe(fpu_mem_reg_con); 9614 %} 9615 9616 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9617 predicate(UseSSE<=1); 9618 match(Set dst (MulD dst src)); 9619 format %{ "FLD $src\n\t" 9620 "DMULp $dst,ST" %} 9621 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9622 ins_cost(150); 9623 ins_encode( Push_Reg_DPR(src), 9624 OpcP, RegOpc(dst) ); 9625 ins_pipe( fpu_reg_reg ); 9626 %} 9627 9628 // Strict FP instruction biases argument before multiply then 9629 // biases result to avoid double rounding of subnormals. 9630 // 9631 // scale arg1 by multiplying arg1 by 2^(-15360) 9632 // load arg2 9633 // multiply scaled arg1 by arg2 9634 // rescale product by 2^(15360) 9635 // 9636 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9637 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9638 match(Set dst (MulD dst src)); 9639 ins_cost(1); // Select this instruction for all strict FP double multiplies 9640 9641 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9642 "DMULp $dst,ST\n\t" 9643 "FLD $src\n\t" 9644 "DMULp $dst,ST\n\t" 9645 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9646 "DMULp $dst,ST\n\t" %} 9647 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9648 ins_encode( strictfp_bias1(dst), 9649 Push_Reg_DPR(src), 9650 OpcP, RegOpc(dst), 9651 strictfp_bias2(dst) ); 9652 ins_pipe( fpu_reg_reg ); 9653 %} 9654 9655 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9656 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9657 match(Set dst (MulD dst con)); 9658 ins_cost(200); 9659 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9660 "DMULp $dst,ST" %} 9661 ins_encode %{ 9662 __ fld_d($constantaddress($con)); 9663 __ fmulp($dst$$reg); 9664 %} 9665 ins_pipe(fpu_reg_mem); 9666 %} 9667 9668 9669 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9670 predicate( UseSSE<=1 ); 9671 match(Set dst (MulD dst (LoadD src))); 9672 ins_cost(200); 9673 format %{ "FLD_D $src\n\t" 9674 "DMULp $dst,ST" %} 9675 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9676 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9677 OpcP, RegOpc(dst) ); 9678 ins_pipe( fpu_reg_mem ); 9679 %} 9680 9681 // 9682 // Cisc-alternate to reg-reg multiply 9683 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9684 predicate( UseSSE<=1 ); 9685 match(Set dst (MulD src (LoadD mem))); 9686 ins_cost(250); 9687 format %{ "FLD_D $mem\n\t" 9688 "DMUL ST,$src\n\t" 9689 "FSTP_D $dst" %} 9690 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9691 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9692 OpcReg_FPR(src), 9693 Pop_Reg_DPR(dst) ); 9694 ins_pipe( fpu_reg_reg_mem ); 9695 %} 9696 9697 9698 // MACRO3 -- addDPR a mulDPR 9699 // This instruction is a '2-address' instruction in that the result goes 9700 // back to src2. This eliminates a move from the macro; possibly the 9701 // register allocator will have to add it back (and maybe not). 9702 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9703 predicate( UseSSE<=1 ); 9704 match(Set src2 (AddD (MulD src0 src1) src2)); 9705 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9706 "DMUL ST,$src1\n\t" 9707 "DADDp $src2,ST" %} 9708 ins_cost(250); 9709 opcode(0xDD); /* LoadD DD /0 */ 9710 ins_encode( Push_Reg_FPR(src0), 9711 FMul_ST_reg(src1), 9712 FAddP_reg_ST(src2) ); 9713 ins_pipe( fpu_reg_reg_reg ); 9714 %} 9715 9716 9717 // MACRO3 -- subDPR a mulDPR 9718 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9719 predicate( UseSSE<=1 ); 9720 match(Set src2 (SubD (MulD src0 src1) src2)); 9721 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9722 "DMUL ST,$src1\n\t" 9723 "DSUBRp $src2,ST" %} 9724 ins_cost(250); 9725 ins_encode( Push_Reg_FPR(src0), 9726 FMul_ST_reg(src1), 9727 Opcode(0xDE), Opc_plus(0xE0,src2)); 9728 ins_pipe( fpu_reg_reg_reg ); 9729 %} 9730 9731 9732 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9733 predicate( UseSSE<=1 ); 9734 match(Set dst (DivD dst src)); 9735 9736 format %{ "FLD $src\n\t" 9737 "FDIVp $dst,ST" %} 9738 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9739 ins_cost(150); 9740 ins_encode( Push_Reg_DPR(src), 9741 OpcP, RegOpc(dst) ); 9742 ins_pipe( fpu_reg_reg ); 9743 %} 9744 9745 // Strict FP instruction biases argument before division then 9746 // biases result, to avoid double rounding of subnormals. 9747 // 9748 // scale dividend by multiplying dividend by 2^(-15360) 9749 // load divisor 9750 // divide scaled dividend by divisor 9751 // rescale quotient by 2^(15360) 9752 // 9753 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9754 predicate (UseSSE<=1); 9755 match(Set dst (DivD dst src)); 9756 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9757 ins_cost(01); 9758 9759 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9760 "DMULp $dst,ST\n\t" 9761 "FLD $src\n\t" 9762 "FDIVp $dst,ST\n\t" 9763 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9764 "DMULp $dst,ST\n\t" %} 9765 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9766 ins_encode( strictfp_bias1(dst), 9767 Push_Reg_DPR(src), 9768 OpcP, RegOpc(dst), 9769 strictfp_bias2(dst) ); 9770 ins_pipe( fpu_reg_reg ); 9771 %} 9772 9773 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9774 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 9775 match(Set dst (RoundDouble (DivD src1 src2))); 9776 9777 format %{ "FLD $src1\n\t" 9778 "FDIV ST,$src2\n\t" 9779 "FSTP_D $dst\t# D-round" %} 9780 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 9781 ins_encode( Push_Reg_DPR(src1), 9782 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 9783 ins_pipe( fpu_mem_reg_reg ); 9784 %} 9785 9786 9787 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9788 predicate(UseSSE<=1); 9789 match(Set dst (ModD dst src)); 9790 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9791 9792 format %{ "DMOD $dst,$src" %} 9793 ins_cost(250); 9794 ins_encode(Push_Reg_Mod_DPR(dst, src), 9795 emitModDPR(), 9796 Push_Result_Mod_DPR(src), 9797 Pop_Reg_DPR(dst)); 9798 ins_pipe( pipe_slow ); 9799 %} 9800 9801 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9802 predicate(UseSSE>=2); 9803 match(Set dst (ModD src0 src1)); 9804 effect(KILL rax, KILL cr); 9805 9806 format %{ "SUB ESP,8\t # DMOD\n" 9807 "\tMOVSD [ESP+0],$src1\n" 9808 "\tFLD_D [ESP+0]\n" 9809 "\tMOVSD [ESP+0],$src0\n" 9810 "\tFLD_D [ESP+0]\n" 9811 "loop:\tFPREM\n" 9812 "\tFWAIT\n" 9813 "\tFNSTSW AX\n" 9814 "\tSAHF\n" 9815 "\tJP loop\n" 9816 "\tFSTP_D [ESP+0]\n" 9817 "\tMOVSD $dst,[ESP+0]\n" 9818 "\tADD ESP,8\n" 9819 "\tFSTP ST0\t # Restore FPU Stack" 9820 %} 9821 ins_cost(250); 9822 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9823 ins_pipe( pipe_slow ); 9824 %} 9825 9826 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{ 9827 predicate (UseSSE<=1); 9828 match(Set dst(TanD src)); 9829 format %{ "DTAN $dst" %} 9830 ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan 9831 Opcode(0xDD), Opcode(0xD8)); // fstp st 9832 ins_pipe( pipe_slow ); 9833 %} 9834 9835 instruct tanD_reg(regD dst, eFlagsReg cr) %{ 9836 predicate (UseSSE>=2); 9837 match(Set dst(TanD dst)); 9838 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9839 format %{ "DTAN $dst" %} 9840 ins_encode( Push_SrcD(dst), 9841 Opcode(0xD9), Opcode(0xF2), // fptan 9842 Opcode(0xDD), Opcode(0xD8), // fstp st 9843 Push_ResultD(dst) ); 9844 ins_pipe( pipe_slow ); 9845 %} 9846 9847 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9848 predicate (UseSSE<=1); 9849 match(Set dst(AtanD dst src)); 9850 format %{ "DATA $dst,$src" %} 9851 opcode(0xD9, 0xF3); 9852 ins_encode( Push_Reg_DPR(src), 9853 OpcP, OpcS, RegOpc(dst) ); 9854 ins_pipe( pipe_slow ); 9855 %} 9856 9857 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 9858 predicate (UseSSE>=2); 9859 match(Set dst(AtanD dst src)); 9860 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9861 format %{ "DATA $dst,$src" %} 9862 opcode(0xD9, 0xF3); 9863 ins_encode( Push_SrcD(src), 9864 OpcP, OpcS, Push_ResultD(dst) ); 9865 ins_pipe( pipe_slow ); 9866 %} 9867 9868 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 9869 predicate (UseSSE<=1); 9870 match(Set dst (SqrtD src)); 9871 format %{ "DSQRT $dst,$src" %} 9872 opcode(0xFA, 0xD9); 9873 ins_encode( Push_Reg_DPR(src), 9874 OpcS, OpcP, Pop_Reg_DPR(dst) ); 9875 ins_pipe( pipe_slow ); 9876 %} 9877 9878 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{ 9879 predicate (UseSSE<=1); 9880 // The source Double operand on FPU stack 9881 match(Set dst (Log10D src)); 9882 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 9883 // fxch ; swap ST(0) with ST(1) 9884 // fyl2x ; compute log_10(2) * log_2(x) 9885 format %{ "FLDLG2 \t\t\t#Log10\n\t" 9886 "FXCH \n\t" 9887 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 9888 %} 9889 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 9890 Opcode(0xD9), Opcode(0xC9), // fxch 9891 Opcode(0xD9), Opcode(0xF1)); // fyl2x 9892 9893 ins_pipe( pipe_slow ); 9894 %} 9895 9896 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{ 9897 predicate (UseSSE>=2); 9898 effect(KILL cr); 9899 match(Set dst (Log10D src)); 9900 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 9901 // fyl2x ; compute log_10(2) * log_2(x) 9902 format %{ "FLDLG2 \t\t\t#Log10\n\t" 9903 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 9904 %} 9905 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 9906 Push_SrcD(src), 9907 Opcode(0xD9), Opcode(0xF1), // fyl2x 9908 Push_ResultD(dst)); 9909 9910 ins_pipe( pipe_slow ); 9911 %} 9912 9913 //-------------Float Instructions------------------------------- 9914 // Float Math 9915 9916 // Code for float compare: 9917 // fcompp(); 9918 // fwait(); fnstsw_ax(); 9919 // sahf(); 9920 // movl(dst, unordered_result); 9921 // jcc(Assembler::parity, exit); 9922 // movl(dst, less_result); 9923 // jcc(Assembler::below, exit); 9924 // movl(dst, equal_result); 9925 // jcc(Assembler::equal, exit); 9926 // movl(dst, greater_result); 9927 // exit: 9928 9929 // P6 version of float compare, sets condition codes in EFLAGS 9930 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9931 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9932 match(Set cr (CmpF src1 src2)); 9933 effect(KILL rax); 9934 ins_cost(150); 9935 format %{ "FLD $src1\n\t" 9936 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9937 "JNP exit\n\t" 9938 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 9939 "SAHF\n" 9940 "exit:\tNOP // avoid branch to branch" %} 9941 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9942 ins_encode( Push_Reg_DPR(src1), 9943 OpcP, RegOpc(src2), 9944 cmpF_P6_fixup ); 9945 ins_pipe( pipe_slow ); 9946 %} 9947 9948 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 9949 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9950 match(Set cr (CmpF src1 src2)); 9951 ins_cost(100); 9952 format %{ "FLD $src1\n\t" 9953 "FUCOMIP ST,$src2 // P6 instruction" %} 9954 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9955 ins_encode( Push_Reg_DPR(src1), 9956 OpcP, RegOpc(src2)); 9957 ins_pipe( pipe_slow ); 9958 %} 9959 9960 9961 // Compare & branch 9962 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9963 predicate(UseSSE == 0); 9964 match(Set cr (CmpF src1 src2)); 9965 effect(KILL rax); 9966 ins_cost(200); 9967 format %{ "FLD $src1\n\t" 9968 "FCOMp $src2\n\t" 9969 "FNSTSW AX\n\t" 9970 "TEST AX,0x400\n\t" 9971 "JZ,s flags\n\t" 9972 "MOV AH,1\t# unordered treat as LT\n" 9973 "flags:\tSAHF" %} 9974 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9975 ins_encode( Push_Reg_DPR(src1), 9976 OpcP, RegOpc(src2), 9977 fpu_flags); 9978 ins_pipe( pipe_slow ); 9979 %} 9980 9981 // Compare vs zero into -1,0,1 9982 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9983 predicate(UseSSE == 0); 9984 match(Set dst (CmpF3 src1 zero)); 9985 effect(KILL cr, KILL rax); 9986 ins_cost(280); 9987 format %{ "FTSTF $dst,$src1" %} 9988 opcode(0xE4, 0xD9); 9989 ins_encode( Push_Reg_DPR(src1), 9990 OpcS, OpcP, PopFPU, 9991 CmpF_Result(dst)); 9992 ins_pipe( pipe_slow ); 9993 %} 9994 9995 // Compare into -1,0,1 9996 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 9997 predicate(UseSSE == 0); 9998 match(Set dst (CmpF3 src1 src2)); 9999 effect(KILL cr, KILL rax); 10000 ins_cost(300); 10001 format %{ "FCMPF $dst,$src1,$src2" %} 10002 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10003 ins_encode( Push_Reg_DPR(src1), 10004 OpcP, RegOpc(src2), 10005 CmpF_Result(dst)); 10006 ins_pipe( pipe_slow ); 10007 %} 10008 10009 // float compare and set condition codes in EFLAGS by XMM regs 10010 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10011 predicate(UseSSE>=1); 10012 match(Set cr (CmpF src1 src2)); 10013 ins_cost(145); 10014 format %{ "UCOMISS $src1,$src2\n\t" 10015 "JNP,s exit\n\t" 10016 "PUSHF\t# saw NaN, set CF\n\t" 10017 "AND [rsp], #0xffffff2b\n\t" 10018 "POPF\n" 10019 "exit:" %} 10020 ins_encode %{ 10021 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10022 emit_cmpfp_fixup(_masm); 10023 %} 10024 ins_pipe( pipe_slow ); 10025 %} 10026 10027 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10028 predicate(UseSSE>=1); 10029 match(Set cr (CmpF src1 src2)); 10030 ins_cost(100); 10031 format %{ "UCOMISS $src1,$src2" %} 10032 ins_encode %{ 10033 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10034 %} 10035 ins_pipe( pipe_slow ); 10036 %} 10037 10038 // float compare and set condition codes in EFLAGS by XMM regs 10039 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10040 predicate(UseSSE>=1); 10041 match(Set cr (CmpF src1 (LoadF src2))); 10042 ins_cost(165); 10043 format %{ "UCOMISS $src1,$src2\n\t" 10044 "JNP,s exit\n\t" 10045 "PUSHF\t# saw NaN, set CF\n\t" 10046 "AND [rsp], #0xffffff2b\n\t" 10047 "POPF\n" 10048 "exit:" %} 10049 ins_encode %{ 10050 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10051 emit_cmpfp_fixup(_masm); 10052 %} 10053 ins_pipe( pipe_slow ); 10054 %} 10055 10056 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10057 predicate(UseSSE>=1); 10058 match(Set cr (CmpF src1 (LoadF src2))); 10059 ins_cost(100); 10060 format %{ "UCOMISS $src1,$src2" %} 10061 ins_encode %{ 10062 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10063 %} 10064 ins_pipe( pipe_slow ); 10065 %} 10066 10067 // Compare into -1,0,1 in XMM 10068 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10069 predicate(UseSSE>=1); 10070 match(Set dst (CmpF3 src1 src2)); 10071 effect(KILL cr); 10072 ins_cost(255); 10073 format %{ "UCOMISS $src1, $src2\n\t" 10074 "MOV $dst, #-1\n\t" 10075 "JP,s done\n\t" 10076 "JB,s done\n\t" 10077 "SETNE $dst\n\t" 10078 "MOVZB $dst, $dst\n" 10079 "done:" %} 10080 ins_encode %{ 10081 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10082 emit_cmpfp3(_masm, $dst$$Register); 10083 %} 10084 ins_pipe( pipe_slow ); 10085 %} 10086 10087 // Compare into -1,0,1 in XMM and memory 10088 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10089 predicate(UseSSE>=1); 10090 match(Set dst (CmpF3 src1 (LoadF src2))); 10091 effect(KILL cr); 10092 ins_cost(275); 10093 format %{ "UCOMISS $src1, $src2\n\t" 10094 "MOV $dst, #-1\n\t" 10095 "JP,s done\n\t" 10096 "JB,s done\n\t" 10097 "SETNE $dst\n\t" 10098 "MOVZB $dst, $dst\n" 10099 "done:" %} 10100 ins_encode %{ 10101 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10102 emit_cmpfp3(_masm, $dst$$Register); 10103 %} 10104 ins_pipe( pipe_slow ); 10105 %} 10106 10107 // Spill to obtain 24-bit precision 10108 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10109 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10110 match(Set dst (SubF src1 src2)); 10111 10112 format %{ "FSUB $dst,$src1 - $src2" %} 10113 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10114 ins_encode( Push_Reg_FPR(src1), 10115 OpcReg_FPR(src2), 10116 Pop_Mem_FPR(dst) ); 10117 ins_pipe( fpu_mem_reg_reg ); 10118 %} 10119 // 10120 // This instruction does not round to 24-bits 10121 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10122 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10123 match(Set dst (SubF dst src)); 10124 10125 format %{ "FSUB $dst,$src" %} 10126 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10127 ins_encode( Push_Reg_FPR(src), 10128 OpcP, RegOpc(dst) ); 10129 ins_pipe( fpu_reg_reg ); 10130 %} 10131 10132 // Spill to obtain 24-bit precision 10133 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10134 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10135 match(Set dst (AddF src1 src2)); 10136 10137 format %{ "FADD $dst,$src1,$src2" %} 10138 opcode(0xD8, 0x0); /* D8 C0+i */ 10139 ins_encode( Push_Reg_FPR(src2), 10140 OpcReg_FPR(src1), 10141 Pop_Mem_FPR(dst) ); 10142 ins_pipe( fpu_mem_reg_reg ); 10143 %} 10144 // 10145 // This instruction does not round to 24-bits 10146 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10147 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10148 match(Set dst (AddF dst src)); 10149 10150 format %{ "FLD $src\n\t" 10151 "FADDp $dst,ST" %} 10152 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10153 ins_encode( Push_Reg_FPR(src), 10154 OpcP, RegOpc(dst) ); 10155 ins_pipe( fpu_reg_reg ); 10156 %} 10157 10158 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10159 predicate(UseSSE==0); 10160 match(Set dst (AbsF src)); 10161 ins_cost(100); 10162 format %{ "FABS" %} 10163 opcode(0xE1, 0xD9); 10164 ins_encode( OpcS, OpcP ); 10165 ins_pipe( fpu_reg_reg ); 10166 %} 10167 10168 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10169 predicate(UseSSE==0); 10170 match(Set dst (NegF src)); 10171 ins_cost(100); 10172 format %{ "FCHS" %} 10173 opcode(0xE0, 0xD9); 10174 ins_encode( OpcS, OpcP ); 10175 ins_pipe( fpu_reg_reg ); 10176 %} 10177 10178 // Cisc-alternate to addFPR_reg 10179 // Spill to obtain 24-bit precision 10180 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10181 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10182 match(Set dst (AddF src1 (LoadF src2))); 10183 10184 format %{ "FLD $src2\n\t" 10185 "FADD ST,$src1\n\t" 10186 "FSTP_S $dst" %} 10187 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10188 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10189 OpcReg_FPR(src1), 10190 Pop_Mem_FPR(dst) ); 10191 ins_pipe( fpu_mem_reg_mem ); 10192 %} 10193 // 10194 // Cisc-alternate to addFPR_reg 10195 // This instruction does not round to 24-bits 10196 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10197 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10198 match(Set dst (AddF dst (LoadF src))); 10199 10200 format %{ "FADD $dst,$src" %} 10201 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10202 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10203 OpcP, RegOpc(dst) ); 10204 ins_pipe( fpu_reg_mem ); 10205 %} 10206 10207 // // Following two instructions for _222_mpegaudio 10208 // Spill to obtain 24-bit precision 10209 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10210 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10211 match(Set dst (AddF src1 src2)); 10212 10213 format %{ "FADD $dst,$src1,$src2" %} 10214 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10215 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10216 OpcReg_FPR(src2), 10217 Pop_Mem_FPR(dst) ); 10218 ins_pipe( fpu_mem_reg_mem ); 10219 %} 10220 10221 // Cisc-spill variant 10222 // Spill to obtain 24-bit precision 10223 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10224 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10225 match(Set dst (AddF src1 (LoadF src2))); 10226 10227 format %{ "FADD $dst,$src1,$src2 cisc" %} 10228 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10229 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10230 set_instruction_start, 10231 OpcP, RMopc_Mem(secondary,src1), 10232 Pop_Mem_FPR(dst) ); 10233 ins_pipe( fpu_mem_mem_mem ); 10234 %} 10235 10236 // Spill to obtain 24-bit precision 10237 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10238 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10239 match(Set dst (AddF src1 src2)); 10240 10241 format %{ "FADD $dst,$src1,$src2" %} 10242 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10243 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10244 set_instruction_start, 10245 OpcP, RMopc_Mem(secondary,src1), 10246 Pop_Mem_FPR(dst) ); 10247 ins_pipe( fpu_mem_mem_mem ); 10248 %} 10249 10250 10251 // Spill to obtain 24-bit precision 10252 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10253 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10254 match(Set dst (AddF src con)); 10255 format %{ "FLD $src\n\t" 10256 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10257 "FSTP_S $dst" %} 10258 ins_encode %{ 10259 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10260 __ fadd_s($constantaddress($con)); 10261 __ fstp_s(Address(rsp, $dst$$disp)); 10262 %} 10263 ins_pipe(fpu_mem_reg_con); 10264 %} 10265 // 10266 // This instruction does not round to 24-bits 10267 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10268 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10269 match(Set dst (AddF src con)); 10270 format %{ "FLD $src\n\t" 10271 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10272 "FSTP $dst" %} 10273 ins_encode %{ 10274 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10275 __ fadd_s($constantaddress($con)); 10276 __ fstp_d($dst$$reg); 10277 %} 10278 ins_pipe(fpu_reg_reg_con); 10279 %} 10280 10281 // Spill to obtain 24-bit precision 10282 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10283 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10284 match(Set dst (MulF src1 src2)); 10285 10286 format %{ "FLD $src1\n\t" 10287 "FMUL $src2\n\t" 10288 "FSTP_S $dst" %} 10289 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10290 ins_encode( Push_Reg_FPR(src1), 10291 OpcReg_FPR(src2), 10292 Pop_Mem_FPR(dst) ); 10293 ins_pipe( fpu_mem_reg_reg ); 10294 %} 10295 // 10296 // This instruction does not round to 24-bits 10297 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10298 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10299 match(Set dst (MulF src1 src2)); 10300 10301 format %{ "FLD $src1\n\t" 10302 "FMUL $src2\n\t" 10303 "FSTP_S $dst" %} 10304 opcode(0xD8, 0x1); /* D8 C8+i */ 10305 ins_encode( Push_Reg_FPR(src2), 10306 OpcReg_FPR(src1), 10307 Pop_Reg_FPR(dst) ); 10308 ins_pipe( fpu_reg_reg_reg ); 10309 %} 10310 10311 10312 // Spill to obtain 24-bit precision 10313 // Cisc-alternate to reg-reg multiply 10314 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10315 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10316 match(Set dst (MulF src1 (LoadF src2))); 10317 10318 format %{ "FLD_S $src2\n\t" 10319 "FMUL $src1\n\t" 10320 "FSTP_S $dst" %} 10321 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10322 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10323 OpcReg_FPR(src1), 10324 Pop_Mem_FPR(dst) ); 10325 ins_pipe( fpu_mem_reg_mem ); 10326 %} 10327 // 10328 // This instruction does not round to 24-bits 10329 // Cisc-alternate to reg-reg multiply 10330 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10331 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10332 match(Set dst (MulF src1 (LoadF src2))); 10333 10334 format %{ "FMUL $dst,$src1,$src2" %} 10335 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10336 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10337 OpcReg_FPR(src1), 10338 Pop_Reg_FPR(dst) ); 10339 ins_pipe( fpu_reg_reg_mem ); 10340 %} 10341 10342 // Spill to obtain 24-bit precision 10343 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10344 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10345 match(Set dst (MulF src1 src2)); 10346 10347 format %{ "FMUL $dst,$src1,$src2" %} 10348 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10349 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10350 set_instruction_start, 10351 OpcP, RMopc_Mem(secondary,src1), 10352 Pop_Mem_FPR(dst) ); 10353 ins_pipe( fpu_mem_mem_mem ); 10354 %} 10355 10356 // Spill to obtain 24-bit precision 10357 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10358 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10359 match(Set dst (MulF src con)); 10360 10361 format %{ "FLD $src\n\t" 10362 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10363 "FSTP_S $dst" %} 10364 ins_encode %{ 10365 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10366 __ fmul_s($constantaddress($con)); 10367 __ fstp_s(Address(rsp, $dst$$disp)); 10368 %} 10369 ins_pipe(fpu_mem_reg_con); 10370 %} 10371 // 10372 // This instruction does not round to 24-bits 10373 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10374 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10375 match(Set dst (MulF src con)); 10376 10377 format %{ "FLD $src\n\t" 10378 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10379 "FSTP $dst" %} 10380 ins_encode %{ 10381 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10382 __ fmul_s($constantaddress($con)); 10383 __ fstp_d($dst$$reg); 10384 %} 10385 ins_pipe(fpu_reg_reg_con); 10386 %} 10387 10388 10389 // 10390 // MACRO1 -- subsume unshared load into mulFPR 10391 // This instruction does not round to 24-bits 10392 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10393 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10394 match(Set dst (MulF (LoadF mem1) src)); 10395 10396 format %{ "FLD $mem1 ===MACRO1===\n\t" 10397 "FMUL ST,$src\n\t" 10398 "FSTP $dst" %} 10399 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10400 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10401 OpcReg_FPR(src), 10402 Pop_Reg_FPR(dst) ); 10403 ins_pipe( fpu_reg_reg_mem ); 10404 %} 10405 // 10406 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10407 // This instruction does not round to 24-bits 10408 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10409 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10410 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10411 ins_cost(95); 10412 10413 format %{ "FLD $mem1 ===MACRO2===\n\t" 10414 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10415 "FADD ST,$src2\n\t" 10416 "FSTP $dst" %} 10417 opcode(0xD9); /* LoadF D9 /0 */ 10418 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10419 FMul_ST_reg(src1), 10420 FAdd_ST_reg(src2), 10421 Pop_Reg_FPR(dst) ); 10422 ins_pipe( fpu_reg_mem_reg_reg ); 10423 %} 10424 10425 // MACRO3 -- addFPR a mulFPR 10426 // This instruction does not round to 24-bits. It is a '2-address' 10427 // instruction in that the result goes back to src2. This eliminates 10428 // a move from the macro; possibly the register allocator will have 10429 // to add it back (and maybe not). 10430 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10431 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10432 match(Set src2 (AddF (MulF src0 src1) src2)); 10433 10434 format %{ "FLD $src0 ===MACRO3===\n\t" 10435 "FMUL ST,$src1\n\t" 10436 "FADDP $src2,ST" %} 10437 opcode(0xD9); /* LoadF D9 /0 */ 10438 ins_encode( Push_Reg_FPR(src0), 10439 FMul_ST_reg(src1), 10440 FAddP_reg_ST(src2) ); 10441 ins_pipe( fpu_reg_reg_reg ); 10442 %} 10443 10444 // MACRO4 -- divFPR subFPR 10445 // This instruction does not round to 24-bits 10446 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10447 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10448 match(Set dst (DivF (SubF src2 src1) src3)); 10449 10450 format %{ "FLD $src2 ===MACRO4===\n\t" 10451 "FSUB ST,$src1\n\t" 10452 "FDIV ST,$src3\n\t" 10453 "FSTP $dst" %} 10454 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10455 ins_encode( Push_Reg_FPR(src2), 10456 subFPR_divFPR_encode(src1,src3), 10457 Pop_Reg_FPR(dst) ); 10458 ins_pipe( fpu_reg_reg_reg_reg ); 10459 %} 10460 10461 // Spill to obtain 24-bit precision 10462 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10463 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10464 match(Set dst (DivF src1 src2)); 10465 10466 format %{ "FDIV $dst,$src1,$src2" %} 10467 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10468 ins_encode( Push_Reg_FPR(src1), 10469 OpcReg_FPR(src2), 10470 Pop_Mem_FPR(dst) ); 10471 ins_pipe( fpu_mem_reg_reg ); 10472 %} 10473 // 10474 // This instruction does not round to 24-bits 10475 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10476 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10477 match(Set dst (DivF dst src)); 10478 10479 format %{ "FDIV $dst,$src" %} 10480 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10481 ins_encode( Push_Reg_FPR(src), 10482 OpcP, RegOpc(dst) ); 10483 ins_pipe( fpu_reg_reg ); 10484 %} 10485 10486 10487 // Spill to obtain 24-bit precision 10488 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10489 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10490 match(Set dst (ModF src1 src2)); 10491 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10492 10493 format %{ "FMOD $dst,$src1,$src2" %} 10494 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10495 emitModDPR(), 10496 Push_Result_Mod_DPR(src2), 10497 Pop_Mem_FPR(dst)); 10498 ins_pipe( pipe_slow ); 10499 %} 10500 // 10501 // This instruction does not round to 24-bits 10502 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10503 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10504 match(Set dst (ModF dst src)); 10505 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10506 10507 format %{ "FMOD $dst,$src" %} 10508 ins_encode(Push_Reg_Mod_DPR(dst, src), 10509 emitModDPR(), 10510 Push_Result_Mod_DPR(src), 10511 Pop_Reg_FPR(dst)); 10512 ins_pipe( pipe_slow ); 10513 %} 10514 10515 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10516 predicate(UseSSE>=1); 10517 match(Set dst (ModF src0 src1)); 10518 effect(KILL rax, KILL cr); 10519 format %{ "SUB ESP,4\t # FMOD\n" 10520 "\tMOVSS [ESP+0],$src1\n" 10521 "\tFLD_S [ESP+0]\n" 10522 "\tMOVSS [ESP+0],$src0\n" 10523 "\tFLD_S [ESP+0]\n" 10524 "loop:\tFPREM\n" 10525 "\tFWAIT\n" 10526 "\tFNSTSW AX\n" 10527 "\tSAHF\n" 10528 "\tJP loop\n" 10529 "\tFSTP_S [ESP+0]\n" 10530 "\tMOVSS $dst,[ESP+0]\n" 10531 "\tADD ESP,4\n" 10532 "\tFSTP ST0\t # Restore FPU Stack" 10533 %} 10534 ins_cost(250); 10535 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10536 ins_pipe( pipe_slow ); 10537 %} 10538 10539 10540 //----------Arithmetic Conversion Instructions--------------------------------- 10541 // The conversions operations are all Alpha sorted. Please keep it that way! 10542 10543 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10544 predicate(UseSSE==0); 10545 match(Set dst (RoundFloat src)); 10546 ins_cost(125); 10547 format %{ "FST_S $dst,$src\t# F-round" %} 10548 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10549 ins_pipe( fpu_mem_reg ); 10550 %} 10551 10552 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10553 predicate(UseSSE<=1); 10554 match(Set dst (RoundDouble src)); 10555 ins_cost(125); 10556 format %{ "FST_D $dst,$src\t# D-round" %} 10557 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10558 ins_pipe( fpu_mem_reg ); 10559 %} 10560 10561 // Force rounding to 24-bit precision and 6-bit exponent 10562 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10563 predicate(UseSSE==0); 10564 match(Set dst (ConvD2F src)); 10565 format %{ "FST_S $dst,$src\t# F-round" %} 10566 expand %{ 10567 roundFloat_mem_reg(dst,src); 10568 %} 10569 %} 10570 10571 // Force rounding to 24-bit precision and 6-bit exponent 10572 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10573 predicate(UseSSE==1); 10574 match(Set dst (ConvD2F src)); 10575 effect( KILL cr ); 10576 format %{ "SUB ESP,4\n\t" 10577 "FST_S [ESP],$src\t# F-round\n\t" 10578 "MOVSS $dst,[ESP]\n\t" 10579 "ADD ESP,4" %} 10580 ins_encode %{ 10581 __ subptr(rsp, 4); 10582 if ($src$$reg != FPR1L_enc) { 10583 __ fld_s($src$$reg-1); 10584 __ fstp_s(Address(rsp, 0)); 10585 } else { 10586 __ fst_s(Address(rsp, 0)); 10587 } 10588 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10589 __ addptr(rsp, 4); 10590 %} 10591 ins_pipe( pipe_slow ); 10592 %} 10593 10594 // Force rounding double precision to single precision 10595 instruct convD2F_reg(regF dst, regD src) %{ 10596 predicate(UseSSE>=2); 10597 match(Set dst (ConvD2F src)); 10598 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10599 ins_encode %{ 10600 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10601 %} 10602 ins_pipe( pipe_slow ); 10603 %} 10604 10605 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10606 predicate(UseSSE==0); 10607 match(Set dst (ConvF2D src)); 10608 format %{ "FST_S $dst,$src\t# D-round" %} 10609 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10610 ins_pipe( fpu_reg_reg ); 10611 %} 10612 10613 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10614 predicate(UseSSE==1); 10615 match(Set dst (ConvF2D src)); 10616 format %{ "FST_D $dst,$src\t# D-round" %} 10617 expand %{ 10618 roundDouble_mem_reg(dst,src); 10619 %} 10620 %} 10621 10622 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10623 predicate(UseSSE==1); 10624 match(Set dst (ConvF2D src)); 10625 effect( KILL cr ); 10626 format %{ "SUB ESP,4\n\t" 10627 "MOVSS [ESP] $src\n\t" 10628 "FLD_S [ESP]\n\t" 10629 "ADD ESP,4\n\t" 10630 "FSTP $dst\t# D-round" %} 10631 ins_encode %{ 10632 __ subptr(rsp, 4); 10633 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10634 __ fld_s(Address(rsp, 0)); 10635 __ addptr(rsp, 4); 10636 __ fstp_d($dst$$reg); 10637 %} 10638 ins_pipe( pipe_slow ); 10639 %} 10640 10641 instruct convF2D_reg(regD dst, regF src) %{ 10642 predicate(UseSSE>=2); 10643 match(Set dst (ConvF2D src)); 10644 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10645 ins_encode %{ 10646 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10647 %} 10648 ins_pipe( pipe_slow ); 10649 %} 10650 10651 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10652 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10653 predicate(UseSSE<=1); 10654 match(Set dst (ConvD2I src)); 10655 effect( KILL tmp, KILL cr ); 10656 format %{ "FLD $src\t# Convert double to int \n\t" 10657 "FLDCW trunc mode\n\t" 10658 "SUB ESP,4\n\t" 10659 "FISTp [ESP + #0]\n\t" 10660 "FLDCW std/24-bit mode\n\t" 10661 "POP EAX\n\t" 10662 "CMP EAX,0x80000000\n\t" 10663 "JNE,s fast\n\t" 10664 "FLD_D $src\n\t" 10665 "CALL d2i_wrapper\n" 10666 "fast:" %} 10667 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10668 ins_pipe( pipe_slow ); 10669 %} 10670 10671 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10672 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10673 predicate(UseSSE>=2); 10674 match(Set dst (ConvD2I src)); 10675 effect( KILL tmp, KILL cr ); 10676 format %{ "CVTTSD2SI $dst, $src\n\t" 10677 "CMP $dst,0x80000000\n\t" 10678 "JNE,s fast\n\t" 10679 "SUB ESP, 8\n\t" 10680 "MOVSD [ESP], $src\n\t" 10681 "FLD_D [ESP]\n\t" 10682 "ADD ESP, 8\n\t" 10683 "CALL d2i_wrapper\n" 10684 "fast:" %} 10685 ins_encode %{ 10686 Label fast; 10687 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10688 __ cmpl($dst$$Register, 0x80000000); 10689 __ jccb(Assembler::notEqual, fast); 10690 __ subptr(rsp, 8); 10691 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10692 __ fld_d(Address(rsp, 0)); 10693 __ addptr(rsp, 8); 10694 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10695 __ bind(fast); 10696 %} 10697 ins_pipe( pipe_slow ); 10698 %} 10699 10700 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10701 predicate(UseSSE<=1); 10702 match(Set dst (ConvD2L src)); 10703 effect( KILL cr ); 10704 format %{ "FLD $src\t# Convert double to long\n\t" 10705 "FLDCW trunc mode\n\t" 10706 "SUB ESP,8\n\t" 10707 "FISTp [ESP + #0]\n\t" 10708 "FLDCW std/24-bit mode\n\t" 10709 "POP EAX\n\t" 10710 "POP EDX\n\t" 10711 "CMP EDX,0x80000000\n\t" 10712 "JNE,s fast\n\t" 10713 "TEST EAX,EAX\n\t" 10714 "JNE,s fast\n\t" 10715 "FLD $src\n\t" 10716 "CALL d2l_wrapper\n" 10717 "fast:" %} 10718 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10719 ins_pipe( pipe_slow ); 10720 %} 10721 10722 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10723 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10724 predicate (UseSSE>=2); 10725 match(Set dst (ConvD2L src)); 10726 effect( KILL cr ); 10727 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10728 "MOVSD [ESP],$src\n\t" 10729 "FLD_D [ESP]\n\t" 10730 "FLDCW trunc mode\n\t" 10731 "FISTp [ESP + #0]\n\t" 10732 "FLDCW std/24-bit mode\n\t" 10733 "POP EAX\n\t" 10734 "POP EDX\n\t" 10735 "CMP EDX,0x80000000\n\t" 10736 "JNE,s fast\n\t" 10737 "TEST EAX,EAX\n\t" 10738 "JNE,s fast\n\t" 10739 "SUB ESP,8\n\t" 10740 "MOVSD [ESP],$src\n\t" 10741 "FLD_D [ESP]\n\t" 10742 "ADD ESP,8\n\t" 10743 "CALL d2l_wrapper\n" 10744 "fast:" %} 10745 ins_encode %{ 10746 Label fast; 10747 __ subptr(rsp, 8); 10748 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10749 __ fld_d(Address(rsp, 0)); 10750 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10751 __ fistp_d(Address(rsp, 0)); 10752 // Restore the rounding mode, mask the exception 10753 if (Compile::current()->in_24_bit_fp_mode()) { 10754 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10755 } else { 10756 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10757 } 10758 // Load the converted long, adjust CPU stack 10759 __ pop(rax); 10760 __ pop(rdx); 10761 __ cmpl(rdx, 0x80000000); 10762 __ jccb(Assembler::notEqual, fast); 10763 __ testl(rax, rax); 10764 __ jccb(Assembler::notEqual, fast); 10765 __ subptr(rsp, 8); 10766 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10767 __ fld_d(Address(rsp, 0)); 10768 __ addptr(rsp, 8); 10769 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10770 __ bind(fast); 10771 %} 10772 ins_pipe( pipe_slow ); 10773 %} 10774 10775 // Convert a double to an int. Java semantics require we do complex 10776 // manglations in the corner cases. So we set the rounding mode to 10777 // 'zero', store the darned double down as an int, and reset the 10778 // rounding mode to 'nearest'. The hardware stores a flag value down 10779 // if we would overflow or converted a NAN; we check for this and 10780 // and go the slow path if needed. 10781 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10782 predicate(UseSSE==0); 10783 match(Set dst (ConvF2I src)); 10784 effect( KILL tmp, KILL cr ); 10785 format %{ "FLD $src\t# Convert float to int \n\t" 10786 "FLDCW trunc mode\n\t" 10787 "SUB ESP,4\n\t" 10788 "FISTp [ESP + #0]\n\t" 10789 "FLDCW std/24-bit mode\n\t" 10790 "POP EAX\n\t" 10791 "CMP EAX,0x80000000\n\t" 10792 "JNE,s fast\n\t" 10793 "FLD $src\n\t" 10794 "CALL d2i_wrapper\n" 10795 "fast:" %} 10796 // DPR2I_encoding works for FPR2I 10797 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10798 ins_pipe( pipe_slow ); 10799 %} 10800 10801 // Convert a float in xmm to an int reg. 10802 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10803 predicate(UseSSE>=1); 10804 match(Set dst (ConvF2I src)); 10805 effect( KILL tmp, KILL cr ); 10806 format %{ "CVTTSS2SI $dst, $src\n\t" 10807 "CMP $dst,0x80000000\n\t" 10808 "JNE,s fast\n\t" 10809 "SUB ESP, 4\n\t" 10810 "MOVSS [ESP], $src\n\t" 10811 "FLD [ESP]\n\t" 10812 "ADD ESP, 4\n\t" 10813 "CALL d2i_wrapper\n" 10814 "fast:" %} 10815 ins_encode %{ 10816 Label fast; 10817 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10818 __ cmpl($dst$$Register, 0x80000000); 10819 __ jccb(Assembler::notEqual, fast); 10820 __ subptr(rsp, 4); 10821 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10822 __ fld_s(Address(rsp, 0)); 10823 __ addptr(rsp, 4); 10824 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10825 __ bind(fast); 10826 %} 10827 ins_pipe( pipe_slow ); 10828 %} 10829 10830 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10831 predicate(UseSSE==0); 10832 match(Set dst (ConvF2L src)); 10833 effect( KILL cr ); 10834 format %{ "FLD $src\t# Convert float to long\n\t" 10835 "FLDCW trunc mode\n\t" 10836 "SUB ESP,8\n\t" 10837 "FISTp [ESP + #0]\n\t" 10838 "FLDCW std/24-bit mode\n\t" 10839 "POP EAX\n\t" 10840 "POP EDX\n\t" 10841 "CMP EDX,0x80000000\n\t" 10842 "JNE,s fast\n\t" 10843 "TEST EAX,EAX\n\t" 10844 "JNE,s fast\n\t" 10845 "FLD $src\n\t" 10846 "CALL d2l_wrapper\n" 10847 "fast:" %} 10848 // DPR2L_encoding works for FPR2L 10849 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10850 ins_pipe( pipe_slow ); 10851 %} 10852 10853 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10854 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10855 predicate (UseSSE>=1); 10856 match(Set dst (ConvF2L src)); 10857 effect( KILL cr ); 10858 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10859 "MOVSS [ESP],$src\n\t" 10860 "FLD_S [ESP]\n\t" 10861 "FLDCW trunc mode\n\t" 10862 "FISTp [ESP + #0]\n\t" 10863 "FLDCW std/24-bit mode\n\t" 10864 "POP EAX\n\t" 10865 "POP EDX\n\t" 10866 "CMP EDX,0x80000000\n\t" 10867 "JNE,s fast\n\t" 10868 "TEST EAX,EAX\n\t" 10869 "JNE,s fast\n\t" 10870 "SUB ESP,4\t# Convert float to long\n\t" 10871 "MOVSS [ESP],$src\n\t" 10872 "FLD_S [ESP]\n\t" 10873 "ADD ESP,4\n\t" 10874 "CALL d2l_wrapper\n" 10875 "fast:" %} 10876 ins_encode %{ 10877 Label fast; 10878 __ subptr(rsp, 8); 10879 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10880 __ fld_s(Address(rsp, 0)); 10881 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10882 __ fistp_d(Address(rsp, 0)); 10883 // Restore the rounding mode, mask the exception 10884 if (Compile::current()->in_24_bit_fp_mode()) { 10885 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10886 } else { 10887 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10888 } 10889 // Load the converted long, adjust CPU stack 10890 __ pop(rax); 10891 __ pop(rdx); 10892 __ cmpl(rdx, 0x80000000); 10893 __ jccb(Assembler::notEqual, fast); 10894 __ testl(rax, rax); 10895 __ jccb(Assembler::notEqual, fast); 10896 __ subptr(rsp, 4); 10897 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10898 __ fld_s(Address(rsp, 0)); 10899 __ addptr(rsp, 4); 10900 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10901 __ bind(fast); 10902 %} 10903 ins_pipe( pipe_slow ); 10904 %} 10905 10906 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 10907 predicate( UseSSE<=1 ); 10908 match(Set dst (ConvI2D src)); 10909 format %{ "FILD $src\n\t" 10910 "FSTP $dst" %} 10911 opcode(0xDB, 0x0); /* DB /0 */ 10912 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 10913 ins_pipe( fpu_reg_mem ); 10914 %} 10915 10916 instruct convI2D_reg(regD dst, rRegI src) %{ 10917 predicate( UseSSE>=2 && !UseXmmI2D ); 10918 match(Set dst (ConvI2D src)); 10919 format %{ "CVTSI2SD $dst,$src" %} 10920 ins_encode %{ 10921 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 10922 %} 10923 ins_pipe( pipe_slow ); 10924 %} 10925 10926 instruct convI2D_mem(regD dst, memory mem) %{ 10927 predicate( UseSSE>=2 ); 10928 match(Set dst (ConvI2D (LoadI mem))); 10929 format %{ "CVTSI2SD $dst,$mem" %} 10930 ins_encode %{ 10931 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 10932 %} 10933 ins_pipe( pipe_slow ); 10934 %} 10935 10936 instruct convXI2D_reg(regD dst, rRegI src) 10937 %{ 10938 predicate( UseSSE>=2 && UseXmmI2D ); 10939 match(Set dst (ConvI2D src)); 10940 10941 format %{ "MOVD $dst,$src\n\t" 10942 "CVTDQ2PD $dst,$dst\t# i2d" %} 10943 ins_encode %{ 10944 __ movdl($dst$$XMMRegister, $src$$Register); 10945 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 10946 %} 10947 ins_pipe(pipe_slow); // XXX 10948 %} 10949 10950 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 10951 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 10952 match(Set dst (ConvI2D (LoadI mem))); 10953 format %{ "FILD $mem\n\t" 10954 "FSTP $dst" %} 10955 opcode(0xDB); /* DB /0 */ 10956 ins_encode( OpcP, RMopc_Mem(0x00,mem), 10957 Pop_Reg_DPR(dst)); 10958 ins_pipe( fpu_reg_mem ); 10959 %} 10960 10961 // Convert a byte to a float; no rounding step needed. 10962 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 10963 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 10964 match(Set dst (ConvI2F src)); 10965 format %{ "FILD $src\n\t" 10966 "FSTP $dst" %} 10967 10968 opcode(0xDB, 0x0); /* DB /0 */ 10969 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 10970 ins_pipe( fpu_reg_mem ); 10971 %} 10972 10973 // In 24-bit mode, force exponent rounding by storing back out 10974 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 10975 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10976 match(Set dst (ConvI2F src)); 10977 ins_cost(200); 10978 format %{ "FILD $src\n\t" 10979 "FSTP_S $dst" %} 10980 opcode(0xDB, 0x0); /* DB /0 */ 10981 ins_encode( Push_Mem_I(src), 10982 Pop_Mem_FPR(dst)); 10983 ins_pipe( fpu_mem_mem ); 10984 %} 10985 10986 // In 24-bit mode, force exponent rounding by storing back out 10987 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 10988 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10989 match(Set dst (ConvI2F (LoadI mem))); 10990 ins_cost(200); 10991 format %{ "FILD $mem\n\t" 10992 "FSTP_S $dst" %} 10993 opcode(0xDB); /* DB /0 */ 10994 ins_encode( OpcP, RMopc_Mem(0x00,mem), 10995 Pop_Mem_FPR(dst)); 10996 ins_pipe( fpu_mem_mem ); 10997 %} 10998 10999 // This instruction does not round to 24-bits 11000 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11001 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11002 match(Set dst (ConvI2F src)); 11003 format %{ "FILD $src\n\t" 11004 "FSTP $dst" %} 11005 opcode(0xDB, 0x0); /* DB /0 */ 11006 ins_encode( Push_Mem_I(src), 11007 Pop_Reg_FPR(dst)); 11008 ins_pipe( fpu_reg_mem ); 11009 %} 11010 11011 // This instruction does not round to 24-bits 11012 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11013 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11014 match(Set dst (ConvI2F (LoadI mem))); 11015 format %{ "FILD $mem\n\t" 11016 "FSTP $dst" %} 11017 opcode(0xDB); /* DB /0 */ 11018 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11019 Pop_Reg_FPR(dst)); 11020 ins_pipe( fpu_reg_mem ); 11021 %} 11022 11023 // Convert an int to a float in xmm; no rounding step needed. 11024 instruct convI2F_reg(regF dst, rRegI src) %{ 11025 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11026 match(Set dst (ConvI2F src)); 11027 format %{ "CVTSI2SS $dst, $src" %} 11028 ins_encode %{ 11029 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11030 %} 11031 ins_pipe( pipe_slow ); 11032 %} 11033 11034 instruct convXI2F_reg(regF dst, rRegI src) 11035 %{ 11036 predicate( UseSSE>=2 && UseXmmI2F ); 11037 match(Set dst (ConvI2F src)); 11038 11039 format %{ "MOVD $dst,$src\n\t" 11040 "CVTDQ2PS $dst,$dst\t# i2f" %} 11041 ins_encode %{ 11042 __ movdl($dst$$XMMRegister, $src$$Register); 11043 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11044 %} 11045 ins_pipe(pipe_slow); // XXX 11046 %} 11047 11048 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11049 match(Set dst (ConvI2L src)); 11050 effect(KILL cr); 11051 ins_cost(375); 11052 format %{ "MOV $dst.lo,$src\n\t" 11053 "MOV $dst.hi,$src\n\t" 11054 "SAR $dst.hi,31" %} 11055 ins_encode(convert_int_long(dst,src)); 11056 ins_pipe( ialu_reg_reg_long ); 11057 %} 11058 11059 // Zero-extend convert int to long 11060 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11061 match(Set dst (AndL (ConvI2L src) mask) ); 11062 effect( KILL flags ); 11063 ins_cost(250); 11064 format %{ "MOV $dst.lo,$src\n\t" 11065 "XOR $dst.hi,$dst.hi" %} 11066 opcode(0x33); // XOR 11067 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11068 ins_pipe( ialu_reg_reg_long ); 11069 %} 11070 11071 // Zero-extend long 11072 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11073 match(Set dst (AndL src mask) ); 11074 effect( KILL flags ); 11075 ins_cost(250); 11076 format %{ "MOV $dst.lo,$src.lo\n\t" 11077 "XOR $dst.hi,$dst.hi\n\t" %} 11078 opcode(0x33); // XOR 11079 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11080 ins_pipe( ialu_reg_reg_long ); 11081 %} 11082 11083 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11084 predicate (UseSSE<=1); 11085 match(Set dst (ConvL2D src)); 11086 effect( KILL cr ); 11087 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11088 "PUSH $src.lo\n\t" 11089 "FILD ST,[ESP + #0]\n\t" 11090 "ADD ESP,8\n\t" 11091 "FSTP_D $dst\t# D-round" %} 11092 opcode(0xDF, 0x5); /* DF /5 */ 11093 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11094 ins_pipe( pipe_slow ); 11095 %} 11096 11097 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11098 predicate (UseSSE>=2); 11099 match(Set dst (ConvL2D src)); 11100 effect( KILL cr ); 11101 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11102 "PUSH $src.lo\n\t" 11103 "FILD_D [ESP]\n\t" 11104 "FSTP_D [ESP]\n\t" 11105 "MOVSD $dst,[ESP]\n\t" 11106 "ADD ESP,8" %} 11107 opcode(0xDF, 0x5); /* DF /5 */ 11108 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11109 ins_pipe( pipe_slow ); 11110 %} 11111 11112 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11113 predicate (UseSSE>=1); 11114 match(Set dst (ConvL2F src)); 11115 effect( KILL cr ); 11116 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11117 "PUSH $src.lo\n\t" 11118 "FILD_D [ESP]\n\t" 11119 "FSTP_S [ESP]\n\t" 11120 "MOVSS $dst,[ESP]\n\t" 11121 "ADD ESP,8" %} 11122 opcode(0xDF, 0x5); /* DF /5 */ 11123 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11124 ins_pipe( pipe_slow ); 11125 %} 11126 11127 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11128 match(Set dst (ConvL2F src)); 11129 effect( KILL cr ); 11130 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11131 "PUSH $src.lo\n\t" 11132 "FILD ST,[ESP + #0]\n\t" 11133 "ADD ESP,8\n\t" 11134 "FSTP_S $dst\t# F-round" %} 11135 opcode(0xDF, 0x5); /* DF /5 */ 11136 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11137 ins_pipe( pipe_slow ); 11138 %} 11139 11140 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11141 match(Set dst (ConvL2I src)); 11142 effect( DEF dst, USE src ); 11143 format %{ "MOV $dst,$src.lo" %} 11144 ins_encode(enc_CopyL_Lo(dst,src)); 11145 ins_pipe( ialu_reg_reg ); 11146 %} 11147 11148 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11149 match(Set dst (MoveF2I src)); 11150 effect( DEF dst, USE src ); 11151 ins_cost(100); 11152 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11153 ins_encode %{ 11154 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11155 %} 11156 ins_pipe( ialu_reg_mem ); 11157 %} 11158 11159 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11160 predicate(UseSSE==0); 11161 match(Set dst (MoveF2I src)); 11162 effect( DEF dst, USE src ); 11163 11164 ins_cost(125); 11165 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11166 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11167 ins_pipe( fpu_mem_reg ); 11168 %} 11169 11170 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11171 predicate(UseSSE>=1); 11172 match(Set dst (MoveF2I src)); 11173 effect( DEF dst, USE src ); 11174 11175 ins_cost(95); 11176 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11177 ins_encode %{ 11178 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11179 %} 11180 ins_pipe( pipe_slow ); 11181 %} 11182 11183 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11184 predicate(UseSSE>=2); 11185 match(Set dst (MoveF2I src)); 11186 effect( DEF dst, USE src ); 11187 ins_cost(85); 11188 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11189 ins_encode %{ 11190 __ movdl($dst$$Register, $src$$XMMRegister); 11191 %} 11192 ins_pipe( pipe_slow ); 11193 %} 11194 11195 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11196 match(Set dst (MoveI2F src)); 11197 effect( DEF dst, USE src ); 11198 11199 ins_cost(100); 11200 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11201 ins_encode %{ 11202 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11203 %} 11204 ins_pipe( ialu_mem_reg ); 11205 %} 11206 11207 11208 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11209 predicate(UseSSE==0); 11210 match(Set dst (MoveI2F src)); 11211 effect(DEF dst, USE src); 11212 11213 ins_cost(125); 11214 format %{ "FLD_S $src\n\t" 11215 "FSTP $dst\t# MoveI2F_stack_reg" %} 11216 opcode(0xD9); /* D9 /0, FLD m32real */ 11217 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11218 Pop_Reg_FPR(dst) ); 11219 ins_pipe( fpu_reg_mem ); 11220 %} 11221 11222 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11223 predicate(UseSSE>=1); 11224 match(Set dst (MoveI2F src)); 11225 effect( DEF dst, USE src ); 11226 11227 ins_cost(95); 11228 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11229 ins_encode %{ 11230 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11231 %} 11232 ins_pipe( pipe_slow ); 11233 %} 11234 11235 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11236 predicate(UseSSE>=2); 11237 match(Set dst (MoveI2F src)); 11238 effect( DEF dst, USE src ); 11239 11240 ins_cost(85); 11241 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11242 ins_encode %{ 11243 __ movdl($dst$$XMMRegister, $src$$Register); 11244 %} 11245 ins_pipe( pipe_slow ); 11246 %} 11247 11248 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11249 match(Set dst (MoveD2L src)); 11250 effect(DEF dst, USE src); 11251 11252 ins_cost(250); 11253 format %{ "MOV $dst.lo,$src\n\t" 11254 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11255 opcode(0x8B, 0x8B); 11256 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11257 ins_pipe( ialu_mem_long_reg ); 11258 %} 11259 11260 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11261 predicate(UseSSE<=1); 11262 match(Set dst (MoveD2L src)); 11263 effect(DEF dst, USE src); 11264 11265 ins_cost(125); 11266 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11267 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11268 ins_pipe( fpu_mem_reg ); 11269 %} 11270 11271 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11272 predicate(UseSSE>=2); 11273 match(Set dst (MoveD2L src)); 11274 effect(DEF dst, USE src); 11275 ins_cost(95); 11276 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11277 ins_encode %{ 11278 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11279 %} 11280 ins_pipe( pipe_slow ); 11281 %} 11282 11283 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11284 predicate(UseSSE>=2); 11285 match(Set dst (MoveD2L src)); 11286 effect(DEF dst, USE src, TEMP tmp); 11287 ins_cost(85); 11288 format %{ "MOVD $dst.lo,$src\n\t" 11289 "PSHUFLW $tmp,$src,0x4E\n\t" 11290 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11291 ins_encode %{ 11292 __ movdl($dst$$Register, $src$$XMMRegister); 11293 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11294 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11295 %} 11296 ins_pipe( pipe_slow ); 11297 %} 11298 11299 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11300 match(Set dst (MoveL2D src)); 11301 effect(DEF dst, USE src); 11302 11303 ins_cost(200); 11304 format %{ "MOV $dst,$src.lo\n\t" 11305 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11306 opcode(0x89, 0x89); 11307 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11308 ins_pipe( ialu_mem_long_reg ); 11309 %} 11310 11311 11312 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11313 predicate(UseSSE<=1); 11314 match(Set dst (MoveL2D src)); 11315 effect(DEF dst, USE src); 11316 ins_cost(125); 11317 11318 format %{ "FLD_D $src\n\t" 11319 "FSTP $dst\t# MoveL2D_stack_reg" %} 11320 opcode(0xDD); /* DD /0, FLD m64real */ 11321 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11322 Pop_Reg_DPR(dst) ); 11323 ins_pipe( fpu_reg_mem ); 11324 %} 11325 11326 11327 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11328 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11329 match(Set dst (MoveL2D src)); 11330 effect(DEF dst, USE src); 11331 11332 ins_cost(95); 11333 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11334 ins_encode %{ 11335 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11336 %} 11337 ins_pipe( pipe_slow ); 11338 %} 11339 11340 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11341 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11342 match(Set dst (MoveL2D src)); 11343 effect(DEF dst, USE src); 11344 11345 ins_cost(95); 11346 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11347 ins_encode %{ 11348 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11349 %} 11350 ins_pipe( pipe_slow ); 11351 %} 11352 11353 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11354 predicate(UseSSE>=2); 11355 match(Set dst (MoveL2D src)); 11356 effect(TEMP dst, USE src, TEMP tmp); 11357 ins_cost(85); 11358 format %{ "MOVD $dst,$src.lo\n\t" 11359 "MOVD $tmp,$src.hi\n\t" 11360 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11361 ins_encode %{ 11362 __ movdl($dst$$XMMRegister, $src$$Register); 11363 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11364 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11365 %} 11366 ins_pipe( pipe_slow ); 11367 %} 11368 11369 11370 // ======================================================================= 11371 // fast clearing of an array 11372 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11373 predicate(!((ClearArrayNode*)n)->is_large()); 11374 match(Set dummy (ClearArray cnt base)); 11375 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11376 11377 format %{ $$template 11378 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11379 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11380 $$emit$$"JG LARGE\n\t" 11381 $$emit$$"SHL ECX, 1\n\t" 11382 $$emit$$"DEC ECX\n\t" 11383 $$emit$$"JS DONE\t# Zero length\n\t" 11384 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11385 $$emit$$"DEC ECX\n\t" 11386 $$emit$$"JGE LOOP\n\t" 11387 $$emit$$"JMP DONE\n\t" 11388 $$emit$$"# LARGE:\n\t" 11389 if (UseFastStosb) { 11390 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11391 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11392 } else { 11393 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11394 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11395 } 11396 $$emit$$"# DONE" 11397 %} 11398 ins_encode %{ 11399 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, false); 11400 %} 11401 ins_pipe( pipe_slow ); 11402 %} 11403 11404 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11405 predicate(((ClearArrayNode*)n)->is_large()); 11406 match(Set dummy (ClearArray cnt base)); 11407 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11408 format %{ $$template 11409 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11410 if (UseFastStosb) { 11411 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11412 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11413 } else { 11414 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11415 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11416 } 11417 $$emit$$"# DONE" 11418 %} 11419 ins_encode %{ 11420 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, true); 11421 %} 11422 ins_pipe( pipe_slow ); 11423 %} 11424 11425 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11426 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11427 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11428 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11429 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11430 11431 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11432 ins_encode %{ 11433 __ string_compare($str1$$Register, $str2$$Register, 11434 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11435 $tmp1$$XMMRegister, StrIntrinsicNode::LL); 11436 %} 11437 ins_pipe( pipe_slow ); 11438 %} 11439 11440 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11441 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11442 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11443 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11444 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11445 11446 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11447 ins_encode %{ 11448 __ string_compare($str1$$Register, $str2$$Register, 11449 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11450 $tmp1$$XMMRegister, StrIntrinsicNode::UU); 11451 %} 11452 ins_pipe( pipe_slow ); 11453 %} 11454 11455 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11456 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11457 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11458 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11459 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11460 11461 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11462 ins_encode %{ 11463 __ string_compare($str1$$Register, $str2$$Register, 11464 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11465 $tmp1$$XMMRegister, StrIntrinsicNode::LU); 11466 %} 11467 ins_pipe( pipe_slow ); 11468 %} 11469 11470 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11471 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11472 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11473 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11474 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11475 11476 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11477 ins_encode %{ 11478 __ string_compare($str2$$Register, $str1$$Register, 11479 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11480 $tmp1$$XMMRegister, StrIntrinsicNode::UL); 11481 %} 11482 ins_pipe( pipe_slow ); 11483 %} 11484 11485 // fast string equals 11486 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11487 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11488 match(Set result (StrEquals (Binary str1 str2) cnt)); 11489 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11490 11491 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11492 ins_encode %{ 11493 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11494 $cnt$$Register, $result$$Register, $tmp3$$Register, 11495 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11496 %} 11497 11498 ins_pipe( pipe_slow ); 11499 %} 11500 11501 // fast search of substring with known size. 11502 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11503 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11504 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11505 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11506 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11507 11508 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11509 ins_encode %{ 11510 int icnt2 = (int)$int_cnt2$$constant; 11511 if (icnt2 >= 16) { 11512 // IndexOf for constant substrings with size >= 16 elements 11513 // which don't need to be loaded through stack. 11514 __ string_indexofC8($str1$$Register, $str2$$Register, 11515 $cnt1$$Register, $cnt2$$Register, 11516 icnt2, $result$$Register, 11517 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11518 } else { 11519 // Small strings are loaded through stack if they cross page boundary. 11520 __ string_indexof($str1$$Register, $str2$$Register, 11521 $cnt1$$Register, $cnt2$$Register, 11522 icnt2, $result$$Register, 11523 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11524 } 11525 %} 11526 ins_pipe( pipe_slow ); 11527 %} 11528 11529 // fast search of substring with known size. 11530 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11531 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11532 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11533 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11534 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11535 11536 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11537 ins_encode %{ 11538 int icnt2 = (int)$int_cnt2$$constant; 11539 if (icnt2 >= 8) { 11540 // IndexOf for constant substrings with size >= 8 elements 11541 // which don't need to be loaded through stack. 11542 __ string_indexofC8($str1$$Register, $str2$$Register, 11543 $cnt1$$Register, $cnt2$$Register, 11544 icnt2, $result$$Register, 11545 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11546 } else { 11547 // Small strings are loaded through stack if they cross page boundary. 11548 __ string_indexof($str1$$Register, $str2$$Register, 11549 $cnt1$$Register, $cnt2$$Register, 11550 icnt2, $result$$Register, 11551 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11552 } 11553 %} 11554 ins_pipe( pipe_slow ); 11555 %} 11556 11557 // fast search of substring with known size. 11558 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11559 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11560 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11561 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11562 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11563 11564 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11565 ins_encode %{ 11566 int icnt2 = (int)$int_cnt2$$constant; 11567 if (icnt2 >= 8) { 11568 // IndexOf for constant substrings with size >= 8 elements 11569 // which don't need to be loaded through stack. 11570 __ string_indexofC8($str1$$Register, $str2$$Register, 11571 $cnt1$$Register, $cnt2$$Register, 11572 icnt2, $result$$Register, 11573 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11574 } else { 11575 // Small strings are loaded through stack if they cross page boundary. 11576 __ string_indexof($str1$$Register, $str2$$Register, 11577 $cnt1$$Register, $cnt2$$Register, 11578 icnt2, $result$$Register, 11579 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11580 } 11581 %} 11582 ins_pipe( pipe_slow ); 11583 %} 11584 11585 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11586 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11587 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11588 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11589 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11590 11591 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11592 ins_encode %{ 11593 __ string_indexof($str1$$Register, $str2$$Register, 11594 $cnt1$$Register, $cnt2$$Register, 11595 (-1), $result$$Register, 11596 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11597 %} 11598 ins_pipe( pipe_slow ); 11599 %} 11600 11601 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11602 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11603 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11604 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11605 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11606 11607 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11608 ins_encode %{ 11609 __ string_indexof($str1$$Register, $str2$$Register, 11610 $cnt1$$Register, $cnt2$$Register, 11611 (-1), $result$$Register, 11612 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11613 %} 11614 ins_pipe( pipe_slow ); 11615 %} 11616 11617 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11618 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11619 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11620 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11621 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11622 11623 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11624 ins_encode %{ 11625 __ string_indexof($str1$$Register, $str2$$Register, 11626 $cnt1$$Register, $cnt2$$Register, 11627 (-1), $result$$Register, 11628 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11629 %} 11630 ins_pipe( pipe_slow ); 11631 %} 11632 11633 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11634 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11635 predicate(UseSSE42Intrinsics); 11636 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11637 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11638 format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11639 ins_encode %{ 11640 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11641 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11642 %} 11643 ins_pipe( pipe_slow ); 11644 %} 11645 11646 // fast array equals 11647 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11648 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11649 %{ 11650 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11651 match(Set result (AryEq ary1 ary2)); 11652 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11653 //ins_cost(300); 11654 11655 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11656 ins_encode %{ 11657 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11658 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11659 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11660 %} 11661 ins_pipe( pipe_slow ); 11662 %} 11663 11664 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11665 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11666 %{ 11667 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 11668 match(Set result (AryEq ary1 ary2)); 11669 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11670 //ins_cost(300); 11671 11672 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11673 ins_encode %{ 11674 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11675 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11676 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */); 11677 %} 11678 ins_pipe( pipe_slow ); 11679 %} 11680 11681 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result, 11682 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 11683 %{ 11684 match(Set result (HasNegatives ary1 len)); 11685 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 11686 11687 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11688 ins_encode %{ 11689 __ has_negatives($ary1$$Register, $len$$Register, 11690 $result$$Register, $tmp3$$Register, 11691 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11692 %} 11693 ins_pipe( pipe_slow ); 11694 %} 11695 11696 // fast char[] to byte[] compression 11697 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11698 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11699 match(Set result (StrCompressedCopy src (Binary dst len))); 11700 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11701 11702 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 11703 ins_encode %{ 11704 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 11705 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11706 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11707 %} 11708 ins_pipe( pipe_slow ); 11709 %} 11710 11711 // fast byte[] to char[] inflation 11712 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 11713 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 11714 match(Set dummy (StrInflatedCopy src (Binary dst len))); 11715 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 11716 11717 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 11718 ins_encode %{ 11719 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 11720 $tmp1$$XMMRegister, $tmp2$$Register); 11721 %} 11722 ins_pipe( pipe_slow ); 11723 %} 11724 11725 // encode char[] to byte[] in ISO_8859_1 11726 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 11727 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11728 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11729 match(Set result (EncodeISOArray src (Binary dst len))); 11730 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11731 11732 format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 11733 ins_encode %{ 11734 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 11735 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11736 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11737 %} 11738 ins_pipe( pipe_slow ); 11739 %} 11740 11741 11742 //----------Control Flow Instructions------------------------------------------ 11743 // Signed compare Instructions 11744 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 11745 match(Set cr (CmpI op1 op2)); 11746 effect( DEF cr, USE op1, USE op2 ); 11747 format %{ "CMP $op1,$op2" %} 11748 opcode(0x3B); /* Opcode 3B /r */ 11749 ins_encode( OpcP, RegReg( op1, op2) ); 11750 ins_pipe( ialu_cr_reg_reg ); 11751 %} 11752 11753 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 11754 match(Set cr (CmpI op1 op2)); 11755 effect( DEF cr, USE op1 ); 11756 format %{ "CMP $op1,$op2" %} 11757 opcode(0x81,0x07); /* Opcode 81 /7 */ 11758 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 11759 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11760 ins_pipe( ialu_cr_reg_imm ); 11761 %} 11762 11763 // Cisc-spilled version of cmpI_eReg 11764 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 11765 match(Set cr (CmpI op1 (LoadI op2))); 11766 11767 format %{ "CMP $op1,$op2" %} 11768 ins_cost(500); 11769 opcode(0x3B); /* Opcode 3B /r */ 11770 ins_encode( OpcP, RegMem( op1, op2) ); 11771 ins_pipe( ialu_cr_reg_mem ); 11772 %} 11773 11774 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ 11775 match(Set cr (CmpI src zero)); 11776 effect( DEF cr, USE src ); 11777 11778 format %{ "TEST $src,$src" %} 11779 opcode(0x85); 11780 ins_encode( OpcP, RegReg( src, src ) ); 11781 ins_pipe( ialu_cr_reg_imm ); 11782 %} 11783 11784 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ 11785 match(Set cr (CmpI (AndI src con) zero)); 11786 11787 format %{ "TEST $src,$con" %} 11788 opcode(0xF7,0x00); 11789 ins_encode( OpcP, RegOpc(src), Con32(con) ); 11790 ins_pipe( ialu_cr_reg_imm ); 11791 %} 11792 11793 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ 11794 match(Set cr (CmpI (AndI src mem) zero)); 11795 11796 format %{ "TEST $src,$mem" %} 11797 opcode(0x85); 11798 ins_encode( OpcP, RegMem( src, mem ) ); 11799 ins_pipe( ialu_cr_reg_mem ); 11800 %} 11801 11802 // Unsigned compare Instructions; really, same as signed except they 11803 // produce an eFlagsRegU instead of eFlagsReg. 11804 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 11805 match(Set cr (CmpU op1 op2)); 11806 11807 format %{ "CMPu $op1,$op2" %} 11808 opcode(0x3B); /* Opcode 3B /r */ 11809 ins_encode( OpcP, RegReg( op1, op2) ); 11810 ins_pipe( ialu_cr_reg_reg ); 11811 %} 11812 11813 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 11814 match(Set cr (CmpU op1 op2)); 11815 11816 format %{ "CMPu $op1,$op2" %} 11817 opcode(0x81,0x07); /* Opcode 81 /7 */ 11818 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11819 ins_pipe( ialu_cr_reg_imm ); 11820 %} 11821 11822 // // Cisc-spilled version of cmpU_eReg 11823 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 11824 match(Set cr (CmpU op1 (LoadI op2))); 11825 11826 format %{ "CMPu $op1,$op2" %} 11827 ins_cost(500); 11828 opcode(0x3B); /* Opcode 3B /r */ 11829 ins_encode( OpcP, RegMem( op1, op2) ); 11830 ins_pipe( ialu_cr_reg_mem ); 11831 %} 11832 11833 // // Cisc-spilled version of cmpU_eReg 11834 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 11835 // match(Set cr (CmpU (LoadI op1) op2)); 11836 // 11837 // format %{ "CMPu $op1,$op2" %} 11838 // ins_cost(500); 11839 // opcode(0x39); /* Opcode 39 /r */ 11840 // ins_encode( OpcP, RegMem( op1, op2) ); 11841 //%} 11842 11843 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ 11844 match(Set cr (CmpU src zero)); 11845 11846 format %{ "TESTu $src,$src" %} 11847 opcode(0x85); 11848 ins_encode( OpcP, RegReg( src, src ) ); 11849 ins_pipe( ialu_cr_reg_imm ); 11850 %} 11851 11852 // Unsigned pointer compare Instructions 11853 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 11854 match(Set cr (CmpP op1 op2)); 11855 11856 format %{ "CMPu $op1,$op2" %} 11857 opcode(0x3B); /* Opcode 3B /r */ 11858 ins_encode( OpcP, RegReg( op1, op2) ); 11859 ins_pipe( ialu_cr_reg_reg ); 11860 %} 11861 11862 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 11863 match(Set cr (CmpP op1 op2)); 11864 11865 format %{ "CMPu $op1,$op2" %} 11866 opcode(0x81,0x07); /* Opcode 81 /7 */ 11867 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11868 ins_pipe( ialu_cr_reg_imm ); 11869 %} 11870 11871 // // Cisc-spilled version of cmpP_eReg 11872 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 11873 match(Set cr (CmpP op1 (LoadP op2))); 11874 11875 format %{ "CMPu $op1,$op2" %} 11876 ins_cost(500); 11877 opcode(0x3B); /* Opcode 3B /r */ 11878 ins_encode( OpcP, RegMem( op1, op2) ); 11879 ins_pipe( ialu_cr_reg_mem ); 11880 %} 11881 11882 // // Cisc-spilled version of cmpP_eReg 11883 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 11884 // match(Set cr (CmpP (LoadP op1) op2)); 11885 // 11886 // format %{ "CMPu $op1,$op2" %} 11887 // ins_cost(500); 11888 // opcode(0x39); /* Opcode 39 /r */ 11889 // ins_encode( OpcP, RegMem( op1, op2) ); 11890 //%} 11891 11892 // Compare raw pointer (used in out-of-heap check). 11893 // Only works because non-oop pointers must be raw pointers 11894 // and raw pointers have no anti-dependencies. 11895 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 11896 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 11897 match(Set cr (CmpP op1 (LoadP op2))); 11898 11899 format %{ "CMPu $op1,$op2" %} 11900 opcode(0x3B); /* Opcode 3B /r */ 11901 ins_encode( OpcP, RegMem( op1, op2) ); 11902 ins_pipe( ialu_cr_reg_mem ); 11903 %} 11904 11905 // 11906 // This will generate a signed flags result. This should be ok 11907 // since any compare to a zero should be eq/neq. 11908 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 11909 match(Set cr (CmpP src zero)); 11910 11911 format %{ "TEST $src,$src" %} 11912 opcode(0x85); 11913 ins_encode( OpcP, RegReg( src, src ) ); 11914 ins_pipe( ialu_cr_reg_imm ); 11915 %} 11916 11917 // Cisc-spilled version of testP_reg 11918 // This will generate a signed flags result. This should be ok 11919 // since any compare to a zero should be eq/neq. 11920 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 11921 match(Set cr (CmpP (LoadP op) zero)); 11922 11923 format %{ "TEST $op,0xFFFFFFFF" %} 11924 ins_cost(500); 11925 opcode(0xF7); /* Opcode F7 /0 */ 11926 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 11927 ins_pipe( ialu_cr_reg_imm ); 11928 %} 11929 11930 // Yanked all unsigned pointer compare operations. 11931 // Pointer compares are done with CmpP which is already unsigned. 11932 11933 //----------Max and Min-------------------------------------------------------- 11934 // Min Instructions 11935 //// 11936 // *** Min and Max using the conditional move are slower than the 11937 // *** branch version on a Pentium III. 11938 // // Conditional move for min 11939 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11940 // effect( USE_DEF op2, USE op1, USE cr ); 11941 // format %{ "CMOVlt $op2,$op1\t! min" %} 11942 // opcode(0x4C,0x0F); 11943 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11944 // ins_pipe( pipe_cmov_reg ); 11945 //%} 11946 // 11947 //// Min Register with Register (P6 version) 11948 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11949 // predicate(VM_Version::supports_cmov() ); 11950 // match(Set op2 (MinI op1 op2)); 11951 // ins_cost(200); 11952 // expand %{ 11953 // eFlagsReg cr; 11954 // compI_eReg(cr,op1,op2); 11955 // cmovI_reg_lt(op2,op1,cr); 11956 // %} 11957 //%} 11958 11959 // Min Register with Register (generic version) 11960 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11961 match(Set dst (MinI dst src)); 11962 effect(KILL flags); 11963 ins_cost(300); 11964 11965 format %{ "MIN $dst,$src" %} 11966 opcode(0xCC); 11967 ins_encode( min_enc(dst,src) ); 11968 ins_pipe( pipe_slow ); 11969 %} 11970 11971 // Max Register with Register 11972 // *** Min and Max using the conditional move are slower than the 11973 // *** branch version on a Pentium III. 11974 // // Conditional move for max 11975 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11976 // effect( USE_DEF op2, USE op1, USE cr ); 11977 // format %{ "CMOVgt $op2,$op1\t! max" %} 11978 // opcode(0x4F,0x0F); 11979 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11980 // ins_pipe( pipe_cmov_reg ); 11981 //%} 11982 // 11983 // // Max Register with Register (P6 version) 11984 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11985 // predicate(VM_Version::supports_cmov() ); 11986 // match(Set op2 (MaxI op1 op2)); 11987 // ins_cost(200); 11988 // expand %{ 11989 // eFlagsReg cr; 11990 // compI_eReg(cr,op1,op2); 11991 // cmovI_reg_gt(op2,op1,cr); 11992 // %} 11993 //%} 11994 11995 // Max Register with Register (generic version) 11996 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11997 match(Set dst (MaxI dst src)); 11998 effect(KILL flags); 11999 ins_cost(300); 12000 12001 format %{ "MAX $dst,$src" %} 12002 opcode(0xCC); 12003 ins_encode( max_enc(dst,src) ); 12004 ins_pipe( pipe_slow ); 12005 %} 12006 12007 // ============================================================================ 12008 // Counted Loop limit node which represents exact final iterator value. 12009 // Note: the resulting value should fit into integer range since 12010 // counted loops have limit check on overflow. 12011 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12012 match(Set limit (LoopLimit (Binary init limit) stride)); 12013 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12014 ins_cost(300); 12015 12016 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12017 ins_encode %{ 12018 int strd = (int)$stride$$constant; 12019 assert(strd != 1 && strd != -1, "sanity"); 12020 int m1 = (strd > 0) ? 1 : -1; 12021 // Convert limit to long (EAX:EDX) 12022 __ cdql(); 12023 // Convert init to long (init:tmp) 12024 __ movl($tmp$$Register, $init$$Register); 12025 __ sarl($tmp$$Register, 31); 12026 // $limit - $init 12027 __ subl($limit$$Register, $init$$Register); 12028 __ sbbl($limit_hi$$Register, $tmp$$Register); 12029 // + ($stride - 1) 12030 if (strd > 0) { 12031 __ addl($limit$$Register, (strd - 1)); 12032 __ adcl($limit_hi$$Register, 0); 12033 __ movl($tmp$$Register, strd); 12034 } else { 12035 __ addl($limit$$Register, (strd + 1)); 12036 __ adcl($limit_hi$$Register, -1); 12037 __ lneg($limit_hi$$Register, $limit$$Register); 12038 __ movl($tmp$$Register, -strd); 12039 } 12040 // signed devision: (EAX:EDX) / pos_stride 12041 __ idivl($tmp$$Register); 12042 if (strd < 0) { 12043 // restore sign 12044 __ negl($tmp$$Register); 12045 } 12046 // (EAX) * stride 12047 __ mull($tmp$$Register); 12048 // + init (ignore upper bits) 12049 __ addl($limit$$Register, $init$$Register); 12050 %} 12051 ins_pipe( pipe_slow ); 12052 %} 12053 12054 // ============================================================================ 12055 // Branch Instructions 12056 // Jump Table 12057 instruct jumpXtnd(rRegI switch_val) %{ 12058 match(Jump switch_val); 12059 ins_cost(350); 12060 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12061 ins_encode %{ 12062 // Jump to Address(table_base + switch_reg) 12063 Address index(noreg, $switch_val$$Register, Address::times_1); 12064 __ jump(ArrayAddress($constantaddress, index)); 12065 %} 12066 ins_pipe(pipe_jmp); 12067 %} 12068 12069 // Jump Direct - Label defines a relative address from JMP+1 12070 instruct jmpDir(label labl) %{ 12071 match(Goto); 12072 effect(USE labl); 12073 12074 ins_cost(300); 12075 format %{ "JMP $labl" %} 12076 size(5); 12077 ins_encode %{ 12078 Label* L = $labl$$label; 12079 __ jmp(*L, false); // Always long jump 12080 %} 12081 ins_pipe( pipe_jmp ); 12082 %} 12083 12084 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12085 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12086 match(If cop cr); 12087 effect(USE labl); 12088 12089 ins_cost(300); 12090 format %{ "J$cop $labl" %} 12091 size(6); 12092 ins_encode %{ 12093 Label* L = $labl$$label; 12094 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12095 %} 12096 ins_pipe( pipe_jcc ); 12097 %} 12098 12099 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12100 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12101 match(CountedLoopEnd cop cr); 12102 effect(USE labl); 12103 12104 ins_cost(300); 12105 format %{ "J$cop $labl\t# Loop end" %} 12106 size(6); 12107 ins_encode %{ 12108 Label* L = $labl$$label; 12109 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12110 %} 12111 ins_pipe( pipe_jcc ); 12112 %} 12113 12114 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12115 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12116 match(CountedLoopEnd cop cmp); 12117 effect(USE labl); 12118 12119 ins_cost(300); 12120 format %{ "J$cop,u $labl\t# Loop end" %} 12121 size(6); 12122 ins_encode %{ 12123 Label* L = $labl$$label; 12124 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12125 %} 12126 ins_pipe( pipe_jcc ); 12127 %} 12128 12129 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12130 match(CountedLoopEnd cop cmp); 12131 effect(USE labl); 12132 12133 ins_cost(200); 12134 format %{ "J$cop,u $labl\t# Loop end" %} 12135 size(6); 12136 ins_encode %{ 12137 Label* L = $labl$$label; 12138 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12139 %} 12140 ins_pipe( pipe_jcc ); 12141 %} 12142 12143 // Jump Direct Conditional - using unsigned comparison 12144 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12145 match(If cop cmp); 12146 effect(USE labl); 12147 12148 ins_cost(300); 12149 format %{ "J$cop,u $labl" %} 12150 size(6); 12151 ins_encode %{ 12152 Label* L = $labl$$label; 12153 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12154 %} 12155 ins_pipe(pipe_jcc); 12156 %} 12157 12158 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12159 match(If cop cmp); 12160 effect(USE labl); 12161 12162 ins_cost(200); 12163 format %{ "J$cop,u $labl" %} 12164 size(6); 12165 ins_encode %{ 12166 Label* L = $labl$$label; 12167 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12168 %} 12169 ins_pipe(pipe_jcc); 12170 %} 12171 12172 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12173 match(If cop cmp); 12174 effect(USE labl); 12175 12176 ins_cost(200); 12177 format %{ $$template 12178 if ($cop$$cmpcode == Assembler::notEqual) { 12179 $$emit$$"JP,u $labl\n\t" 12180 $$emit$$"J$cop,u $labl" 12181 } else { 12182 $$emit$$"JP,u done\n\t" 12183 $$emit$$"J$cop,u $labl\n\t" 12184 $$emit$$"done:" 12185 } 12186 %} 12187 ins_encode %{ 12188 Label* l = $labl$$label; 12189 if ($cop$$cmpcode == Assembler::notEqual) { 12190 __ jcc(Assembler::parity, *l, false); 12191 __ jcc(Assembler::notEqual, *l, false); 12192 } else if ($cop$$cmpcode == Assembler::equal) { 12193 Label done; 12194 __ jccb(Assembler::parity, done); 12195 __ jcc(Assembler::equal, *l, false); 12196 __ bind(done); 12197 } else { 12198 ShouldNotReachHere(); 12199 } 12200 %} 12201 ins_pipe(pipe_jcc); 12202 %} 12203 12204 // ============================================================================ 12205 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12206 // array for an instance of the superklass. Set a hidden internal cache on a 12207 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12208 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12209 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12210 match(Set result (PartialSubtypeCheck sub super)); 12211 effect( KILL rcx, KILL cr ); 12212 12213 ins_cost(1100); // slightly larger than the next version 12214 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12215 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12216 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12217 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12218 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12219 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12220 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12221 "miss:\t" %} 12222 12223 opcode(0x1); // Force a XOR of EDI 12224 ins_encode( enc_PartialSubtypeCheck() ); 12225 ins_pipe( pipe_slow ); 12226 %} 12227 12228 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12229 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12230 effect( KILL rcx, KILL result ); 12231 12232 ins_cost(1000); 12233 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12234 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12235 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12236 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12237 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12238 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12239 "miss:\t" %} 12240 12241 opcode(0x0); // No need to XOR EDI 12242 ins_encode( enc_PartialSubtypeCheck() ); 12243 ins_pipe( pipe_slow ); 12244 %} 12245 12246 // ============================================================================ 12247 // Branch Instructions -- short offset versions 12248 // 12249 // These instructions are used to replace jumps of a long offset (the default 12250 // match) with jumps of a shorter offset. These instructions are all tagged 12251 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12252 // match rules in general matching. Instead, the ADLC generates a conversion 12253 // method in the MachNode which can be used to do in-place replacement of the 12254 // long variant with the shorter variant. The compiler will determine if a 12255 // branch can be taken by the is_short_branch_offset() predicate in the machine 12256 // specific code section of the file. 12257 12258 // Jump Direct - Label defines a relative address from JMP+1 12259 instruct jmpDir_short(label labl) %{ 12260 match(Goto); 12261 effect(USE labl); 12262 12263 ins_cost(300); 12264 format %{ "JMP,s $labl" %} 12265 size(2); 12266 ins_encode %{ 12267 Label* L = $labl$$label; 12268 __ jmpb(*L); 12269 %} 12270 ins_pipe( pipe_jmp ); 12271 ins_short_branch(1); 12272 %} 12273 12274 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12275 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12276 match(If cop cr); 12277 effect(USE labl); 12278 12279 ins_cost(300); 12280 format %{ "J$cop,s $labl" %} 12281 size(2); 12282 ins_encode %{ 12283 Label* L = $labl$$label; 12284 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12285 %} 12286 ins_pipe( pipe_jcc ); 12287 ins_short_branch(1); 12288 %} 12289 12290 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12291 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12292 match(CountedLoopEnd cop cr); 12293 effect(USE labl); 12294 12295 ins_cost(300); 12296 format %{ "J$cop,s $labl\t# Loop end" %} 12297 size(2); 12298 ins_encode %{ 12299 Label* L = $labl$$label; 12300 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12301 %} 12302 ins_pipe( pipe_jcc ); 12303 ins_short_branch(1); 12304 %} 12305 12306 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12307 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12308 match(CountedLoopEnd cop cmp); 12309 effect(USE labl); 12310 12311 ins_cost(300); 12312 format %{ "J$cop,us $labl\t# Loop end" %} 12313 size(2); 12314 ins_encode %{ 12315 Label* L = $labl$$label; 12316 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12317 %} 12318 ins_pipe( pipe_jcc ); 12319 ins_short_branch(1); 12320 %} 12321 12322 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12323 match(CountedLoopEnd cop cmp); 12324 effect(USE labl); 12325 12326 ins_cost(300); 12327 format %{ "J$cop,us $labl\t# Loop end" %} 12328 size(2); 12329 ins_encode %{ 12330 Label* L = $labl$$label; 12331 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12332 %} 12333 ins_pipe( pipe_jcc ); 12334 ins_short_branch(1); 12335 %} 12336 12337 // Jump Direct Conditional - using unsigned comparison 12338 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12339 match(If cop cmp); 12340 effect(USE labl); 12341 12342 ins_cost(300); 12343 format %{ "J$cop,us $labl" %} 12344 size(2); 12345 ins_encode %{ 12346 Label* L = $labl$$label; 12347 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12348 %} 12349 ins_pipe( pipe_jcc ); 12350 ins_short_branch(1); 12351 %} 12352 12353 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12354 match(If cop cmp); 12355 effect(USE labl); 12356 12357 ins_cost(300); 12358 format %{ "J$cop,us $labl" %} 12359 size(2); 12360 ins_encode %{ 12361 Label* L = $labl$$label; 12362 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12363 %} 12364 ins_pipe( pipe_jcc ); 12365 ins_short_branch(1); 12366 %} 12367 12368 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12369 match(If cop cmp); 12370 effect(USE labl); 12371 12372 ins_cost(300); 12373 format %{ $$template 12374 if ($cop$$cmpcode == Assembler::notEqual) { 12375 $$emit$$"JP,u,s $labl\n\t" 12376 $$emit$$"J$cop,u,s $labl" 12377 } else { 12378 $$emit$$"JP,u,s done\n\t" 12379 $$emit$$"J$cop,u,s $labl\n\t" 12380 $$emit$$"done:" 12381 } 12382 %} 12383 size(4); 12384 ins_encode %{ 12385 Label* l = $labl$$label; 12386 if ($cop$$cmpcode == Assembler::notEqual) { 12387 __ jccb(Assembler::parity, *l); 12388 __ jccb(Assembler::notEqual, *l); 12389 } else if ($cop$$cmpcode == Assembler::equal) { 12390 Label done; 12391 __ jccb(Assembler::parity, done); 12392 __ jccb(Assembler::equal, *l); 12393 __ bind(done); 12394 } else { 12395 ShouldNotReachHere(); 12396 } 12397 %} 12398 ins_pipe(pipe_jcc); 12399 ins_short_branch(1); 12400 %} 12401 12402 // ============================================================================ 12403 // Long Compare 12404 // 12405 // Currently we hold longs in 2 registers. Comparing such values efficiently 12406 // is tricky. The flavor of compare used depends on whether we are testing 12407 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12408 // The GE test is the negated LT test. The LE test can be had by commuting 12409 // the operands (yielding a GE test) and then negating; negate again for the 12410 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12411 // NE test is negated from that. 12412 12413 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12414 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12415 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12416 // are collapsed internally in the ADLC's dfa-gen code. The match for 12417 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12418 // foo match ends up with the wrong leaf. One fix is to not match both 12419 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12420 // both forms beat the trinary form of long-compare and both are very useful 12421 // on Intel which has so few registers. 12422 12423 // Manifest a CmpL result in an integer register. Very painful. 12424 // This is the test to avoid. 12425 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12426 match(Set dst (CmpL3 src1 src2)); 12427 effect( KILL flags ); 12428 ins_cost(1000); 12429 format %{ "XOR $dst,$dst\n\t" 12430 "CMP $src1.hi,$src2.hi\n\t" 12431 "JLT,s m_one\n\t" 12432 "JGT,s p_one\n\t" 12433 "CMP $src1.lo,$src2.lo\n\t" 12434 "JB,s m_one\n\t" 12435 "JEQ,s done\n" 12436 "p_one:\tINC $dst\n\t" 12437 "JMP,s done\n" 12438 "m_one:\tDEC $dst\n" 12439 "done:" %} 12440 ins_encode %{ 12441 Label p_one, m_one, done; 12442 __ xorptr($dst$$Register, $dst$$Register); 12443 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12444 __ jccb(Assembler::less, m_one); 12445 __ jccb(Assembler::greater, p_one); 12446 __ cmpl($src1$$Register, $src2$$Register); 12447 __ jccb(Assembler::below, m_one); 12448 __ jccb(Assembler::equal, done); 12449 __ bind(p_one); 12450 __ incrementl($dst$$Register); 12451 __ jmpb(done); 12452 __ bind(m_one); 12453 __ decrementl($dst$$Register); 12454 __ bind(done); 12455 %} 12456 ins_pipe( pipe_slow ); 12457 %} 12458 12459 //====== 12460 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12461 // compares. Can be used for LE or GT compares by reversing arguments. 12462 // NOT GOOD FOR EQ/NE tests. 12463 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12464 match( Set flags (CmpL src zero )); 12465 ins_cost(100); 12466 format %{ "TEST $src.hi,$src.hi" %} 12467 opcode(0x85); 12468 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12469 ins_pipe( ialu_cr_reg_reg ); 12470 %} 12471 12472 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12473 // compares. Can be used for LE or GT compares by reversing arguments. 12474 // NOT GOOD FOR EQ/NE tests. 12475 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12476 match( Set flags (CmpL src1 src2 )); 12477 effect( TEMP tmp ); 12478 ins_cost(300); 12479 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12480 "MOV $tmp,$src1.hi\n\t" 12481 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12482 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12483 ins_pipe( ialu_cr_reg_reg ); 12484 %} 12485 12486 // Long compares reg < zero/req OR reg >= zero/req. 12487 // Just a wrapper for a normal branch, plus the predicate test. 12488 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12489 match(If cmp flags); 12490 effect(USE labl); 12491 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12492 expand %{ 12493 jmpCon(cmp,flags,labl); // JLT or JGE... 12494 %} 12495 %} 12496 12497 // Compare 2 longs and CMOVE longs. 12498 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12499 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12500 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12501 ins_cost(400); 12502 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12503 "CMOV$cmp $dst.hi,$src.hi" %} 12504 opcode(0x0F,0x40); 12505 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12506 ins_pipe( pipe_cmov_reg_long ); 12507 %} 12508 12509 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12510 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12511 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12512 ins_cost(500); 12513 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12514 "CMOV$cmp $dst.hi,$src.hi" %} 12515 opcode(0x0F,0x40); 12516 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12517 ins_pipe( pipe_cmov_reg_long ); 12518 %} 12519 12520 // Compare 2 longs and CMOVE ints. 12521 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12522 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12523 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12524 ins_cost(200); 12525 format %{ "CMOV$cmp $dst,$src" %} 12526 opcode(0x0F,0x40); 12527 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12528 ins_pipe( pipe_cmov_reg ); 12529 %} 12530 12531 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12532 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12533 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12534 ins_cost(250); 12535 format %{ "CMOV$cmp $dst,$src" %} 12536 opcode(0x0F,0x40); 12537 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12538 ins_pipe( pipe_cmov_mem ); 12539 %} 12540 12541 // Compare 2 longs and CMOVE ints. 12542 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12543 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12544 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12545 ins_cost(200); 12546 format %{ "CMOV$cmp $dst,$src" %} 12547 opcode(0x0F,0x40); 12548 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12549 ins_pipe( pipe_cmov_reg ); 12550 %} 12551 12552 // Compare 2 longs and CMOVE doubles 12553 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 12554 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12555 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12556 ins_cost(200); 12557 expand %{ 12558 fcmovDPR_regS(cmp,flags,dst,src); 12559 %} 12560 %} 12561 12562 // Compare 2 longs and CMOVE doubles 12563 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12564 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12565 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12566 ins_cost(200); 12567 expand %{ 12568 fcmovD_regS(cmp,flags,dst,src); 12569 %} 12570 %} 12571 12572 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 12573 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12574 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12575 ins_cost(200); 12576 expand %{ 12577 fcmovFPR_regS(cmp,flags,dst,src); 12578 %} 12579 %} 12580 12581 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 12582 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12583 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12584 ins_cost(200); 12585 expand %{ 12586 fcmovF_regS(cmp,flags,dst,src); 12587 %} 12588 %} 12589 12590 //====== 12591 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12592 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12593 match( Set flags (CmpL src zero )); 12594 effect(TEMP tmp); 12595 ins_cost(200); 12596 format %{ "MOV $tmp,$src.lo\n\t" 12597 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 12598 ins_encode( long_cmp_flags0( src, tmp ) ); 12599 ins_pipe( ialu_reg_reg_long ); 12600 %} 12601 12602 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12603 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 12604 match( Set flags (CmpL src1 src2 )); 12605 ins_cost(200+300); 12606 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12607 "JNE,s skip\n\t" 12608 "CMP $src1.hi,$src2.hi\n\t" 12609 "skip:\t" %} 12610 ins_encode( long_cmp_flags1( src1, src2 ) ); 12611 ins_pipe( ialu_cr_reg_reg ); 12612 %} 12613 12614 // Long compare reg == zero/reg OR reg != zero/reg 12615 // Just a wrapper for a normal branch, plus the predicate test. 12616 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 12617 match(If cmp flags); 12618 effect(USE labl); 12619 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12620 expand %{ 12621 jmpCon(cmp,flags,labl); // JEQ or JNE... 12622 %} 12623 %} 12624 12625 // Compare 2 longs and CMOVE longs. 12626 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 12627 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12628 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12629 ins_cost(400); 12630 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12631 "CMOV$cmp $dst.hi,$src.hi" %} 12632 opcode(0x0F,0x40); 12633 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12634 ins_pipe( pipe_cmov_reg_long ); 12635 %} 12636 12637 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 12638 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12639 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12640 ins_cost(500); 12641 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12642 "CMOV$cmp $dst.hi,$src.hi" %} 12643 opcode(0x0F,0x40); 12644 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12645 ins_pipe( pipe_cmov_reg_long ); 12646 %} 12647 12648 // Compare 2 longs and CMOVE ints. 12649 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 12650 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12651 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12652 ins_cost(200); 12653 format %{ "CMOV$cmp $dst,$src" %} 12654 opcode(0x0F,0x40); 12655 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12656 ins_pipe( pipe_cmov_reg ); 12657 %} 12658 12659 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 12660 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12661 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12662 ins_cost(250); 12663 format %{ "CMOV$cmp $dst,$src" %} 12664 opcode(0x0F,0x40); 12665 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12666 ins_pipe( pipe_cmov_mem ); 12667 %} 12668 12669 // Compare 2 longs and CMOVE ints. 12670 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 12671 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12672 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12673 ins_cost(200); 12674 format %{ "CMOV$cmp $dst,$src" %} 12675 opcode(0x0F,0x40); 12676 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12677 ins_pipe( pipe_cmov_reg ); 12678 %} 12679 12680 // Compare 2 longs and CMOVE doubles 12681 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 12682 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12683 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12684 ins_cost(200); 12685 expand %{ 12686 fcmovDPR_regS(cmp,flags,dst,src); 12687 %} 12688 %} 12689 12690 // Compare 2 longs and CMOVE doubles 12691 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 12692 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12693 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12694 ins_cost(200); 12695 expand %{ 12696 fcmovD_regS(cmp,flags,dst,src); 12697 %} 12698 %} 12699 12700 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 12701 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12702 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12703 ins_cost(200); 12704 expand %{ 12705 fcmovFPR_regS(cmp,flags,dst,src); 12706 %} 12707 %} 12708 12709 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 12710 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12711 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12712 ins_cost(200); 12713 expand %{ 12714 fcmovF_regS(cmp,flags,dst,src); 12715 %} 12716 %} 12717 12718 //====== 12719 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12720 // Same as cmpL_reg_flags_LEGT except must negate src 12721 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12722 match( Set flags (CmpL src zero )); 12723 effect( TEMP tmp ); 12724 ins_cost(300); 12725 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 12726 "CMP $tmp,$src.lo\n\t" 12727 "SBB $tmp,$src.hi\n\t" %} 12728 ins_encode( long_cmp_flags3(src, tmp) ); 12729 ins_pipe( ialu_reg_reg_long ); 12730 %} 12731 12732 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12733 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 12734 // requires a commuted test to get the same result. 12735 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12736 match( Set flags (CmpL src1 src2 )); 12737 effect( TEMP tmp ); 12738 ins_cost(300); 12739 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 12740 "MOV $tmp,$src2.hi\n\t" 12741 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 12742 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 12743 ins_pipe( ialu_cr_reg_reg ); 12744 %} 12745 12746 // Long compares reg < zero/req OR reg >= zero/req. 12747 // Just a wrapper for a normal branch, plus the predicate test 12748 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 12749 match(If cmp flags); 12750 effect(USE labl); 12751 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 12752 ins_cost(300); 12753 expand %{ 12754 jmpCon(cmp,flags,labl); // JGT or JLE... 12755 %} 12756 %} 12757 12758 // Compare 2 longs and CMOVE longs. 12759 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 12760 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12761 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12762 ins_cost(400); 12763 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12764 "CMOV$cmp $dst.hi,$src.hi" %} 12765 opcode(0x0F,0x40); 12766 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12767 ins_pipe( pipe_cmov_reg_long ); 12768 %} 12769 12770 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 12771 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12772 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12773 ins_cost(500); 12774 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12775 "CMOV$cmp $dst.hi,$src.hi+4" %} 12776 opcode(0x0F,0x40); 12777 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12778 ins_pipe( pipe_cmov_reg_long ); 12779 %} 12780 12781 // Compare 2 longs and CMOVE ints. 12782 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 12783 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12784 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12785 ins_cost(200); 12786 format %{ "CMOV$cmp $dst,$src" %} 12787 opcode(0x0F,0x40); 12788 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12789 ins_pipe( pipe_cmov_reg ); 12790 %} 12791 12792 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 12793 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12794 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12795 ins_cost(250); 12796 format %{ "CMOV$cmp $dst,$src" %} 12797 opcode(0x0F,0x40); 12798 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12799 ins_pipe( pipe_cmov_mem ); 12800 %} 12801 12802 // Compare 2 longs and CMOVE ptrs. 12803 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 12804 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12805 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12806 ins_cost(200); 12807 format %{ "CMOV$cmp $dst,$src" %} 12808 opcode(0x0F,0x40); 12809 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12810 ins_pipe( pipe_cmov_reg ); 12811 %} 12812 12813 // Compare 2 longs and CMOVE doubles 12814 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 12815 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12816 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12817 ins_cost(200); 12818 expand %{ 12819 fcmovDPR_regS(cmp,flags,dst,src); 12820 %} 12821 %} 12822 12823 // Compare 2 longs and CMOVE doubles 12824 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 12825 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12826 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12827 ins_cost(200); 12828 expand %{ 12829 fcmovD_regS(cmp,flags,dst,src); 12830 %} 12831 %} 12832 12833 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 12834 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12835 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12836 ins_cost(200); 12837 expand %{ 12838 fcmovFPR_regS(cmp,flags,dst,src); 12839 %} 12840 %} 12841 12842 12843 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 12844 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12845 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12846 ins_cost(200); 12847 expand %{ 12848 fcmovF_regS(cmp,flags,dst,src); 12849 %} 12850 %} 12851 12852 12853 // ============================================================================ 12854 // Procedure Call/Return Instructions 12855 // Call Java Static Instruction 12856 // Note: If this code changes, the corresponding ret_addr_offset() and 12857 // compute_padding() functions will have to be adjusted. 12858 instruct CallStaticJavaDirect(method meth) %{ 12859 match(CallStaticJava); 12860 effect(USE meth); 12861 12862 ins_cost(300); 12863 format %{ "CALL,static " %} 12864 opcode(0xE8); /* E8 cd */ 12865 ins_encode( pre_call_resets, 12866 Java_Static_Call( meth ), 12867 call_epilog, 12868 post_call_FPU ); 12869 ins_pipe( pipe_slow ); 12870 ins_alignment(4); 12871 %} 12872 12873 // Call Java Dynamic Instruction 12874 // Note: If this code changes, the corresponding ret_addr_offset() and 12875 // compute_padding() functions will have to be adjusted. 12876 instruct CallDynamicJavaDirect(method meth) %{ 12877 match(CallDynamicJava); 12878 effect(USE meth); 12879 12880 ins_cost(300); 12881 format %{ "MOV EAX,(oop)-1\n\t" 12882 "CALL,dynamic" %} 12883 opcode(0xE8); /* E8 cd */ 12884 ins_encode( pre_call_resets, 12885 Java_Dynamic_Call( meth ), 12886 call_epilog, 12887 post_call_FPU ); 12888 ins_pipe( pipe_slow ); 12889 ins_alignment(4); 12890 %} 12891 12892 // Call Runtime Instruction 12893 instruct CallRuntimeDirect(method meth) %{ 12894 match(CallRuntime ); 12895 effect(USE meth); 12896 12897 ins_cost(300); 12898 format %{ "CALL,runtime " %} 12899 opcode(0xE8); /* E8 cd */ 12900 // Use FFREEs to clear entries in float stack 12901 ins_encode( pre_call_resets, 12902 FFree_Float_Stack_All, 12903 Java_To_Runtime( meth ), 12904 post_call_FPU ); 12905 ins_pipe( pipe_slow ); 12906 %} 12907 12908 // Call runtime without safepoint 12909 instruct CallLeafDirect(method meth) %{ 12910 match(CallLeaf); 12911 effect(USE meth); 12912 12913 ins_cost(300); 12914 format %{ "CALL_LEAF,runtime " %} 12915 opcode(0xE8); /* E8 cd */ 12916 ins_encode( pre_call_resets, 12917 FFree_Float_Stack_All, 12918 Java_To_Runtime( meth ), 12919 Verify_FPU_For_Leaf, post_call_FPU ); 12920 ins_pipe( pipe_slow ); 12921 %} 12922 12923 instruct CallLeafNoFPDirect(method meth) %{ 12924 match(CallLeafNoFP); 12925 effect(USE meth); 12926 12927 ins_cost(300); 12928 format %{ "CALL_LEAF_NOFP,runtime " %} 12929 opcode(0xE8); /* E8 cd */ 12930 ins_encode(Java_To_Runtime(meth)); 12931 ins_pipe( pipe_slow ); 12932 %} 12933 12934 12935 // Return Instruction 12936 // Remove the return address & jump to it. 12937 instruct Ret() %{ 12938 match(Return); 12939 format %{ "RET" %} 12940 opcode(0xC3); 12941 ins_encode(OpcP); 12942 ins_pipe( pipe_jmp ); 12943 %} 12944 12945 // Tail Call; Jump from runtime stub to Java code. 12946 // Also known as an 'interprocedural jump'. 12947 // Target of jump will eventually return to caller. 12948 // TailJump below removes the return address. 12949 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 12950 match(TailCall jump_target method_oop ); 12951 ins_cost(300); 12952 format %{ "JMP $jump_target \t# EBX holds method oop" %} 12953 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12954 ins_encode( OpcP, RegOpc(jump_target) ); 12955 ins_pipe( pipe_jmp ); 12956 %} 12957 12958 12959 // Tail Jump; remove the return address; jump to target. 12960 // TailCall above leaves the return address around. 12961 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 12962 match( TailJump jump_target ex_oop ); 12963 ins_cost(300); 12964 format %{ "POP EDX\t# pop return address into dummy\n\t" 12965 "JMP $jump_target " %} 12966 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12967 ins_encode( enc_pop_rdx, 12968 OpcP, RegOpc(jump_target) ); 12969 ins_pipe( pipe_jmp ); 12970 %} 12971 12972 // Create exception oop: created by stack-crawling runtime code. 12973 // Created exception is now available to this handler, and is setup 12974 // just prior to jumping to this handler. No code emitted. 12975 instruct CreateException( eAXRegP ex_oop ) 12976 %{ 12977 match(Set ex_oop (CreateEx)); 12978 12979 size(0); 12980 // use the following format syntax 12981 format %{ "# exception oop is in EAX; no code emitted" %} 12982 ins_encode(); 12983 ins_pipe( empty ); 12984 %} 12985 12986 12987 // Rethrow exception: 12988 // The exception oop will come in the first argument position. 12989 // Then JUMP (not call) to the rethrow stub code. 12990 instruct RethrowException() 12991 %{ 12992 match(Rethrow); 12993 12994 // use the following format syntax 12995 format %{ "JMP rethrow_stub" %} 12996 ins_encode(enc_rethrow); 12997 ins_pipe( pipe_jmp ); 12998 %} 12999 13000 // inlined locking and unlocking 13001 13002 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 13003 predicate(Compile::current()->use_rtm()); 13004 match(Set cr (FastLock object box)); 13005 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 13006 ins_cost(300); 13007 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13008 ins_encode %{ 13009 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13010 $scr$$Register, $cx1$$Register, $cx2$$Register, 13011 _counters, _rtm_counters, _stack_rtm_counters, 13012 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13013 true, ra_->C->profile_rtm()); 13014 %} 13015 ins_pipe(pipe_slow); 13016 %} 13017 13018 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 13019 predicate(!Compile::current()->use_rtm()); 13020 match(Set cr (FastLock object box)); 13021 effect(TEMP tmp, TEMP scr, USE_KILL box); 13022 ins_cost(300); 13023 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13024 ins_encode %{ 13025 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13026 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 13027 %} 13028 ins_pipe(pipe_slow); 13029 %} 13030 13031 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13032 match(Set cr (FastUnlock object box)); 13033 effect(TEMP tmp, USE_KILL box); 13034 ins_cost(300); 13035 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13036 ins_encode %{ 13037 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13038 %} 13039 ins_pipe(pipe_slow); 13040 %} 13041 13042 13043 13044 // ============================================================================ 13045 // Safepoint Instruction 13046 instruct safePoint_poll(eFlagsReg cr) %{ 13047 match(SafePoint); 13048 effect(KILL cr); 13049 13050 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. 13051 // On SPARC that might be acceptable as we can generate the address with 13052 // just a sethi, saving an or. By polling at offset 0 we can end up 13053 // putting additional pressure on the index-0 in the D$. Because of 13054 // alignment (just like the situation at hand) the lower indices tend 13055 // to see more traffic. It'd be better to change the polling address 13056 // to offset 0 of the last $line in the polling page. 13057 13058 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} 13059 ins_cost(125); 13060 size(6) ; 13061 ins_encode( Safepoint_Poll() ); 13062 ins_pipe( ialu_reg_mem ); 13063 %} 13064 13065 13066 // ============================================================================ 13067 // This name is KNOWN by the ADLC and cannot be changed. 13068 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13069 // for this guy. 13070 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13071 match(Set dst (ThreadLocal)); 13072 effect(DEF dst, KILL cr); 13073 13074 format %{ "MOV $dst, Thread::current()" %} 13075 ins_encode %{ 13076 Register dstReg = as_Register($dst$$reg); 13077 __ get_thread(dstReg); 13078 %} 13079 ins_pipe( ialu_reg_fat ); 13080 %} 13081 13082 13083 13084 //----------PEEPHOLE RULES----------------------------------------------------- 13085 // These must follow all instruction definitions as they use the names 13086 // defined in the instructions definitions. 13087 // 13088 // peepmatch ( root_instr_name [preceding_instruction]* ); 13089 // 13090 // peepconstraint %{ 13091 // (instruction_number.operand_name relational_op instruction_number.operand_name 13092 // [, ...] ); 13093 // // instruction numbers are zero-based using left to right order in peepmatch 13094 // 13095 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13096 // // provide an instruction_number.operand_name for each operand that appears 13097 // // in the replacement instruction's match rule 13098 // 13099 // ---------VM FLAGS--------------------------------------------------------- 13100 // 13101 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13102 // 13103 // Each peephole rule is given an identifying number starting with zero and 13104 // increasing by one in the order seen by the parser. An individual peephole 13105 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13106 // on the command-line. 13107 // 13108 // ---------CURRENT LIMITATIONS---------------------------------------------- 13109 // 13110 // Only match adjacent instructions in same basic block 13111 // Only equality constraints 13112 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13113 // Only one replacement instruction 13114 // 13115 // ---------EXAMPLE---------------------------------------------------------- 13116 // 13117 // // pertinent parts of existing instructions in architecture description 13118 // instruct movI(rRegI dst, rRegI src) %{ 13119 // match(Set dst (CopyI src)); 13120 // %} 13121 // 13122 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 13123 // match(Set dst (AddI dst src)); 13124 // effect(KILL cr); 13125 // %} 13126 // 13127 // // Change (inc mov) to lea 13128 // peephole %{ 13129 // // increment preceeded by register-register move 13130 // peepmatch ( incI_eReg movI ); 13131 // // require that the destination register of the increment 13132 // // match the destination register of the move 13133 // peepconstraint ( 0.dst == 1.dst ); 13134 // // construct a replacement instruction that sets 13135 // // the destination to ( move's source register + one ) 13136 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13137 // %} 13138 // 13139 // Implementation no longer uses movX instructions since 13140 // machine-independent system no longer uses CopyX nodes. 13141 // 13142 // peephole %{ 13143 // peepmatch ( incI_eReg movI ); 13144 // peepconstraint ( 0.dst == 1.dst ); 13145 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13146 // %} 13147 // 13148 // peephole %{ 13149 // peepmatch ( decI_eReg movI ); 13150 // peepconstraint ( 0.dst == 1.dst ); 13151 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13152 // %} 13153 // 13154 // peephole %{ 13155 // peepmatch ( addI_eReg_imm movI ); 13156 // peepconstraint ( 0.dst == 1.dst ); 13157 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13158 // %} 13159 // 13160 // peephole %{ 13161 // peepmatch ( addP_eReg_imm movP ); 13162 // peepconstraint ( 0.dst == 1.dst ); 13163 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13164 // %} 13165 13166 // // Change load of spilled value to only a spill 13167 // instruct storeI(memory mem, rRegI src) %{ 13168 // match(Set mem (StoreI mem src)); 13169 // %} 13170 // 13171 // instruct loadI(rRegI dst, memory mem) %{ 13172 // match(Set dst (LoadI mem)); 13173 // %} 13174 // 13175 peephole %{ 13176 peepmatch ( loadI storeI ); 13177 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13178 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13179 %} 13180 13181 //----------SMARTSPILL RULES--------------------------------------------------- 13182 // These must follow all instruction definitions as they use the names 13183 // defined in the instructions definitions.