1 // 2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2)); 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3)); 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4)); 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5)); 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6)); 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7)); 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8)); 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9)); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 137 // 138 // Class for no registers (empty set). 139 reg_class no_reg(); 140 141 // Class for all registers 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 143 // Class for all registers (excluding EBP) 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); 145 // Dynamic register class that selects at runtime between register classes 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); 149 150 // Class for general registers 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 152 // Class for general registers (excluding EBP). 153 // This register class can be used for implicit null checks on win95. 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp). 155 // Used also if the PreserveFramePointer flag is true. 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp. 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); 159 160 // Class of "X" registers 161 reg_class int_x_reg(EBX, ECX, EDX, EAX); 162 163 // Class of registers that can appear in an address with no offset. 164 // EBP and ESP require an extra instruction byte for zero offset. 165 // Used in fast-unlock 166 reg_class p_reg(EDX, EDI, ESI, EBX); 167 168 // Class for general registers excluding ECX 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); 170 // Class for general registers excluding ECX (and EBP) 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); 174 175 // Class for general registers excluding EAX 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 177 178 // Class for general registers excluding EAX and EBX. 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); 180 // Class for general registers excluding EAX and EBX (and EBP) 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); 184 185 // Class of EAX (for multiply and divide operations) 186 reg_class eax_reg(EAX); 187 188 // Class of EBX (for atomic add) 189 reg_class ebx_reg(EBX); 190 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 192 reg_class ecx_reg(ECX); 193 194 // Class of EDX (for multiply and divide operations) 195 reg_class edx_reg(EDX); 196 197 // Class of EDI (for synchronization) 198 reg_class edi_reg(EDI); 199 200 // Class of ESI (for synchronization) 201 reg_class esi_reg(ESI); 202 203 // Singleton class for stack pointer 204 reg_class sp_reg(ESP); 205 206 // Singleton class for instruction pointer 207 // reg_class ip_reg(EIP); 208 209 // Class of integer register pairs 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); 211 // Class of integer register pairs (excluding EBP and EDI); 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp. 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); 215 216 // Class of integer register pairs that aligns with calling convention 217 reg_class eadx_reg( EAX,EDX ); 218 reg_class ebcx_reg( ECX,EBX ); 219 220 // Not AX or DX, used in divides 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); 222 // Not AX or DX (and neither EBP), used in divides 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); 226 227 // Floating point registers. Notice FPR0 is not a choice. 228 // FPR0 is not ever allocated; we use clever encodings to fake 229 // a 2-address instructions out of Intels FP stack. 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 231 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 233 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 234 FPR7L,FPR7H ); 235 236 reg_class fp_flt_reg0( FPR1L ); 237 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 238 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 240 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 241 242 %} 243 244 245 //----------SOURCE BLOCK------------------------------------------------------- 246 // This is a block of C++ code which provides values, functions, and 247 // definitions necessary in the rest of the architecture description 248 source_hpp %{ 249 // Must be visible to the DFA in dfa_x86_32.cpp 250 extern bool is_operand_hi32_zero(Node* n); 251 %} 252 253 source %{ 254 #define RELOC_IMM32 Assembler::imm_operand 255 #define RELOC_DISP32 Assembler::disp32_operand 256 257 #define __ _masm. 258 259 // How to find the high register of a Long pair, given the low register 260 #define HIGH_FROM_LOW(x) ((x)+2) 261 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM 263 // instructions, to allow sign-masking or sign-bit flipping. They allow 264 // fast versions of NegF/NegD and AbsF/AbsD. 265 266 // Note: 'double' and 'long long' have 32-bits alignment on x86. 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 268 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 269 // of 128-bits operands for SSE instructions. 270 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 271 // Store the value to a 128-bits operand. 272 operand[0] = lo; 273 operand[1] = hi; 274 return operand; 275 } 276 277 // Buffer for 128-bits masks used by SSE instructions. 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 279 280 // Static initialization during VM startup. 281 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 283 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 285 286 // Offset hacking within calls. 287 static int pre_call_resets_size() { 288 int size = 0; 289 Compile* C = Compile::current(); 290 if (C->in_24_bit_fp_mode()) { 291 size += 6; // fldcw 292 } 293 if (C->max_vector_size() > 16) { 294 size += 3; // vzeroupper 295 } 296 return size; 297 } 298 299 // !!!!! Special hack to get all type of calls to specify the byte offset 300 // from the start of the call to the point where the return address 301 // will point. 302 int MachCallStaticJavaNode::ret_addr_offset() { 303 return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 304 } 305 306 int MachCallDynamicJavaNode::ret_addr_offset() { 307 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 308 } 309 310 static int sizeof_FFree_Float_Stack_All = -1; 311 312 int MachCallRuntimeNode::ret_addr_offset() { 313 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 314 return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size(); 315 } 316 317 // Indicate if the safepoint node needs the polling page as an input. 318 // Since x86 does have absolute addressing, it doesn't. 319 bool SafePointNode::needs_polling_address_input() { 320 return false; 321 } 322 323 // 324 // Compute padding required for nodes which need alignment 325 // 326 327 // The address of the call instruction needs to be 4-byte aligned to 328 // ensure that it does not span a cache line so that it can be patched. 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 330 current_offset += pre_call_resets_size(); // skip fldcw, if any 331 current_offset += 1; // skip call opcode byte 332 return round_to(current_offset, alignment_required()) - current_offset; 333 } 334 335 // The address of the call instruction needs to be 4-byte aligned to 336 // ensure that it does not span a cache line so that it can be patched. 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 338 current_offset += pre_call_resets_size(); // skip fldcw, if any 339 current_offset += 5; // skip MOV instruction 340 current_offset += 1; // skip call opcode byte 341 return round_to(current_offset, alignment_required()) - current_offset; 342 } 343 344 // EMIT_RM() 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 346 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 347 cbuf.insts()->emit_int8(c); 348 } 349 350 // EMIT_CC() 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 352 unsigned char c = (unsigned char)( f1 | f2 ); 353 cbuf.insts()->emit_int8(c); 354 } 355 356 // EMIT_OPCODE() 357 void emit_opcode(CodeBuffer &cbuf, int code) { 358 cbuf.insts()->emit_int8((unsigned char) code); 359 } 360 361 // EMIT_OPCODE() w/ relocation information 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 363 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 364 emit_opcode(cbuf, code); 365 } 366 367 // EMIT_D8() 368 void emit_d8(CodeBuffer &cbuf, int d8) { 369 cbuf.insts()->emit_int8((unsigned char) d8); 370 } 371 372 // EMIT_D16() 373 void emit_d16(CodeBuffer &cbuf, int d16) { 374 cbuf.insts()->emit_int16(d16); 375 } 376 377 // EMIT_D32() 378 void emit_d32(CodeBuffer &cbuf, int d32) { 379 cbuf.insts()->emit_int32(d32); 380 } 381 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 384 int format) { 385 cbuf.relocate(cbuf.insts_mark(), reloc, format); 386 cbuf.insts()->emit_int32(d32); 387 } 388 389 // emit 32 bit value and construct relocation entry from RelocationHolder 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 391 int format) { 392 #ifdef ASSERT 393 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 394 assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code"); 395 } 396 #endif 397 cbuf.relocate(cbuf.insts_mark(), rspec, format); 398 cbuf.insts()->emit_int32(d32); 399 } 400 401 // Access stack slot for load or store 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 403 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 404 if( -128 <= disp && disp <= 127 ) { 405 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 406 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 407 emit_d8 (cbuf, disp); // Displacement // R/M byte 408 } else { 409 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 410 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 411 emit_d32(cbuf, disp); // Displacement // R/M byte 412 } 413 } 414 415 // rRegI ereg, memory mem) %{ // emit_reg_mem 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 417 // There is no index & no scale, use form without SIB byte 418 if ((index == 0x4) && 419 (scale == 0) && (base != ESP_enc)) { 420 // If no displacement, mode is 0x0; unless base is [EBP] 421 if ( (displace == 0) && (base != EBP_enc) ) { 422 emit_rm(cbuf, 0x0, reg_encoding, base); 423 } 424 else { // If 8-bit displacement, mode 0x1 425 if ((displace >= -128) && (displace <= 127) 426 && (disp_reloc == relocInfo::none) ) { 427 emit_rm(cbuf, 0x1, reg_encoding, base); 428 emit_d8(cbuf, displace); 429 } 430 else { // If 32-bit displacement 431 if (base == -1) { // Special flag for absolute address 432 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 433 // (manual lies; no SIB needed here) 434 if ( disp_reloc != relocInfo::none ) { 435 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 436 } else { 437 emit_d32 (cbuf, displace); 438 } 439 } 440 else { // Normal base + offset 441 emit_rm(cbuf, 0x2, reg_encoding, base); 442 if ( disp_reloc != relocInfo::none ) { 443 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 444 } else { 445 emit_d32 (cbuf, displace); 446 } 447 } 448 } 449 } 450 } 451 else { // Else, encode with the SIB byte 452 // If no displacement, mode is 0x0; unless base is [EBP] 453 if (displace == 0 && (base != EBP_enc)) { // If no displacement 454 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 455 emit_rm(cbuf, scale, index, base); 456 } 457 else { // If 8-bit displacement, mode 0x1 458 if ((displace >= -128) && (displace <= 127) 459 && (disp_reloc == relocInfo::none) ) { 460 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 461 emit_rm(cbuf, scale, index, base); 462 emit_d8(cbuf, displace); 463 } 464 else { // If 32-bit displacement 465 if (base == 0x04 ) { 466 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 467 emit_rm(cbuf, scale, index, 0x04); 468 } else { 469 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 470 emit_rm(cbuf, scale, index, base); 471 } 472 if ( disp_reloc != relocInfo::none ) { 473 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 474 } else { 475 emit_d32 (cbuf, displace); 476 } 477 } 478 } 479 } 480 } 481 482 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 484 if( dst_encoding == src_encoding ) { 485 // reg-reg copy, use an empty encoding 486 } else { 487 emit_opcode( cbuf, 0x8B ); 488 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 489 } 490 } 491 492 void emit_cmpfp_fixup(MacroAssembler& _masm) { 493 Label exit; 494 __ jccb(Assembler::noParity, exit); 495 __ pushf(); 496 // 497 // comiss/ucomiss instructions set ZF,PF,CF flags and 498 // zero OF,AF,SF for NaN values. 499 // Fixup flags by zeroing ZF,PF so that compare of NaN 500 // values returns 'less than' result (CF is set). 501 // Leave the rest of flags unchanged. 502 // 503 // 7 6 5 4 3 2 1 0 504 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 505 // 0 0 1 0 1 0 1 1 (0x2B) 506 // 507 __ andl(Address(rsp, 0), 0xffffff2b); 508 __ popf(); 509 __ bind(exit); 510 } 511 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 513 Label done; 514 __ movl(dst, -1); 515 __ jcc(Assembler::parity, done); 516 __ jcc(Assembler::below, done); 517 __ setb(Assembler::notEqual, dst); 518 __ movzbl(dst, dst); 519 __ bind(done); 520 } 521 522 523 //============================================================================= 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 525 526 int Compile::ConstantTable::calculate_table_base_offset() const { 527 return 0; // absolute addressing, no offset 528 } 529 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 532 ShouldNotReachHere(); 533 } 534 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 536 // Empty encoding 537 } 538 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 540 return 0; 541 } 542 543 #ifndef PRODUCT 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 545 st->print("# MachConstantBaseNode (empty encoding)"); 546 } 547 #endif 548 549 550 //============================================================================= 551 #ifndef PRODUCT 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 553 Compile* C = ra_->C; 554 555 int framesize = C->frame_size_in_bytes(); 556 int bangsize = C->bang_size_in_bytes(); 557 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 558 // Remove wordSize for return addr which is already pushed. 559 framesize -= wordSize; 560 561 if (C->need_stack_bang(bangsize)) { 562 framesize -= wordSize; 563 st->print("# stack bang (%d bytes)", bangsize); 564 st->print("\n\t"); 565 st->print("PUSH EBP\t# Save EBP"); 566 if (PreserveFramePointer) { 567 st->print("\n\t"); 568 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 569 } 570 if (framesize) { 571 st->print("\n\t"); 572 st->print("SUB ESP, #%d\t# Create frame",framesize); 573 } 574 } else { 575 st->print("SUB ESP, #%d\t# Create frame",framesize); 576 st->print("\n\t"); 577 framesize -= wordSize; 578 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 579 if (PreserveFramePointer) { 580 st->print("\n\t"); 581 st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); 582 if (framesize > 0) { 583 st->print("\n\t"); 584 st->print("ADD EBP, #%d", framesize); 585 } 586 } 587 } 588 589 if (VerifyStackAtCalls) { 590 st->print("\n\t"); 591 framesize -= wordSize; 592 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 593 } 594 595 if( C->in_24_bit_fp_mode() ) { 596 st->print("\n\t"); 597 st->print("FLDCW \t# load 24 bit fpu control word"); 598 } 599 if (UseSSE >= 2 && VerifyFPU) { 600 st->print("\n\t"); 601 st->print("# verify FPU stack (must be clean on entry)"); 602 } 603 604 #ifdef ASSERT 605 if (VerifyStackAtCalls) { 606 st->print("\n\t"); 607 st->print("# stack alignment check"); 608 } 609 #endif 610 st->cr(); 611 } 612 #endif 613 614 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 616 Compile* C = ra_->C; 617 MacroAssembler _masm(&cbuf); 618 619 int framesize = C->frame_size_in_bytes(); 620 int bangsize = C->bang_size_in_bytes(); 621 622 __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode()); 623 624 C->set_frame_complete(cbuf.insts_size()); 625 626 if (C->has_mach_constant_base_node()) { 627 // NOTE: We set the table base offset here because users might be 628 // emitted before MachConstantBaseNode. 629 Compile::ConstantTable& constant_table = C->constant_table(); 630 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 631 } 632 } 633 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 635 return MachNode::size(ra_); // too many variables; just compute it the hard way 636 } 637 638 int MachPrologNode::reloc() const { 639 return 0; // a large enough number 640 } 641 642 //============================================================================= 643 #ifndef PRODUCT 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 645 Compile *C = ra_->C; 646 int framesize = C->frame_size_in_bytes(); 647 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 648 // Remove two words for return addr and rbp, 649 framesize -= 2*wordSize; 650 651 if (C->max_vector_size() > 16) { 652 st->print("VZEROUPPER"); 653 st->cr(); st->print("\t"); 654 } 655 if (C->in_24_bit_fp_mode()) { 656 st->print("FLDCW standard control word"); 657 st->cr(); st->print("\t"); 658 } 659 if (framesize) { 660 st->print("ADD ESP,%d\t# Destroy frame",framesize); 661 st->cr(); st->print("\t"); 662 } 663 st->print_cr("POPL EBP"); st->print("\t"); 664 if (do_polling() && C->is_method_compilation()) { 665 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 666 st->cr(); st->print("\t"); 667 } 668 } 669 #endif 670 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 672 Compile *C = ra_->C; 673 MacroAssembler _masm(&cbuf); 674 675 if (C->max_vector_size() > 16) { 676 // Clear upper bits of YMM registers when current compiled code uses 677 // wide vectors to avoid AVX <-> SSE transition penalty during call. 678 _masm.vzeroupper(); 679 } 680 // If method set FPU control word, restore to standard control word 681 if (C->in_24_bit_fp_mode()) { 682 _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 683 } 684 685 int framesize = C->frame_size_in_bytes(); 686 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 687 // Remove two words for return addr and rbp, 688 framesize -= 2*wordSize; 689 690 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 691 692 if (framesize >= 128) { 693 emit_opcode(cbuf, 0x81); // add SP, #framesize 694 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 695 emit_d32(cbuf, framesize); 696 } else if (framesize) { 697 emit_opcode(cbuf, 0x83); // add SP, #framesize 698 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 699 emit_d8(cbuf, framesize); 700 } 701 702 emit_opcode(cbuf, 0x58 | EBP_enc); 703 704 if (StackReservedPages > 0 && C->has_reserved_stack_access()) { 705 __ reserved_stack_check(); 706 } 707 708 if (do_polling() && C->is_method_compilation()) { 709 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); 710 emit_opcode(cbuf,0x85); 711 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX 712 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 713 } 714 } 715 716 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 717 Compile *C = ra_->C; 718 // If method set FPU control word, restore to standard control word 719 int size = C->in_24_bit_fp_mode() ? 6 : 0; 720 if (C->max_vector_size() > 16) size += 3; // vzeroupper 721 if (do_polling() && C->is_method_compilation()) size += 6; 722 723 int framesize = C->frame_size_in_bytes(); 724 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 725 // Remove two words for return addr and rbp, 726 framesize -= 2*wordSize; 727 728 size++; // popl rbp, 729 730 if (framesize >= 128) { 731 size += 6; 732 } else { 733 size += framesize ? 3 : 0; 734 } 735 size += 64; // added to support ReservedStackAccess 736 return size; 737 } 738 739 int MachEpilogNode::reloc() const { 740 return 0; // a large enough number 741 } 742 743 const Pipeline * MachEpilogNode::pipeline() const { 744 return MachNode::pipeline_class(); 745 } 746 747 int MachEpilogNode::safepoint_offset() const { return 0; } 748 749 //============================================================================= 750 751 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 752 static enum RC rc_class( OptoReg::Name reg ) { 753 754 if( !OptoReg::is_valid(reg) ) return rc_bad; 755 if (OptoReg::is_stack(reg)) return rc_stack; 756 757 VMReg r = OptoReg::as_VMReg(reg); 758 if (r->is_Register()) return rc_int; 759 if (r->is_FloatRegister()) { 760 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 761 return rc_float; 762 } 763 assert(r->is_XMMRegister(), "must be"); 764 return rc_xmm; 765 } 766 767 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 768 int opcode, const char *op_str, int size, outputStream* st ) { 769 if( cbuf ) { 770 emit_opcode (*cbuf, opcode ); 771 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 772 #ifndef PRODUCT 773 } else if( !do_size ) { 774 if( size != 0 ) st->print("\n\t"); 775 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 776 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 777 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 778 } else { // FLD, FST, PUSH, POP 779 st->print("%s [ESP + #%d]",op_str,offset); 780 } 781 #endif 782 } 783 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 784 return size+3+offset_size; 785 } 786 787 // Helper for XMM registers. Extra opcode bits, limited syntax. 788 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 789 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 790 int in_size_in_bits = Assembler::EVEX_32bit; 791 int evex_encoding = 0; 792 if (reg_lo+1 == reg_hi) { 793 in_size_in_bits = Assembler::EVEX_64bit; 794 evex_encoding = Assembler::VEX_W; 795 } 796 if (cbuf) { 797 MacroAssembler _masm(cbuf); 798 // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 799 // it maps more cases to single byte displacement 800 _masm.set_managed(); 801 if (reg_lo+1 == reg_hi) { // double move? 802 if (is_load) { 803 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 804 } else { 805 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 806 } 807 } else { 808 if (is_load) { 809 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 810 } else { 811 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 812 } 813 } 814 #ifndef PRODUCT 815 } else if (!do_size) { 816 if (size != 0) st->print("\n\t"); 817 if (reg_lo+1 == reg_hi) { // double move? 818 if (is_load) st->print("%s %s,[ESP + #%d]", 819 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 820 Matcher::regName[reg_lo], offset); 821 else st->print("MOVSD [ESP + #%d],%s", 822 offset, Matcher::regName[reg_lo]); 823 } else { 824 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 825 Matcher::regName[reg_lo], offset); 826 else st->print("MOVSS [ESP + #%d],%s", 827 offset, Matcher::regName[reg_lo]); 828 } 829 #endif 830 } 831 bool is_single_byte = false; 832 if ((UseAVX > 2) && (offset != 0)) { 833 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 834 } 835 int offset_size = 0; 836 if (UseAVX > 2 ) { 837 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 838 } else { 839 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 840 } 841 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 842 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 843 return size+5+offset_size; 844 } 845 846 847 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 848 int src_hi, int dst_hi, int size, outputStream* st ) { 849 if (cbuf) { 850 MacroAssembler _masm(cbuf); 851 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 852 _masm.set_managed(); 853 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 854 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 855 as_XMMRegister(Matcher::_regEncode[src_lo])); 856 } else { 857 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 858 as_XMMRegister(Matcher::_regEncode[src_lo])); 859 } 860 #ifndef PRODUCT 861 } else if (!do_size) { 862 if (size != 0) st->print("\n\t"); 863 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 864 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 865 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 866 } else { 867 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 868 } 869 } else { 870 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 871 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 872 } else { 873 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 874 } 875 } 876 #endif 877 } 878 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 879 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 880 int sz = (UseAVX > 2) ? 6 : 4; 881 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 882 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 883 return size + sz; 884 } 885 886 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 887 int src_hi, int dst_hi, int size, outputStream* st ) { 888 // 32-bit 889 if (cbuf) { 890 MacroAssembler _masm(cbuf); 891 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 892 _masm.set_managed(); 893 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 894 as_Register(Matcher::_regEncode[src_lo])); 895 #ifndef PRODUCT 896 } else if (!do_size) { 897 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 898 #endif 899 } 900 return (UseAVX> 2) ? 6 : 4; 901 } 902 903 904 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 905 int src_hi, int dst_hi, int size, outputStream* st ) { 906 // 32-bit 907 if (cbuf) { 908 MacroAssembler _masm(cbuf); 909 // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. 910 _masm.set_managed(); 911 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 912 as_XMMRegister(Matcher::_regEncode[src_lo])); 913 #ifndef PRODUCT 914 } else if (!do_size) { 915 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 916 #endif 917 } 918 return (UseAVX> 2) ? 6 : 4; 919 } 920 921 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 922 if( cbuf ) { 923 emit_opcode(*cbuf, 0x8B ); 924 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 925 #ifndef PRODUCT 926 } else if( !do_size ) { 927 if( size != 0 ) st->print("\n\t"); 928 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 929 #endif 930 } 931 return size+2; 932 } 933 934 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 935 int offset, int size, outputStream* st ) { 936 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 937 if( cbuf ) { 938 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 939 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 940 #ifndef PRODUCT 941 } else if( !do_size ) { 942 if( size != 0 ) st->print("\n\t"); 943 st->print("FLD %s",Matcher::regName[src_lo]); 944 #endif 945 } 946 size += 2; 947 } 948 949 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 950 const char *op_str; 951 int op; 952 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 953 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 954 op = 0xDD; 955 } else { // 32-bit store 956 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 957 op = 0xD9; 958 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 959 } 960 961 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 962 } 963 964 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 965 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 966 int src_hi, int dst_hi, uint ireg, outputStream* st); 967 968 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 969 int stack_offset, int reg, uint ireg, outputStream* st); 970 971 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, 972 int dst_offset, uint ireg, outputStream* st) { 973 int calc_size = 0; 974 int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 975 int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 976 switch (ireg) { 977 case Op_VecS: 978 calc_size = 3+src_offset_size + 3+dst_offset_size; 979 break; 980 case Op_VecD: { 981 calc_size = 3+src_offset_size + 3+dst_offset_size; 982 int tmp_src_offset = src_offset + 4; 983 int tmp_dst_offset = dst_offset + 4; 984 src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4); 985 dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4); 986 calc_size += 3+src_offset_size + 3+dst_offset_size; 987 break; 988 } 989 case Op_VecX: 990 case Op_VecY: 991 case Op_VecZ: 992 calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; 993 break; 994 default: 995 ShouldNotReachHere(); 996 } 997 if (cbuf) { 998 MacroAssembler _masm(cbuf); 999 int offset = __ offset(); 1000 switch (ireg) { 1001 case Op_VecS: 1002 __ pushl(Address(rsp, src_offset)); 1003 __ popl (Address(rsp, dst_offset)); 1004 break; 1005 case Op_VecD: 1006 __ pushl(Address(rsp, src_offset)); 1007 __ popl (Address(rsp, dst_offset)); 1008 __ pushl(Address(rsp, src_offset+4)); 1009 __ popl (Address(rsp, dst_offset+4)); 1010 break; 1011 case Op_VecX: 1012 __ movdqu(Address(rsp, -16), xmm0); 1013 __ movdqu(xmm0, Address(rsp, src_offset)); 1014 __ movdqu(Address(rsp, dst_offset), xmm0); 1015 __ movdqu(xmm0, Address(rsp, -16)); 1016 break; 1017 case Op_VecY: 1018 __ vmovdqu(Address(rsp, -32), xmm0); 1019 __ vmovdqu(xmm0, Address(rsp, src_offset)); 1020 __ vmovdqu(Address(rsp, dst_offset), xmm0); 1021 __ vmovdqu(xmm0, Address(rsp, -32)); 1022 break; 1023 case Op_VecZ: 1024 __ evmovdquq(Address(rsp, -64), xmm0, 2); 1025 __ evmovdquq(xmm0, Address(rsp, src_offset), 2); 1026 __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); 1027 __ evmovdquq(xmm0, Address(rsp, -64), 2); 1028 break; 1029 default: 1030 ShouldNotReachHere(); 1031 } 1032 int size = __ offset() - offset; 1033 assert(size == calc_size, "incorrect size calculation"); 1034 return size; 1035 #ifndef PRODUCT 1036 } else if (!do_size) { 1037 switch (ireg) { 1038 case Op_VecS: 1039 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 1040 "popl [rsp + #%d]", 1041 src_offset, dst_offset); 1042 break; 1043 case Op_VecD: 1044 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 1045 "popq [rsp + #%d]\n\t" 1046 "pushl [rsp + #%d]\n\t" 1047 "popq [rsp + #%d]", 1048 src_offset, dst_offset, src_offset+4, dst_offset+4); 1049 break; 1050 case Op_VecX: 1051 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 1052 "movdqu xmm0, [rsp + #%d]\n\t" 1053 "movdqu [rsp + #%d], xmm0\n\t" 1054 "movdqu xmm0, [rsp - #16]", 1055 src_offset, dst_offset); 1056 break; 1057 case Op_VecY: 1058 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1059 "vmovdqu xmm0, [rsp + #%d]\n\t" 1060 "vmovdqu [rsp + #%d], xmm0\n\t" 1061 "vmovdqu xmm0, [rsp - #32]", 1062 src_offset, dst_offset); 1063 break; 1064 case Op_VecZ: 1065 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1066 "vmovdqu xmm0, [rsp + #%d]\n\t" 1067 "vmovdqu [rsp + #%d], xmm0\n\t" 1068 "vmovdqu xmm0, [rsp - #64]", 1069 src_offset, dst_offset); 1070 break; 1071 default: 1072 ShouldNotReachHere(); 1073 } 1074 #endif 1075 } 1076 return calc_size; 1077 } 1078 1079 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1080 // Get registers to move 1081 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1082 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1083 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1084 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1085 1086 enum RC src_second_rc = rc_class(src_second); 1087 enum RC src_first_rc = rc_class(src_first); 1088 enum RC dst_second_rc = rc_class(dst_second); 1089 enum RC dst_first_rc = rc_class(dst_first); 1090 1091 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1092 1093 // Generate spill code! 1094 int size = 0; 1095 1096 if( src_first == dst_first && src_second == dst_second ) 1097 return size; // Self copy, no move 1098 1099 if (bottom_type()->isa_vect() != NULL) { 1100 uint ireg = ideal_reg(); 1101 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1102 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1103 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1104 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1105 // mem -> mem 1106 int src_offset = ra_->reg2offset(src_first); 1107 int dst_offset = ra_->reg2offset(dst_first); 1108 return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); 1109 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1110 return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); 1111 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1112 int stack_offset = ra_->reg2offset(dst_first); 1113 return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); 1114 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1115 int stack_offset = ra_->reg2offset(src_first); 1116 return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); 1117 } else { 1118 ShouldNotReachHere(); 1119 } 1120 } 1121 1122 // -------------------------------------- 1123 // Check for mem-mem move. push/pop to move. 1124 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1125 if( src_second == dst_first ) { // overlapping stack copy ranges 1126 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1127 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1128 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1129 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1130 } 1131 // move low bits 1132 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1133 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1134 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1135 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1136 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1137 } 1138 return size; 1139 } 1140 1141 // -------------------------------------- 1142 // Check for integer reg-reg copy 1143 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1144 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1145 1146 // Check for integer store 1147 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1148 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1149 1150 // Check for integer load 1151 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 1152 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1153 1154 // Check for integer reg-xmm reg copy 1155 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1156 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1157 "no 64 bit integer-float reg moves" ); 1158 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1159 } 1160 // -------------------------------------- 1161 // Check for float reg-reg copy 1162 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1163 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1164 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1165 if( cbuf ) { 1166 1167 // Note the mucking with the register encode to compensate for the 0/1 1168 // indexing issue mentioned in a comment in the reg_def sections 1169 // for FPR registers many lines above here. 1170 1171 if( src_first != FPR1L_num ) { 1172 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1173 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1174 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1175 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1176 } else { 1177 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1178 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1179 } 1180 #ifndef PRODUCT 1181 } else if( !do_size ) { 1182 if( size != 0 ) st->print("\n\t"); 1183 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1184 else st->print( "FST %s", Matcher::regName[dst_first]); 1185 #endif 1186 } 1187 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1188 } 1189 1190 // Check for float store 1191 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1192 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1193 } 1194 1195 // Check for float load 1196 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1197 int offset = ra_->reg2offset(src_first); 1198 const char *op_str; 1199 int op; 1200 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1201 op_str = "FLD_D"; 1202 op = 0xDD; 1203 } else { // 32-bit load 1204 op_str = "FLD_S"; 1205 op = 0xD9; 1206 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1207 } 1208 if( cbuf ) { 1209 emit_opcode (*cbuf, op ); 1210 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1211 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1212 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1213 #ifndef PRODUCT 1214 } else if( !do_size ) { 1215 if( size != 0 ) st->print("\n\t"); 1216 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1217 #endif 1218 } 1219 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1220 return size + 3+offset_size+2; 1221 } 1222 1223 // Check for xmm reg-reg copy 1224 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1225 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1226 (src_first+1 == src_second && dst_first+1 == dst_second), 1227 "no non-adjacent float-moves" ); 1228 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1229 } 1230 1231 // Check for xmm reg-integer reg copy 1232 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1233 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1234 "no 64 bit float-integer reg moves" ); 1235 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1236 } 1237 1238 // Check for xmm store 1239 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1240 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1241 } 1242 1243 // Check for float xmm load 1244 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1245 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1246 } 1247 1248 // Copy from float reg to xmm reg 1249 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1250 // copy to the top of stack from floating point reg 1251 // and use LEA to preserve flags 1252 if( cbuf ) { 1253 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1254 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1255 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1256 emit_d8(*cbuf,0xF8); 1257 #ifndef PRODUCT 1258 } else if( !do_size ) { 1259 if( size != 0 ) st->print("\n\t"); 1260 st->print("LEA ESP,[ESP-8]"); 1261 #endif 1262 } 1263 size += 4; 1264 1265 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1266 1267 // Copy from the temp memory to the xmm reg. 1268 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1269 1270 if( cbuf ) { 1271 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1272 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1273 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1274 emit_d8(*cbuf,0x08); 1275 #ifndef PRODUCT 1276 } else if( !do_size ) { 1277 if( size != 0 ) st->print("\n\t"); 1278 st->print("LEA ESP,[ESP+8]"); 1279 #endif 1280 } 1281 size += 4; 1282 return size; 1283 } 1284 1285 assert( size > 0, "missed a case" ); 1286 1287 // -------------------------------------------------------------------- 1288 // Check for second bits still needing moving. 1289 if( src_second == dst_second ) 1290 return size; // Self copy; no move 1291 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1292 1293 // Check for second word int-int move 1294 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1295 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1296 1297 // Check for second word integer store 1298 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1299 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1300 1301 // Check for second word integer load 1302 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1303 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1304 1305 1306 Unimplemented(); 1307 return 0; // Mute compiler 1308 } 1309 1310 #ifndef PRODUCT 1311 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1312 implementation( NULL, ra_, false, st ); 1313 } 1314 #endif 1315 1316 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1317 implementation( &cbuf, ra_, false, NULL ); 1318 } 1319 1320 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1321 return implementation( NULL, ra_, true, NULL ); 1322 } 1323 1324 1325 //============================================================================= 1326 #ifndef PRODUCT 1327 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1328 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1329 int reg = ra_->get_reg_first(this); 1330 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1331 } 1332 #endif 1333 1334 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1335 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1336 int reg = ra_->get_encode(this); 1337 if( offset >= 128 ) { 1338 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1339 emit_rm(cbuf, 0x2, reg, 0x04); 1340 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1341 emit_d32(cbuf, offset); 1342 } 1343 else { 1344 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1345 emit_rm(cbuf, 0x1, reg, 0x04); 1346 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1347 emit_d8(cbuf, offset); 1348 } 1349 } 1350 1351 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1352 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1353 if( offset >= 128 ) { 1354 return 7; 1355 } 1356 else { 1357 return 4; 1358 } 1359 } 1360 1361 //============================================================================= 1362 #ifndef PRODUCT 1363 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1364 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1365 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1366 st->print_cr("\tNOP"); 1367 st->print_cr("\tNOP"); 1368 if( !OptoBreakpoint ) 1369 st->print_cr("\tNOP"); 1370 } 1371 #endif 1372 1373 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1374 MacroAssembler masm(&cbuf); 1375 #ifdef ASSERT 1376 uint insts_size = cbuf.insts_size(); 1377 #endif 1378 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1379 masm.jump_cc(Assembler::notEqual, 1380 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1381 /* WARNING these NOPs are critical so that verified entry point is properly 1382 aligned for patching by NativeJump::patch_verified_entry() */ 1383 int nops_cnt = 2; 1384 if( !OptoBreakpoint ) // Leave space for int3 1385 nops_cnt += 1; 1386 masm.nop(nops_cnt); 1387 1388 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1389 } 1390 1391 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1392 return OptoBreakpoint ? 11 : 12; 1393 } 1394 1395 1396 //============================================================================= 1397 1398 int Matcher::regnum_to_fpu_offset(int regnum) { 1399 return regnum - 32; // The FP registers are in the second chunk 1400 } 1401 1402 // This is UltraSparc specific, true just means we have fast l2f conversion 1403 const bool Matcher::convL2FSupported(void) { 1404 return true; 1405 } 1406 1407 // Is this branch offset short enough that a short branch can be used? 1408 // 1409 // NOTE: If the platform does not provide any short branch variants, then 1410 // this method should return false for offset 0. 1411 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1412 // The passed offset is relative to address of the branch. 1413 // On 86 a branch displacement is calculated relative to address 1414 // of a next instruction. 1415 offset -= br_size; 1416 1417 // the short version of jmpConUCF2 contains multiple branches, 1418 // making the reach slightly less 1419 if (rule == jmpConUCF2_rule) 1420 return (-126 <= offset && offset <= 125); 1421 return (-128 <= offset && offset <= 127); 1422 } 1423 1424 const bool Matcher::isSimpleConstant64(jlong value) { 1425 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1426 return false; 1427 } 1428 1429 // The ecx parameter to rep stos for the ClearArray node is in dwords. 1430 const bool Matcher::init_array_count_is_in_bytes = false; 1431 1432 // Needs 2 CMOV's for longs. 1433 const int Matcher::long_cmove_cost() { return 1; } 1434 1435 // No CMOVF/CMOVD with SSE/SSE2 1436 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 1437 1438 // Does the CPU require late expand (see block.cpp for description of late expand)? 1439 const bool Matcher::require_postalloc_expand = false; 1440 1441 // Do we need to mask the count passed to shift instructions or does 1442 // the cpu only look at the lower 5/6 bits anyway? 1443 const bool Matcher::need_masked_shift_count = false; 1444 1445 bool Matcher::narrow_oop_use_complex_address() { 1446 ShouldNotCallThis(); 1447 return true; 1448 } 1449 1450 bool Matcher::narrow_klass_use_complex_address() { 1451 ShouldNotCallThis(); 1452 return true; 1453 } 1454 1455 1456 // Is it better to copy float constants, or load them directly from memory? 1457 // Intel can load a float constant from a direct address, requiring no 1458 // extra registers. Most RISCs will have to materialize an address into a 1459 // register first, so they would do better to copy the constant from stack. 1460 const bool Matcher::rematerialize_float_constants = true; 1461 1462 // If CPU can load and store mis-aligned doubles directly then no fixup is 1463 // needed. Else we split the double into 2 integer pieces and move it 1464 // piece-by-piece. Only happens when passing doubles into C code as the 1465 // Java calling convention forces doubles to be aligned. 1466 const bool Matcher::misaligned_doubles_ok = true; 1467 1468 1469 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1470 // Get the memory operand from the node 1471 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1472 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1473 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1474 uint opcnt = 1; // First operand 1475 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1476 while( idx >= skipped+num_edges ) { 1477 skipped += num_edges; 1478 opcnt++; // Bump operand count 1479 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1480 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1481 } 1482 1483 MachOper *memory = node->_opnds[opcnt]; 1484 MachOper *new_memory = NULL; 1485 switch (memory->opcode()) { 1486 case DIRECT: 1487 case INDOFFSET32X: 1488 // No transformation necessary. 1489 return; 1490 case INDIRECT: 1491 new_memory = new indirect_win95_safeOper( ); 1492 break; 1493 case INDOFFSET8: 1494 new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1495 break; 1496 case INDOFFSET32: 1497 new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1498 break; 1499 case INDINDEXOFFSET: 1500 new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1501 break; 1502 case INDINDEXSCALE: 1503 new_memory = new indIndexScale_win95_safeOper(memory->scale()); 1504 break; 1505 case INDINDEXSCALEOFFSET: 1506 new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1507 break; 1508 case LOAD_LONG_INDIRECT: 1509 case LOAD_LONG_INDOFFSET32: 1510 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1511 return; 1512 default: 1513 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1514 return; 1515 } 1516 node->_opnds[opcnt] = new_memory; 1517 } 1518 1519 // Advertise here if the CPU requires explicit rounding operations 1520 // to implement the UseStrictFP mode. 1521 const bool Matcher::strict_fp_requires_explicit_rounding = true; 1522 1523 // Are floats conerted to double when stored to stack during deoptimization? 1524 // On x32 it is stored with convertion only when FPU is used for floats. 1525 bool Matcher::float_in_double() { return (UseSSE == 0); } 1526 1527 // Do ints take an entire long register or just half? 1528 const bool Matcher::int_in_long = false; 1529 1530 // Return whether or not this register is ever used as an argument. This 1531 // function is used on startup to build the trampoline stubs in generateOptoStub. 1532 // Registers not mentioned will be killed by the VM call in the trampoline, and 1533 // arguments in those registers not be available to the callee. 1534 bool Matcher::can_be_java_arg( int reg ) { 1535 if( reg == ECX_num || reg == EDX_num ) return true; 1536 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1537 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1538 return false; 1539 } 1540 1541 bool Matcher::is_spillable_arg( int reg ) { 1542 return can_be_java_arg(reg); 1543 } 1544 1545 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1546 // Use hardware integer DIV instruction when 1547 // it is faster than a code which use multiply. 1548 // Only when constant divisor fits into 32 bit 1549 // (min_jint is excluded to get only correct 1550 // positive 32 bit values from negative). 1551 return VM_Version::has_fast_idiv() && 1552 (divisor == (int)divisor && divisor != min_jint); 1553 } 1554 1555 // Register for DIVI projection of divmodI 1556 RegMask Matcher::divI_proj_mask() { 1557 return EAX_REG_mask(); 1558 } 1559 1560 // Register for MODI projection of divmodI 1561 RegMask Matcher::modI_proj_mask() { 1562 return EDX_REG_mask(); 1563 } 1564 1565 // Register for DIVL projection of divmodL 1566 RegMask Matcher::divL_proj_mask() { 1567 ShouldNotReachHere(); 1568 return RegMask(); 1569 } 1570 1571 // Register for MODL projection of divmodL 1572 RegMask Matcher::modL_proj_mask() { 1573 ShouldNotReachHere(); 1574 return RegMask(); 1575 } 1576 1577 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1578 return NO_REG_mask(); 1579 } 1580 1581 // Returns true if the high 32 bits of the value is known to be zero. 1582 bool is_operand_hi32_zero(Node* n) { 1583 int opc = n->Opcode(); 1584 if (opc == Op_AndL) { 1585 Node* o2 = n->in(2); 1586 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1587 return true; 1588 } 1589 } 1590 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1591 return true; 1592 } 1593 return false; 1594 } 1595 1596 %} 1597 1598 //----------ENCODING BLOCK----------------------------------------------------- 1599 // This block specifies the encoding classes used by the compiler to output 1600 // byte streams. Encoding classes generate functions which are called by 1601 // Machine Instruction Nodes in order to generate the bit encoding of the 1602 // instruction. Operands specify their base encoding interface with the 1603 // interface keyword. There are currently supported four interfaces, 1604 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1605 // operand to generate a function which returns its register number when 1606 // queried. CONST_INTER causes an operand to generate a function which 1607 // returns the value of the constant when queried. MEMORY_INTER causes an 1608 // operand to generate four functions which return the Base Register, the 1609 // Index Register, the Scale Value, and the Offset Value of the operand when 1610 // queried. COND_INTER causes an operand to generate six functions which 1611 // return the encoding code (ie - encoding bits for the instruction) 1612 // associated with each basic boolean condition for a conditional instruction. 1613 // Instructions specify two basic values for encoding. They use the 1614 // ins_encode keyword to specify their encoding class (which must be one of 1615 // the class names specified in the encoding block), and they use the 1616 // opcode keyword to specify, in order, their primary, secondary, and 1617 // tertiary opcode. Only the opcode sections which a particular instruction 1618 // needs for encoding need to be specified. 1619 encode %{ 1620 // Build emit functions for each basic byte or larger field in the intel 1621 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1622 // code in the enc_class source block. Emit functions will live in the 1623 // main source block for now. In future, we can generalize this by 1624 // adding a syntax that specifies the sizes of fields in an order, 1625 // so that the adlc can build the emit functions automagically 1626 1627 // Emit primary opcode 1628 enc_class OpcP %{ 1629 emit_opcode(cbuf, $primary); 1630 %} 1631 1632 // Emit secondary opcode 1633 enc_class OpcS %{ 1634 emit_opcode(cbuf, $secondary); 1635 %} 1636 1637 // Emit opcode directly 1638 enc_class Opcode(immI d8) %{ 1639 emit_opcode(cbuf, $d8$$constant); 1640 %} 1641 1642 enc_class SizePrefix %{ 1643 emit_opcode(cbuf,0x66); 1644 %} 1645 1646 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1647 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1648 %} 1649 1650 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1651 emit_opcode(cbuf,$opcode$$constant); 1652 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1653 %} 1654 1655 enc_class mov_r32_imm0( rRegI dst ) %{ 1656 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1657 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1658 %} 1659 1660 enc_class cdq_enc %{ 1661 // Full implementation of Java idiv and irem; checks for 1662 // special case as described in JVM spec., p.243 & p.271. 1663 // 1664 // normal case special case 1665 // 1666 // input : rax,: dividend min_int 1667 // reg: divisor -1 1668 // 1669 // output: rax,: quotient (= rax, idiv reg) min_int 1670 // rdx: remainder (= rax, irem reg) 0 1671 // 1672 // Code sequnce: 1673 // 1674 // 81 F8 00 00 00 80 cmp rax,80000000h 1675 // 0F 85 0B 00 00 00 jne normal_case 1676 // 33 D2 xor rdx,edx 1677 // 83 F9 FF cmp rcx,0FFh 1678 // 0F 84 03 00 00 00 je done 1679 // normal_case: 1680 // 99 cdq 1681 // F7 F9 idiv rax,ecx 1682 // done: 1683 // 1684 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1685 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1686 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1687 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1688 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1689 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1690 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1691 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1692 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1693 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1694 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1695 // normal_case: 1696 emit_opcode(cbuf,0x99); // cdq 1697 // idiv (note: must be emitted by the user of this rule) 1698 // normal: 1699 %} 1700 1701 // Dense encoding for older common ops 1702 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1703 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1704 %} 1705 1706 1707 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1708 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1709 // Check for 8-bit immediate, and set sign extend bit in opcode 1710 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1711 emit_opcode(cbuf, $primary | 0x02); 1712 } 1713 else { // If 32-bit immediate 1714 emit_opcode(cbuf, $primary); 1715 } 1716 %} 1717 1718 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1719 // Emit primary opcode and set sign-extend bit 1720 // Check for 8-bit immediate, and set sign extend bit in opcode 1721 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1722 emit_opcode(cbuf, $primary | 0x02); } 1723 else { // If 32-bit immediate 1724 emit_opcode(cbuf, $primary); 1725 } 1726 // Emit r/m byte with secondary opcode, after primary opcode. 1727 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1728 %} 1729 1730 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1731 // Check for 8-bit immediate, and set sign extend bit in opcode 1732 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1733 $$$emit8$imm$$constant; 1734 } 1735 else { // If 32-bit immediate 1736 // Output immediate 1737 $$$emit32$imm$$constant; 1738 } 1739 %} 1740 1741 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1742 // Emit primary opcode and set sign-extend bit 1743 // Check for 8-bit immediate, and set sign extend bit in opcode 1744 int con = (int)$imm$$constant; // Throw away top bits 1745 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1746 // Emit r/m byte with secondary opcode, after primary opcode. 1747 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1748 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1749 else emit_d32(cbuf,con); 1750 %} 1751 1752 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1753 // Emit primary opcode and set sign-extend bit 1754 // Check for 8-bit immediate, and set sign extend bit in opcode 1755 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1756 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1757 // Emit r/m byte with tertiary opcode, after primary opcode. 1758 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1759 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1760 else emit_d32(cbuf,con); 1761 %} 1762 1763 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1764 emit_cc(cbuf, $secondary, $dst$$reg ); 1765 %} 1766 1767 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1768 int destlo = $dst$$reg; 1769 int desthi = HIGH_FROM_LOW(destlo); 1770 // bswap lo 1771 emit_opcode(cbuf, 0x0F); 1772 emit_cc(cbuf, 0xC8, destlo); 1773 // bswap hi 1774 emit_opcode(cbuf, 0x0F); 1775 emit_cc(cbuf, 0xC8, desthi); 1776 // xchg lo and hi 1777 emit_opcode(cbuf, 0x87); 1778 emit_rm(cbuf, 0x3, destlo, desthi); 1779 %} 1780 1781 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1782 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1783 %} 1784 1785 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1786 $$$emit8$primary; 1787 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1788 %} 1789 1790 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1791 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1792 emit_d8(cbuf, op >> 8 ); 1793 emit_d8(cbuf, op & 255); 1794 %} 1795 1796 // emulate a CMOV with a conditional branch around a MOV 1797 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1798 // Invert sense of branch from sense of CMOV 1799 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1800 emit_d8( cbuf, $brOffs$$constant ); 1801 %} 1802 1803 enc_class enc_PartialSubtypeCheck( ) %{ 1804 Register Redi = as_Register(EDI_enc); // result register 1805 Register Reax = as_Register(EAX_enc); // super class 1806 Register Recx = as_Register(ECX_enc); // killed 1807 Register Resi = as_Register(ESI_enc); // sub class 1808 Label miss; 1809 1810 MacroAssembler _masm(&cbuf); 1811 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1812 NULL, &miss, 1813 /*set_cond_codes:*/ true); 1814 if ($primary) { 1815 __ xorptr(Redi, Redi); 1816 } 1817 __ bind(miss); 1818 %} 1819 1820 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1821 MacroAssembler masm(&cbuf); 1822 int start = masm.offset(); 1823 if (UseSSE >= 2) { 1824 if (VerifyFPU) { 1825 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1826 } 1827 } else { 1828 // External c_calling_convention expects the FPU stack to be 'clean'. 1829 // Compiled code leaves it dirty. Do cleanup now. 1830 masm.empty_FPU_stack(); 1831 } 1832 if (sizeof_FFree_Float_Stack_All == -1) { 1833 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1834 } else { 1835 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1836 } 1837 %} 1838 1839 enc_class Verify_FPU_For_Leaf %{ 1840 if( VerifyFPU ) { 1841 MacroAssembler masm(&cbuf); 1842 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1843 } 1844 %} 1845 1846 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1847 // This is the instruction starting address for relocation info. 1848 cbuf.set_insts_mark(); 1849 $$$emit8$primary; 1850 // CALL directly to the runtime 1851 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1852 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1853 1854 if (UseSSE >= 2) { 1855 MacroAssembler _masm(&cbuf); 1856 BasicType rt = tf()->return_type(); 1857 1858 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1859 // A C runtime call where the return value is unused. In SSE2+ 1860 // mode the result needs to be removed from the FPU stack. It's 1861 // likely that this function call could be removed by the 1862 // optimizer if the C function is a pure function. 1863 __ ffree(0); 1864 } else if (rt == T_FLOAT) { 1865 __ lea(rsp, Address(rsp, -4)); 1866 __ fstp_s(Address(rsp, 0)); 1867 __ movflt(xmm0, Address(rsp, 0)); 1868 __ lea(rsp, Address(rsp, 4)); 1869 } else if (rt == T_DOUBLE) { 1870 __ lea(rsp, Address(rsp, -8)); 1871 __ fstp_d(Address(rsp, 0)); 1872 __ movdbl(xmm0, Address(rsp, 0)); 1873 __ lea(rsp, Address(rsp, 8)); 1874 } 1875 } 1876 %} 1877 1878 1879 enc_class pre_call_resets %{ 1880 // If method sets FPU control word restore it here 1881 debug_only(int off0 = cbuf.insts_size()); 1882 if (ra_->C->in_24_bit_fp_mode()) { 1883 MacroAssembler _masm(&cbuf); 1884 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1885 } 1886 if (ra_->C->max_vector_size() > 16) { 1887 // Clear upper bits of YMM registers when current compiled code uses 1888 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1889 MacroAssembler _masm(&cbuf); 1890 __ vzeroupper(); 1891 } 1892 debug_only(int off1 = cbuf.insts_size()); 1893 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1894 %} 1895 1896 enc_class post_call_FPU %{ 1897 // If method sets FPU control word do it here also 1898 if (Compile::current()->in_24_bit_fp_mode()) { 1899 MacroAssembler masm(&cbuf); 1900 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1901 } 1902 %} 1903 1904 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1905 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1906 // who we intended to call. 1907 cbuf.set_insts_mark(); 1908 $$$emit8$primary; 1909 1910 if (!_method) { 1911 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1912 runtime_call_Relocation::spec(), 1913 RELOC_IMM32); 1914 } else { 1915 int method_index = resolved_method_index(cbuf); 1916 RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) 1917 : static_call_Relocation::spec(method_index); 1918 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1919 rspec, RELOC_DISP32); 1920 // Emit stubs for static call. 1921 address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); 1922 if (stub == NULL) { 1923 ciEnv::current()->record_failure("CodeCache is full"); 1924 return; 1925 } 1926 } 1927 %} 1928 1929 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1930 MacroAssembler _masm(&cbuf); 1931 __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); 1932 %} 1933 1934 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1935 int disp = in_bytes(Method::from_compiled_offset()); 1936 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1937 1938 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1939 cbuf.set_insts_mark(); 1940 $$$emit8$primary; 1941 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1942 emit_d8(cbuf, disp); // Displacement 1943 1944 %} 1945 1946 // Following encoding is no longer used, but may be restored if calling 1947 // convention changes significantly. 1948 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1949 // 1950 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1951 // // int ic_reg = Matcher::inline_cache_reg(); 1952 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1953 // // int imo_reg = Matcher::interpreter_method_oop_reg(); 1954 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1955 // 1956 // // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1957 // // // so we load it immediately before the call 1958 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1959 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1960 // 1961 // // xor rbp,ebp 1962 // emit_opcode(cbuf, 0x33); 1963 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1964 // 1965 // // CALL to interpreter. 1966 // cbuf.set_insts_mark(); 1967 // $$$emit8$primary; 1968 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1969 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1970 // %} 1971 1972 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1973 $$$emit8$primary; 1974 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1975 $$$emit8$shift$$constant; 1976 %} 1977 1978 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1979 // Load immediate does not have a zero or sign extended version 1980 // for 8-bit immediates 1981 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1982 $$$emit32$src$$constant; 1983 %} 1984 1985 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1986 // Load immediate does not have a zero or sign extended version 1987 // for 8-bit immediates 1988 emit_opcode(cbuf, $primary + $dst$$reg); 1989 $$$emit32$src$$constant; 1990 %} 1991 1992 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1993 // Load immediate does not have a zero or sign extended version 1994 // for 8-bit immediates 1995 int dst_enc = $dst$$reg; 1996 int src_con = $src$$constant & 0x0FFFFFFFFL; 1997 if (src_con == 0) { 1998 // xor dst, dst 1999 emit_opcode(cbuf, 0x33); 2000 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2001 } else { 2002 emit_opcode(cbuf, $primary + dst_enc); 2003 emit_d32(cbuf, src_con); 2004 } 2005 %} 2006 2007 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 2008 // Load immediate does not have a zero or sign extended version 2009 // for 8-bit immediates 2010 int dst_enc = $dst$$reg + 2; 2011 int src_con = ((julong)($src$$constant)) >> 32; 2012 if (src_con == 0) { 2013 // xor dst, dst 2014 emit_opcode(cbuf, 0x33); 2015 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 2016 } else { 2017 emit_opcode(cbuf, $primary + dst_enc); 2018 emit_d32(cbuf, src_con); 2019 } 2020 %} 2021 2022 2023 // Encode a reg-reg copy. If it is useless, then empty encoding. 2024 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 2025 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2026 %} 2027 2028 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 2029 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 2030 %} 2031 2032 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 2033 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2034 %} 2035 2036 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 2037 $$$emit8$primary; 2038 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2039 %} 2040 2041 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 2042 $$$emit8$secondary; 2043 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2044 %} 2045 2046 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 2047 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2048 %} 2049 2050 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2051 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2052 %} 2053 2054 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 2055 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2056 %} 2057 2058 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2059 // Output immediate 2060 $$$emit32$src$$constant; 2061 %} 2062 2063 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 2064 // Output Float immediate bits 2065 jfloat jf = $src$$constant; 2066 int jf_as_bits = jint_cast( jf ); 2067 emit_d32(cbuf, jf_as_bits); 2068 %} 2069 2070 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 2071 // Output Float immediate bits 2072 jfloat jf = $src$$constant; 2073 int jf_as_bits = jint_cast( jf ); 2074 emit_d32(cbuf, jf_as_bits); 2075 %} 2076 2077 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2078 // Output immediate 2079 $$$emit16$src$$constant; 2080 %} 2081 2082 enc_class Con_d32(immI src) %{ 2083 emit_d32(cbuf,$src$$constant); 2084 %} 2085 2086 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2087 // Output immediate memory reference 2088 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2089 emit_d32(cbuf, 0x00); 2090 %} 2091 2092 enc_class lock_prefix( ) %{ 2093 if( os::is_MP() ) 2094 emit_opcode(cbuf,0xF0); // [Lock] 2095 %} 2096 2097 // Cmp-xchg long value. 2098 // Note: we need to swap rbx, and rcx before and after the 2099 // cmpxchg8 instruction because the instruction uses 2100 // rcx as the high order word of the new value to store but 2101 // our register encoding uses rbx,. 2102 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2103 2104 // XCHG rbx,ecx 2105 emit_opcode(cbuf,0x87); 2106 emit_opcode(cbuf,0xD9); 2107 // [Lock] 2108 if( os::is_MP() ) 2109 emit_opcode(cbuf,0xF0); 2110 // CMPXCHG8 [Eptr] 2111 emit_opcode(cbuf,0x0F); 2112 emit_opcode(cbuf,0xC7); 2113 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2114 // XCHG rbx,ecx 2115 emit_opcode(cbuf,0x87); 2116 emit_opcode(cbuf,0xD9); 2117 %} 2118 2119 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2120 // [Lock] 2121 if( os::is_MP() ) 2122 emit_opcode(cbuf,0xF0); 2123 2124 // CMPXCHG [Eptr] 2125 emit_opcode(cbuf,0x0F); 2126 emit_opcode(cbuf,0xB1); 2127 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2128 %} 2129 2130 enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ 2131 // [Lock] 2132 if( os::is_MP() ) 2133 emit_opcode(cbuf,0xF0); 2134 2135 // CMPXCHGB [Eptr] 2136 emit_opcode(cbuf,0x0F); 2137 emit_opcode(cbuf,0xB0); 2138 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2139 %} 2140 2141 enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ 2142 // [Lock] 2143 if( os::is_MP() ) 2144 emit_opcode(cbuf,0xF0); 2145 2146 // 16-bit mode 2147 emit_opcode(cbuf, 0x66); 2148 2149 // CMPXCHGW [Eptr] 2150 emit_opcode(cbuf,0x0F); 2151 emit_opcode(cbuf,0xB1); 2152 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2153 %} 2154 2155 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2156 int res_encoding = $res$$reg; 2157 2158 // MOV res,0 2159 emit_opcode( cbuf, 0xB8 + res_encoding); 2160 emit_d32( cbuf, 0 ); 2161 // JNE,s fail 2162 emit_opcode(cbuf,0x75); 2163 emit_d8(cbuf, 5 ); 2164 // MOV res,1 2165 emit_opcode( cbuf, 0xB8 + res_encoding); 2166 emit_d32( cbuf, 1 ); 2167 // fail: 2168 %} 2169 2170 enc_class set_instruction_start( ) %{ 2171 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2172 %} 2173 2174 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2175 int reg_encoding = $ereg$$reg; 2176 int base = $mem$$base; 2177 int index = $mem$$index; 2178 int scale = $mem$$scale; 2179 int displace = $mem$$disp; 2180 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2181 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2182 %} 2183 2184 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2185 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2186 int base = $mem$$base; 2187 int index = $mem$$index; 2188 int scale = $mem$$scale; 2189 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2190 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2191 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2192 %} 2193 2194 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2195 int r1, r2; 2196 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2197 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2198 emit_opcode(cbuf,0x0F); 2199 emit_opcode(cbuf,$tertiary); 2200 emit_rm(cbuf, 0x3, r1, r2); 2201 emit_d8(cbuf,$cnt$$constant); 2202 emit_d8(cbuf,$primary); 2203 emit_rm(cbuf, 0x3, $secondary, r1); 2204 emit_d8(cbuf,$cnt$$constant); 2205 %} 2206 2207 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2208 emit_opcode( cbuf, 0x8B ); // Move 2209 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2210 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2211 emit_d8(cbuf,$primary); 2212 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2213 emit_d8(cbuf,$cnt$$constant-32); 2214 } 2215 emit_d8(cbuf,$primary); 2216 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2217 emit_d8(cbuf,31); 2218 %} 2219 2220 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2221 int r1, r2; 2222 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2223 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2224 2225 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2226 emit_rm(cbuf, 0x3, r1, r2); 2227 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2228 emit_opcode(cbuf,$primary); 2229 emit_rm(cbuf, 0x3, $secondary, r1); 2230 emit_d8(cbuf,$cnt$$constant-32); 2231 } 2232 emit_opcode(cbuf,0x33); // XOR r2,r2 2233 emit_rm(cbuf, 0x3, r2, r2); 2234 %} 2235 2236 // Clone of RegMem but accepts an extra parameter to access each 2237 // half of a double in memory; it never needs relocation info. 2238 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2239 emit_opcode(cbuf,$opcode$$constant); 2240 int reg_encoding = $rm_reg$$reg; 2241 int base = $mem$$base; 2242 int index = $mem$$index; 2243 int scale = $mem$$scale; 2244 int displace = $mem$$disp + $disp_for_half$$constant; 2245 relocInfo::relocType disp_reloc = relocInfo::none; 2246 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2247 %} 2248 2249 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2250 // 2251 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2252 // and it never needs relocation information. 2253 // Frequently used to move data between FPU's Stack Top and memory. 2254 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2255 int rm_byte_opcode = $rm_opcode$$constant; 2256 int base = $mem$$base; 2257 int index = $mem$$index; 2258 int scale = $mem$$scale; 2259 int displace = $mem$$disp; 2260 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2261 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2262 %} 2263 2264 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2265 int rm_byte_opcode = $rm_opcode$$constant; 2266 int base = $mem$$base; 2267 int index = $mem$$index; 2268 int scale = $mem$$scale; 2269 int displace = $mem$$disp; 2270 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2271 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2272 %} 2273 2274 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2275 int reg_encoding = $dst$$reg; 2276 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2277 int index = 0x04; // 0x04 indicates no index 2278 int scale = 0x00; // 0x00 indicates no scale 2279 int displace = $src1$$constant; // 0x00 indicates no displacement 2280 relocInfo::relocType disp_reloc = relocInfo::none; 2281 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2282 %} 2283 2284 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2285 // Compare dst,src 2286 emit_opcode(cbuf,0x3B); 2287 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2288 // jmp dst < src around move 2289 emit_opcode(cbuf,0x7C); 2290 emit_d8(cbuf,2); 2291 // move dst,src 2292 emit_opcode(cbuf,0x8B); 2293 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2294 %} 2295 2296 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2297 // Compare dst,src 2298 emit_opcode(cbuf,0x3B); 2299 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2300 // jmp dst > src around move 2301 emit_opcode(cbuf,0x7F); 2302 emit_d8(cbuf,2); 2303 // move dst,src 2304 emit_opcode(cbuf,0x8B); 2305 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2306 %} 2307 2308 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2309 // If src is FPR1, we can just FST to store it. 2310 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2311 int reg_encoding = 0x2; // Just store 2312 int base = $mem$$base; 2313 int index = $mem$$index; 2314 int scale = $mem$$scale; 2315 int displace = $mem$$disp; 2316 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2317 if( $src$$reg != FPR1L_enc ) { 2318 reg_encoding = 0x3; // Store & pop 2319 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2320 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2321 } 2322 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2323 emit_opcode(cbuf,$primary); 2324 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2325 %} 2326 2327 enc_class neg_reg(rRegI dst) %{ 2328 // NEG $dst 2329 emit_opcode(cbuf,0xF7); 2330 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2331 %} 2332 2333 enc_class setLT_reg(eCXRegI dst) %{ 2334 // SETLT $dst 2335 emit_opcode(cbuf,0x0F); 2336 emit_opcode(cbuf,0x9C); 2337 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2338 %} 2339 2340 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2341 int tmpReg = $tmp$$reg; 2342 2343 // SUB $p,$q 2344 emit_opcode(cbuf,0x2B); 2345 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2346 // SBB $tmp,$tmp 2347 emit_opcode(cbuf,0x1B); 2348 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2349 // AND $tmp,$y 2350 emit_opcode(cbuf,0x23); 2351 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2352 // ADD $p,$tmp 2353 emit_opcode(cbuf,0x03); 2354 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2355 %} 2356 2357 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2358 // TEST shift,32 2359 emit_opcode(cbuf,0xF7); 2360 emit_rm(cbuf, 0x3, 0, ECX_enc); 2361 emit_d32(cbuf,0x20); 2362 // JEQ,s small 2363 emit_opcode(cbuf, 0x74); 2364 emit_d8(cbuf, 0x04); 2365 // MOV $dst.hi,$dst.lo 2366 emit_opcode( cbuf, 0x8B ); 2367 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2368 // CLR $dst.lo 2369 emit_opcode(cbuf, 0x33); 2370 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2371 // small: 2372 // SHLD $dst.hi,$dst.lo,$shift 2373 emit_opcode(cbuf,0x0F); 2374 emit_opcode(cbuf,0xA5); 2375 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2376 // SHL $dst.lo,$shift" 2377 emit_opcode(cbuf,0xD3); 2378 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2379 %} 2380 2381 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2382 // TEST shift,32 2383 emit_opcode(cbuf,0xF7); 2384 emit_rm(cbuf, 0x3, 0, ECX_enc); 2385 emit_d32(cbuf,0x20); 2386 // JEQ,s small 2387 emit_opcode(cbuf, 0x74); 2388 emit_d8(cbuf, 0x04); 2389 // MOV $dst.lo,$dst.hi 2390 emit_opcode( cbuf, 0x8B ); 2391 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2392 // CLR $dst.hi 2393 emit_opcode(cbuf, 0x33); 2394 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2395 // small: 2396 // SHRD $dst.lo,$dst.hi,$shift 2397 emit_opcode(cbuf,0x0F); 2398 emit_opcode(cbuf,0xAD); 2399 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2400 // SHR $dst.hi,$shift" 2401 emit_opcode(cbuf,0xD3); 2402 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2403 %} 2404 2405 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2406 // TEST shift,32 2407 emit_opcode(cbuf,0xF7); 2408 emit_rm(cbuf, 0x3, 0, ECX_enc); 2409 emit_d32(cbuf,0x20); 2410 // JEQ,s small 2411 emit_opcode(cbuf, 0x74); 2412 emit_d8(cbuf, 0x05); 2413 // MOV $dst.lo,$dst.hi 2414 emit_opcode( cbuf, 0x8B ); 2415 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2416 // SAR $dst.hi,31 2417 emit_opcode(cbuf, 0xC1); 2418 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2419 emit_d8(cbuf, 0x1F ); 2420 // small: 2421 // SHRD $dst.lo,$dst.hi,$shift 2422 emit_opcode(cbuf,0x0F); 2423 emit_opcode(cbuf,0xAD); 2424 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2425 // SAR $dst.hi,$shift" 2426 emit_opcode(cbuf,0xD3); 2427 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2428 %} 2429 2430 2431 // ----------------- Encodings for floating point unit ----------------- 2432 // May leave result in FPU-TOS or FPU reg depending on opcodes 2433 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2434 $$$emit8$primary; 2435 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2436 %} 2437 2438 // Pop argument in FPR0 with FSTP ST(0) 2439 enc_class PopFPU() %{ 2440 emit_opcode( cbuf, 0xDD ); 2441 emit_d8( cbuf, 0xD8 ); 2442 %} 2443 2444 // !!!!! equivalent to Pop_Reg_F 2445 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2446 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2447 emit_d8( cbuf, 0xD8+$dst$$reg ); 2448 %} 2449 2450 enc_class Push_Reg_DPR( regDPR dst ) %{ 2451 emit_opcode( cbuf, 0xD9 ); 2452 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2453 %} 2454 2455 enc_class strictfp_bias1( regDPR dst ) %{ 2456 emit_opcode( cbuf, 0xDB ); // FLD m80real 2457 emit_opcode( cbuf, 0x2D ); 2458 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2459 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2460 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2461 %} 2462 2463 enc_class strictfp_bias2( regDPR dst ) %{ 2464 emit_opcode( cbuf, 0xDB ); // FLD m80real 2465 emit_opcode( cbuf, 0x2D ); 2466 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2467 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2468 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2469 %} 2470 2471 // Special case for moving an integer register to a stack slot. 2472 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2473 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2474 %} 2475 2476 // Special case for moving a register to a stack slot. 2477 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2478 // Opcode already emitted 2479 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2480 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2481 emit_d32(cbuf, $dst$$disp); // Displacement 2482 %} 2483 2484 // Push the integer in stackSlot 'src' onto FP-stack 2485 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2486 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2487 %} 2488 2489 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2490 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2491 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2492 %} 2493 2494 // Same as Pop_Mem_F except for opcode 2495 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2496 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2497 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2498 %} 2499 2500 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2501 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2502 emit_d8( cbuf, 0xD8+$dst$$reg ); 2503 %} 2504 2505 enc_class Push_Reg_FPR( regFPR dst ) %{ 2506 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2507 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2508 %} 2509 2510 // Push FPU's float to a stack-slot, and pop FPU-stack 2511 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2512 int pop = 0x02; 2513 if ($src$$reg != FPR1L_enc) { 2514 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2515 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2516 pop = 0x03; 2517 } 2518 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2519 %} 2520 2521 // Push FPU's double to a stack-slot, and pop FPU-stack 2522 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2523 int pop = 0x02; 2524 if ($src$$reg != FPR1L_enc) { 2525 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2526 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2527 pop = 0x03; 2528 } 2529 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2530 %} 2531 2532 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2533 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2534 int pop = 0xD0 - 1; // -1 since we skip FLD 2535 if ($src$$reg != FPR1L_enc) { 2536 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2537 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2538 pop = 0xD8; 2539 } 2540 emit_opcode( cbuf, 0xDD ); 2541 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2542 %} 2543 2544 2545 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2546 // load dst in FPR0 2547 emit_opcode( cbuf, 0xD9 ); 2548 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2549 if ($src$$reg != FPR1L_enc) { 2550 // fincstp 2551 emit_opcode (cbuf, 0xD9); 2552 emit_opcode (cbuf, 0xF7); 2553 // swap src with FPR1: 2554 // FXCH FPR1 with src 2555 emit_opcode(cbuf, 0xD9); 2556 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2557 // fdecstp 2558 emit_opcode (cbuf, 0xD9); 2559 emit_opcode (cbuf, 0xF6); 2560 } 2561 %} 2562 2563 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2564 MacroAssembler _masm(&cbuf); 2565 __ subptr(rsp, 8); 2566 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2567 __ fld_d(Address(rsp, 0)); 2568 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2569 __ fld_d(Address(rsp, 0)); 2570 %} 2571 2572 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2573 MacroAssembler _masm(&cbuf); 2574 __ subptr(rsp, 4); 2575 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2576 __ fld_s(Address(rsp, 0)); 2577 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2578 __ fld_s(Address(rsp, 0)); 2579 %} 2580 2581 enc_class Push_ResultD(regD dst) %{ 2582 MacroAssembler _masm(&cbuf); 2583 __ fstp_d(Address(rsp, 0)); 2584 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2585 __ addptr(rsp, 8); 2586 %} 2587 2588 enc_class Push_ResultF(regF dst, immI d8) %{ 2589 MacroAssembler _masm(&cbuf); 2590 __ fstp_s(Address(rsp, 0)); 2591 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2592 __ addptr(rsp, $d8$$constant); 2593 %} 2594 2595 enc_class Push_SrcD(regD src) %{ 2596 MacroAssembler _masm(&cbuf); 2597 __ subptr(rsp, 8); 2598 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2599 __ fld_d(Address(rsp, 0)); 2600 %} 2601 2602 enc_class push_stack_temp_qword() %{ 2603 MacroAssembler _masm(&cbuf); 2604 __ subptr(rsp, 8); 2605 %} 2606 2607 enc_class pop_stack_temp_qword() %{ 2608 MacroAssembler _masm(&cbuf); 2609 __ addptr(rsp, 8); 2610 %} 2611 2612 enc_class push_xmm_to_fpr1(regD src) %{ 2613 MacroAssembler _masm(&cbuf); 2614 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2615 __ fld_d(Address(rsp, 0)); 2616 %} 2617 2618 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2619 if ($src$$reg != FPR1L_enc) { 2620 // fincstp 2621 emit_opcode (cbuf, 0xD9); 2622 emit_opcode (cbuf, 0xF7); 2623 // FXCH FPR1 with src 2624 emit_opcode(cbuf, 0xD9); 2625 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2626 // fdecstp 2627 emit_opcode (cbuf, 0xD9); 2628 emit_opcode (cbuf, 0xF6); 2629 } 2630 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2631 // // FSTP FPR$dst$$reg 2632 // emit_opcode( cbuf, 0xDD ); 2633 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2634 %} 2635 2636 enc_class fnstsw_sahf_skip_parity() %{ 2637 // fnstsw ax 2638 emit_opcode( cbuf, 0xDF ); 2639 emit_opcode( cbuf, 0xE0 ); 2640 // sahf 2641 emit_opcode( cbuf, 0x9E ); 2642 // jnp ::skip 2643 emit_opcode( cbuf, 0x7B ); 2644 emit_opcode( cbuf, 0x05 ); 2645 %} 2646 2647 enc_class emitModDPR() %{ 2648 // fprem must be iterative 2649 // :: loop 2650 // fprem 2651 emit_opcode( cbuf, 0xD9 ); 2652 emit_opcode( cbuf, 0xF8 ); 2653 // wait 2654 emit_opcode( cbuf, 0x9b ); 2655 // fnstsw ax 2656 emit_opcode( cbuf, 0xDF ); 2657 emit_opcode( cbuf, 0xE0 ); 2658 // sahf 2659 emit_opcode( cbuf, 0x9E ); 2660 // jp ::loop 2661 emit_opcode( cbuf, 0x0F ); 2662 emit_opcode( cbuf, 0x8A ); 2663 emit_opcode( cbuf, 0xF4 ); 2664 emit_opcode( cbuf, 0xFF ); 2665 emit_opcode( cbuf, 0xFF ); 2666 emit_opcode( cbuf, 0xFF ); 2667 %} 2668 2669 enc_class fpu_flags() %{ 2670 // fnstsw_ax 2671 emit_opcode( cbuf, 0xDF); 2672 emit_opcode( cbuf, 0xE0); 2673 // test ax,0x0400 2674 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2675 emit_opcode( cbuf, 0xA9 ); 2676 emit_d16 ( cbuf, 0x0400 ); 2677 // // // This sequence works, but stalls for 12-16 cycles on PPro 2678 // // test rax,0x0400 2679 // emit_opcode( cbuf, 0xA9 ); 2680 // emit_d32 ( cbuf, 0x00000400 ); 2681 // 2682 // jz exit (no unordered comparison) 2683 emit_opcode( cbuf, 0x74 ); 2684 emit_d8 ( cbuf, 0x02 ); 2685 // mov ah,1 - treat as LT case (set carry flag) 2686 emit_opcode( cbuf, 0xB4 ); 2687 emit_d8 ( cbuf, 0x01 ); 2688 // sahf 2689 emit_opcode( cbuf, 0x9E); 2690 %} 2691 2692 enc_class cmpF_P6_fixup() %{ 2693 // Fixup the integer flags in case comparison involved a NaN 2694 // 2695 // JNP exit (no unordered comparison, P-flag is set by NaN) 2696 emit_opcode( cbuf, 0x7B ); 2697 emit_d8 ( cbuf, 0x03 ); 2698 // MOV AH,1 - treat as LT case (set carry flag) 2699 emit_opcode( cbuf, 0xB4 ); 2700 emit_d8 ( cbuf, 0x01 ); 2701 // SAHF 2702 emit_opcode( cbuf, 0x9E); 2703 // NOP // target for branch to avoid branch to branch 2704 emit_opcode( cbuf, 0x90); 2705 %} 2706 2707 // fnstsw_ax(); 2708 // sahf(); 2709 // movl(dst, nan_result); 2710 // jcc(Assembler::parity, exit); 2711 // movl(dst, less_result); 2712 // jcc(Assembler::below, exit); 2713 // movl(dst, equal_result); 2714 // jcc(Assembler::equal, exit); 2715 // movl(dst, greater_result); 2716 2717 // less_result = 1; 2718 // greater_result = -1; 2719 // equal_result = 0; 2720 // nan_result = -1; 2721 2722 enc_class CmpF_Result(rRegI dst) %{ 2723 // fnstsw_ax(); 2724 emit_opcode( cbuf, 0xDF); 2725 emit_opcode( cbuf, 0xE0); 2726 // sahf 2727 emit_opcode( cbuf, 0x9E); 2728 // movl(dst, nan_result); 2729 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2730 emit_d32( cbuf, -1 ); 2731 // jcc(Assembler::parity, exit); 2732 emit_opcode( cbuf, 0x7A ); 2733 emit_d8 ( cbuf, 0x13 ); 2734 // movl(dst, less_result); 2735 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2736 emit_d32( cbuf, -1 ); 2737 // jcc(Assembler::below, exit); 2738 emit_opcode( cbuf, 0x72 ); 2739 emit_d8 ( cbuf, 0x0C ); 2740 // movl(dst, equal_result); 2741 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2742 emit_d32( cbuf, 0 ); 2743 // jcc(Assembler::equal, exit); 2744 emit_opcode( cbuf, 0x74 ); 2745 emit_d8 ( cbuf, 0x05 ); 2746 // movl(dst, greater_result); 2747 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2748 emit_d32( cbuf, 1 ); 2749 %} 2750 2751 2752 // Compare the longs and set flags 2753 // BROKEN! Do Not use as-is 2754 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2755 // CMP $src1.hi,$src2.hi 2756 emit_opcode( cbuf, 0x3B ); 2757 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2758 // JNE,s done 2759 emit_opcode(cbuf,0x75); 2760 emit_d8(cbuf, 2 ); 2761 // CMP $src1.lo,$src2.lo 2762 emit_opcode( cbuf, 0x3B ); 2763 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2764 // done: 2765 %} 2766 2767 enc_class convert_int_long( regL dst, rRegI src ) %{ 2768 // mov $dst.lo,$src 2769 int dst_encoding = $dst$$reg; 2770 int src_encoding = $src$$reg; 2771 encode_Copy( cbuf, dst_encoding , src_encoding ); 2772 // mov $dst.hi,$src 2773 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2774 // sar $dst.hi,31 2775 emit_opcode( cbuf, 0xC1 ); 2776 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2777 emit_d8(cbuf, 0x1F ); 2778 %} 2779 2780 enc_class convert_long_double( eRegL src ) %{ 2781 // push $src.hi 2782 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2783 // push $src.lo 2784 emit_opcode(cbuf, 0x50+$src$$reg ); 2785 // fild 64-bits at [SP] 2786 emit_opcode(cbuf,0xdf); 2787 emit_d8(cbuf, 0x6C); 2788 emit_d8(cbuf, 0x24); 2789 emit_d8(cbuf, 0x00); 2790 // pop stack 2791 emit_opcode(cbuf, 0x83); // add SP, #8 2792 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2793 emit_d8(cbuf, 0x8); 2794 %} 2795 2796 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2797 // IMUL EDX:EAX,$src1 2798 emit_opcode( cbuf, 0xF7 ); 2799 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2800 // SAR EDX,$cnt-32 2801 int shift_count = ((int)$cnt$$constant) - 32; 2802 if (shift_count > 0) { 2803 emit_opcode(cbuf, 0xC1); 2804 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2805 emit_d8(cbuf, shift_count); 2806 } 2807 %} 2808 2809 // this version doesn't have add sp, 8 2810 enc_class convert_long_double2( eRegL src ) %{ 2811 // push $src.hi 2812 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2813 // push $src.lo 2814 emit_opcode(cbuf, 0x50+$src$$reg ); 2815 // fild 64-bits at [SP] 2816 emit_opcode(cbuf,0xdf); 2817 emit_d8(cbuf, 0x6C); 2818 emit_d8(cbuf, 0x24); 2819 emit_d8(cbuf, 0x00); 2820 %} 2821 2822 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2823 // Basic idea: long = (long)int * (long)int 2824 // IMUL EDX:EAX, src 2825 emit_opcode( cbuf, 0xF7 ); 2826 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2827 %} 2828 2829 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2830 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2831 // MUL EDX:EAX, src 2832 emit_opcode( cbuf, 0xF7 ); 2833 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2834 %} 2835 2836 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2837 // Basic idea: lo(result) = lo(x_lo * y_lo) 2838 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2839 // MOV $tmp,$src.lo 2840 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2841 // IMUL $tmp,EDX 2842 emit_opcode( cbuf, 0x0F ); 2843 emit_opcode( cbuf, 0xAF ); 2844 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2845 // MOV EDX,$src.hi 2846 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2847 // IMUL EDX,EAX 2848 emit_opcode( cbuf, 0x0F ); 2849 emit_opcode( cbuf, 0xAF ); 2850 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2851 // ADD $tmp,EDX 2852 emit_opcode( cbuf, 0x03 ); 2853 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2854 // MUL EDX:EAX,$src.lo 2855 emit_opcode( cbuf, 0xF7 ); 2856 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2857 // ADD EDX,ESI 2858 emit_opcode( cbuf, 0x03 ); 2859 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2860 %} 2861 2862 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2863 // Basic idea: lo(result) = lo(src * y_lo) 2864 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2865 // IMUL $tmp,EDX,$src 2866 emit_opcode( cbuf, 0x6B ); 2867 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2868 emit_d8( cbuf, (int)$src$$constant ); 2869 // MOV EDX,$src 2870 emit_opcode(cbuf, 0xB8 + EDX_enc); 2871 emit_d32( cbuf, (int)$src$$constant ); 2872 // MUL EDX:EAX,EDX 2873 emit_opcode( cbuf, 0xF7 ); 2874 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2875 // ADD EDX,ESI 2876 emit_opcode( cbuf, 0x03 ); 2877 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2878 %} 2879 2880 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2881 // PUSH src1.hi 2882 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2883 // PUSH src1.lo 2884 emit_opcode(cbuf, 0x50+$src1$$reg ); 2885 // PUSH src2.hi 2886 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2887 // PUSH src2.lo 2888 emit_opcode(cbuf, 0x50+$src2$$reg ); 2889 // CALL directly to the runtime 2890 cbuf.set_insts_mark(); 2891 emit_opcode(cbuf,0xE8); // Call into runtime 2892 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2893 // Restore stack 2894 emit_opcode(cbuf, 0x83); // add SP, #framesize 2895 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2896 emit_d8(cbuf, 4*4); 2897 %} 2898 2899 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2900 // PUSH src1.hi 2901 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2902 // PUSH src1.lo 2903 emit_opcode(cbuf, 0x50+$src1$$reg ); 2904 // PUSH src2.hi 2905 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2906 // PUSH src2.lo 2907 emit_opcode(cbuf, 0x50+$src2$$reg ); 2908 // CALL directly to the runtime 2909 cbuf.set_insts_mark(); 2910 emit_opcode(cbuf,0xE8); // Call into runtime 2911 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2912 // Restore stack 2913 emit_opcode(cbuf, 0x83); // add SP, #framesize 2914 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2915 emit_d8(cbuf, 4*4); 2916 %} 2917 2918 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2919 // MOV $tmp,$src.lo 2920 emit_opcode(cbuf, 0x8B); 2921 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2922 // OR $tmp,$src.hi 2923 emit_opcode(cbuf, 0x0B); 2924 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2925 %} 2926 2927 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2928 // CMP $src1.lo,$src2.lo 2929 emit_opcode( cbuf, 0x3B ); 2930 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2931 // JNE,s skip 2932 emit_cc(cbuf, 0x70, 0x5); 2933 emit_d8(cbuf,2); 2934 // CMP $src1.hi,$src2.hi 2935 emit_opcode( cbuf, 0x3B ); 2936 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2937 %} 2938 2939 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2940 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2941 emit_opcode( cbuf, 0x3B ); 2942 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2943 // MOV $tmp,$src1.hi 2944 emit_opcode( cbuf, 0x8B ); 2945 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2946 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2947 emit_opcode( cbuf, 0x1B ); 2948 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2949 %} 2950 2951 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2952 // XOR $tmp,$tmp 2953 emit_opcode(cbuf,0x33); // XOR 2954 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2955 // CMP $tmp,$src.lo 2956 emit_opcode( cbuf, 0x3B ); 2957 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2958 // SBB $tmp,$src.hi 2959 emit_opcode( cbuf, 0x1B ); 2960 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2961 %} 2962 2963 // Sniff, sniff... smells like Gnu Superoptimizer 2964 enc_class neg_long( eRegL dst ) %{ 2965 emit_opcode(cbuf,0xF7); // NEG hi 2966 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2967 emit_opcode(cbuf,0xF7); // NEG lo 2968 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2969 emit_opcode(cbuf,0x83); // SBB hi,0 2970 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2971 emit_d8 (cbuf,0 ); 2972 %} 2973 2974 enc_class enc_pop_rdx() %{ 2975 emit_opcode(cbuf,0x5A); 2976 %} 2977 2978 enc_class enc_rethrow() %{ 2979 cbuf.set_insts_mark(); 2980 emit_opcode(cbuf, 0xE9); // jmp entry 2981 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2982 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2983 %} 2984 2985 2986 // Convert a double to an int. Java semantics require we do complex 2987 // manglelations in the corner cases. So we set the rounding mode to 2988 // 'zero', store the darned double down as an int, and reset the 2989 // rounding mode to 'nearest'. The hardware throws an exception which 2990 // patches up the correct value directly to the stack. 2991 enc_class DPR2I_encoding( regDPR src ) %{ 2992 // Flip to round-to-zero mode. We attempted to allow invalid-op 2993 // exceptions here, so that a NAN or other corner-case value will 2994 // thrown an exception (but normal values get converted at full speed). 2995 // However, I2C adapters and other float-stack manglers leave pending 2996 // invalid-op exceptions hanging. We would have to clear them before 2997 // enabling them and that is more expensive than just testing for the 2998 // invalid value Intel stores down in the corner cases. 2999 emit_opcode(cbuf,0xD9); // FLDCW trunc 3000 emit_opcode(cbuf,0x2D); 3001 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3002 // Allocate a word 3003 emit_opcode(cbuf,0x83); // SUB ESP,4 3004 emit_opcode(cbuf,0xEC); 3005 emit_d8(cbuf,0x04); 3006 // Encoding assumes a double has been pushed into FPR0. 3007 // Store down the double as an int, popping the FPU stack 3008 emit_opcode(cbuf,0xDB); // FISTP [ESP] 3009 emit_opcode(cbuf,0x1C); 3010 emit_d8(cbuf,0x24); 3011 // Restore the rounding mode; mask the exception 3012 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3013 emit_opcode(cbuf,0x2D); 3014 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3015 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3016 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3017 3018 // Load the converted int; adjust CPU stack 3019 emit_opcode(cbuf,0x58); // POP EAX 3020 emit_opcode(cbuf,0x3D); // CMP EAX,imm 3021 emit_d32 (cbuf,0x80000000); // 0x80000000 3022 emit_opcode(cbuf,0x75); // JNE around_slow_call 3023 emit_d8 (cbuf,0x07); // Size of slow_call 3024 // Push src onto stack slow-path 3025 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3026 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3027 // CALL directly to the runtime 3028 cbuf.set_insts_mark(); 3029 emit_opcode(cbuf,0xE8); // Call into runtime 3030 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3031 // Carry on here... 3032 %} 3033 3034 enc_class DPR2L_encoding( regDPR src ) %{ 3035 emit_opcode(cbuf,0xD9); // FLDCW trunc 3036 emit_opcode(cbuf,0x2D); 3037 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 3038 // Allocate a word 3039 emit_opcode(cbuf,0x83); // SUB ESP,8 3040 emit_opcode(cbuf,0xEC); 3041 emit_d8(cbuf,0x08); 3042 // Encoding assumes a double has been pushed into FPR0. 3043 // Store down the double as a long, popping the FPU stack 3044 emit_opcode(cbuf,0xDF); // FISTP [ESP] 3045 emit_opcode(cbuf,0x3C); 3046 emit_d8(cbuf,0x24); 3047 // Restore the rounding mode; mask the exception 3048 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 3049 emit_opcode(cbuf,0x2D); 3050 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 3051 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 3052 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 3053 3054 // Load the converted int; adjust CPU stack 3055 emit_opcode(cbuf,0x58); // POP EAX 3056 emit_opcode(cbuf,0x5A); // POP EDX 3057 emit_opcode(cbuf,0x81); // CMP EDX,imm 3058 emit_d8 (cbuf,0xFA); // rdx 3059 emit_d32 (cbuf,0x80000000); // 0x80000000 3060 emit_opcode(cbuf,0x75); // JNE around_slow_call 3061 emit_d8 (cbuf,0x07+4); // Size of slow_call 3062 emit_opcode(cbuf,0x85); // TEST EAX,EAX 3063 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 3064 emit_opcode(cbuf,0x75); // JNE around_slow_call 3065 emit_d8 (cbuf,0x07); // Size of slow_call 3066 // Push src onto stack slow-path 3067 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 3068 emit_d8 (cbuf,0xC0-1+$src$$reg ); 3069 // CALL directly to the runtime 3070 cbuf.set_insts_mark(); 3071 emit_opcode(cbuf,0xE8); // Call into runtime 3072 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 3073 // Carry on here... 3074 %} 3075 3076 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3077 // Operand was loaded from memory into fp ST (stack top) 3078 // FMUL ST,$src /* D8 C8+i */ 3079 emit_opcode(cbuf, 0xD8); 3080 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3081 %} 3082 3083 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3084 // FADDP ST,src2 /* D8 C0+i */ 3085 emit_opcode(cbuf, 0xD8); 3086 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3087 //could use FADDP src2,fpST /* DE C0+i */ 3088 %} 3089 3090 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3091 // FADDP src2,ST /* DE C0+i */ 3092 emit_opcode(cbuf, 0xDE); 3093 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3094 %} 3095 3096 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3097 // Operand has been loaded into fp ST (stack top) 3098 // FSUB ST,$src1 3099 emit_opcode(cbuf, 0xD8); 3100 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3101 3102 // FDIV 3103 emit_opcode(cbuf, 0xD8); 3104 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3105 %} 3106 3107 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3108 // Operand was loaded from memory into fp ST (stack top) 3109 // FADD ST,$src /* D8 C0+i */ 3110 emit_opcode(cbuf, 0xD8); 3111 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3112 3113 // FMUL ST,src2 /* D8 C*+i */ 3114 emit_opcode(cbuf, 0xD8); 3115 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3116 %} 3117 3118 3119 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3120 // Operand was loaded from memory into fp ST (stack top) 3121 // FADD ST,$src /* D8 C0+i */ 3122 emit_opcode(cbuf, 0xD8); 3123 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3124 3125 // FMULP src2,ST /* DE C8+i */ 3126 emit_opcode(cbuf, 0xDE); 3127 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3128 %} 3129 3130 // Atomically load the volatile long 3131 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3132 emit_opcode(cbuf,0xDF); 3133 int rm_byte_opcode = 0x05; 3134 int base = $mem$$base; 3135 int index = $mem$$index; 3136 int scale = $mem$$scale; 3137 int displace = $mem$$disp; 3138 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3139 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3140 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3141 %} 3142 3143 // Volatile Store Long. Must be atomic, so move it into 3144 // the FP TOS and then do a 64-bit FIST. Has to probe the 3145 // target address before the store (for null-ptr checks) 3146 // so the memory operand is used twice in the encoding. 3147 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3148 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3149 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3150 emit_opcode(cbuf,0xDF); 3151 int rm_byte_opcode = 0x07; 3152 int base = $mem$$base; 3153 int index = $mem$$index; 3154 int scale = $mem$$scale; 3155 int displace = $mem$$disp; 3156 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3157 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3158 %} 3159 3160 // Safepoint Poll. This polls the safepoint page, and causes an 3161 // exception if it is not readable. Unfortunately, it kills the condition code 3162 // in the process 3163 // We current use TESTL [spp],EDI 3164 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 3165 3166 enc_class Safepoint_Poll() %{ 3167 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); 3168 emit_opcode(cbuf,0x85); 3169 emit_rm (cbuf, 0x0, 0x7, 0x5); 3170 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 3171 %} 3172 %} 3173 3174 3175 //----------FRAME-------------------------------------------------------------- 3176 // Definition of frame structure and management information. 3177 // 3178 // S T A C K L A Y O U T Allocators stack-slot number 3179 // | (to get allocators register number 3180 // G Owned by | | v add OptoReg::stack0()) 3181 // r CALLER | | 3182 // o | +--------+ pad to even-align allocators stack-slot 3183 // w V | pad0 | numbers; owned by CALLER 3184 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3185 // h ^ | in | 5 3186 // | | args | 4 Holes in incoming args owned by SELF 3187 // | | | | 3 3188 // | | +--------+ 3189 // V | | old out| Empty on Intel, window on Sparc 3190 // | old |preserve| Must be even aligned. 3191 // | SP-+--------+----> Matcher::_old_SP, even aligned 3192 // | | in | 3 area for Intel ret address 3193 // Owned by |preserve| Empty on Sparc. 3194 // SELF +--------+ 3195 // | | pad2 | 2 pad to align old SP 3196 // | +--------+ 1 3197 // | | locks | 0 3198 // | +--------+----> OptoReg::stack0(), even aligned 3199 // | | pad1 | 11 pad to align new SP 3200 // | +--------+ 3201 // | | | 10 3202 // | | spills | 9 spills 3203 // V | | 8 (pad0 slot for callee) 3204 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3205 // ^ | out | 7 3206 // | | args | 6 Holes in outgoing args owned by CALLEE 3207 // Owned by +--------+ 3208 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3209 // | new |preserve| Must be even-aligned. 3210 // | SP-+--------+----> Matcher::_new_SP, even aligned 3211 // | | | 3212 // 3213 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3214 // known from SELF's arguments and the Java calling convention. 3215 // Region 6-7 is determined per call site. 3216 // Note 2: If the calling convention leaves holes in the incoming argument 3217 // area, those holes are owned by SELF. Holes in the outgoing area 3218 // are owned by the CALLEE. Holes should not be nessecary in the 3219 // incoming area, as the Java calling convention is completely under 3220 // the control of the AD file. Doubles can be sorted and packed to 3221 // avoid holes. Holes in the outgoing arguments may be nessecary for 3222 // varargs C calling conventions. 3223 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3224 // even aligned with pad0 as needed. 3225 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3226 // region 6-11 is even aligned; it may be padded out more so that 3227 // the region from SP to FP meets the minimum stack alignment. 3228 3229 frame %{ 3230 // What direction does stack grow in (assumed to be same for C & Java) 3231 stack_direction(TOWARDS_LOW); 3232 3233 // These three registers define part of the calling convention 3234 // between compiled code and the interpreter. 3235 inline_cache_reg(EAX); // Inline Cache Register 3236 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 3237 3238 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3239 cisc_spilling_operand_name(indOffset32); 3240 3241 // Number of stack slots consumed by locking an object 3242 sync_stack_slots(1); 3243 3244 // Compiled code's Frame Pointer 3245 frame_pointer(ESP); 3246 // Interpreter stores its frame pointer in a register which is 3247 // stored to the stack by I2CAdaptors. 3248 // I2CAdaptors convert from interpreted java to compiled java. 3249 interpreter_frame_pointer(EBP); 3250 3251 // Stack alignment requirement 3252 // Alignment size in bytes (128-bit -> 16 bytes) 3253 stack_alignment(StackAlignmentInBytes); 3254 3255 // Number of stack slots between incoming argument block and the start of 3256 // a new frame. The PROLOG must add this many slots to the stack. The 3257 // EPILOG must remove this many slots. Intel needs one slot for 3258 // return address and one for rbp, (must save rbp) 3259 in_preserve_stack_slots(2+VerifyStackAtCalls); 3260 3261 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3262 // for calls to C. Supports the var-args backing area for register parms. 3263 varargs_C_out_slots_killed(0); 3264 3265 // The after-PROLOG location of the return address. Location of 3266 // return address specifies a type (REG or STACK) and a number 3267 // representing the register number (i.e. - use a register name) or 3268 // stack slot. 3269 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3270 // Otherwise, it is above the locks and verification slot and alignment word 3271 return_addr(STACK - 1 + 3272 round_to((Compile::current()->in_preserve_stack_slots() + 3273 Compile::current()->fixed_slots()), 3274 stack_alignment_in_slots())); 3275 3276 // Body of function which returns an integer array locating 3277 // arguments either in registers or in stack slots. Passed an array 3278 // of ideal registers called "sig" and a "length" count. Stack-slot 3279 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3280 // arguments for a CALLEE. Incoming stack arguments are 3281 // automatically biased by the preserve_stack_slots field above. 3282 calling_convention %{ 3283 // No difference between ingoing/outgoing just pass false 3284 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 3285 %} 3286 3287 3288 // Body of function which returns an integer array locating 3289 // arguments either in registers or in stack slots. Passed an array 3290 // of ideal registers called "sig" and a "length" count. Stack-slot 3291 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3292 // arguments for a CALLEE. Incoming stack arguments are 3293 // automatically biased by the preserve_stack_slots field above. 3294 c_calling_convention %{ 3295 // This is obviously always outgoing 3296 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 3297 %} 3298 3299 // Location of C & interpreter return values 3300 c_return_value %{ 3301 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3302 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3303 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3304 3305 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3306 // that C functions return float and double results in XMM0. 3307 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3308 return OptoRegPair(XMM0b_num,XMM0_num); 3309 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3310 return OptoRegPair(OptoReg::Bad,XMM0_num); 3311 3312 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3313 %} 3314 3315 // Location of return values 3316 return_value %{ 3317 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3318 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3319 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3320 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3321 return OptoRegPair(XMM0b_num,XMM0_num); 3322 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3323 return OptoRegPair(OptoReg::Bad,XMM0_num); 3324 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3325 %} 3326 3327 %} 3328 3329 //----------ATTRIBUTES--------------------------------------------------------- 3330 //----------Operand Attributes------------------------------------------------- 3331 op_attrib op_cost(0); // Required cost attribute 3332 3333 //----------Instruction Attributes--------------------------------------------- 3334 ins_attrib ins_cost(100); // Required cost attribute 3335 ins_attrib ins_size(8); // Required size attribute (in bits) 3336 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3337 // non-matching short branch variant of some 3338 // long branch? 3339 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3340 // specifies the alignment that some part of the instruction (not 3341 // necessarily the start) requires. If > 1, a compute_padding() 3342 // function must be provided for the instruction 3343 3344 //----------OPERANDS----------------------------------------------------------- 3345 // Operand definitions must precede instruction definitions for correct parsing 3346 // in the ADLC because operands constitute user defined types which are used in 3347 // instruction definitions. 3348 3349 //----------Simple Operands---------------------------------------------------- 3350 // Immediate Operands 3351 // Integer Immediate 3352 operand immI() %{ 3353 match(ConI); 3354 3355 op_cost(10); 3356 format %{ %} 3357 interface(CONST_INTER); 3358 %} 3359 3360 // Constant for test vs zero 3361 operand immI0() %{ 3362 predicate(n->get_int() == 0); 3363 match(ConI); 3364 3365 op_cost(0); 3366 format %{ %} 3367 interface(CONST_INTER); 3368 %} 3369 3370 // Constant for increment 3371 operand immI1() %{ 3372 predicate(n->get_int() == 1); 3373 match(ConI); 3374 3375 op_cost(0); 3376 format %{ %} 3377 interface(CONST_INTER); 3378 %} 3379 3380 // Constant for decrement 3381 operand immI_M1() %{ 3382 predicate(n->get_int() == -1); 3383 match(ConI); 3384 3385 op_cost(0); 3386 format %{ %} 3387 interface(CONST_INTER); 3388 %} 3389 3390 // Valid scale values for addressing modes 3391 operand immI2() %{ 3392 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3393 match(ConI); 3394 3395 format %{ %} 3396 interface(CONST_INTER); 3397 %} 3398 3399 operand immI8() %{ 3400 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3401 match(ConI); 3402 3403 op_cost(5); 3404 format %{ %} 3405 interface(CONST_INTER); 3406 %} 3407 3408 operand immI16() %{ 3409 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3410 match(ConI); 3411 3412 op_cost(10); 3413 format %{ %} 3414 interface(CONST_INTER); 3415 %} 3416 3417 // Int Immediate non-negative 3418 operand immU31() 3419 %{ 3420 predicate(n->get_int() >= 0); 3421 match(ConI); 3422 3423 op_cost(0); 3424 format %{ %} 3425 interface(CONST_INTER); 3426 %} 3427 3428 // Constant for long shifts 3429 operand immI_32() %{ 3430 predicate( n->get_int() == 32 ); 3431 match(ConI); 3432 3433 op_cost(0); 3434 format %{ %} 3435 interface(CONST_INTER); 3436 %} 3437 3438 operand immI_1_31() %{ 3439 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3440 match(ConI); 3441 3442 op_cost(0); 3443 format %{ %} 3444 interface(CONST_INTER); 3445 %} 3446 3447 operand immI_32_63() %{ 3448 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3449 match(ConI); 3450 op_cost(0); 3451 3452 format %{ %} 3453 interface(CONST_INTER); 3454 %} 3455 3456 operand immI_1() %{ 3457 predicate( n->get_int() == 1 ); 3458 match(ConI); 3459 3460 op_cost(0); 3461 format %{ %} 3462 interface(CONST_INTER); 3463 %} 3464 3465 operand immI_2() %{ 3466 predicate( n->get_int() == 2 ); 3467 match(ConI); 3468 3469 op_cost(0); 3470 format %{ %} 3471 interface(CONST_INTER); 3472 %} 3473 3474 operand immI_3() %{ 3475 predicate( n->get_int() == 3 ); 3476 match(ConI); 3477 3478 op_cost(0); 3479 format %{ %} 3480 interface(CONST_INTER); 3481 %} 3482 3483 // Pointer Immediate 3484 operand immP() %{ 3485 match(ConP); 3486 3487 op_cost(10); 3488 format %{ %} 3489 interface(CONST_INTER); 3490 %} 3491 3492 // NULL Pointer Immediate 3493 operand immP0() %{ 3494 predicate( n->get_ptr() == 0 ); 3495 match(ConP); 3496 op_cost(0); 3497 3498 format %{ %} 3499 interface(CONST_INTER); 3500 %} 3501 3502 // Long Immediate 3503 operand immL() %{ 3504 match(ConL); 3505 3506 op_cost(20); 3507 format %{ %} 3508 interface(CONST_INTER); 3509 %} 3510 3511 // Long Immediate zero 3512 operand immL0() %{ 3513 predicate( n->get_long() == 0L ); 3514 match(ConL); 3515 op_cost(0); 3516 3517 format %{ %} 3518 interface(CONST_INTER); 3519 %} 3520 3521 // Long Immediate zero 3522 operand immL_M1() %{ 3523 predicate( n->get_long() == -1L ); 3524 match(ConL); 3525 op_cost(0); 3526 3527 format %{ %} 3528 interface(CONST_INTER); 3529 %} 3530 3531 // Long immediate from 0 to 127. 3532 // Used for a shorter form of long mul by 10. 3533 operand immL_127() %{ 3534 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3535 match(ConL); 3536 op_cost(0); 3537 3538 format %{ %} 3539 interface(CONST_INTER); 3540 %} 3541 3542 // Long Immediate: low 32-bit mask 3543 operand immL_32bits() %{ 3544 predicate(n->get_long() == 0xFFFFFFFFL); 3545 match(ConL); 3546 op_cost(0); 3547 3548 format %{ %} 3549 interface(CONST_INTER); 3550 %} 3551 3552 // Long Immediate: low 32-bit mask 3553 operand immL32() %{ 3554 predicate(n->get_long() == (int)(n->get_long())); 3555 match(ConL); 3556 op_cost(20); 3557 3558 format %{ %} 3559 interface(CONST_INTER); 3560 %} 3561 3562 //Double Immediate zero 3563 operand immDPR0() %{ 3564 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3565 // bug that generates code such that NaNs compare equal to 0.0 3566 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3567 match(ConD); 3568 3569 op_cost(5); 3570 format %{ %} 3571 interface(CONST_INTER); 3572 %} 3573 3574 // Double Immediate one 3575 operand immDPR1() %{ 3576 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3577 match(ConD); 3578 3579 op_cost(5); 3580 format %{ %} 3581 interface(CONST_INTER); 3582 %} 3583 3584 // Double Immediate 3585 operand immDPR() %{ 3586 predicate(UseSSE<=1); 3587 match(ConD); 3588 3589 op_cost(5); 3590 format %{ %} 3591 interface(CONST_INTER); 3592 %} 3593 3594 operand immD() %{ 3595 predicate(UseSSE>=2); 3596 match(ConD); 3597 3598 op_cost(5); 3599 format %{ %} 3600 interface(CONST_INTER); 3601 %} 3602 3603 // Double Immediate zero 3604 operand immD0() %{ 3605 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3606 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3607 // compare equal to -0.0. 3608 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3609 match(ConD); 3610 3611 format %{ %} 3612 interface(CONST_INTER); 3613 %} 3614 3615 // Float Immediate zero 3616 operand immFPR0() %{ 3617 predicate(UseSSE == 0 && n->getf() == 0.0F); 3618 match(ConF); 3619 3620 op_cost(5); 3621 format %{ %} 3622 interface(CONST_INTER); 3623 %} 3624 3625 // Float Immediate one 3626 operand immFPR1() %{ 3627 predicate(UseSSE == 0 && n->getf() == 1.0F); 3628 match(ConF); 3629 3630 op_cost(5); 3631 format %{ %} 3632 interface(CONST_INTER); 3633 %} 3634 3635 // Float Immediate 3636 operand immFPR() %{ 3637 predicate( UseSSE == 0 ); 3638 match(ConF); 3639 3640 op_cost(5); 3641 format %{ %} 3642 interface(CONST_INTER); 3643 %} 3644 3645 // Float Immediate 3646 operand immF() %{ 3647 predicate(UseSSE >= 1); 3648 match(ConF); 3649 3650 op_cost(5); 3651 format %{ %} 3652 interface(CONST_INTER); 3653 %} 3654 3655 // Float Immediate zero. Zero and not -0.0 3656 operand immF0() %{ 3657 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3658 match(ConF); 3659 3660 op_cost(5); 3661 format %{ %} 3662 interface(CONST_INTER); 3663 %} 3664 3665 // Immediates for special shifts (sign extend) 3666 3667 // Constants for increment 3668 operand immI_16() %{ 3669 predicate( n->get_int() == 16 ); 3670 match(ConI); 3671 3672 format %{ %} 3673 interface(CONST_INTER); 3674 %} 3675 3676 operand immI_24() %{ 3677 predicate( n->get_int() == 24 ); 3678 match(ConI); 3679 3680 format %{ %} 3681 interface(CONST_INTER); 3682 %} 3683 3684 // Constant for byte-wide masking 3685 operand immI_255() %{ 3686 predicate( n->get_int() == 255 ); 3687 match(ConI); 3688 3689 format %{ %} 3690 interface(CONST_INTER); 3691 %} 3692 3693 // Constant for short-wide masking 3694 operand immI_65535() %{ 3695 predicate(n->get_int() == 65535); 3696 match(ConI); 3697 3698 format %{ %} 3699 interface(CONST_INTER); 3700 %} 3701 3702 // Register Operands 3703 // Integer Register 3704 operand rRegI() %{ 3705 constraint(ALLOC_IN_RC(int_reg)); 3706 match(RegI); 3707 match(xRegI); 3708 match(eAXRegI); 3709 match(eBXRegI); 3710 match(eCXRegI); 3711 match(eDXRegI); 3712 match(eDIRegI); 3713 match(eSIRegI); 3714 3715 format %{ %} 3716 interface(REG_INTER); 3717 %} 3718 3719 // Subset of Integer Register 3720 operand xRegI(rRegI reg) %{ 3721 constraint(ALLOC_IN_RC(int_x_reg)); 3722 match(reg); 3723 match(eAXRegI); 3724 match(eBXRegI); 3725 match(eCXRegI); 3726 match(eDXRegI); 3727 3728 format %{ %} 3729 interface(REG_INTER); 3730 %} 3731 3732 // Special Registers 3733 operand eAXRegI(xRegI reg) %{ 3734 constraint(ALLOC_IN_RC(eax_reg)); 3735 match(reg); 3736 match(rRegI); 3737 3738 format %{ "EAX" %} 3739 interface(REG_INTER); 3740 %} 3741 3742 // Special Registers 3743 operand eBXRegI(xRegI reg) %{ 3744 constraint(ALLOC_IN_RC(ebx_reg)); 3745 match(reg); 3746 match(rRegI); 3747 3748 format %{ "EBX" %} 3749 interface(REG_INTER); 3750 %} 3751 3752 operand eCXRegI(xRegI reg) %{ 3753 constraint(ALLOC_IN_RC(ecx_reg)); 3754 match(reg); 3755 match(rRegI); 3756 3757 format %{ "ECX" %} 3758 interface(REG_INTER); 3759 %} 3760 3761 operand eDXRegI(xRegI reg) %{ 3762 constraint(ALLOC_IN_RC(edx_reg)); 3763 match(reg); 3764 match(rRegI); 3765 3766 format %{ "EDX" %} 3767 interface(REG_INTER); 3768 %} 3769 3770 operand eDIRegI(xRegI reg) %{ 3771 constraint(ALLOC_IN_RC(edi_reg)); 3772 match(reg); 3773 match(rRegI); 3774 3775 format %{ "EDI" %} 3776 interface(REG_INTER); 3777 %} 3778 3779 operand naxRegI() %{ 3780 constraint(ALLOC_IN_RC(nax_reg)); 3781 match(RegI); 3782 match(eCXRegI); 3783 match(eDXRegI); 3784 match(eSIRegI); 3785 match(eDIRegI); 3786 3787 format %{ %} 3788 interface(REG_INTER); 3789 %} 3790 3791 operand nadxRegI() %{ 3792 constraint(ALLOC_IN_RC(nadx_reg)); 3793 match(RegI); 3794 match(eBXRegI); 3795 match(eCXRegI); 3796 match(eSIRegI); 3797 match(eDIRegI); 3798 3799 format %{ %} 3800 interface(REG_INTER); 3801 %} 3802 3803 operand ncxRegI() %{ 3804 constraint(ALLOC_IN_RC(ncx_reg)); 3805 match(RegI); 3806 match(eAXRegI); 3807 match(eDXRegI); 3808 match(eSIRegI); 3809 match(eDIRegI); 3810 3811 format %{ %} 3812 interface(REG_INTER); 3813 %} 3814 3815 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3816 // // 3817 operand eSIRegI(xRegI reg) %{ 3818 constraint(ALLOC_IN_RC(esi_reg)); 3819 match(reg); 3820 match(rRegI); 3821 3822 format %{ "ESI" %} 3823 interface(REG_INTER); 3824 %} 3825 3826 // Pointer Register 3827 operand anyRegP() %{ 3828 constraint(ALLOC_IN_RC(any_reg)); 3829 match(RegP); 3830 match(eAXRegP); 3831 match(eBXRegP); 3832 match(eCXRegP); 3833 match(eDIRegP); 3834 match(eRegP); 3835 3836 format %{ %} 3837 interface(REG_INTER); 3838 %} 3839 3840 operand eRegP() %{ 3841 constraint(ALLOC_IN_RC(int_reg)); 3842 match(RegP); 3843 match(eAXRegP); 3844 match(eBXRegP); 3845 match(eCXRegP); 3846 match(eDIRegP); 3847 3848 format %{ %} 3849 interface(REG_INTER); 3850 %} 3851 3852 // On windows95, EBP is not safe to use for implicit null tests. 3853 operand eRegP_no_EBP() %{ 3854 constraint(ALLOC_IN_RC(int_reg_no_ebp)); 3855 match(RegP); 3856 match(eAXRegP); 3857 match(eBXRegP); 3858 match(eCXRegP); 3859 match(eDIRegP); 3860 3861 op_cost(100); 3862 format %{ %} 3863 interface(REG_INTER); 3864 %} 3865 3866 operand naxRegP() %{ 3867 constraint(ALLOC_IN_RC(nax_reg)); 3868 match(RegP); 3869 match(eBXRegP); 3870 match(eDXRegP); 3871 match(eCXRegP); 3872 match(eSIRegP); 3873 match(eDIRegP); 3874 3875 format %{ %} 3876 interface(REG_INTER); 3877 %} 3878 3879 operand nabxRegP() %{ 3880 constraint(ALLOC_IN_RC(nabx_reg)); 3881 match(RegP); 3882 match(eCXRegP); 3883 match(eDXRegP); 3884 match(eSIRegP); 3885 match(eDIRegP); 3886 3887 format %{ %} 3888 interface(REG_INTER); 3889 %} 3890 3891 operand pRegP() %{ 3892 constraint(ALLOC_IN_RC(p_reg)); 3893 match(RegP); 3894 match(eBXRegP); 3895 match(eDXRegP); 3896 match(eSIRegP); 3897 match(eDIRegP); 3898 3899 format %{ %} 3900 interface(REG_INTER); 3901 %} 3902 3903 // Special Registers 3904 // Return a pointer value 3905 operand eAXRegP(eRegP reg) %{ 3906 constraint(ALLOC_IN_RC(eax_reg)); 3907 match(reg); 3908 format %{ "EAX" %} 3909 interface(REG_INTER); 3910 %} 3911 3912 // Used in AtomicAdd 3913 operand eBXRegP(eRegP reg) %{ 3914 constraint(ALLOC_IN_RC(ebx_reg)); 3915 match(reg); 3916 format %{ "EBX" %} 3917 interface(REG_INTER); 3918 %} 3919 3920 // Tail-call (interprocedural jump) to interpreter 3921 operand eCXRegP(eRegP reg) %{ 3922 constraint(ALLOC_IN_RC(ecx_reg)); 3923 match(reg); 3924 format %{ "ECX" %} 3925 interface(REG_INTER); 3926 %} 3927 3928 operand eSIRegP(eRegP reg) %{ 3929 constraint(ALLOC_IN_RC(esi_reg)); 3930 match(reg); 3931 format %{ "ESI" %} 3932 interface(REG_INTER); 3933 %} 3934 3935 // Used in rep stosw 3936 operand eDIRegP(eRegP reg) %{ 3937 constraint(ALLOC_IN_RC(edi_reg)); 3938 match(reg); 3939 format %{ "EDI" %} 3940 interface(REG_INTER); 3941 %} 3942 3943 operand eRegL() %{ 3944 constraint(ALLOC_IN_RC(long_reg)); 3945 match(RegL); 3946 match(eADXRegL); 3947 3948 format %{ %} 3949 interface(REG_INTER); 3950 %} 3951 3952 operand eADXRegL( eRegL reg ) %{ 3953 constraint(ALLOC_IN_RC(eadx_reg)); 3954 match(reg); 3955 3956 format %{ "EDX:EAX" %} 3957 interface(REG_INTER); 3958 %} 3959 3960 operand eBCXRegL( eRegL reg ) %{ 3961 constraint(ALLOC_IN_RC(ebcx_reg)); 3962 match(reg); 3963 3964 format %{ "EBX:ECX" %} 3965 interface(REG_INTER); 3966 %} 3967 3968 // Special case for integer high multiply 3969 operand eADXRegL_low_only() %{ 3970 constraint(ALLOC_IN_RC(eadx_reg)); 3971 match(RegL); 3972 3973 format %{ "EAX" %} 3974 interface(REG_INTER); 3975 %} 3976 3977 // Flags register, used as output of compare instructions 3978 operand eFlagsReg() %{ 3979 constraint(ALLOC_IN_RC(int_flags)); 3980 match(RegFlags); 3981 3982 format %{ "EFLAGS" %} 3983 interface(REG_INTER); 3984 %} 3985 3986 // Flags register, used as output of FLOATING POINT compare instructions 3987 operand eFlagsRegU() %{ 3988 constraint(ALLOC_IN_RC(int_flags)); 3989 match(RegFlags); 3990 3991 format %{ "EFLAGS_U" %} 3992 interface(REG_INTER); 3993 %} 3994 3995 operand eFlagsRegUCF() %{ 3996 constraint(ALLOC_IN_RC(int_flags)); 3997 match(RegFlags); 3998 predicate(false); 3999 4000 format %{ "EFLAGS_U_CF" %} 4001 interface(REG_INTER); 4002 %} 4003 4004 // Condition Code Register used by long compare 4005 operand flagsReg_long_LTGE() %{ 4006 constraint(ALLOC_IN_RC(int_flags)); 4007 match(RegFlags); 4008 format %{ "FLAGS_LTGE" %} 4009 interface(REG_INTER); 4010 %} 4011 operand flagsReg_long_EQNE() %{ 4012 constraint(ALLOC_IN_RC(int_flags)); 4013 match(RegFlags); 4014 format %{ "FLAGS_EQNE" %} 4015 interface(REG_INTER); 4016 %} 4017 operand flagsReg_long_LEGT() %{ 4018 constraint(ALLOC_IN_RC(int_flags)); 4019 match(RegFlags); 4020 format %{ "FLAGS_LEGT" %} 4021 interface(REG_INTER); 4022 %} 4023 4024 // Float register operands 4025 operand regDPR() %{ 4026 predicate( UseSSE < 2 ); 4027 constraint(ALLOC_IN_RC(fp_dbl_reg)); 4028 match(RegD); 4029 match(regDPR1); 4030 match(regDPR2); 4031 format %{ %} 4032 interface(REG_INTER); 4033 %} 4034 4035 operand regDPR1(regDPR reg) %{ 4036 predicate( UseSSE < 2 ); 4037 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 4038 match(reg); 4039 format %{ "FPR1" %} 4040 interface(REG_INTER); 4041 %} 4042 4043 operand regDPR2(regDPR reg) %{ 4044 predicate( UseSSE < 2 ); 4045 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 4046 match(reg); 4047 format %{ "FPR2" %} 4048 interface(REG_INTER); 4049 %} 4050 4051 operand regnotDPR1(regDPR reg) %{ 4052 predicate( UseSSE < 2 ); 4053 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 4054 match(reg); 4055 format %{ %} 4056 interface(REG_INTER); 4057 %} 4058 4059 // Float register operands 4060 operand regFPR() %{ 4061 predicate( UseSSE < 2 ); 4062 constraint(ALLOC_IN_RC(fp_flt_reg)); 4063 match(RegF); 4064 match(regFPR1); 4065 format %{ %} 4066 interface(REG_INTER); 4067 %} 4068 4069 // Float register operands 4070 operand regFPR1(regFPR reg) %{ 4071 predicate( UseSSE < 2 ); 4072 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4073 match(reg); 4074 format %{ "FPR1" %} 4075 interface(REG_INTER); 4076 %} 4077 4078 // XMM Float register operands 4079 operand regF() %{ 4080 predicate( UseSSE>=1 ); 4081 constraint(ALLOC_IN_RC(float_reg_legacy)); 4082 match(RegF); 4083 format %{ %} 4084 interface(REG_INTER); 4085 %} 4086 4087 // XMM Double register operands 4088 operand regD() %{ 4089 predicate( UseSSE>=2 ); 4090 constraint(ALLOC_IN_RC(double_reg_legacy)); 4091 match(RegD); 4092 format %{ %} 4093 interface(REG_INTER); 4094 %} 4095 4096 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM) 4097 // runtime code generation via reg_class_dynamic. 4098 operand vecS() %{ 4099 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 4100 match(VecS); 4101 4102 format %{ %} 4103 interface(REG_INTER); 4104 %} 4105 4106 operand vecD() %{ 4107 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 4108 match(VecD); 4109 4110 format %{ %} 4111 interface(REG_INTER); 4112 %} 4113 4114 operand vecX() %{ 4115 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 4116 match(VecX); 4117 4118 format %{ %} 4119 interface(REG_INTER); 4120 %} 4121 4122 operand vecY() %{ 4123 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 4124 match(VecY); 4125 4126 format %{ %} 4127 interface(REG_INTER); 4128 %} 4129 4130 //----------Memory Operands---------------------------------------------------- 4131 // Direct Memory Operand 4132 operand direct(immP addr) %{ 4133 match(addr); 4134 4135 format %{ "[$addr]" %} 4136 interface(MEMORY_INTER) %{ 4137 base(0xFFFFFFFF); 4138 index(0x4); 4139 scale(0x0); 4140 disp($addr); 4141 %} 4142 %} 4143 4144 // Indirect Memory Operand 4145 operand indirect(eRegP reg) %{ 4146 constraint(ALLOC_IN_RC(int_reg)); 4147 match(reg); 4148 4149 format %{ "[$reg]" %} 4150 interface(MEMORY_INTER) %{ 4151 base($reg); 4152 index(0x4); 4153 scale(0x0); 4154 disp(0x0); 4155 %} 4156 %} 4157 4158 // Indirect Memory Plus Short Offset Operand 4159 operand indOffset8(eRegP reg, immI8 off) %{ 4160 match(AddP reg off); 4161 4162 format %{ "[$reg + $off]" %} 4163 interface(MEMORY_INTER) %{ 4164 base($reg); 4165 index(0x4); 4166 scale(0x0); 4167 disp($off); 4168 %} 4169 %} 4170 4171 // Indirect Memory Plus Long Offset Operand 4172 operand indOffset32(eRegP reg, immI off) %{ 4173 match(AddP reg off); 4174 4175 format %{ "[$reg + $off]" %} 4176 interface(MEMORY_INTER) %{ 4177 base($reg); 4178 index(0x4); 4179 scale(0x0); 4180 disp($off); 4181 %} 4182 %} 4183 4184 // Indirect Memory Plus Long Offset Operand 4185 operand indOffset32X(rRegI reg, immP off) %{ 4186 match(AddP off reg); 4187 4188 format %{ "[$reg + $off]" %} 4189 interface(MEMORY_INTER) %{ 4190 base($reg); 4191 index(0x4); 4192 scale(0x0); 4193 disp($off); 4194 %} 4195 %} 4196 4197 // Indirect Memory Plus Index Register Plus Offset Operand 4198 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4199 match(AddP (AddP reg ireg) off); 4200 4201 op_cost(10); 4202 format %{"[$reg + $off + $ireg]" %} 4203 interface(MEMORY_INTER) %{ 4204 base($reg); 4205 index($ireg); 4206 scale(0x0); 4207 disp($off); 4208 %} 4209 %} 4210 4211 // Indirect Memory Plus Index Register Plus Offset Operand 4212 operand indIndex(eRegP reg, rRegI ireg) %{ 4213 match(AddP reg ireg); 4214 4215 op_cost(10); 4216 format %{"[$reg + $ireg]" %} 4217 interface(MEMORY_INTER) %{ 4218 base($reg); 4219 index($ireg); 4220 scale(0x0); 4221 disp(0x0); 4222 %} 4223 %} 4224 4225 // // ------------------------------------------------------------------------- 4226 // // 486 architecture doesn't support "scale * index + offset" with out a base 4227 // // ------------------------------------------------------------------------- 4228 // // Scaled Memory Operands 4229 // // Indirect Memory Times Scale Plus Offset Operand 4230 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4231 // match(AddP off (LShiftI ireg scale)); 4232 // 4233 // op_cost(10); 4234 // format %{"[$off + $ireg << $scale]" %} 4235 // interface(MEMORY_INTER) %{ 4236 // base(0x4); 4237 // index($ireg); 4238 // scale($scale); 4239 // disp($off); 4240 // %} 4241 // %} 4242 4243 // Indirect Memory Times Scale Plus Index Register 4244 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4245 match(AddP reg (LShiftI ireg scale)); 4246 4247 op_cost(10); 4248 format %{"[$reg + $ireg << $scale]" %} 4249 interface(MEMORY_INTER) %{ 4250 base($reg); 4251 index($ireg); 4252 scale($scale); 4253 disp(0x0); 4254 %} 4255 %} 4256 4257 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4258 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4259 match(AddP (AddP reg (LShiftI ireg scale)) off); 4260 4261 op_cost(10); 4262 format %{"[$reg + $off + $ireg << $scale]" %} 4263 interface(MEMORY_INTER) %{ 4264 base($reg); 4265 index($ireg); 4266 scale($scale); 4267 disp($off); 4268 %} 4269 %} 4270 4271 //----------Load Long Memory Operands------------------------------------------ 4272 // The load-long idiom will use it's address expression again after loading 4273 // the first word of the long. If the load-long destination overlaps with 4274 // registers used in the addressing expression, the 2nd half will be loaded 4275 // from a clobbered address. Fix this by requiring that load-long use 4276 // address registers that do not overlap with the load-long target. 4277 4278 // load-long support 4279 operand load_long_RegP() %{ 4280 constraint(ALLOC_IN_RC(esi_reg)); 4281 match(RegP); 4282 match(eSIRegP); 4283 op_cost(100); 4284 format %{ %} 4285 interface(REG_INTER); 4286 %} 4287 4288 // Indirect Memory Operand Long 4289 operand load_long_indirect(load_long_RegP reg) %{ 4290 constraint(ALLOC_IN_RC(esi_reg)); 4291 match(reg); 4292 4293 format %{ "[$reg]" %} 4294 interface(MEMORY_INTER) %{ 4295 base($reg); 4296 index(0x4); 4297 scale(0x0); 4298 disp(0x0); 4299 %} 4300 %} 4301 4302 // Indirect Memory Plus Long Offset Operand 4303 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4304 match(AddP reg off); 4305 4306 format %{ "[$reg + $off]" %} 4307 interface(MEMORY_INTER) %{ 4308 base($reg); 4309 index(0x4); 4310 scale(0x0); 4311 disp($off); 4312 %} 4313 %} 4314 4315 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4316 4317 4318 //----------Special Memory Operands-------------------------------------------- 4319 // Stack Slot Operand - This operand is used for loading and storing temporary 4320 // values on the stack where a match requires a value to 4321 // flow through memory. 4322 operand stackSlotP(sRegP reg) %{ 4323 constraint(ALLOC_IN_RC(stack_slots)); 4324 // No match rule because this operand is only generated in matching 4325 format %{ "[$reg]" %} 4326 interface(MEMORY_INTER) %{ 4327 base(0x4); // ESP 4328 index(0x4); // No Index 4329 scale(0x0); // No Scale 4330 disp($reg); // Stack Offset 4331 %} 4332 %} 4333 4334 operand stackSlotI(sRegI reg) %{ 4335 constraint(ALLOC_IN_RC(stack_slots)); 4336 // No match rule because this operand is only generated in matching 4337 format %{ "[$reg]" %} 4338 interface(MEMORY_INTER) %{ 4339 base(0x4); // ESP 4340 index(0x4); // No Index 4341 scale(0x0); // No Scale 4342 disp($reg); // Stack Offset 4343 %} 4344 %} 4345 4346 operand stackSlotF(sRegF reg) %{ 4347 constraint(ALLOC_IN_RC(stack_slots)); 4348 // No match rule because this operand is only generated in matching 4349 format %{ "[$reg]" %} 4350 interface(MEMORY_INTER) %{ 4351 base(0x4); // ESP 4352 index(0x4); // No Index 4353 scale(0x0); // No Scale 4354 disp($reg); // Stack Offset 4355 %} 4356 %} 4357 4358 operand stackSlotD(sRegD reg) %{ 4359 constraint(ALLOC_IN_RC(stack_slots)); 4360 // No match rule because this operand is only generated in matching 4361 format %{ "[$reg]" %} 4362 interface(MEMORY_INTER) %{ 4363 base(0x4); // ESP 4364 index(0x4); // No Index 4365 scale(0x0); // No Scale 4366 disp($reg); // Stack Offset 4367 %} 4368 %} 4369 4370 operand stackSlotL(sRegL reg) %{ 4371 constraint(ALLOC_IN_RC(stack_slots)); 4372 // No match rule because this operand is only generated in matching 4373 format %{ "[$reg]" %} 4374 interface(MEMORY_INTER) %{ 4375 base(0x4); // ESP 4376 index(0x4); // No Index 4377 scale(0x0); // No Scale 4378 disp($reg); // Stack Offset 4379 %} 4380 %} 4381 4382 //----------Memory Operands - Win95 Implicit Null Variants---------------- 4383 // Indirect Memory Operand 4384 operand indirect_win95_safe(eRegP_no_EBP reg) 4385 %{ 4386 constraint(ALLOC_IN_RC(int_reg)); 4387 match(reg); 4388 4389 op_cost(100); 4390 format %{ "[$reg]" %} 4391 interface(MEMORY_INTER) %{ 4392 base($reg); 4393 index(0x4); 4394 scale(0x0); 4395 disp(0x0); 4396 %} 4397 %} 4398 4399 // Indirect Memory Plus Short Offset Operand 4400 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 4401 %{ 4402 match(AddP reg off); 4403 4404 op_cost(100); 4405 format %{ "[$reg + $off]" %} 4406 interface(MEMORY_INTER) %{ 4407 base($reg); 4408 index(0x4); 4409 scale(0x0); 4410 disp($off); 4411 %} 4412 %} 4413 4414 // Indirect Memory Plus Long Offset Operand 4415 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 4416 %{ 4417 match(AddP reg off); 4418 4419 op_cost(100); 4420 format %{ "[$reg + $off]" %} 4421 interface(MEMORY_INTER) %{ 4422 base($reg); 4423 index(0x4); 4424 scale(0x0); 4425 disp($off); 4426 %} 4427 %} 4428 4429 // Indirect Memory Plus Index Register Plus Offset Operand 4430 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) 4431 %{ 4432 match(AddP (AddP reg ireg) off); 4433 4434 op_cost(100); 4435 format %{"[$reg + $off + $ireg]" %} 4436 interface(MEMORY_INTER) %{ 4437 base($reg); 4438 index($ireg); 4439 scale(0x0); 4440 disp($off); 4441 %} 4442 %} 4443 4444 // Indirect Memory Times Scale Plus Index Register 4445 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) 4446 %{ 4447 match(AddP reg (LShiftI ireg scale)); 4448 4449 op_cost(100); 4450 format %{"[$reg + $ireg << $scale]" %} 4451 interface(MEMORY_INTER) %{ 4452 base($reg); 4453 index($ireg); 4454 scale($scale); 4455 disp(0x0); 4456 %} 4457 %} 4458 4459 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4460 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) 4461 %{ 4462 match(AddP (AddP reg (LShiftI ireg scale)) off); 4463 4464 op_cost(100); 4465 format %{"[$reg + $off + $ireg << $scale]" %} 4466 interface(MEMORY_INTER) %{ 4467 base($reg); 4468 index($ireg); 4469 scale($scale); 4470 disp($off); 4471 %} 4472 %} 4473 4474 //----------Conditional Branch Operands---------------------------------------- 4475 // Comparison Op - This is the operation of the comparison, and is limited to 4476 // the following set of codes: 4477 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4478 // 4479 // Other attributes of the comparison, such as unsignedness, are specified 4480 // by the comparison instruction that sets a condition code flags register. 4481 // That result is represented by a flags operand whose subtype is appropriate 4482 // to the unsignedness (etc.) of the comparison. 4483 // 4484 // Later, the instruction which matches both the Comparison Op (a Bool) and 4485 // the flags (produced by the Cmp) specifies the coding of the comparison op 4486 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4487 4488 // Comparision Code 4489 operand cmpOp() %{ 4490 match(Bool); 4491 4492 format %{ "" %} 4493 interface(COND_INTER) %{ 4494 equal(0x4, "e"); 4495 not_equal(0x5, "ne"); 4496 less(0xC, "l"); 4497 greater_equal(0xD, "ge"); 4498 less_equal(0xE, "le"); 4499 greater(0xF, "g"); 4500 overflow(0x0, "o"); 4501 no_overflow(0x1, "no"); 4502 %} 4503 %} 4504 4505 // Comparison Code, unsigned compare. Used by FP also, with 4506 // C2 (unordered) turned into GT or LT already. The other bits 4507 // C0 and C3 are turned into Carry & Zero flags. 4508 operand cmpOpU() %{ 4509 match(Bool); 4510 4511 format %{ "" %} 4512 interface(COND_INTER) %{ 4513 equal(0x4, "e"); 4514 not_equal(0x5, "ne"); 4515 less(0x2, "b"); 4516 greater_equal(0x3, "nb"); 4517 less_equal(0x6, "be"); 4518 greater(0x7, "nbe"); 4519 overflow(0x0, "o"); 4520 no_overflow(0x1, "no"); 4521 %} 4522 %} 4523 4524 // Floating comparisons that don't require any fixup for the unordered case 4525 operand cmpOpUCF() %{ 4526 match(Bool); 4527 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4528 n->as_Bool()->_test._test == BoolTest::ge || 4529 n->as_Bool()->_test._test == BoolTest::le || 4530 n->as_Bool()->_test._test == BoolTest::gt); 4531 format %{ "" %} 4532 interface(COND_INTER) %{ 4533 equal(0x4, "e"); 4534 not_equal(0x5, "ne"); 4535 less(0x2, "b"); 4536 greater_equal(0x3, "nb"); 4537 less_equal(0x6, "be"); 4538 greater(0x7, "nbe"); 4539 overflow(0x0, "o"); 4540 no_overflow(0x1, "no"); 4541 %} 4542 %} 4543 4544 4545 // Floating comparisons that can be fixed up with extra conditional jumps 4546 operand cmpOpUCF2() %{ 4547 match(Bool); 4548 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4549 n->as_Bool()->_test._test == BoolTest::eq); 4550 format %{ "" %} 4551 interface(COND_INTER) %{ 4552 equal(0x4, "e"); 4553 not_equal(0x5, "ne"); 4554 less(0x2, "b"); 4555 greater_equal(0x3, "nb"); 4556 less_equal(0x6, "be"); 4557 greater(0x7, "nbe"); 4558 overflow(0x0, "o"); 4559 no_overflow(0x1, "no"); 4560 %} 4561 %} 4562 4563 // Comparison Code for FP conditional move 4564 operand cmpOp_fcmov() %{ 4565 match(Bool); 4566 4567 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4568 n->as_Bool()->_test._test != BoolTest::no_overflow); 4569 format %{ "" %} 4570 interface(COND_INTER) %{ 4571 equal (0x0C8); 4572 not_equal (0x1C8); 4573 less (0x0C0); 4574 greater_equal(0x1C0); 4575 less_equal (0x0D0); 4576 greater (0x1D0); 4577 overflow(0x0, "o"); // not really supported by the instruction 4578 no_overflow(0x1, "no"); // not really supported by the instruction 4579 %} 4580 %} 4581 4582 // Comparision Code used in long compares 4583 operand cmpOp_commute() %{ 4584 match(Bool); 4585 4586 format %{ "" %} 4587 interface(COND_INTER) %{ 4588 equal(0x4, "e"); 4589 not_equal(0x5, "ne"); 4590 less(0xF, "g"); 4591 greater_equal(0xE, "le"); 4592 less_equal(0xD, "ge"); 4593 greater(0xC, "l"); 4594 overflow(0x0, "o"); 4595 no_overflow(0x1, "no"); 4596 %} 4597 %} 4598 4599 //----------OPERAND CLASSES---------------------------------------------------- 4600 // Operand Classes are groups of operands that are used as to simplify 4601 // instruction definitions by not requiring the AD writer to specify separate 4602 // instructions for every form of operand when the instruction accepts 4603 // multiple operand types with the same basic encoding and format. The classic 4604 // case of this is memory operands. 4605 4606 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4607 indIndex, indIndexScale, indIndexScaleOffset); 4608 4609 // Long memory operations are encoded in 2 instructions and a +4 offset. 4610 // This means some kind of offset is always required and you cannot use 4611 // an oop as the offset (done when working on static globals). 4612 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4613 indIndex, indIndexScale, indIndexScaleOffset); 4614 4615 4616 //----------PIPELINE----------------------------------------------------------- 4617 // Rules which define the behavior of the target architectures pipeline. 4618 pipeline %{ 4619 4620 //----------ATTRIBUTES--------------------------------------------------------- 4621 attributes %{ 4622 variable_size_instructions; // Fixed size instructions 4623 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4624 instruction_unit_size = 1; // An instruction is 1 bytes long 4625 instruction_fetch_unit_size = 16; // The processor fetches one line 4626 instruction_fetch_units = 1; // of 16 bytes 4627 4628 // List of nop instructions 4629 nops( MachNop ); 4630 %} 4631 4632 //----------RESOURCES---------------------------------------------------------- 4633 // Resources are the functional units available to the machine 4634 4635 // Generic P2/P3 pipeline 4636 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4637 // 3 instructions decoded per cycle. 4638 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4639 // 2 ALU op, only ALU0 handles mul/div instructions. 4640 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4641 MS0, MS1, MEM = MS0 | MS1, 4642 BR, FPU, 4643 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4644 4645 //----------PIPELINE DESCRIPTION----------------------------------------------- 4646 // Pipeline Description specifies the stages in the machine's pipeline 4647 4648 // Generic P2/P3 pipeline 4649 pipe_desc(S0, S1, S2, S3, S4, S5); 4650 4651 //----------PIPELINE CLASSES--------------------------------------------------- 4652 // Pipeline Classes describe the stages in which input and output are 4653 // referenced by the hardware pipeline. 4654 4655 // Naming convention: ialu or fpu 4656 // Then: _reg 4657 // Then: _reg if there is a 2nd register 4658 // Then: _long if it's a pair of instructions implementing a long 4659 // Then: _fat if it requires the big decoder 4660 // Or: _mem if it requires the big decoder and a memory unit. 4661 4662 // Integer ALU reg operation 4663 pipe_class ialu_reg(rRegI dst) %{ 4664 single_instruction; 4665 dst : S4(write); 4666 dst : S3(read); 4667 DECODE : S0; // any decoder 4668 ALU : S3; // any alu 4669 %} 4670 4671 // Long ALU reg operation 4672 pipe_class ialu_reg_long(eRegL dst) %{ 4673 instruction_count(2); 4674 dst : S4(write); 4675 dst : S3(read); 4676 DECODE : S0(2); // any 2 decoders 4677 ALU : S3(2); // both alus 4678 %} 4679 4680 // Integer ALU reg operation using big decoder 4681 pipe_class ialu_reg_fat(rRegI dst) %{ 4682 single_instruction; 4683 dst : S4(write); 4684 dst : S3(read); 4685 D0 : S0; // big decoder only 4686 ALU : S3; // any alu 4687 %} 4688 4689 // Long ALU reg operation using big decoder 4690 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4691 instruction_count(2); 4692 dst : S4(write); 4693 dst : S3(read); 4694 D0 : S0(2); // big decoder only; twice 4695 ALU : S3(2); // any 2 alus 4696 %} 4697 4698 // Integer ALU reg-reg operation 4699 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4700 single_instruction; 4701 dst : S4(write); 4702 src : S3(read); 4703 DECODE : S0; // any decoder 4704 ALU : S3; // any alu 4705 %} 4706 4707 // Long ALU reg-reg operation 4708 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4709 instruction_count(2); 4710 dst : S4(write); 4711 src : S3(read); 4712 DECODE : S0(2); // any 2 decoders 4713 ALU : S3(2); // both alus 4714 %} 4715 4716 // Integer ALU reg-reg operation 4717 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4718 single_instruction; 4719 dst : S4(write); 4720 src : S3(read); 4721 D0 : S0; // big decoder only 4722 ALU : S3; // any alu 4723 %} 4724 4725 // Long ALU reg-reg operation 4726 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4727 instruction_count(2); 4728 dst : S4(write); 4729 src : S3(read); 4730 D0 : S0(2); // big decoder only; twice 4731 ALU : S3(2); // both alus 4732 %} 4733 4734 // Integer ALU reg-mem operation 4735 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4736 single_instruction; 4737 dst : S5(write); 4738 mem : S3(read); 4739 D0 : S0; // big decoder only 4740 ALU : S4; // any alu 4741 MEM : S3; // any mem 4742 %} 4743 4744 // Long ALU reg-mem operation 4745 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4746 instruction_count(2); 4747 dst : S5(write); 4748 mem : S3(read); 4749 D0 : S0(2); // big decoder only; twice 4750 ALU : S4(2); // any 2 alus 4751 MEM : S3(2); // both mems 4752 %} 4753 4754 // Integer mem operation (prefetch) 4755 pipe_class ialu_mem(memory mem) 4756 %{ 4757 single_instruction; 4758 mem : S3(read); 4759 D0 : S0; // big decoder only 4760 MEM : S3; // any mem 4761 %} 4762 4763 // Integer Store to Memory 4764 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4765 single_instruction; 4766 mem : S3(read); 4767 src : S5(read); 4768 D0 : S0; // big decoder only 4769 ALU : S4; // any alu 4770 MEM : S3; 4771 %} 4772 4773 // Long Store to Memory 4774 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4775 instruction_count(2); 4776 mem : S3(read); 4777 src : S5(read); 4778 D0 : S0(2); // big decoder only; twice 4779 ALU : S4(2); // any 2 alus 4780 MEM : S3(2); // Both mems 4781 %} 4782 4783 // Integer Store to Memory 4784 pipe_class ialu_mem_imm(memory mem) %{ 4785 single_instruction; 4786 mem : S3(read); 4787 D0 : S0; // big decoder only 4788 ALU : S4; // any alu 4789 MEM : S3; 4790 %} 4791 4792 // Integer ALU0 reg-reg operation 4793 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4794 single_instruction; 4795 dst : S4(write); 4796 src : S3(read); 4797 D0 : S0; // Big decoder only 4798 ALU0 : S3; // only alu0 4799 %} 4800 4801 // Integer ALU0 reg-mem operation 4802 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4803 single_instruction; 4804 dst : S5(write); 4805 mem : S3(read); 4806 D0 : S0; // big decoder only 4807 ALU0 : S4; // ALU0 only 4808 MEM : S3; // any mem 4809 %} 4810 4811 // Integer ALU reg-reg operation 4812 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4813 single_instruction; 4814 cr : S4(write); 4815 src1 : S3(read); 4816 src2 : S3(read); 4817 DECODE : S0; // any decoder 4818 ALU : S3; // any alu 4819 %} 4820 4821 // Integer ALU reg-imm operation 4822 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4823 single_instruction; 4824 cr : S4(write); 4825 src1 : S3(read); 4826 DECODE : S0; // any decoder 4827 ALU : S3; // any alu 4828 %} 4829 4830 // Integer ALU reg-mem operation 4831 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4832 single_instruction; 4833 cr : S4(write); 4834 src1 : S3(read); 4835 src2 : S3(read); 4836 D0 : S0; // big decoder only 4837 ALU : S4; // any alu 4838 MEM : S3; 4839 %} 4840 4841 // Conditional move reg-reg 4842 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4843 instruction_count(4); 4844 y : S4(read); 4845 q : S3(read); 4846 p : S3(read); 4847 DECODE : S0(4); // any decoder 4848 %} 4849 4850 // Conditional move reg-reg 4851 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4852 single_instruction; 4853 dst : S4(write); 4854 src : S3(read); 4855 cr : S3(read); 4856 DECODE : S0; // any decoder 4857 %} 4858 4859 // Conditional move reg-mem 4860 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4861 single_instruction; 4862 dst : S4(write); 4863 src : S3(read); 4864 cr : S3(read); 4865 DECODE : S0; // any decoder 4866 MEM : S3; 4867 %} 4868 4869 // Conditional move reg-reg long 4870 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4871 single_instruction; 4872 dst : S4(write); 4873 src : S3(read); 4874 cr : S3(read); 4875 DECODE : S0(2); // any 2 decoders 4876 %} 4877 4878 // Conditional move double reg-reg 4879 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4880 single_instruction; 4881 dst : S4(write); 4882 src : S3(read); 4883 cr : S3(read); 4884 DECODE : S0; // any decoder 4885 %} 4886 4887 // Float reg-reg operation 4888 pipe_class fpu_reg(regDPR dst) %{ 4889 instruction_count(2); 4890 dst : S3(read); 4891 DECODE : S0(2); // any 2 decoders 4892 FPU : S3; 4893 %} 4894 4895 // Float reg-reg operation 4896 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4897 instruction_count(2); 4898 dst : S4(write); 4899 src : S3(read); 4900 DECODE : S0(2); // any 2 decoders 4901 FPU : S3; 4902 %} 4903 4904 // Float reg-reg operation 4905 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4906 instruction_count(3); 4907 dst : S4(write); 4908 src1 : S3(read); 4909 src2 : S3(read); 4910 DECODE : S0(3); // any 3 decoders 4911 FPU : S3(2); 4912 %} 4913 4914 // Float reg-reg operation 4915 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4916 instruction_count(4); 4917 dst : S4(write); 4918 src1 : S3(read); 4919 src2 : S3(read); 4920 src3 : S3(read); 4921 DECODE : S0(4); // any 3 decoders 4922 FPU : S3(2); 4923 %} 4924 4925 // Float reg-reg operation 4926 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4927 instruction_count(4); 4928 dst : S4(write); 4929 src1 : S3(read); 4930 src2 : S3(read); 4931 src3 : S3(read); 4932 DECODE : S1(3); // any 3 decoders 4933 D0 : S0; // Big decoder only 4934 FPU : S3(2); 4935 MEM : S3; 4936 %} 4937 4938 // Float reg-mem operation 4939 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4940 instruction_count(2); 4941 dst : S5(write); 4942 mem : S3(read); 4943 D0 : S0; // big decoder only 4944 DECODE : S1; // any decoder for FPU POP 4945 FPU : S4; 4946 MEM : S3; // any mem 4947 %} 4948 4949 // Float reg-mem operation 4950 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4951 instruction_count(3); 4952 dst : S5(write); 4953 src1 : S3(read); 4954 mem : S3(read); 4955 D0 : S0; // big decoder only 4956 DECODE : S1(2); // any decoder for FPU POP 4957 FPU : S4; 4958 MEM : S3; // any mem 4959 %} 4960 4961 // Float mem-reg operation 4962 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4963 instruction_count(2); 4964 src : S5(read); 4965 mem : S3(read); 4966 DECODE : S0; // any decoder for FPU PUSH 4967 D0 : S1; // big decoder only 4968 FPU : S4; 4969 MEM : S3; // any mem 4970 %} 4971 4972 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4973 instruction_count(3); 4974 src1 : S3(read); 4975 src2 : S3(read); 4976 mem : S3(read); 4977 DECODE : S0(2); // any decoder for FPU PUSH 4978 D0 : S1; // big decoder only 4979 FPU : S4; 4980 MEM : S3; // any mem 4981 %} 4982 4983 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4984 instruction_count(3); 4985 src1 : S3(read); 4986 src2 : S3(read); 4987 mem : S4(read); 4988 DECODE : S0; // any decoder for FPU PUSH 4989 D0 : S0(2); // big decoder only 4990 FPU : S4; 4991 MEM : S3(2); // any mem 4992 %} 4993 4994 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4995 instruction_count(2); 4996 src1 : S3(read); 4997 dst : S4(read); 4998 D0 : S0(2); // big decoder only 4999 MEM : S3(2); // any mem 5000 %} 5001 5002 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 5003 instruction_count(3); 5004 src1 : S3(read); 5005 src2 : S3(read); 5006 dst : S4(read); 5007 D0 : S0(3); // big decoder only 5008 FPU : S4; 5009 MEM : S3(3); // any mem 5010 %} 5011 5012 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 5013 instruction_count(3); 5014 src1 : S4(read); 5015 mem : S4(read); 5016 DECODE : S0; // any decoder for FPU PUSH 5017 D0 : S0(2); // big decoder only 5018 FPU : S4; 5019 MEM : S3(2); // any mem 5020 %} 5021 5022 // Float load constant 5023 pipe_class fpu_reg_con(regDPR dst) %{ 5024 instruction_count(2); 5025 dst : S5(write); 5026 D0 : S0; // big decoder only for the load 5027 DECODE : S1; // any decoder for FPU POP 5028 FPU : S4; 5029 MEM : S3; // any mem 5030 %} 5031 5032 // Float load constant 5033 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 5034 instruction_count(3); 5035 dst : S5(write); 5036 src : S3(read); 5037 D0 : S0; // big decoder only for the load 5038 DECODE : S1(2); // any decoder for FPU POP 5039 FPU : S4; 5040 MEM : S3; // any mem 5041 %} 5042 5043 // UnConditional branch 5044 pipe_class pipe_jmp( label labl ) %{ 5045 single_instruction; 5046 BR : S3; 5047 %} 5048 5049 // Conditional branch 5050 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 5051 single_instruction; 5052 cr : S1(read); 5053 BR : S3; 5054 %} 5055 5056 // Allocation idiom 5057 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 5058 instruction_count(1); force_serialization; 5059 fixed_latency(6); 5060 heap_ptr : S3(read); 5061 DECODE : S0(3); 5062 D0 : S2; 5063 MEM : S3; 5064 ALU : S3(2); 5065 dst : S5(write); 5066 BR : S5; 5067 %} 5068 5069 // Generic big/slow expanded idiom 5070 pipe_class pipe_slow( ) %{ 5071 instruction_count(10); multiple_bundles; force_serialization; 5072 fixed_latency(100); 5073 D0 : S0(2); 5074 MEM : S3(2); 5075 %} 5076 5077 // The real do-nothing guy 5078 pipe_class empty( ) %{ 5079 instruction_count(0); 5080 %} 5081 5082 // Define the class for the Nop node 5083 define %{ 5084 MachNop = empty; 5085 %} 5086 5087 %} 5088 5089 //----------INSTRUCTIONS------------------------------------------------------- 5090 // 5091 // match -- States which machine-independent subtree may be replaced 5092 // by this instruction. 5093 // ins_cost -- The estimated cost of this instruction is used by instruction 5094 // selection to identify a minimum cost tree of machine 5095 // instructions that matches a tree of machine-independent 5096 // instructions. 5097 // format -- A string providing the disassembly for this instruction. 5098 // The value of an instruction's operand may be inserted 5099 // by referring to it with a '$' prefix. 5100 // opcode -- Three instruction opcodes may be provided. These are referred 5101 // to within an encode class as $primary, $secondary, and $tertiary 5102 // respectively. The primary opcode is commonly used to 5103 // indicate the type of machine instruction, while secondary 5104 // and tertiary are often used for prefix options or addressing 5105 // modes. 5106 // ins_encode -- A list of encode classes with parameters. The encode class 5107 // name must have been defined in an 'enc_class' specification 5108 // in the encode section of the architecture description. 5109 5110 //----------BSWAP-Instruction-------------------------------------------------- 5111 instruct bytes_reverse_int(rRegI dst) %{ 5112 match(Set dst (ReverseBytesI dst)); 5113 5114 format %{ "BSWAP $dst" %} 5115 opcode(0x0F, 0xC8); 5116 ins_encode( OpcP, OpcSReg(dst) ); 5117 ins_pipe( ialu_reg ); 5118 %} 5119 5120 instruct bytes_reverse_long(eRegL dst) %{ 5121 match(Set dst (ReverseBytesL dst)); 5122 5123 format %{ "BSWAP $dst.lo\n\t" 5124 "BSWAP $dst.hi\n\t" 5125 "XCHG $dst.lo $dst.hi" %} 5126 5127 ins_cost(125); 5128 ins_encode( bswap_long_bytes(dst) ); 5129 ins_pipe( ialu_reg_reg); 5130 %} 5131 5132 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5133 match(Set dst (ReverseBytesUS dst)); 5134 effect(KILL cr); 5135 5136 format %{ "BSWAP $dst\n\t" 5137 "SHR $dst,16\n\t" %} 5138 ins_encode %{ 5139 __ bswapl($dst$$Register); 5140 __ shrl($dst$$Register, 16); 5141 %} 5142 ins_pipe( ialu_reg ); 5143 %} 5144 5145 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5146 match(Set dst (ReverseBytesS dst)); 5147 effect(KILL cr); 5148 5149 format %{ "BSWAP $dst\n\t" 5150 "SAR $dst,16\n\t" %} 5151 ins_encode %{ 5152 __ bswapl($dst$$Register); 5153 __ sarl($dst$$Register, 16); 5154 %} 5155 ins_pipe( ialu_reg ); 5156 %} 5157 5158 5159 //---------- Zeros Count Instructions ------------------------------------------ 5160 5161 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5162 predicate(UseCountLeadingZerosInstruction); 5163 match(Set dst (CountLeadingZerosI src)); 5164 effect(KILL cr); 5165 5166 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5167 ins_encode %{ 5168 __ lzcntl($dst$$Register, $src$$Register); 5169 %} 5170 ins_pipe(ialu_reg); 5171 %} 5172 5173 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5174 predicate(!UseCountLeadingZerosInstruction); 5175 match(Set dst (CountLeadingZerosI src)); 5176 effect(KILL cr); 5177 5178 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5179 "JNZ skip\n\t" 5180 "MOV $dst, -1\n" 5181 "skip:\n\t" 5182 "NEG $dst\n\t" 5183 "ADD $dst, 31" %} 5184 ins_encode %{ 5185 Register Rdst = $dst$$Register; 5186 Register Rsrc = $src$$Register; 5187 Label skip; 5188 __ bsrl(Rdst, Rsrc); 5189 __ jccb(Assembler::notZero, skip); 5190 __ movl(Rdst, -1); 5191 __ bind(skip); 5192 __ negl(Rdst); 5193 __ addl(Rdst, BitsPerInt - 1); 5194 %} 5195 ins_pipe(ialu_reg); 5196 %} 5197 5198 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5199 predicate(UseCountLeadingZerosInstruction); 5200 match(Set dst (CountLeadingZerosL src)); 5201 effect(TEMP dst, KILL cr); 5202 5203 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5204 "JNC done\n\t" 5205 "LZCNT $dst, $src.lo\n\t" 5206 "ADD $dst, 32\n" 5207 "done:" %} 5208 ins_encode %{ 5209 Register Rdst = $dst$$Register; 5210 Register Rsrc = $src$$Register; 5211 Label done; 5212 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5213 __ jccb(Assembler::carryClear, done); 5214 __ lzcntl(Rdst, Rsrc); 5215 __ addl(Rdst, BitsPerInt); 5216 __ bind(done); 5217 %} 5218 ins_pipe(ialu_reg); 5219 %} 5220 5221 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5222 predicate(!UseCountLeadingZerosInstruction); 5223 match(Set dst (CountLeadingZerosL src)); 5224 effect(TEMP dst, KILL cr); 5225 5226 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5227 "JZ msw_is_zero\n\t" 5228 "ADD $dst, 32\n\t" 5229 "JMP not_zero\n" 5230 "msw_is_zero:\n\t" 5231 "BSR $dst, $src.lo\n\t" 5232 "JNZ not_zero\n\t" 5233 "MOV $dst, -1\n" 5234 "not_zero:\n\t" 5235 "NEG $dst\n\t" 5236 "ADD $dst, 63\n" %} 5237 ins_encode %{ 5238 Register Rdst = $dst$$Register; 5239 Register Rsrc = $src$$Register; 5240 Label msw_is_zero; 5241 Label not_zero; 5242 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5243 __ jccb(Assembler::zero, msw_is_zero); 5244 __ addl(Rdst, BitsPerInt); 5245 __ jmpb(not_zero); 5246 __ bind(msw_is_zero); 5247 __ bsrl(Rdst, Rsrc); 5248 __ jccb(Assembler::notZero, not_zero); 5249 __ movl(Rdst, -1); 5250 __ bind(not_zero); 5251 __ negl(Rdst); 5252 __ addl(Rdst, BitsPerLong - 1); 5253 %} 5254 ins_pipe(ialu_reg); 5255 %} 5256 5257 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5258 predicate(UseCountTrailingZerosInstruction); 5259 match(Set dst (CountTrailingZerosI src)); 5260 effect(KILL cr); 5261 5262 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5263 ins_encode %{ 5264 __ tzcntl($dst$$Register, $src$$Register); 5265 %} 5266 ins_pipe(ialu_reg); 5267 %} 5268 5269 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5270 predicate(!UseCountTrailingZerosInstruction); 5271 match(Set dst (CountTrailingZerosI src)); 5272 effect(KILL cr); 5273 5274 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5275 "JNZ done\n\t" 5276 "MOV $dst, 32\n" 5277 "done:" %} 5278 ins_encode %{ 5279 Register Rdst = $dst$$Register; 5280 Label done; 5281 __ bsfl(Rdst, $src$$Register); 5282 __ jccb(Assembler::notZero, done); 5283 __ movl(Rdst, BitsPerInt); 5284 __ bind(done); 5285 %} 5286 ins_pipe(ialu_reg); 5287 %} 5288 5289 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5290 predicate(UseCountTrailingZerosInstruction); 5291 match(Set dst (CountTrailingZerosL src)); 5292 effect(TEMP dst, KILL cr); 5293 5294 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5295 "JNC done\n\t" 5296 "TZCNT $dst, $src.hi\n\t" 5297 "ADD $dst, 32\n" 5298 "done:" %} 5299 ins_encode %{ 5300 Register Rdst = $dst$$Register; 5301 Register Rsrc = $src$$Register; 5302 Label done; 5303 __ tzcntl(Rdst, Rsrc); 5304 __ jccb(Assembler::carryClear, done); 5305 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5306 __ addl(Rdst, BitsPerInt); 5307 __ bind(done); 5308 %} 5309 ins_pipe(ialu_reg); 5310 %} 5311 5312 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5313 predicate(!UseCountTrailingZerosInstruction); 5314 match(Set dst (CountTrailingZerosL src)); 5315 effect(TEMP dst, KILL cr); 5316 5317 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5318 "JNZ done\n\t" 5319 "BSF $dst, $src.hi\n\t" 5320 "JNZ msw_not_zero\n\t" 5321 "MOV $dst, 32\n" 5322 "msw_not_zero:\n\t" 5323 "ADD $dst, 32\n" 5324 "done:" %} 5325 ins_encode %{ 5326 Register Rdst = $dst$$Register; 5327 Register Rsrc = $src$$Register; 5328 Label msw_not_zero; 5329 Label done; 5330 __ bsfl(Rdst, Rsrc); 5331 __ jccb(Assembler::notZero, done); 5332 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5333 __ jccb(Assembler::notZero, msw_not_zero); 5334 __ movl(Rdst, BitsPerInt); 5335 __ bind(msw_not_zero); 5336 __ addl(Rdst, BitsPerInt); 5337 __ bind(done); 5338 %} 5339 ins_pipe(ialu_reg); 5340 %} 5341 5342 5343 //---------- Population Count Instructions ------------------------------------- 5344 5345 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5346 predicate(UsePopCountInstruction); 5347 match(Set dst (PopCountI src)); 5348 effect(KILL cr); 5349 5350 format %{ "POPCNT $dst, $src" %} 5351 ins_encode %{ 5352 __ popcntl($dst$$Register, $src$$Register); 5353 %} 5354 ins_pipe(ialu_reg); 5355 %} 5356 5357 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5358 predicate(UsePopCountInstruction); 5359 match(Set dst (PopCountI (LoadI mem))); 5360 effect(KILL cr); 5361 5362 format %{ "POPCNT $dst, $mem" %} 5363 ins_encode %{ 5364 __ popcntl($dst$$Register, $mem$$Address); 5365 %} 5366 ins_pipe(ialu_reg); 5367 %} 5368 5369 // Note: Long.bitCount(long) returns an int. 5370 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5371 predicate(UsePopCountInstruction); 5372 match(Set dst (PopCountL src)); 5373 effect(KILL cr, TEMP tmp, TEMP dst); 5374 5375 format %{ "POPCNT $dst, $src.lo\n\t" 5376 "POPCNT $tmp, $src.hi\n\t" 5377 "ADD $dst, $tmp" %} 5378 ins_encode %{ 5379 __ popcntl($dst$$Register, $src$$Register); 5380 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5381 __ addl($dst$$Register, $tmp$$Register); 5382 %} 5383 ins_pipe(ialu_reg); 5384 %} 5385 5386 // Note: Long.bitCount(long) returns an int. 5387 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5388 predicate(UsePopCountInstruction); 5389 match(Set dst (PopCountL (LoadL mem))); 5390 effect(KILL cr, TEMP tmp, TEMP dst); 5391 5392 format %{ "POPCNT $dst, $mem\n\t" 5393 "POPCNT $tmp, $mem+4\n\t" 5394 "ADD $dst, $tmp" %} 5395 ins_encode %{ 5396 //__ popcntl($dst$$Register, $mem$$Address$$first); 5397 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5398 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5399 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5400 __ addl($dst$$Register, $tmp$$Register); 5401 %} 5402 ins_pipe(ialu_reg); 5403 %} 5404 5405 5406 //----------Load/Store/Move Instructions--------------------------------------- 5407 //----------Load Instructions-------------------------------------------------- 5408 // Load Byte (8bit signed) 5409 instruct loadB(xRegI dst, memory mem) %{ 5410 match(Set dst (LoadB mem)); 5411 5412 ins_cost(125); 5413 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5414 5415 ins_encode %{ 5416 __ movsbl($dst$$Register, $mem$$Address); 5417 %} 5418 5419 ins_pipe(ialu_reg_mem); 5420 %} 5421 5422 // Load Byte (8bit signed) into Long Register 5423 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5424 match(Set dst (ConvI2L (LoadB mem))); 5425 effect(KILL cr); 5426 5427 ins_cost(375); 5428 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5429 "MOV $dst.hi,$dst.lo\n\t" 5430 "SAR $dst.hi,7" %} 5431 5432 ins_encode %{ 5433 __ movsbl($dst$$Register, $mem$$Address); 5434 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5435 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5436 %} 5437 5438 ins_pipe(ialu_reg_mem); 5439 %} 5440 5441 // Load Unsigned Byte (8bit UNsigned) 5442 instruct loadUB(xRegI dst, memory mem) %{ 5443 match(Set dst (LoadUB mem)); 5444 5445 ins_cost(125); 5446 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5447 5448 ins_encode %{ 5449 __ movzbl($dst$$Register, $mem$$Address); 5450 %} 5451 5452 ins_pipe(ialu_reg_mem); 5453 %} 5454 5455 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5456 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5457 match(Set dst (ConvI2L (LoadUB mem))); 5458 effect(KILL cr); 5459 5460 ins_cost(250); 5461 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5462 "XOR $dst.hi,$dst.hi" %} 5463 5464 ins_encode %{ 5465 Register Rdst = $dst$$Register; 5466 __ movzbl(Rdst, $mem$$Address); 5467 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5468 %} 5469 5470 ins_pipe(ialu_reg_mem); 5471 %} 5472 5473 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5474 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5475 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5476 effect(KILL cr); 5477 5478 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" 5479 "XOR $dst.hi,$dst.hi\n\t" 5480 "AND $dst.lo,right_n_bits($mask, 8)" %} 5481 ins_encode %{ 5482 Register Rdst = $dst$$Register; 5483 __ movzbl(Rdst, $mem$$Address); 5484 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5485 __ andl(Rdst, $mask$$constant & right_n_bits(8)); 5486 %} 5487 ins_pipe(ialu_reg_mem); 5488 %} 5489 5490 // Load Short (16bit signed) 5491 instruct loadS(rRegI dst, memory mem) %{ 5492 match(Set dst (LoadS mem)); 5493 5494 ins_cost(125); 5495 format %{ "MOVSX $dst,$mem\t# short" %} 5496 5497 ins_encode %{ 5498 __ movswl($dst$$Register, $mem$$Address); 5499 %} 5500 5501 ins_pipe(ialu_reg_mem); 5502 %} 5503 5504 // Load Short (16 bit signed) to Byte (8 bit signed) 5505 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5506 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5507 5508 ins_cost(125); 5509 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5510 ins_encode %{ 5511 __ movsbl($dst$$Register, $mem$$Address); 5512 %} 5513 ins_pipe(ialu_reg_mem); 5514 %} 5515 5516 // Load Short (16bit signed) into Long Register 5517 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5518 match(Set dst (ConvI2L (LoadS mem))); 5519 effect(KILL cr); 5520 5521 ins_cost(375); 5522 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5523 "MOV $dst.hi,$dst.lo\n\t" 5524 "SAR $dst.hi,15" %} 5525 5526 ins_encode %{ 5527 __ movswl($dst$$Register, $mem$$Address); 5528 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5529 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5530 %} 5531 5532 ins_pipe(ialu_reg_mem); 5533 %} 5534 5535 // Load Unsigned Short/Char (16bit unsigned) 5536 instruct loadUS(rRegI dst, memory mem) %{ 5537 match(Set dst (LoadUS mem)); 5538 5539 ins_cost(125); 5540 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5541 5542 ins_encode %{ 5543 __ movzwl($dst$$Register, $mem$$Address); 5544 %} 5545 5546 ins_pipe(ialu_reg_mem); 5547 %} 5548 5549 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5550 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5551 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5552 5553 ins_cost(125); 5554 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5555 ins_encode %{ 5556 __ movsbl($dst$$Register, $mem$$Address); 5557 %} 5558 ins_pipe(ialu_reg_mem); 5559 %} 5560 5561 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5562 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5563 match(Set dst (ConvI2L (LoadUS mem))); 5564 effect(KILL cr); 5565 5566 ins_cost(250); 5567 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5568 "XOR $dst.hi,$dst.hi" %} 5569 5570 ins_encode %{ 5571 __ movzwl($dst$$Register, $mem$$Address); 5572 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5573 %} 5574 5575 ins_pipe(ialu_reg_mem); 5576 %} 5577 5578 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5579 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5580 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5581 effect(KILL cr); 5582 5583 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5584 "XOR $dst.hi,$dst.hi" %} 5585 ins_encode %{ 5586 Register Rdst = $dst$$Register; 5587 __ movzbl(Rdst, $mem$$Address); 5588 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5589 %} 5590 ins_pipe(ialu_reg_mem); 5591 %} 5592 5593 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register 5594 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ 5595 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5596 effect(KILL cr); 5597 5598 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" 5599 "XOR $dst.hi,$dst.hi\n\t" 5600 "AND $dst.lo,right_n_bits($mask, 16)" %} 5601 ins_encode %{ 5602 Register Rdst = $dst$$Register; 5603 __ movzwl(Rdst, $mem$$Address); 5604 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5605 __ andl(Rdst, $mask$$constant & right_n_bits(16)); 5606 %} 5607 ins_pipe(ialu_reg_mem); 5608 %} 5609 5610 // Load Integer 5611 instruct loadI(rRegI dst, memory mem) %{ 5612 match(Set dst (LoadI mem)); 5613 5614 ins_cost(125); 5615 format %{ "MOV $dst,$mem\t# int" %} 5616 5617 ins_encode %{ 5618 __ movl($dst$$Register, $mem$$Address); 5619 %} 5620 5621 ins_pipe(ialu_reg_mem); 5622 %} 5623 5624 // Load Integer (32 bit signed) to Byte (8 bit signed) 5625 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5626 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5627 5628 ins_cost(125); 5629 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5630 ins_encode %{ 5631 __ movsbl($dst$$Register, $mem$$Address); 5632 %} 5633 ins_pipe(ialu_reg_mem); 5634 %} 5635 5636 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5637 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5638 match(Set dst (AndI (LoadI mem) mask)); 5639 5640 ins_cost(125); 5641 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5642 ins_encode %{ 5643 __ movzbl($dst$$Register, $mem$$Address); 5644 %} 5645 ins_pipe(ialu_reg_mem); 5646 %} 5647 5648 // Load Integer (32 bit signed) to Short (16 bit signed) 5649 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5650 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5651 5652 ins_cost(125); 5653 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5654 ins_encode %{ 5655 __ movswl($dst$$Register, $mem$$Address); 5656 %} 5657 ins_pipe(ialu_reg_mem); 5658 %} 5659 5660 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5661 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5662 match(Set dst (AndI (LoadI mem) mask)); 5663 5664 ins_cost(125); 5665 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5666 ins_encode %{ 5667 __ movzwl($dst$$Register, $mem$$Address); 5668 %} 5669 ins_pipe(ialu_reg_mem); 5670 %} 5671 5672 // Load Integer into Long Register 5673 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5674 match(Set dst (ConvI2L (LoadI mem))); 5675 effect(KILL cr); 5676 5677 ins_cost(375); 5678 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5679 "MOV $dst.hi,$dst.lo\n\t" 5680 "SAR $dst.hi,31" %} 5681 5682 ins_encode %{ 5683 __ movl($dst$$Register, $mem$$Address); 5684 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5685 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5686 %} 5687 5688 ins_pipe(ialu_reg_mem); 5689 %} 5690 5691 // Load Integer with mask 0xFF into Long Register 5692 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5693 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5694 effect(KILL cr); 5695 5696 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5697 "XOR $dst.hi,$dst.hi" %} 5698 ins_encode %{ 5699 Register Rdst = $dst$$Register; 5700 __ movzbl(Rdst, $mem$$Address); 5701 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5702 %} 5703 ins_pipe(ialu_reg_mem); 5704 %} 5705 5706 // Load Integer with mask 0xFFFF into Long Register 5707 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5708 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5709 effect(KILL cr); 5710 5711 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5712 "XOR $dst.hi,$dst.hi" %} 5713 ins_encode %{ 5714 Register Rdst = $dst$$Register; 5715 __ movzwl(Rdst, $mem$$Address); 5716 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5717 %} 5718 ins_pipe(ialu_reg_mem); 5719 %} 5720 5721 // Load Integer with 31-bit mask into Long Register 5722 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5723 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5724 effect(KILL cr); 5725 5726 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5727 "XOR $dst.hi,$dst.hi\n\t" 5728 "AND $dst.lo,$mask" %} 5729 ins_encode %{ 5730 Register Rdst = $dst$$Register; 5731 __ movl(Rdst, $mem$$Address); 5732 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5733 __ andl(Rdst, $mask$$constant); 5734 %} 5735 ins_pipe(ialu_reg_mem); 5736 %} 5737 5738 // Load Unsigned Integer into Long Register 5739 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5740 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5741 effect(KILL cr); 5742 5743 ins_cost(250); 5744 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5745 "XOR $dst.hi,$dst.hi" %} 5746 5747 ins_encode %{ 5748 __ movl($dst$$Register, $mem$$Address); 5749 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5750 %} 5751 5752 ins_pipe(ialu_reg_mem); 5753 %} 5754 5755 // Load Long. Cannot clobber address while loading, so restrict address 5756 // register to ESI 5757 instruct loadL(eRegL dst, load_long_memory mem) %{ 5758 predicate(!((LoadLNode*)n)->require_atomic_access()); 5759 match(Set dst (LoadL mem)); 5760 5761 ins_cost(250); 5762 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5763 "MOV $dst.hi,$mem+4" %} 5764 5765 ins_encode %{ 5766 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5767 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5768 __ movl($dst$$Register, Amemlo); 5769 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5770 %} 5771 5772 ins_pipe(ialu_reg_long_mem); 5773 %} 5774 5775 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5776 // then store it down to the stack and reload on the int 5777 // side. 5778 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5779 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5780 match(Set dst (LoadL mem)); 5781 5782 ins_cost(200); 5783 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5784 "FISTp $dst" %} 5785 ins_encode(enc_loadL_volatile(mem,dst)); 5786 ins_pipe( fpu_reg_mem ); 5787 %} 5788 5789 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5790 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5791 match(Set dst (LoadL mem)); 5792 effect(TEMP tmp); 5793 ins_cost(180); 5794 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5795 "MOVSD $dst,$tmp" %} 5796 ins_encode %{ 5797 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5798 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5799 %} 5800 ins_pipe( pipe_slow ); 5801 %} 5802 5803 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5804 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5805 match(Set dst (LoadL mem)); 5806 effect(TEMP tmp); 5807 ins_cost(160); 5808 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5809 "MOVD $dst.lo,$tmp\n\t" 5810 "PSRLQ $tmp,32\n\t" 5811 "MOVD $dst.hi,$tmp" %} 5812 ins_encode %{ 5813 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5814 __ movdl($dst$$Register, $tmp$$XMMRegister); 5815 __ psrlq($tmp$$XMMRegister, 32); 5816 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5817 %} 5818 ins_pipe( pipe_slow ); 5819 %} 5820 5821 // Load Range 5822 instruct loadRange(rRegI dst, memory mem) %{ 5823 match(Set dst (LoadRange mem)); 5824 5825 ins_cost(125); 5826 format %{ "MOV $dst,$mem" %} 5827 opcode(0x8B); 5828 ins_encode( OpcP, RegMem(dst,mem)); 5829 ins_pipe( ialu_reg_mem ); 5830 %} 5831 5832 5833 // Load Pointer 5834 instruct loadP(eRegP dst, memory mem) %{ 5835 match(Set dst (LoadP mem)); 5836 5837 ins_cost(125); 5838 format %{ "MOV $dst,$mem" %} 5839 opcode(0x8B); 5840 ins_encode( OpcP, RegMem(dst,mem)); 5841 ins_pipe( ialu_reg_mem ); 5842 %} 5843 5844 // Load Klass Pointer 5845 instruct loadKlass(eRegP dst, memory mem) %{ 5846 match(Set dst (LoadKlass mem)); 5847 5848 ins_cost(125); 5849 format %{ "MOV $dst,$mem" %} 5850 opcode(0x8B); 5851 ins_encode( OpcP, RegMem(dst,mem)); 5852 ins_pipe( ialu_reg_mem ); 5853 %} 5854 5855 // Load Double 5856 instruct loadDPR(regDPR dst, memory mem) %{ 5857 predicate(UseSSE<=1); 5858 match(Set dst (LoadD mem)); 5859 5860 ins_cost(150); 5861 format %{ "FLD_D ST,$mem\n\t" 5862 "FSTP $dst" %} 5863 opcode(0xDD); /* DD /0 */ 5864 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5865 Pop_Reg_DPR(dst) ); 5866 ins_pipe( fpu_reg_mem ); 5867 %} 5868 5869 // Load Double to XMM 5870 instruct loadD(regD dst, memory mem) %{ 5871 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5872 match(Set dst (LoadD mem)); 5873 ins_cost(145); 5874 format %{ "MOVSD $dst,$mem" %} 5875 ins_encode %{ 5876 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5877 %} 5878 ins_pipe( pipe_slow ); 5879 %} 5880 5881 instruct loadD_partial(regD dst, memory mem) %{ 5882 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5883 match(Set dst (LoadD mem)); 5884 ins_cost(145); 5885 format %{ "MOVLPD $dst,$mem" %} 5886 ins_encode %{ 5887 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5888 %} 5889 ins_pipe( pipe_slow ); 5890 %} 5891 5892 // Load to XMM register (single-precision floating point) 5893 // MOVSS instruction 5894 instruct loadF(regF dst, memory mem) %{ 5895 predicate(UseSSE>=1); 5896 match(Set dst (LoadF mem)); 5897 ins_cost(145); 5898 format %{ "MOVSS $dst,$mem" %} 5899 ins_encode %{ 5900 __ movflt ($dst$$XMMRegister, $mem$$Address); 5901 %} 5902 ins_pipe( pipe_slow ); 5903 %} 5904 5905 // Load Float 5906 instruct loadFPR(regFPR dst, memory mem) %{ 5907 predicate(UseSSE==0); 5908 match(Set dst (LoadF mem)); 5909 5910 ins_cost(150); 5911 format %{ "FLD_S ST,$mem\n\t" 5912 "FSTP $dst" %} 5913 opcode(0xD9); /* D9 /0 */ 5914 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5915 Pop_Reg_FPR(dst) ); 5916 ins_pipe( fpu_reg_mem ); 5917 %} 5918 5919 // Load Effective Address 5920 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5921 match(Set dst mem); 5922 5923 ins_cost(110); 5924 format %{ "LEA $dst,$mem" %} 5925 opcode(0x8D); 5926 ins_encode( OpcP, RegMem(dst,mem)); 5927 ins_pipe( ialu_reg_reg_fat ); 5928 %} 5929 5930 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5931 match(Set dst mem); 5932 5933 ins_cost(110); 5934 format %{ "LEA $dst,$mem" %} 5935 opcode(0x8D); 5936 ins_encode( OpcP, RegMem(dst,mem)); 5937 ins_pipe( ialu_reg_reg_fat ); 5938 %} 5939 5940 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5941 match(Set dst mem); 5942 5943 ins_cost(110); 5944 format %{ "LEA $dst,$mem" %} 5945 opcode(0x8D); 5946 ins_encode( OpcP, RegMem(dst,mem)); 5947 ins_pipe( ialu_reg_reg_fat ); 5948 %} 5949 5950 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5951 match(Set dst mem); 5952 5953 ins_cost(110); 5954 format %{ "LEA $dst,$mem" %} 5955 opcode(0x8D); 5956 ins_encode( OpcP, RegMem(dst,mem)); 5957 ins_pipe( ialu_reg_reg_fat ); 5958 %} 5959 5960 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5961 match(Set dst mem); 5962 5963 ins_cost(110); 5964 format %{ "LEA $dst,$mem" %} 5965 opcode(0x8D); 5966 ins_encode( OpcP, RegMem(dst,mem)); 5967 ins_pipe( ialu_reg_reg_fat ); 5968 %} 5969 5970 // Load Constant 5971 instruct loadConI(rRegI dst, immI src) %{ 5972 match(Set dst src); 5973 5974 format %{ "MOV $dst,$src" %} 5975 ins_encode( LdImmI(dst, src) ); 5976 ins_pipe( ialu_reg_fat ); 5977 %} 5978 5979 // Load Constant zero 5980 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ 5981 match(Set dst src); 5982 effect(KILL cr); 5983 5984 ins_cost(50); 5985 format %{ "XOR $dst,$dst" %} 5986 opcode(0x33); /* + rd */ 5987 ins_encode( OpcP, RegReg( dst, dst ) ); 5988 ins_pipe( ialu_reg ); 5989 %} 5990 5991 instruct loadConP(eRegP dst, immP src) %{ 5992 match(Set dst src); 5993 5994 format %{ "MOV $dst,$src" %} 5995 opcode(0xB8); /* + rd */ 5996 ins_encode( LdImmP(dst, src) ); 5997 ins_pipe( ialu_reg_fat ); 5998 %} 5999 6000 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 6001 match(Set dst src); 6002 effect(KILL cr); 6003 ins_cost(200); 6004 format %{ "MOV $dst.lo,$src.lo\n\t" 6005 "MOV $dst.hi,$src.hi" %} 6006 opcode(0xB8); 6007 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 6008 ins_pipe( ialu_reg_long_fat ); 6009 %} 6010 6011 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 6012 match(Set dst src); 6013 effect(KILL cr); 6014 ins_cost(150); 6015 format %{ "XOR $dst.lo,$dst.lo\n\t" 6016 "XOR $dst.hi,$dst.hi" %} 6017 opcode(0x33,0x33); 6018 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 6019 ins_pipe( ialu_reg_long ); 6020 %} 6021 6022 // The instruction usage is guarded by predicate in operand immFPR(). 6023 instruct loadConFPR(regFPR dst, immFPR con) %{ 6024 match(Set dst con); 6025 ins_cost(125); 6026 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 6027 "FSTP $dst" %} 6028 ins_encode %{ 6029 __ fld_s($constantaddress($con)); 6030 __ fstp_d($dst$$reg); 6031 %} 6032 ins_pipe(fpu_reg_con); 6033 %} 6034 6035 // The instruction usage is guarded by predicate in operand immFPR0(). 6036 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 6037 match(Set dst con); 6038 ins_cost(125); 6039 format %{ "FLDZ ST\n\t" 6040 "FSTP $dst" %} 6041 ins_encode %{ 6042 __ fldz(); 6043 __ fstp_d($dst$$reg); 6044 %} 6045 ins_pipe(fpu_reg_con); 6046 %} 6047 6048 // The instruction usage is guarded by predicate in operand immFPR1(). 6049 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 6050 match(Set dst con); 6051 ins_cost(125); 6052 format %{ "FLD1 ST\n\t" 6053 "FSTP $dst" %} 6054 ins_encode %{ 6055 __ fld1(); 6056 __ fstp_d($dst$$reg); 6057 %} 6058 ins_pipe(fpu_reg_con); 6059 %} 6060 6061 // The instruction usage is guarded by predicate in operand immF(). 6062 instruct loadConF(regF dst, immF con) %{ 6063 match(Set dst con); 6064 ins_cost(125); 6065 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 6066 ins_encode %{ 6067 __ movflt($dst$$XMMRegister, $constantaddress($con)); 6068 %} 6069 ins_pipe(pipe_slow); 6070 %} 6071 6072 // The instruction usage is guarded by predicate in operand immF0(). 6073 instruct loadConF0(regF dst, immF0 src) %{ 6074 match(Set dst src); 6075 ins_cost(100); 6076 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6077 ins_encode %{ 6078 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6079 %} 6080 ins_pipe(pipe_slow); 6081 %} 6082 6083 // The instruction usage is guarded by predicate in operand immDPR(). 6084 instruct loadConDPR(regDPR dst, immDPR con) %{ 6085 match(Set dst con); 6086 ins_cost(125); 6087 6088 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6089 "FSTP $dst" %} 6090 ins_encode %{ 6091 __ fld_d($constantaddress($con)); 6092 __ fstp_d($dst$$reg); 6093 %} 6094 ins_pipe(fpu_reg_con); 6095 %} 6096 6097 // The instruction usage is guarded by predicate in operand immDPR0(). 6098 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6099 match(Set dst con); 6100 ins_cost(125); 6101 6102 format %{ "FLDZ ST\n\t" 6103 "FSTP $dst" %} 6104 ins_encode %{ 6105 __ fldz(); 6106 __ fstp_d($dst$$reg); 6107 %} 6108 ins_pipe(fpu_reg_con); 6109 %} 6110 6111 // The instruction usage is guarded by predicate in operand immDPR1(). 6112 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6113 match(Set dst con); 6114 ins_cost(125); 6115 6116 format %{ "FLD1 ST\n\t" 6117 "FSTP $dst" %} 6118 ins_encode %{ 6119 __ fld1(); 6120 __ fstp_d($dst$$reg); 6121 %} 6122 ins_pipe(fpu_reg_con); 6123 %} 6124 6125 // The instruction usage is guarded by predicate in operand immD(). 6126 instruct loadConD(regD dst, immD con) %{ 6127 match(Set dst con); 6128 ins_cost(125); 6129 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6130 ins_encode %{ 6131 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6132 %} 6133 ins_pipe(pipe_slow); 6134 %} 6135 6136 // The instruction usage is guarded by predicate in operand immD0(). 6137 instruct loadConD0(regD dst, immD0 src) %{ 6138 match(Set dst src); 6139 ins_cost(100); 6140 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6141 ins_encode %{ 6142 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6143 %} 6144 ins_pipe( pipe_slow ); 6145 %} 6146 6147 // Load Stack Slot 6148 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6149 match(Set dst src); 6150 ins_cost(125); 6151 6152 format %{ "MOV $dst,$src" %} 6153 opcode(0x8B); 6154 ins_encode( OpcP, RegMem(dst,src)); 6155 ins_pipe( ialu_reg_mem ); 6156 %} 6157 6158 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6159 match(Set dst src); 6160 6161 ins_cost(200); 6162 format %{ "MOV $dst,$src.lo\n\t" 6163 "MOV $dst+4,$src.hi" %} 6164 opcode(0x8B, 0x8B); 6165 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6166 ins_pipe( ialu_mem_long_reg ); 6167 %} 6168 6169 // Load Stack Slot 6170 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6171 match(Set dst src); 6172 ins_cost(125); 6173 6174 format %{ "MOV $dst,$src" %} 6175 opcode(0x8B); 6176 ins_encode( OpcP, RegMem(dst,src)); 6177 ins_pipe( ialu_reg_mem ); 6178 %} 6179 6180 // Load Stack Slot 6181 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6182 match(Set dst src); 6183 ins_cost(125); 6184 6185 format %{ "FLD_S $src\n\t" 6186 "FSTP $dst" %} 6187 opcode(0xD9); /* D9 /0, FLD m32real */ 6188 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6189 Pop_Reg_FPR(dst) ); 6190 ins_pipe( fpu_reg_mem ); 6191 %} 6192 6193 // Load Stack Slot 6194 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6195 match(Set dst src); 6196 ins_cost(125); 6197 6198 format %{ "FLD_D $src\n\t" 6199 "FSTP $dst" %} 6200 opcode(0xDD); /* DD /0, FLD m64real */ 6201 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6202 Pop_Reg_DPR(dst) ); 6203 ins_pipe( fpu_reg_mem ); 6204 %} 6205 6206 // Prefetch instructions for allocation. 6207 // Must be safe to execute with invalid address (cannot fault). 6208 6209 instruct prefetchAlloc0( memory mem ) %{ 6210 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6211 match(PrefetchAllocation mem); 6212 ins_cost(0); 6213 size(0); 6214 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6215 ins_encode(); 6216 ins_pipe(empty); 6217 %} 6218 6219 instruct prefetchAlloc( memory mem ) %{ 6220 predicate(AllocatePrefetchInstr==3); 6221 match( PrefetchAllocation mem ); 6222 ins_cost(100); 6223 6224 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6225 ins_encode %{ 6226 __ prefetchw($mem$$Address); 6227 %} 6228 ins_pipe(ialu_mem); 6229 %} 6230 6231 instruct prefetchAllocNTA( memory mem ) %{ 6232 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6233 match(PrefetchAllocation mem); 6234 ins_cost(100); 6235 6236 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6237 ins_encode %{ 6238 __ prefetchnta($mem$$Address); 6239 %} 6240 ins_pipe(ialu_mem); 6241 %} 6242 6243 instruct prefetchAllocT0( memory mem ) %{ 6244 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6245 match(PrefetchAllocation mem); 6246 ins_cost(100); 6247 6248 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6249 ins_encode %{ 6250 __ prefetcht0($mem$$Address); 6251 %} 6252 ins_pipe(ialu_mem); 6253 %} 6254 6255 instruct prefetchAllocT2( memory mem ) %{ 6256 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6257 match(PrefetchAllocation mem); 6258 ins_cost(100); 6259 6260 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6261 ins_encode %{ 6262 __ prefetcht2($mem$$Address); 6263 %} 6264 ins_pipe(ialu_mem); 6265 %} 6266 6267 //----------Store Instructions------------------------------------------------- 6268 6269 // Store Byte 6270 instruct storeB(memory mem, xRegI src) %{ 6271 match(Set mem (StoreB mem src)); 6272 6273 ins_cost(125); 6274 format %{ "MOV8 $mem,$src" %} 6275 opcode(0x88); 6276 ins_encode( OpcP, RegMem( src, mem ) ); 6277 ins_pipe( ialu_mem_reg ); 6278 %} 6279 6280 // Store Char/Short 6281 instruct storeC(memory mem, rRegI src) %{ 6282 match(Set mem (StoreC mem src)); 6283 6284 ins_cost(125); 6285 format %{ "MOV16 $mem,$src" %} 6286 opcode(0x89, 0x66); 6287 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6288 ins_pipe( ialu_mem_reg ); 6289 %} 6290 6291 // Store Integer 6292 instruct storeI(memory mem, rRegI src) %{ 6293 match(Set mem (StoreI mem src)); 6294 6295 ins_cost(125); 6296 format %{ "MOV $mem,$src" %} 6297 opcode(0x89); 6298 ins_encode( OpcP, RegMem( src, mem ) ); 6299 ins_pipe( ialu_mem_reg ); 6300 %} 6301 6302 // Store Long 6303 instruct storeL(long_memory mem, eRegL src) %{ 6304 predicate(!((StoreLNode*)n)->require_atomic_access()); 6305 match(Set mem (StoreL mem src)); 6306 6307 ins_cost(200); 6308 format %{ "MOV $mem,$src.lo\n\t" 6309 "MOV $mem+4,$src.hi" %} 6310 opcode(0x89, 0x89); 6311 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6312 ins_pipe( ialu_mem_long_reg ); 6313 %} 6314 6315 // Store Long to Integer 6316 instruct storeL2I(memory mem, eRegL src) %{ 6317 match(Set mem (StoreI mem (ConvL2I src))); 6318 6319 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6320 ins_encode %{ 6321 __ movl($mem$$Address, $src$$Register); 6322 %} 6323 ins_pipe(ialu_mem_reg); 6324 %} 6325 6326 // Volatile Store Long. Must be atomic, so move it into 6327 // the FP TOS and then do a 64-bit FIST. Has to probe the 6328 // target address before the store (for null-ptr checks) 6329 // so the memory operand is used twice in the encoding. 6330 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6331 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6332 match(Set mem (StoreL mem src)); 6333 effect( KILL cr ); 6334 ins_cost(400); 6335 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6336 "FILD $src\n\t" 6337 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6338 opcode(0x3B); 6339 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6340 ins_pipe( fpu_reg_mem ); 6341 %} 6342 6343 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6344 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6345 match(Set mem (StoreL mem src)); 6346 effect( TEMP tmp, KILL cr ); 6347 ins_cost(380); 6348 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6349 "MOVSD $tmp,$src\n\t" 6350 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6351 ins_encode %{ 6352 __ cmpl(rax, $mem$$Address); 6353 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6354 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6355 %} 6356 ins_pipe( pipe_slow ); 6357 %} 6358 6359 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6360 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6361 match(Set mem (StoreL mem src)); 6362 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6363 ins_cost(360); 6364 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6365 "MOVD $tmp,$src.lo\n\t" 6366 "MOVD $tmp2,$src.hi\n\t" 6367 "PUNPCKLDQ $tmp,$tmp2\n\t" 6368 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6369 ins_encode %{ 6370 __ cmpl(rax, $mem$$Address); 6371 __ movdl($tmp$$XMMRegister, $src$$Register); 6372 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6373 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6374 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6375 %} 6376 ins_pipe( pipe_slow ); 6377 %} 6378 6379 // Store Pointer; for storing unknown oops and raw pointers 6380 instruct storeP(memory mem, anyRegP src) %{ 6381 match(Set mem (StoreP mem src)); 6382 6383 ins_cost(125); 6384 format %{ "MOV $mem,$src" %} 6385 opcode(0x89); 6386 ins_encode( OpcP, RegMem( src, mem ) ); 6387 ins_pipe( ialu_mem_reg ); 6388 %} 6389 6390 // Store Integer Immediate 6391 instruct storeImmI(memory mem, immI src) %{ 6392 match(Set mem (StoreI mem src)); 6393 6394 ins_cost(150); 6395 format %{ "MOV $mem,$src" %} 6396 opcode(0xC7); /* C7 /0 */ 6397 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6398 ins_pipe( ialu_mem_imm ); 6399 %} 6400 6401 // Store Short/Char Immediate 6402 instruct storeImmI16(memory mem, immI16 src) %{ 6403 predicate(UseStoreImmI16); 6404 match(Set mem (StoreC mem src)); 6405 6406 ins_cost(150); 6407 format %{ "MOV16 $mem,$src" %} 6408 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6409 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6410 ins_pipe( ialu_mem_imm ); 6411 %} 6412 6413 // Store Pointer Immediate; null pointers or constant oops that do not 6414 // need card-mark barriers. 6415 instruct storeImmP(memory mem, immP src) %{ 6416 match(Set mem (StoreP mem src)); 6417 6418 ins_cost(150); 6419 format %{ "MOV $mem,$src" %} 6420 opcode(0xC7); /* C7 /0 */ 6421 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6422 ins_pipe( ialu_mem_imm ); 6423 %} 6424 6425 // Store Byte Immediate 6426 instruct storeImmB(memory mem, immI8 src) %{ 6427 match(Set mem (StoreB mem src)); 6428 6429 ins_cost(150); 6430 format %{ "MOV8 $mem,$src" %} 6431 opcode(0xC6); /* C6 /0 */ 6432 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6433 ins_pipe( ialu_mem_imm ); 6434 %} 6435 6436 // Store CMS card-mark Immediate 6437 instruct storeImmCM(memory mem, immI8 src) %{ 6438 match(Set mem (StoreCM mem src)); 6439 6440 ins_cost(150); 6441 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6442 opcode(0xC6); /* C6 /0 */ 6443 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6444 ins_pipe( ialu_mem_imm ); 6445 %} 6446 6447 // Store Double 6448 instruct storeDPR( memory mem, regDPR1 src) %{ 6449 predicate(UseSSE<=1); 6450 match(Set mem (StoreD mem src)); 6451 6452 ins_cost(100); 6453 format %{ "FST_D $mem,$src" %} 6454 opcode(0xDD); /* DD /2 */ 6455 ins_encode( enc_FPR_store(mem,src) ); 6456 ins_pipe( fpu_mem_reg ); 6457 %} 6458 6459 // Store double does rounding on x86 6460 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6461 predicate(UseSSE<=1); 6462 match(Set mem (StoreD mem (RoundDouble src))); 6463 6464 ins_cost(100); 6465 format %{ "FST_D $mem,$src\t# round" %} 6466 opcode(0xDD); /* DD /2 */ 6467 ins_encode( enc_FPR_store(mem,src) ); 6468 ins_pipe( fpu_mem_reg ); 6469 %} 6470 6471 // Store XMM register to memory (double-precision floating points) 6472 // MOVSD instruction 6473 instruct storeD(memory mem, regD src) %{ 6474 predicate(UseSSE>=2); 6475 match(Set mem (StoreD mem src)); 6476 ins_cost(95); 6477 format %{ "MOVSD $mem,$src" %} 6478 ins_encode %{ 6479 __ movdbl($mem$$Address, $src$$XMMRegister); 6480 %} 6481 ins_pipe( pipe_slow ); 6482 %} 6483 6484 // Store XMM register to memory (single-precision floating point) 6485 // MOVSS instruction 6486 instruct storeF(memory mem, regF src) %{ 6487 predicate(UseSSE>=1); 6488 match(Set mem (StoreF mem src)); 6489 ins_cost(95); 6490 format %{ "MOVSS $mem,$src" %} 6491 ins_encode %{ 6492 __ movflt($mem$$Address, $src$$XMMRegister); 6493 %} 6494 ins_pipe( pipe_slow ); 6495 %} 6496 6497 // Store Float 6498 instruct storeFPR( memory mem, regFPR1 src) %{ 6499 predicate(UseSSE==0); 6500 match(Set mem (StoreF mem src)); 6501 6502 ins_cost(100); 6503 format %{ "FST_S $mem,$src" %} 6504 opcode(0xD9); /* D9 /2 */ 6505 ins_encode( enc_FPR_store(mem,src) ); 6506 ins_pipe( fpu_mem_reg ); 6507 %} 6508 6509 // Store Float does rounding on x86 6510 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6511 predicate(UseSSE==0); 6512 match(Set mem (StoreF mem (RoundFloat src))); 6513 6514 ins_cost(100); 6515 format %{ "FST_S $mem,$src\t# round" %} 6516 opcode(0xD9); /* D9 /2 */ 6517 ins_encode( enc_FPR_store(mem,src) ); 6518 ins_pipe( fpu_mem_reg ); 6519 %} 6520 6521 // Store Float does rounding on x86 6522 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6523 predicate(UseSSE<=1); 6524 match(Set mem (StoreF mem (ConvD2F src))); 6525 6526 ins_cost(100); 6527 format %{ "FST_S $mem,$src\t# D-round" %} 6528 opcode(0xD9); /* D9 /2 */ 6529 ins_encode( enc_FPR_store(mem,src) ); 6530 ins_pipe( fpu_mem_reg ); 6531 %} 6532 6533 // Store immediate Float value (it is faster than store from FPU register) 6534 // The instruction usage is guarded by predicate in operand immFPR(). 6535 instruct storeFPR_imm( memory mem, immFPR src) %{ 6536 match(Set mem (StoreF mem src)); 6537 6538 ins_cost(50); 6539 format %{ "MOV $mem,$src\t# store float" %} 6540 opcode(0xC7); /* C7 /0 */ 6541 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6542 ins_pipe( ialu_mem_imm ); 6543 %} 6544 6545 // Store immediate Float value (it is faster than store from XMM register) 6546 // The instruction usage is guarded by predicate in operand immF(). 6547 instruct storeF_imm( memory mem, immF src) %{ 6548 match(Set mem (StoreF mem src)); 6549 6550 ins_cost(50); 6551 format %{ "MOV $mem,$src\t# store float" %} 6552 opcode(0xC7); /* C7 /0 */ 6553 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6554 ins_pipe( ialu_mem_imm ); 6555 %} 6556 6557 // Store Integer to stack slot 6558 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6559 match(Set dst src); 6560 6561 ins_cost(100); 6562 format %{ "MOV $dst,$src" %} 6563 opcode(0x89); 6564 ins_encode( OpcPRegSS( dst, src ) ); 6565 ins_pipe( ialu_mem_reg ); 6566 %} 6567 6568 // Store Integer to stack slot 6569 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6570 match(Set dst src); 6571 6572 ins_cost(100); 6573 format %{ "MOV $dst,$src" %} 6574 opcode(0x89); 6575 ins_encode( OpcPRegSS( dst, src ) ); 6576 ins_pipe( ialu_mem_reg ); 6577 %} 6578 6579 // Store Long to stack slot 6580 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6581 match(Set dst src); 6582 6583 ins_cost(200); 6584 format %{ "MOV $dst,$src.lo\n\t" 6585 "MOV $dst+4,$src.hi" %} 6586 opcode(0x89, 0x89); 6587 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6588 ins_pipe( ialu_mem_long_reg ); 6589 %} 6590 6591 //----------MemBar Instructions----------------------------------------------- 6592 // Memory barrier flavors 6593 6594 instruct membar_acquire() %{ 6595 match(MemBarAcquire); 6596 match(LoadFence); 6597 ins_cost(400); 6598 6599 size(0); 6600 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6601 ins_encode(); 6602 ins_pipe(empty); 6603 %} 6604 6605 instruct membar_acquire_lock() %{ 6606 match(MemBarAcquireLock); 6607 ins_cost(0); 6608 6609 size(0); 6610 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6611 ins_encode( ); 6612 ins_pipe(empty); 6613 %} 6614 6615 instruct membar_release() %{ 6616 match(MemBarRelease); 6617 match(StoreFence); 6618 ins_cost(400); 6619 6620 size(0); 6621 format %{ "MEMBAR-release ! (empty encoding)" %} 6622 ins_encode( ); 6623 ins_pipe(empty); 6624 %} 6625 6626 instruct membar_release_lock() %{ 6627 match(MemBarReleaseLock); 6628 ins_cost(0); 6629 6630 size(0); 6631 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6632 ins_encode( ); 6633 ins_pipe(empty); 6634 %} 6635 6636 instruct membar_volatile(eFlagsReg cr) %{ 6637 match(MemBarVolatile); 6638 effect(KILL cr); 6639 ins_cost(400); 6640 6641 format %{ 6642 $$template 6643 if (os::is_MP()) { 6644 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6645 } else { 6646 $$emit$$"MEMBAR-volatile ! (empty encoding)" 6647 } 6648 %} 6649 ins_encode %{ 6650 __ membar(Assembler::StoreLoad); 6651 %} 6652 ins_pipe(pipe_slow); 6653 %} 6654 6655 instruct unnecessary_membar_volatile() %{ 6656 match(MemBarVolatile); 6657 predicate(Matcher::post_store_load_barrier(n)); 6658 ins_cost(0); 6659 6660 size(0); 6661 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6662 ins_encode( ); 6663 ins_pipe(empty); 6664 %} 6665 6666 instruct membar_storestore() %{ 6667 match(MemBarStoreStore); 6668 ins_cost(0); 6669 6670 size(0); 6671 format %{ "MEMBAR-storestore (empty encoding)" %} 6672 ins_encode( ); 6673 ins_pipe(empty); 6674 %} 6675 6676 //----------Move Instructions-------------------------------------------------- 6677 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6678 match(Set dst (CastX2P src)); 6679 format %{ "# X2P $dst, $src" %} 6680 ins_encode( /*empty encoding*/ ); 6681 ins_cost(0); 6682 ins_pipe(empty); 6683 %} 6684 6685 instruct castP2X(rRegI dst, eRegP src ) %{ 6686 match(Set dst (CastP2X src)); 6687 ins_cost(50); 6688 format %{ "MOV $dst, $src\t# CastP2X" %} 6689 ins_encode( enc_Copy( dst, src) ); 6690 ins_pipe( ialu_reg_reg ); 6691 %} 6692 6693 //----------Conditional Move--------------------------------------------------- 6694 // Conditional move 6695 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6696 predicate(!VM_Version::supports_cmov() ); 6697 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6698 ins_cost(200); 6699 format %{ "J$cop,us skip\t# signed cmove\n\t" 6700 "MOV $dst,$src\n" 6701 "skip:" %} 6702 ins_encode %{ 6703 Label Lskip; 6704 // Invert sense of branch from sense of CMOV 6705 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6706 __ movl($dst$$Register, $src$$Register); 6707 __ bind(Lskip); 6708 %} 6709 ins_pipe( pipe_cmov_reg ); 6710 %} 6711 6712 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6713 predicate(!VM_Version::supports_cmov() ); 6714 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6715 ins_cost(200); 6716 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6717 "MOV $dst,$src\n" 6718 "skip:" %} 6719 ins_encode %{ 6720 Label Lskip; 6721 // Invert sense of branch from sense of CMOV 6722 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6723 __ movl($dst$$Register, $src$$Register); 6724 __ bind(Lskip); 6725 %} 6726 ins_pipe( pipe_cmov_reg ); 6727 %} 6728 6729 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6730 predicate(VM_Version::supports_cmov() ); 6731 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6732 ins_cost(200); 6733 format %{ "CMOV$cop $dst,$src" %} 6734 opcode(0x0F,0x40); 6735 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6736 ins_pipe( pipe_cmov_reg ); 6737 %} 6738 6739 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6740 predicate(VM_Version::supports_cmov() ); 6741 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6742 ins_cost(200); 6743 format %{ "CMOV$cop $dst,$src" %} 6744 opcode(0x0F,0x40); 6745 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6746 ins_pipe( pipe_cmov_reg ); 6747 %} 6748 6749 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6750 predicate(VM_Version::supports_cmov() ); 6751 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6752 ins_cost(200); 6753 expand %{ 6754 cmovI_regU(cop, cr, dst, src); 6755 %} 6756 %} 6757 6758 // Conditional move 6759 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6760 predicate(VM_Version::supports_cmov() ); 6761 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6762 ins_cost(250); 6763 format %{ "CMOV$cop $dst,$src" %} 6764 opcode(0x0F,0x40); 6765 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6766 ins_pipe( pipe_cmov_mem ); 6767 %} 6768 6769 // Conditional move 6770 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6771 predicate(VM_Version::supports_cmov() ); 6772 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6773 ins_cost(250); 6774 format %{ "CMOV$cop $dst,$src" %} 6775 opcode(0x0F,0x40); 6776 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6777 ins_pipe( pipe_cmov_mem ); 6778 %} 6779 6780 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6781 predicate(VM_Version::supports_cmov() ); 6782 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6783 ins_cost(250); 6784 expand %{ 6785 cmovI_memU(cop, cr, dst, src); 6786 %} 6787 %} 6788 6789 // Conditional move 6790 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6791 predicate(VM_Version::supports_cmov() ); 6792 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6793 ins_cost(200); 6794 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6795 opcode(0x0F,0x40); 6796 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6797 ins_pipe( pipe_cmov_reg ); 6798 %} 6799 6800 // Conditional move (non-P6 version) 6801 // Note: a CMoveP is generated for stubs and native wrappers 6802 // regardless of whether we are on a P6, so we 6803 // emulate a cmov here 6804 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6805 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6806 ins_cost(300); 6807 format %{ "Jn$cop skip\n\t" 6808 "MOV $dst,$src\t# pointer\n" 6809 "skip:" %} 6810 opcode(0x8b); 6811 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6812 ins_pipe( pipe_cmov_reg ); 6813 %} 6814 6815 // Conditional move 6816 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6817 predicate(VM_Version::supports_cmov() ); 6818 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6819 ins_cost(200); 6820 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6821 opcode(0x0F,0x40); 6822 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6823 ins_pipe( pipe_cmov_reg ); 6824 %} 6825 6826 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6827 predicate(VM_Version::supports_cmov() ); 6828 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6829 ins_cost(200); 6830 expand %{ 6831 cmovP_regU(cop, cr, dst, src); 6832 %} 6833 %} 6834 6835 // DISABLED: Requires the ADLC to emit a bottom_type call that 6836 // correctly meets the two pointer arguments; one is an incoming 6837 // register but the other is a memory operand. ALSO appears to 6838 // be buggy with implicit null checks. 6839 // 6840 //// Conditional move 6841 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6842 // predicate(VM_Version::supports_cmov() ); 6843 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6844 // ins_cost(250); 6845 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6846 // opcode(0x0F,0x40); 6847 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6848 // ins_pipe( pipe_cmov_mem ); 6849 //%} 6850 // 6851 //// Conditional move 6852 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6853 // predicate(VM_Version::supports_cmov() ); 6854 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6855 // ins_cost(250); 6856 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6857 // opcode(0x0F,0x40); 6858 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6859 // ins_pipe( pipe_cmov_mem ); 6860 //%} 6861 6862 // Conditional move 6863 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6864 predicate(UseSSE<=1); 6865 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6866 ins_cost(200); 6867 format %{ "FCMOV$cop $dst,$src\t# double" %} 6868 opcode(0xDA); 6869 ins_encode( enc_cmov_dpr(cop,src) ); 6870 ins_pipe( pipe_cmovDPR_reg ); 6871 %} 6872 6873 // Conditional move 6874 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6875 predicate(UseSSE==0); 6876 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6877 ins_cost(200); 6878 format %{ "FCMOV$cop $dst,$src\t# float" %} 6879 opcode(0xDA); 6880 ins_encode( enc_cmov_dpr(cop,src) ); 6881 ins_pipe( pipe_cmovDPR_reg ); 6882 %} 6883 6884 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6885 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6886 predicate(UseSSE<=1); 6887 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6888 ins_cost(200); 6889 format %{ "Jn$cop skip\n\t" 6890 "MOV $dst,$src\t# double\n" 6891 "skip:" %} 6892 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6893 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6894 ins_pipe( pipe_cmovDPR_reg ); 6895 %} 6896 6897 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6898 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6899 predicate(UseSSE==0); 6900 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6901 ins_cost(200); 6902 format %{ "Jn$cop skip\n\t" 6903 "MOV $dst,$src\t# float\n" 6904 "skip:" %} 6905 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6906 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6907 ins_pipe( pipe_cmovDPR_reg ); 6908 %} 6909 6910 // No CMOVE with SSE/SSE2 6911 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6912 predicate (UseSSE>=1); 6913 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6914 ins_cost(200); 6915 format %{ "Jn$cop skip\n\t" 6916 "MOVSS $dst,$src\t# float\n" 6917 "skip:" %} 6918 ins_encode %{ 6919 Label skip; 6920 // Invert sense of branch from sense of CMOV 6921 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6922 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6923 __ bind(skip); 6924 %} 6925 ins_pipe( pipe_slow ); 6926 %} 6927 6928 // No CMOVE with SSE/SSE2 6929 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6930 predicate (UseSSE>=2); 6931 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6932 ins_cost(200); 6933 format %{ "Jn$cop skip\n\t" 6934 "MOVSD $dst,$src\t# float\n" 6935 "skip:" %} 6936 ins_encode %{ 6937 Label skip; 6938 // Invert sense of branch from sense of CMOV 6939 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6940 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6941 __ bind(skip); 6942 %} 6943 ins_pipe( pipe_slow ); 6944 %} 6945 6946 // unsigned version 6947 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6948 predicate (UseSSE>=1); 6949 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6950 ins_cost(200); 6951 format %{ "Jn$cop skip\n\t" 6952 "MOVSS $dst,$src\t# float\n" 6953 "skip:" %} 6954 ins_encode %{ 6955 Label skip; 6956 // Invert sense of branch from sense of CMOV 6957 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6958 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6959 __ bind(skip); 6960 %} 6961 ins_pipe( pipe_slow ); 6962 %} 6963 6964 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6965 predicate (UseSSE>=1); 6966 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6967 ins_cost(200); 6968 expand %{ 6969 fcmovF_regU(cop, cr, dst, src); 6970 %} 6971 %} 6972 6973 // unsigned version 6974 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6975 predicate (UseSSE>=2); 6976 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6977 ins_cost(200); 6978 format %{ "Jn$cop skip\n\t" 6979 "MOVSD $dst,$src\t# float\n" 6980 "skip:" %} 6981 ins_encode %{ 6982 Label skip; 6983 // Invert sense of branch from sense of CMOV 6984 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6985 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6986 __ bind(skip); 6987 %} 6988 ins_pipe( pipe_slow ); 6989 %} 6990 6991 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 6992 predicate (UseSSE>=2); 6993 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6994 ins_cost(200); 6995 expand %{ 6996 fcmovD_regU(cop, cr, dst, src); 6997 %} 6998 %} 6999 7000 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 7001 predicate(VM_Version::supports_cmov() ); 7002 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7003 ins_cost(200); 7004 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7005 "CMOV$cop $dst.hi,$src.hi" %} 7006 opcode(0x0F,0x40); 7007 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7008 ins_pipe( pipe_cmov_reg_long ); 7009 %} 7010 7011 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 7012 predicate(VM_Version::supports_cmov() ); 7013 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7014 ins_cost(200); 7015 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 7016 "CMOV$cop $dst.hi,$src.hi" %} 7017 opcode(0x0F,0x40); 7018 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 7019 ins_pipe( pipe_cmov_reg_long ); 7020 %} 7021 7022 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 7023 predicate(VM_Version::supports_cmov() ); 7024 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 7025 ins_cost(200); 7026 expand %{ 7027 cmovL_regU(cop, cr, dst, src); 7028 %} 7029 %} 7030 7031 //----------Arithmetic Instructions-------------------------------------------- 7032 //----------Addition Instructions---------------------------------------------- 7033 7034 // Integer Addition Instructions 7035 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7036 match(Set dst (AddI dst src)); 7037 effect(KILL cr); 7038 7039 size(2); 7040 format %{ "ADD $dst,$src" %} 7041 opcode(0x03); 7042 ins_encode( OpcP, RegReg( dst, src) ); 7043 ins_pipe( ialu_reg_reg ); 7044 %} 7045 7046 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7047 match(Set dst (AddI dst src)); 7048 effect(KILL cr); 7049 7050 format %{ "ADD $dst,$src" %} 7051 opcode(0x81, 0x00); /* /0 id */ 7052 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7053 ins_pipe( ialu_reg ); 7054 %} 7055 7056 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 7057 predicate(UseIncDec); 7058 match(Set dst (AddI dst src)); 7059 effect(KILL cr); 7060 7061 size(1); 7062 format %{ "INC $dst" %} 7063 opcode(0x40); /* */ 7064 ins_encode( Opc_plus( primary, dst ) ); 7065 ins_pipe( ialu_reg ); 7066 %} 7067 7068 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 7069 match(Set dst (AddI src0 src1)); 7070 ins_cost(110); 7071 7072 format %{ "LEA $dst,[$src0 + $src1]" %} 7073 opcode(0x8D); /* 0x8D /r */ 7074 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7075 ins_pipe( ialu_reg_reg ); 7076 %} 7077 7078 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7079 match(Set dst (AddP src0 src1)); 7080 ins_cost(110); 7081 7082 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7083 opcode(0x8D); /* 0x8D /r */ 7084 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7085 ins_pipe( ialu_reg_reg ); 7086 %} 7087 7088 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7089 predicate(UseIncDec); 7090 match(Set dst (AddI dst src)); 7091 effect(KILL cr); 7092 7093 size(1); 7094 format %{ "DEC $dst" %} 7095 opcode(0x48); /* */ 7096 ins_encode( Opc_plus( primary, dst ) ); 7097 ins_pipe( ialu_reg ); 7098 %} 7099 7100 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7101 match(Set dst (AddP dst src)); 7102 effect(KILL cr); 7103 7104 size(2); 7105 format %{ "ADD $dst,$src" %} 7106 opcode(0x03); 7107 ins_encode( OpcP, RegReg( dst, src) ); 7108 ins_pipe( ialu_reg_reg ); 7109 %} 7110 7111 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7112 match(Set dst (AddP dst src)); 7113 effect(KILL cr); 7114 7115 format %{ "ADD $dst,$src" %} 7116 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7117 // ins_encode( RegImm( dst, src) ); 7118 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7119 ins_pipe( ialu_reg ); 7120 %} 7121 7122 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7123 match(Set dst (AddI dst (LoadI src))); 7124 effect(KILL cr); 7125 7126 ins_cost(125); 7127 format %{ "ADD $dst,$src" %} 7128 opcode(0x03); 7129 ins_encode( OpcP, RegMem( dst, src) ); 7130 ins_pipe( ialu_reg_mem ); 7131 %} 7132 7133 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7134 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7135 effect(KILL cr); 7136 7137 ins_cost(150); 7138 format %{ "ADD $dst,$src" %} 7139 opcode(0x01); /* Opcode 01 /r */ 7140 ins_encode( OpcP, RegMem( src, dst ) ); 7141 ins_pipe( ialu_mem_reg ); 7142 %} 7143 7144 // Add Memory with Immediate 7145 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7146 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7147 effect(KILL cr); 7148 7149 ins_cost(125); 7150 format %{ "ADD $dst,$src" %} 7151 opcode(0x81); /* Opcode 81 /0 id */ 7152 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7153 ins_pipe( ialu_mem_imm ); 7154 %} 7155 7156 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 7157 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7158 effect(KILL cr); 7159 7160 ins_cost(125); 7161 format %{ "INC $dst" %} 7162 opcode(0xFF); /* Opcode FF /0 */ 7163 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7164 ins_pipe( ialu_mem_imm ); 7165 %} 7166 7167 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7168 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7169 effect(KILL cr); 7170 7171 ins_cost(125); 7172 format %{ "DEC $dst" %} 7173 opcode(0xFF); /* Opcode FF /1 */ 7174 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7175 ins_pipe( ialu_mem_imm ); 7176 %} 7177 7178 7179 instruct checkCastPP( eRegP dst ) %{ 7180 match(Set dst (CheckCastPP dst)); 7181 7182 size(0); 7183 format %{ "#checkcastPP of $dst" %} 7184 ins_encode( /*empty encoding*/ ); 7185 ins_pipe( empty ); 7186 %} 7187 7188 instruct castPP( eRegP dst ) %{ 7189 match(Set dst (CastPP dst)); 7190 format %{ "#castPP of $dst" %} 7191 ins_encode( /*empty encoding*/ ); 7192 ins_pipe( empty ); 7193 %} 7194 7195 instruct castII( rRegI dst ) %{ 7196 match(Set dst (CastII dst)); 7197 format %{ "#castII of $dst" %} 7198 ins_encode( /*empty encoding*/ ); 7199 ins_cost(0); 7200 ins_pipe( empty ); 7201 %} 7202 7203 7204 // Load-locked - same as a regular pointer load when used with compare-swap 7205 instruct loadPLocked(eRegP dst, memory mem) %{ 7206 match(Set dst (LoadPLocked mem)); 7207 7208 ins_cost(125); 7209 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7210 opcode(0x8B); 7211 ins_encode( OpcP, RegMem(dst,mem)); 7212 ins_pipe( ialu_reg_mem ); 7213 %} 7214 7215 // Conditional-store of the updated heap-top. 7216 // Used during allocation of the shared heap. 7217 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7218 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7219 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7220 // EAX is killed if there is contention, but then it's also unused. 7221 // In the common case of no contention, EAX holds the new oop address. 7222 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7223 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7224 ins_pipe( pipe_cmpxchg ); 7225 %} 7226 7227 // Conditional-store of an int value. 7228 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7229 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7230 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7231 effect(KILL oldval); 7232 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7233 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7234 ins_pipe( pipe_cmpxchg ); 7235 %} 7236 7237 // Conditional-store of a long value. 7238 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7239 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7240 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7241 effect(KILL oldval); 7242 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7243 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7244 "XCHG EBX,ECX" 7245 %} 7246 ins_encode %{ 7247 // Note: we need to swap rbx, and rcx before and after the 7248 // cmpxchg8 instruction because the instruction uses 7249 // rcx as the high order word of the new value to store but 7250 // our register encoding uses rbx. 7251 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7252 if( os::is_MP() ) 7253 __ lock(); 7254 __ cmpxchg8($mem$$Address); 7255 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7256 %} 7257 ins_pipe( pipe_cmpxchg ); 7258 %} 7259 7260 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7261 7262 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7263 predicate(VM_Version::supports_cx8()); 7264 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7265 match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); 7266 effect(KILL cr, KILL oldval); 7267 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7268 "MOV $res,0\n\t" 7269 "JNE,s fail\n\t" 7270 "MOV $res,1\n" 7271 "fail:" %} 7272 ins_encode( enc_cmpxchg8(mem_ptr), 7273 enc_flags_ne_to_boolean(res) ); 7274 ins_pipe( pipe_cmpxchg ); 7275 %} 7276 7277 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7278 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7279 match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); 7280 effect(KILL cr, KILL oldval); 7281 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7282 "MOV $res,0\n\t" 7283 "JNE,s fail\n\t" 7284 "MOV $res,1\n" 7285 "fail:" %} 7286 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7287 ins_pipe( pipe_cmpxchg ); 7288 %} 7289 7290 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7291 match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); 7292 match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); 7293 effect(KILL cr, KILL oldval); 7294 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7295 "MOV $res,0\n\t" 7296 "JNE,s fail\n\t" 7297 "MOV $res,1\n" 7298 "fail:" %} 7299 ins_encode( enc_cmpxchgb(mem_ptr), 7300 enc_flags_ne_to_boolean(res) ); 7301 ins_pipe( pipe_cmpxchg ); 7302 %} 7303 7304 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ 7305 match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); 7306 match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); 7307 effect(KILL cr, KILL oldval); 7308 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7309 "MOV $res,0\n\t" 7310 "JNE,s fail\n\t" 7311 "MOV $res,1\n" 7312 "fail:" %} 7313 ins_encode( enc_cmpxchgw(mem_ptr), 7314 enc_flags_ne_to_boolean(res) ); 7315 ins_pipe( pipe_cmpxchg ); 7316 %} 7317 7318 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7319 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7320 match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); 7321 effect(KILL cr, KILL oldval); 7322 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7323 "MOV $res,0\n\t" 7324 "JNE,s fail\n\t" 7325 "MOV $res,1\n" 7326 "fail:" %} 7327 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7328 ins_pipe( pipe_cmpxchg ); 7329 %} 7330 7331 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7332 predicate(VM_Version::supports_cx8()); 7333 match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); 7334 effect(KILL cr); 7335 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7336 ins_encode( enc_cmpxchg8(mem_ptr) ); 7337 ins_pipe( pipe_cmpxchg ); 7338 %} 7339 7340 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7341 match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); 7342 effect(KILL cr); 7343 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7344 ins_encode( enc_cmpxchg(mem_ptr) ); 7345 ins_pipe( pipe_cmpxchg ); 7346 %} 7347 7348 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7349 match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); 7350 effect(KILL cr); 7351 format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7352 ins_encode( enc_cmpxchgb(mem_ptr) ); 7353 ins_pipe( pipe_cmpxchg ); 7354 %} 7355 7356 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7357 match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); 7358 effect(KILL cr); 7359 format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7360 ins_encode( enc_cmpxchgw(mem_ptr) ); 7361 ins_pipe( pipe_cmpxchg ); 7362 %} 7363 7364 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7365 match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); 7366 effect(KILL cr); 7367 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} 7368 ins_encode( enc_cmpxchg(mem_ptr) ); 7369 ins_pipe( pipe_cmpxchg ); 7370 %} 7371 7372 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7373 predicate(n->as_LoadStore()->result_not_used()); 7374 match(Set dummy (GetAndAddB mem add)); 7375 effect(KILL cr); 7376 format %{ "ADDB [$mem],$add" %} 7377 ins_encode %{ 7378 if (os::is_MP()) { __ lock(); } 7379 __ addb($mem$$Address, $add$$constant); 7380 %} 7381 ins_pipe( pipe_cmpxchg ); 7382 %} 7383 7384 // Important to match to xRegI: only 8-bit regs. 7385 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ 7386 match(Set newval (GetAndAddB mem newval)); 7387 effect(KILL cr); 7388 format %{ "XADDB [$mem],$newval" %} 7389 ins_encode %{ 7390 if (os::is_MP()) { __ lock(); } 7391 __ xaddb($mem$$Address, $newval$$Register); 7392 %} 7393 ins_pipe( pipe_cmpxchg ); 7394 %} 7395 7396 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7397 predicate(n->as_LoadStore()->result_not_used()); 7398 match(Set dummy (GetAndAddS mem add)); 7399 effect(KILL cr); 7400 format %{ "ADDS [$mem],$add" %} 7401 ins_encode %{ 7402 if (os::is_MP()) { __ lock(); } 7403 __ addw($mem$$Address, $add$$constant); 7404 %} 7405 ins_pipe( pipe_cmpxchg ); 7406 %} 7407 7408 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ 7409 match(Set newval (GetAndAddS mem newval)); 7410 effect(KILL cr); 7411 format %{ "XADDS [$mem],$newval" %} 7412 ins_encode %{ 7413 if (os::is_MP()) { __ lock(); } 7414 __ xaddw($mem$$Address, $newval$$Register); 7415 %} 7416 ins_pipe( pipe_cmpxchg ); 7417 %} 7418 7419 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7420 predicate(n->as_LoadStore()->result_not_used()); 7421 match(Set dummy (GetAndAddI mem add)); 7422 effect(KILL cr); 7423 format %{ "ADDL [$mem],$add" %} 7424 ins_encode %{ 7425 if (os::is_MP()) { __ lock(); } 7426 __ addl($mem$$Address, $add$$constant); 7427 %} 7428 ins_pipe( pipe_cmpxchg ); 7429 %} 7430 7431 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7432 match(Set newval (GetAndAddI mem newval)); 7433 effect(KILL cr); 7434 format %{ "XADDL [$mem],$newval" %} 7435 ins_encode %{ 7436 if (os::is_MP()) { __ lock(); } 7437 __ xaddl($mem$$Address, $newval$$Register); 7438 %} 7439 ins_pipe( pipe_cmpxchg ); 7440 %} 7441 7442 // Important to match to xRegI: only 8-bit regs. 7443 instruct xchgB( memory mem, xRegI newval) %{ 7444 match(Set newval (GetAndSetB mem newval)); 7445 format %{ "XCHGB $newval,[$mem]" %} 7446 ins_encode %{ 7447 __ xchgb($newval$$Register, $mem$$Address); 7448 %} 7449 ins_pipe( pipe_cmpxchg ); 7450 %} 7451 7452 instruct xchgS( memory mem, rRegI newval) %{ 7453 match(Set newval (GetAndSetS mem newval)); 7454 format %{ "XCHGW $newval,[$mem]" %} 7455 ins_encode %{ 7456 __ xchgw($newval$$Register, $mem$$Address); 7457 %} 7458 ins_pipe( pipe_cmpxchg ); 7459 %} 7460 7461 instruct xchgI( memory mem, rRegI newval) %{ 7462 match(Set newval (GetAndSetI mem newval)); 7463 format %{ "XCHGL $newval,[$mem]" %} 7464 ins_encode %{ 7465 __ xchgl($newval$$Register, $mem$$Address); 7466 %} 7467 ins_pipe( pipe_cmpxchg ); 7468 %} 7469 7470 instruct xchgP( memory mem, pRegP newval) %{ 7471 match(Set newval (GetAndSetP mem newval)); 7472 format %{ "XCHGL $newval,[$mem]" %} 7473 ins_encode %{ 7474 __ xchgl($newval$$Register, $mem$$Address); 7475 %} 7476 ins_pipe( pipe_cmpxchg ); 7477 %} 7478 7479 //----------Subtraction Instructions------------------------------------------- 7480 7481 // Integer Subtraction Instructions 7482 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7483 match(Set dst (SubI dst src)); 7484 effect(KILL cr); 7485 7486 size(2); 7487 format %{ "SUB $dst,$src" %} 7488 opcode(0x2B); 7489 ins_encode( OpcP, RegReg( dst, src) ); 7490 ins_pipe( ialu_reg_reg ); 7491 %} 7492 7493 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7494 match(Set dst (SubI dst src)); 7495 effect(KILL cr); 7496 7497 format %{ "SUB $dst,$src" %} 7498 opcode(0x81,0x05); /* Opcode 81 /5 */ 7499 // ins_encode( RegImm( dst, src) ); 7500 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7501 ins_pipe( ialu_reg ); 7502 %} 7503 7504 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7505 match(Set dst (SubI dst (LoadI src))); 7506 effect(KILL cr); 7507 7508 ins_cost(125); 7509 format %{ "SUB $dst,$src" %} 7510 opcode(0x2B); 7511 ins_encode( OpcP, RegMem( dst, src) ); 7512 ins_pipe( ialu_reg_mem ); 7513 %} 7514 7515 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7516 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7517 effect(KILL cr); 7518 7519 ins_cost(150); 7520 format %{ "SUB $dst,$src" %} 7521 opcode(0x29); /* Opcode 29 /r */ 7522 ins_encode( OpcP, RegMem( src, dst ) ); 7523 ins_pipe( ialu_mem_reg ); 7524 %} 7525 7526 // Subtract from a pointer 7527 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ 7528 match(Set dst (AddP dst (SubI zero src))); 7529 effect(KILL cr); 7530 7531 size(2); 7532 format %{ "SUB $dst,$src" %} 7533 opcode(0x2B); 7534 ins_encode( OpcP, RegReg( dst, src) ); 7535 ins_pipe( ialu_reg_reg ); 7536 %} 7537 7538 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ 7539 match(Set dst (SubI zero dst)); 7540 effect(KILL cr); 7541 7542 size(2); 7543 format %{ "NEG $dst" %} 7544 opcode(0xF7,0x03); // Opcode F7 /3 7545 ins_encode( OpcP, RegOpc( dst ) ); 7546 ins_pipe( ialu_reg ); 7547 %} 7548 7549 //----------Multiplication/Division Instructions------------------------------- 7550 // Integer Multiplication Instructions 7551 // Multiply Register 7552 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7553 match(Set dst (MulI dst src)); 7554 effect(KILL cr); 7555 7556 size(3); 7557 ins_cost(300); 7558 format %{ "IMUL $dst,$src" %} 7559 opcode(0xAF, 0x0F); 7560 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7561 ins_pipe( ialu_reg_reg_alu0 ); 7562 %} 7563 7564 // Multiply 32-bit Immediate 7565 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7566 match(Set dst (MulI src imm)); 7567 effect(KILL cr); 7568 7569 ins_cost(300); 7570 format %{ "IMUL $dst,$src,$imm" %} 7571 opcode(0x69); /* 69 /r id */ 7572 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7573 ins_pipe( ialu_reg_reg_alu0 ); 7574 %} 7575 7576 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7577 match(Set dst src); 7578 effect(KILL cr); 7579 7580 // Note that this is artificially increased to make it more expensive than loadConL 7581 ins_cost(250); 7582 format %{ "MOV EAX,$src\t// low word only" %} 7583 opcode(0xB8); 7584 ins_encode( LdImmL_Lo(dst, src) ); 7585 ins_pipe( ialu_reg_fat ); 7586 %} 7587 7588 // Multiply by 32-bit Immediate, taking the shifted high order results 7589 // (special case for shift by 32) 7590 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7591 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7592 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7593 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7594 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7595 effect(USE src1, KILL cr); 7596 7597 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7598 ins_cost(0*100 + 1*400 - 150); 7599 format %{ "IMUL EDX:EAX,$src1" %} 7600 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7601 ins_pipe( pipe_slow ); 7602 %} 7603 7604 // Multiply by 32-bit Immediate, taking the shifted high order results 7605 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7606 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7607 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7608 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7609 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7610 effect(USE src1, KILL cr); 7611 7612 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7613 ins_cost(1*100 + 1*400 - 150); 7614 format %{ "IMUL EDX:EAX,$src1\n\t" 7615 "SAR EDX,$cnt-32" %} 7616 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7617 ins_pipe( pipe_slow ); 7618 %} 7619 7620 // Multiply Memory 32-bit Immediate 7621 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7622 match(Set dst (MulI (LoadI src) imm)); 7623 effect(KILL cr); 7624 7625 ins_cost(300); 7626 format %{ "IMUL $dst,$src,$imm" %} 7627 opcode(0x69); /* 69 /r id */ 7628 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7629 ins_pipe( ialu_reg_mem_alu0 ); 7630 %} 7631 7632 // Multiply Memory 7633 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7634 match(Set dst (MulI dst (LoadI src))); 7635 effect(KILL cr); 7636 7637 ins_cost(350); 7638 format %{ "IMUL $dst,$src" %} 7639 opcode(0xAF, 0x0F); 7640 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7641 ins_pipe( ialu_reg_mem_alu0 ); 7642 %} 7643 7644 // Multiply Register Int to Long 7645 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7646 // Basic Idea: long = (long)int * (long)int 7647 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7648 effect(DEF dst, USE src, USE src1, KILL flags); 7649 7650 ins_cost(300); 7651 format %{ "IMUL $dst,$src1" %} 7652 7653 ins_encode( long_int_multiply( dst, src1 ) ); 7654 ins_pipe( ialu_reg_reg_alu0 ); 7655 %} 7656 7657 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7658 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7659 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7660 effect(KILL flags); 7661 7662 ins_cost(300); 7663 format %{ "MUL $dst,$src1" %} 7664 7665 ins_encode( long_uint_multiply(dst, src1) ); 7666 ins_pipe( ialu_reg_reg_alu0 ); 7667 %} 7668 7669 // Multiply Register Long 7670 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7671 match(Set dst (MulL dst src)); 7672 effect(KILL cr, TEMP tmp); 7673 ins_cost(4*100+3*400); 7674 // Basic idea: lo(result) = lo(x_lo * y_lo) 7675 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7676 format %{ "MOV $tmp,$src.lo\n\t" 7677 "IMUL $tmp,EDX\n\t" 7678 "MOV EDX,$src.hi\n\t" 7679 "IMUL EDX,EAX\n\t" 7680 "ADD $tmp,EDX\n\t" 7681 "MUL EDX:EAX,$src.lo\n\t" 7682 "ADD EDX,$tmp" %} 7683 ins_encode( long_multiply( dst, src, tmp ) ); 7684 ins_pipe( pipe_slow ); 7685 %} 7686 7687 // Multiply Register Long where the left operand's high 32 bits are zero 7688 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7689 predicate(is_operand_hi32_zero(n->in(1))); 7690 match(Set dst (MulL dst src)); 7691 effect(KILL cr, TEMP tmp); 7692 ins_cost(2*100+2*400); 7693 // Basic idea: lo(result) = lo(x_lo * y_lo) 7694 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7695 format %{ "MOV $tmp,$src.hi\n\t" 7696 "IMUL $tmp,EAX\n\t" 7697 "MUL EDX:EAX,$src.lo\n\t" 7698 "ADD EDX,$tmp" %} 7699 ins_encode %{ 7700 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7701 __ imull($tmp$$Register, rax); 7702 __ mull($src$$Register); 7703 __ addl(rdx, $tmp$$Register); 7704 %} 7705 ins_pipe( pipe_slow ); 7706 %} 7707 7708 // Multiply Register Long where the right operand's high 32 bits are zero 7709 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7710 predicate(is_operand_hi32_zero(n->in(2))); 7711 match(Set dst (MulL dst src)); 7712 effect(KILL cr, TEMP tmp); 7713 ins_cost(2*100+2*400); 7714 // Basic idea: lo(result) = lo(x_lo * y_lo) 7715 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7716 format %{ "MOV $tmp,$src.lo\n\t" 7717 "IMUL $tmp,EDX\n\t" 7718 "MUL EDX:EAX,$src.lo\n\t" 7719 "ADD EDX,$tmp" %} 7720 ins_encode %{ 7721 __ movl($tmp$$Register, $src$$Register); 7722 __ imull($tmp$$Register, rdx); 7723 __ mull($src$$Register); 7724 __ addl(rdx, $tmp$$Register); 7725 %} 7726 ins_pipe( pipe_slow ); 7727 %} 7728 7729 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7730 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7731 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7732 match(Set dst (MulL dst src)); 7733 effect(KILL cr); 7734 ins_cost(1*400); 7735 // Basic idea: lo(result) = lo(x_lo * y_lo) 7736 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7737 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7738 ins_encode %{ 7739 __ mull($src$$Register); 7740 %} 7741 ins_pipe( pipe_slow ); 7742 %} 7743 7744 // Multiply Register Long by small constant 7745 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7746 match(Set dst (MulL dst src)); 7747 effect(KILL cr, TEMP tmp); 7748 ins_cost(2*100+2*400); 7749 size(12); 7750 // Basic idea: lo(result) = lo(src * EAX) 7751 // hi(result) = hi(src * EAX) + lo(src * EDX) 7752 format %{ "IMUL $tmp,EDX,$src\n\t" 7753 "MOV EDX,$src\n\t" 7754 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7755 "ADD EDX,$tmp" %} 7756 ins_encode( long_multiply_con( dst, src, tmp ) ); 7757 ins_pipe( pipe_slow ); 7758 %} 7759 7760 // Integer DIV with Register 7761 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7762 match(Set rax (DivI rax div)); 7763 effect(KILL rdx, KILL cr); 7764 size(26); 7765 ins_cost(30*100+10*100); 7766 format %{ "CMP EAX,0x80000000\n\t" 7767 "JNE,s normal\n\t" 7768 "XOR EDX,EDX\n\t" 7769 "CMP ECX,-1\n\t" 7770 "JE,s done\n" 7771 "normal: CDQ\n\t" 7772 "IDIV $div\n\t" 7773 "done:" %} 7774 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7775 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7776 ins_pipe( ialu_reg_reg_alu0 ); 7777 %} 7778 7779 // Divide Register Long 7780 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7781 match(Set dst (DivL src1 src2)); 7782 effect( KILL cr, KILL cx, KILL bx ); 7783 ins_cost(10000); 7784 format %{ "PUSH $src1.hi\n\t" 7785 "PUSH $src1.lo\n\t" 7786 "PUSH $src2.hi\n\t" 7787 "PUSH $src2.lo\n\t" 7788 "CALL SharedRuntime::ldiv\n\t" 7789 "ADD ESP,16" %} 7790 ins_encode( long_div(src1,src2) ); 7791 ins_pipe( pipe_slow ); 7792 %} 7793 7794 // Integer DIVMOD with Register, both quotient and mod results 7795 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7796 match(DivModI rax div); 7797 effect(KILL cr); 7798 size(26); 7799 ins_cost(30*100+10*100); 7800 format %{ "CMP EAX,0x80000000\n\t" 7801 "JNE,s normal\n\t" 7802 "XOR EDX,EDX\n\t" 7803 "CMP ECX,-1\n\t" 7804 "JE,s done\n" 7805 "normal: CDQ\n\t" 7806 "IDIV $div\n\t" 7807 "done:" %} 7808 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7809 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7810 ins_pipe( pipe_slow ); 7811 %} 7812 7813 // Integer MOD with Register 7814 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7815 match(Set rdx (ModI rax div)); 7816 effect(KILL rax, KILL cr); 7817 7818 size(26); 7819 ins_cost(300); 7820 format %{ "CDQ\n\t" 7821 "IDIV $div" %} 7822 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7823 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7824 ins_pipe( ialu_reg_reg_alu0 ); 7825 %} 7826 7827 // Remainder Register Long 7828 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7829 match(Set dst (ModL src1 src2)); 7830 effect( KILL cr, KILL cx, KILL bx ); 7831 ins_cost(10000); 7832 format %{ "PUSH $src1.hi\n\t" 7833 "PUSH $src1.lo\n\t" 7834 "PUSH $src2.hi\n\t" 7835 "PUSH $src2.lo\n\t" 7836 "CALL SharedRuntime::lrem\n\t" 7837 "ADD ESP,16" %} 7838 ins_encode( long_mod(src1,src2) ); 7839 ins_pipe( pipe_slow ); 7840 %} 7841 7842 // Divide Register Long (no special case since divisor != -1) 7843 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7844 match(Set dst (DivL dst imm)); 7845 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7846 ins_cost(1000); 7847 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7848 "XOR $tmp2,$tmp2\n\t" 7849 "CMP $tmp,EDX\n\t" 7850 "JA,s fast\n\t" 7851 "MOV $tmp2,EAX\n\t" 7852 "MOV EAX,EDX\n\t" 7853 "MOV EDX,0\n\t" 7854 "JLE,s pos\n\t" 7855 "LNEG EAX : $tmp2\n\t" 7856 "DIV $tmp # unsigned division\n\t" 7857 "XCHG EAX,$tmp2\n\t" 7858 "DIV $tmp\n\t" 7859 "LNEG $tmp2 : EAX\n\t" 7860 "JMP,s done\n" 7861 "pos:\n\t" 7862 "DIV $tmp\n\t" 7863 "XCHG EAX,$tmp2\n" 7864 "fast:\n\t" 7865 "DIV $tmp\n" 7866 "done:\n\t" 7867 "MOV EDX,$tmp2\n\t" 7868 "NEG EDX:EAX # if $imm < 0" %} 7869 ins_encode %{ 7870 int con = (int)$imm$$constant; 7871 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7872 int pcon = (con > 0) ? con : -con; 7873 Label Lfast, Lpos, Ldone; 7874 7875 __ movl($tmp$$Register, pcon); 7876 __ xorl($tmp2$$Register,$tmp2$$Register); 7877 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7878 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7879 7880 __ movl($tmp2$$Register, $dst$$Register); // save 7881 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7882 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7883 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7884 7885 // Negative dividend. 7886 // convert value to positive to use unsigned division 7887 __ lneg($dst$$Register, $tmp2$$Register); 7888 __ divl($tmp$$Register); 7889 __ xchgl($dst$$Register, $tmp2$$Register); 7890 __ divl($tmp$$Register); 7891 // revert result back to negative 7892 __ lneg($tmp2$$Register, $dst$$Register); 7893 __ jmpb(Ldone); 7894 7895 __ bind(Lpos); 7896 __ divl($tmp$$Register); // Use unsigned division 7897 __ xchgl($dst$$Register, $tmp2$$Register); 7898 // Fallthrow for final divide, tmp2 has 32 bit hi result 7899 7900 __ bind(Lfast); 7901 // fast path: src is positive 7902 __ divl($tmp$$Register); // Use unsigned division 7903 7904 __ bind(Ldone); 7905 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7906 if (con < 0) { 7907 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7908 } 7909 %} 7910 ins_pipe( pipe_slow ); 7911 %} 7912 7913 // Remainder Register Long (remainder fit into 32 bits) 7914 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7915 match(Set dst (ModL dst imm)); 7916 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7917 ins_cost(1000); 7918 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7919 "CMP $tmp,EDX\n\t" 7920 "JA,s fast\n\t" 7921 "MOV $tmp2,EAX\n\t" 7922 "MOV EAX,EDX\n\t" 7923 "MOV EDX,0\n\t" 7924 "JLE,s pos\n\t" 7925 "LNEG EAX : $tmp2\n\t" 7926 "DIV $tmp # unsigned division\n\t" 7927 "MOV EAX,$tmp2\n\t" 7928 "DIV $tmp\n\t" 7929 "NEG EDX\n\t" 7930 "JMP,s done\n" 7931 "pos:\n\t" 7932 "DIV $tmp\n\t" 7933 "MOV EAX,$tmp2\n" 7934 "fast:\n\t" 7935 "DIV $tmp\n" 7936 "done:\n\t" 7937 "MOV EAX,EDX\n\t" 7938 "SAR EDX,31\n\t" %} 7939 ins_encode %{ 7940 int con = (int)$imm$$constant; 7941 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7942 int pcon = (con > 0) ? con : -con; 7943 Label Lfast, Lpos, Ldone; 7944 7945 __ movl($tmp$$Register, pcon); 7946 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7947 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7948 7949 __ movl($tmp2$$Register, $dst$$Register); // save 7950 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7951 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7952 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7953 7954 // Negative dividend. 7955 // convert value to positive to use unsigned division 7956 __ lneg($dst$$Register, $tmp2$$Register); 7957 __ divl($tmp$$Register); 7958 __ movl($dst$$Register, $tmp2$$Register); 7959 __ divl($tmp$$Register); 7960 // revert remainder back to negative 7961 __ negl(HIGH_FROM_LOW($dst$$Register)); 7962 __ jmpb(Ldone); 7963 7964 __ bind(Lpos); 7965 __ divl($tmp$$Register); 7966 __ movl($dst$$Register, $tmp2$$Register); 7967 7968 __ bind(Lfast); 7969 // fast path: src is positive 7970 __ divl($tmp$$Register); 7971 7972 __ bind(Ldone); 7973 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7974 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 7975 7976 %} 7977 ins_pipe( pipe_slow ); 7978 %} 7979 7980 // Integer Shift Instructions 7981 // Shift Left by one 7982 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7983 match(Set dst (LShiftI dst shift)); 7984 effect(KILL cr); 7985 7986 size(2); 7987 format %{ "SHL $dst,$shift" %} 7988 opcode(0xD1, 0x4); /* D1 /4 */ 7989 ins_encode( OpcP, RegOpc( dst ) ); 7990 ins_pipe( ialu_reg ); 7991 %} 7992 7993 // Shift Left by 8-bit immediate 7994 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7995 match(Set dst (LShiftI dst shift)); 7996 effect(KILL cr); 7997 7998 size(3); 7999 format %{ "SHL $dst,$shift" %} 8000 opcode(0xC1, 0x4); /* C1 /4 ib */ 8001 ins_encode( RegOpcImm( dst, shift) ); 8002 ins_pipe( ialu_reg ); 8003 %} 8004 8005 // Shift Left by variable 8006 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8007 match(Set dst (LShiftI dst shift)); 8008 effect(KILL cr); 8009 8010 size(2); 8011 format %{ "SHL $dst,$shift" %} 8012 opcode(0xD3, 0x4); /* D3 /4 */ 8013 ins_encode( OpcP, RegOpc( dst ) ); 8014 ins_pipe( ialu_reg_reg ); 8015 %} 8016 8017 // Arithmetic shift right by one 8018 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8019 match(Set dst (RShiftI dst shift)); 8020 effect(KILL cr); 8021 8022 size(2); 8023 format %{ "SAR $dst,$shift" %} 8024 opcode(0xD1, 0x7); /* D1 /7 */ 8025 ins_encode( OpcP, RegOpc( dst ) ); 8026 ins_pipe( ialu_reg ); 8027 %} 8028 8029 // Arithmetic shift right by one 8030 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 8031 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8032 effect(KILL cr); 8033 format %{ "SAR $dst,$shift" %} 8034 opcode(0xD1, 0x7); /* D1 /7 */ 8035 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 8036 ins_pipe( ialu_mem_imm ); 8037 %} 8038 8039 // Arithmetic Shift Right by 8-bit immediate 8040 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8041 match(Set dst (RShiftI dst shift)); 8042 effect(KILL cr); 8043 8044 size(3); 8045 format %{ "SAR $dst,$shift" %} 8046 opcode(0xC1, 0x7); /* C1 /7 ib */ 8047 ins_encode( RegOpcImm( dst, shift ) ); 8048 ins_pipe( ialu_mem_imm ); 8049 %} 8050 8051 // Arithmetic Shift Right by 8-bit immediate 8052 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 8053 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 8054 effect(KILL cr); 8055 8056 format %{ "SAR $dst,$shift" %} 8057 opcode(0xC1, 0x7); /* C1 /7 ib */ 8058 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 8059 ins_pipe( ialu_mem_imm ); 8060 %} 8061 8062 // Arithmetic Shift Right by variable 8063 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8064 match(Set dst (RShiftI dst shift)); 8065 effect(KILL cr); 8066 8067 size(2); 8068 format %{ "SAR $dst,$shift" %} 8069 opcode(0xD3, 0x7); /* D3 /7 */ 8070 ins_encode( OpcP, RegOpc( dst ) ); 8071 ins_pipe( ialu_reg_reg ); 8072 %} 8073 8074 // Logical shift right by one 8075 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8076 match(Set dst (URShiftI dst shift)); 8077 effect(KILL cr); 8078 8079 size(2); 8080 format %{ "SHR $dst,$shift" %} 8081 opcode(0xD1, 0x5); /* D1 /5 */ 8082 ins_encode( OpcP, RegOpc( dst ) ); 8083 ins_pipe( ialu_reg ); 8084 %} 8085 8086 // Logical Shift Right by 8-bit immediate 8087 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8088 match(Set dst (URShiftI dst shift)); 8089 effect(KILL cr); 8090 8091 size(3); 8092 format %{ "SHR $dst,$shift" %} 8093 opcode(0xC1, 0x5); /* C1 /5 ib */ 8094 ins_encode( RegOpcImm( dst, shift) ); 8095 ins_pipe( ialu_reg ); 8096 %} 8097 8098 8099 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 8100 // This idiom is used by the compiler for the i2b bytecode. 8101 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 8102 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 8103 8104 size(3); 8105 format %{ "MOVSX $dst,$src :8" %} 8106 ins_encode %{ 8107 __ movsbl($dst$$Register, $src$$Register); 8108 %} 8109 ins_pipe(ialu_reg_reg); 8110 %} 8111 8112 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 8113 // This idiom is used by the compiler the i2s bytecode. 8114 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 8115 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 8116 8117 size(3); 8118 format %{ "MOVSX $dst,$src :16" %} 8119 ins_encode %{ 8120 __ movswl($dst$$Register, $src$$Register); 8121 %} 8122 ins_pipe(ialu_reg_reg); 8123 %} 8124 8125 8126 // Logical Shift Right by variable 8127 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8128 match(Set dst (URShiftI dst shift)); 8129 effect(KILL cr); 8130 8131 size(2); 8132 format %{ "SHR $dst,$shift" %} 8133 opcode(0xD3, 0x5); /* D3 /5 */ 8134 ins_encode( OpcP, RegOpc( dst ) ); 8135 ins_pipe( ialu_reg_reg ); 8136 %} 8137 8138 8139 //----------Logical Instructions----------------------------------------------- 8140 //----------Integer Logical Instructions--------------------------------------- 8141 // And Instructions 8142 // And Register with Register 8143 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8144 match(Set dst (AndI dst src)); 8145 effect(KILL cr); 8146 8147 size(2); 8148 format %{ "AND $dst,$src" %} 8149 opcode(0x23); 8150 ins_encode( OpcP, RegReg( dst, src) ); 8151 ins_pipe( ialu_reg_reg ); 8152 %} 8153 8154 // And Register with Immediate 8155 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8156 match(Set dst (AndI dst src)); 8157 effect(KILL cr); 8158 8159 format %{ "AND $dst,$src" %} 8160 opcode(0x81,0x04); /* Opcode 81 /4 */ 8161 // ins_encode( RegImm( dst, src) ); 8162 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8163 ins_pipe( ialu_reg ); 8164 %} 8165 8166 // And Register with Memory 8167 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8168 match(Set dst (AndI dst (LoadI src))); 8169 effect(KILL cr); 8170 8171 ins_cost(125); 8172 format %{ "AND $dst,$src" %} 8173 opcode(0x23); 8174 ins_encode( OpcP, RegMem( dst, src) ); 8175 ins_pipe( ialu_reg_mem ); 8176 %} 8177 8178 // And Memory with Register 8179 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8180 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8181 effect(KILL cr); 8182 8183 ins_cost(150); 8184 format %{ "AND $dst,$src" %} 8185 opcode(0x21); /* Opcode 21 /r */ 8186 ins_encode( OpcP, RegMem( src, dst ) ); 8187 ins_pipe( ialu_mem_reg ); 8188 %} 8189 8190 // And Memory with Immediate 8191 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8192 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 8193 effect(KILL cr); 8194 8195 ins_cost(125); 8196 format %{ "AND $dst,$src" %} 8197 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 8198 // ins_encode( MemImm( dst, src) ); 8199 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8200 ins_pipe( ialu_mem_imm ); 8201 %} 8202 8203 // BMI1 instructions 8204 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 8205 match(Set dst (AndI (XorI src1 minus_1) src2)); 8206 predicate(UseBMI1Instructions); 8207 effect(KILL cr); 8208 8209 format %{ "ANDNL $dst, $src1, $src2" %} 8210 8211 ins_encode %{ 8212 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8213 %} 8214 ins_pipe(ialu_reg); 8215 %} 8216 8217 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8218 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8219 predicate(UseBMI1Instructions); 8220 effect(KILL cr); 8221 8222 ins_cost(125); 8223 format %{ "ANDNL $dst, $src1, $src2" %} 8224 8225 ins_encode %{ 8226 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8227 %} 8228 ins_pipe(ialu_reg_mem); 8229 %} 8230 8231 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ 8232 match(Set dst (AndI (SubI imm_zero src) src)); 8233 predicate(UseBMI1Instructions); 8234 effect(KILL cr); 8235 8236 format %{ "BLSIL $dst, $src" %} 8237 8238 ins_encode %{ 8239 __ blsil($dst$$Register, $src$$Register); 8240 %} 8241 ins_pipe(ialu_reg); 8242 %} 8243 8244 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ 8245 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8246 predicate(UseBMI1Instructions); 8247 effect(KILL cr); 8248 8249 ins_cost(125); 8250 format %{ "BLSIL $dst, $src" %} 8251 8252 ins_encode %{ 8253 __ blsil($dst$$Register, $src$$Address); 8254 %} 8255 ins_pipe(ialu_reg_mem); 8256 %} 8257 8258 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8259 %{ 8260 match(Set dst (XorI (AddI src minus_1) src)); 8261 predicate(UseBMI1Instructions); 8262 effect(KILL cr); 8263 8264 format %{ "BLSMSKL $dst, $src" %} 8265 8266 ins_encode %{ 8267 __ blsmskl($dst$$Register, $src$$Register); 8268 %} 8269 8270 ins_pipe(ialu_reg); 8271 %} 8272 8273 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8274 %{ 8275 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8276 predicate(UseBMI1Instructions); 8277 effect(KILL cr); 8278 8279 ins_cost(125); 8280 format %{ "BLSMSKL $dst, $src" %} 8281 8282 ins_encode %{ 8283 __ blsmskl($dst$$Register, $src$$Address); 8284 %} 8285 8286 ins_pipe(ialu_reg_mem); 8287 %} 8288 8289 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8290 %{ 8291 match(Set dst (AndI (AddI src minus_1) src) ); 8292 predicate(UseBMI1Instructions); 8293 effect(KILL cr); 8294 8295 format %{ "BLSRL $dst, $src" %} 8296 8297 ins_encode %{ 8298 __ blsrl($dst$$Register, $src$$Register); 8299 %} 8300 8301 ins_pipe(ialu_reg); 8302 %} 8303 8304 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8305 %{ 8306 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8307 predicate(UseBMI1Instructions); 8308 effect(KILL cr); 8309 8310 ins_cost(125); 8311 format %{ "BLSRL $dst, $src" %} 8312 8313 ins_encode %{ 8314 __ blsrl($dst$$Register, $src$$Address); 8315 %} 8316 8317 ins_pipe(ialu_reg_mem); 8318 %} 8319 8320 // Or Instructions 8321 // Or Register with Register 8322 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8323 match(Set dst (OrI dst src)); 8324 effect(KILL cr); 8325 8326 size(2); 8327 format %{ "OR $dst,$src" %} 8328 opcode(0x0B); 8329 ins_encode( OpcP, RegReg( dst, src) ); 8330 ins_pipe( ialu_reg_reg ); 8331 %} 8332 8333 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8334 match(Set dst (OrI dst (CastP2X src))); 8335 effect(KILL cr); 8336 8337 size(2); 8338 format %{ "OR $dst,$src" %} 8339 opcode(0x0B); 8340 ins_encode( OpcP, RegReg( dst, src) ); 8341 ins_pipe( ialu_reg_reg ); 8342 %} 8343 8344 8345 // Or Register with Immediate 8346 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8347 match(Set dst (OrI dst src)); 8348 effect(KILL cr); 8349 8350 format %{ "OR $dst,$src" %} 8351 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8352 // ins_encode( RegImm( dst, src) ); 8353 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8354 ins_pipe( ialu_reg ); 8355 %} 8356 8357 // Or Register with Memory 8358 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8359 match(Set dst (OrI dst (LoadI src))); 8360 effect(KILL cr); 8361 8362 ins_cost(125); 8363 format %{ "OR $dst,$src" %} 8364 opcode(0x0B); 8365 ins_encode( OpcP, RegMem( dst, src) ); 8366 ins_pipe( ialu_reg_mem ); 8367 %} 8368 8369 // Or Memory with Register 8370 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8371 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8372 effect(KILL cr); 8373 8374 ins_cost(150); 8375 format %{ "OR $dst,$src" %} 8376 opcode(0x09); /* Opcode 09 /r */ 8377 ins_encode( OpcP, RegMem( src, dst ) ); 8378 ins_pipe( ialu_mem_reg ); 8379 %} 8380 8381 // Or Memory with Immediate 8382 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8383 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8384 effect(KILL cr); 8385 8386 ins_cost(125); 8387 format %{ "OR $dst,$src" %} 8388 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8389 // ins_encode( MemImm( dst, src) ); 8390 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8391 ins_pipe( ialu_mem_imm ); 8392 %} 8393 8394 // ROL/ROR 8395 // ROL expand 8396 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8397 effect(USE_DEF dst, USE shift, KILL cr); 8398 8399 format %{ "ROL $dst, $shift" %} 8400 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8401 ins_encode( OpcP, RegOpc( dst )); 8402 ins_pipe( ialu_reg ); 8403 %} 8404 8405 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8406 effect(USE_DEF dst, USE shift, KILL cr); 8407 8408 format %{ "ROL $dst, $shift" %} 8409 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8410 ins_encode( RegOpcImm(dst, shift) ); 8411 ins_pipe(ialu_reg); 8412 %} 8413 8414 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8415 effect(USE_DEF dst, USE shift, KILL cr); 8416 8417 format %{ "ROL $dst, $shift" %} 8418 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8419 ins_encode(OpcP, RegOpc(dst)); 8420 ins_pipe( ialu_reg_reg ); 8421 %} 8422 // end of ROL expand 8423 8424 // ROL 32bit by one once 8425 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8426 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8427 8428 expand %{ 8429 rolI_eReg_imm1(dst, lshift, cr); 8430 %} 8431 %} 8432 8433 // ROL 32bit var by imm8 once 8434 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8435 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8436 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8437 8438 expand %{ 8439 rolI_eReg_imm8(dst, lshift, cr); 8440 %} 8441 %} 8442 8443 // ROL 32bit var by var once 8444 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8445 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8446 8447 expand %{ 8448 rolI_eReg_CL(dst, shift, cr); 8449 %} 8450 %} 8451 8452 // ROL 32bit var by var once 8453 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8454 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8455 8456 expand %{ 8457 rolI_eReg_CL(dst, shift, cr); 8458 %} 8459 %} 8460 8461 // ROR expand 8462 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8463 effect(USE_DEF dst, USE shift, KILL cr); 8464 8465 format %{ "ROR $dst, $shift" %} 8466 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8467 ins_encode( OpcP, RegOpc( dst ) ); 8468 ins_pipe( ialu_reg ); 8469 %} 8470 8471 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8472 effect (USE_DEF dst, USE shift, KILL cr); 8473 8474 format %{ "ROR $dst, $shift" %} 8475 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8476 ins_encode( RegOpcImm(dst, shift) ); 8477 ins_pipe( ialu_reg ); 8478 %} 8479 8480 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8481 effect(USE_DEF dst, USE shift, KILL cr); 8482 8483 format %{ "ROR $dst, $shift" %} 8484 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8485 ins_encode(OpcP, RegOpc(dst)); 8486 ins_pipe( ialu_reg_reg ); 8487 %} 8488 // end of ROR expand 8489 8490 // ROR right once 8491 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8492 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8493 8494 expand %{ 8495 rorI_eReg_imm1(dst, rshift, cr); 8496 %} 8497 %} 8498 8499 // ROR 32bit by immI8 once 8500 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8501 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8502 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8503 8504 expand %{ 8505 rorI_eReg_imm8(dst, rshift, cr); 8506 %} 8507 %} 8508 8509 // ROR 32bit var by var once 8510 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8511 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8512 8513 expand %{ 8514 rorI_eReg_CL(dst, shift, cr); 8515 %} 8516 %} 8517 8518 // ROR 32bit var by var once 8519 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8520 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8521 8522 expand %{ 8523 rorI_eReg_CL(dst, shift, cr); 8524 %} 8525 %} 8526 8527 // Xor Instructions 8528 // Xor Register with Register 8529 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8530 match(Set dst (XorI dst src)); 8531 effect(KILL cr); 8532 8533 size(2); 8534 format %{ "XOR $dst,$src" %} 8535 opcode(0x33); 8536 ins_encode( OpcP, RegReg( dst, src) ); 8537 ins_pipe( ialu_reg_reg ); 8538 %} 8539 8540 // Xor Register with Immediate -1 8541 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8542 match(Set dst (XorI dst imm)); 8543 8544 size(2); 8545 format %{ "NOT $dst" %} 8546 ins_encode %{ 8547 __ notl($dst$$Register); 8548 %} 8549 ins_pipe( ialu_reg ); 8550 %} 8551 8552 // Xor Register with Immediate 8553 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8554 match(Set dst (XorI dst src)); 8555 effect(KILL cr); 8556 8557 format %{ "XOR $dst,$src" %} 8558 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8559 // ins_encode( RegImm( dst, src) ); 8560 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8561 ins_pipe( ialu_reg ); 8562 %} 8563 8564 // Xor Register with Memory 8565 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8566 match(Set dst (XorI dst (LoadI src))); 8567 effect(KILL cr); 8568 8569 ins_cost(125); 8570 format %{ "XOR $dst,$src" %} 8571 opcode(0x33); 8572 ins_encode( OpcP, RegMem(dst, src) ); 8573 ins_pipe( ialu_reg_mem ); 8574 %} 8575 8576 // Xor Memory with Register 8577 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8578 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8579 effect(KILL cr); 8580 8581 ins_cost(150); 8582 format %{ "XOR $dst,$src" %} 8583 opcode(0x31); /* Opcode 31 /r */ 8584 ins_encode( OpcP, RegMem( src, dst ) ); 8585 ins_pipe( ialu_mem_reg ); 8586 %} 8587 8588 // Xor Memory with Immediate 8589 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8590 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8591 effect(KILL cr); 8592 8593 ins_cost(125); 8594 format %{ "XOR $dst,$src" %} 8595 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8596 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8597 ins_pipe( ialu_mem_imm ); 8598 %} 8599 8600 //----------Convert Int to Boolean--------------------------------------------- 8601 8602 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8603 effect( DEF dst, USE src ); 8604 format %{ "MOV $dst,$src" %} 8605 ins_encode( enc_Copy( dst, src) ); 8606 ins_pipe( ialu_reg_reg ); 8607 %} 8608 8609 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8610 effect( USE_DEF dst, USE src, KILL cr ); 8611 8612 size(4); 8613 format %{ "NEG $dst\n\t" 8614 "ADC $dst,$src" %} 8615 ins_encode( neg_reg(dst), 8616 OpcRegReg(0x13,dst,src) ); 8617 ins_pipe( ialu_reg_reg_long ); 8618 %} 8619 8620 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8621 match(Set dst (Conv2B src)); 8622 8623 expand %{ 8624 movI_nocopy(dst,src); 8625 ci2b(dst,src,cr); 8626 %} 8627 %} 8628 8629 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8630 effect( DEF dst, USE src ); 8631 format %{ "MOV $dst,$src" %} 8632 ins_encode( enc_Copy( dst, src) ); 8633 ins_pipe( ialu_reg_reg ); 8634 %} 8635 8636 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8637 effect( USE_DEF dst, USE src, KILL cr ); 8638 format %{ "NEG $dst\n\t" 8639 "ADC $dst,$src" %} 8640 ins_encode( neg_reg(dst), 8641 OpcRegReg(0x13,dst,src) ); 8642 ins_pipe( ialu_reg_reg_long ); 8643 %} 8644 8645 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8646 match(Set dst (Conv2B src)); 8647 8648 expand %{ 8649 movP_nocopy(dst,src); 8650 cp2b(dst,src,cr); 8651 %} 8652 %} 8653 8654 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8655 match(Set dst (CmpLTMask p q)); 8656 effect(KILL cr); 8657 ins_cost(400); 8658 8659 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8660 format %{ "XOR $dst,$dst\n\t" 8661 "CMP $p,$q\n\t" 8662 "SETlt $dst\n\t" 8663 "NEG $dst" %} 8664 ins_encode %{ 8665 Register Rp = $p$$Register; 8666 Register Rq = $q$$Register; 8667 Register Rd = $dst$$Register; 8668 Label done; 8669 __ xorl(Rd, Rd); 8670 __ cmpl(Rp, Rq); 8671 __ setb(Assembler::less, Rd); 8672 __ negl(Rd); 8673 %} 8674 8675 ins_pipe(pipe_slow); 8676 %} 8677 8678 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ 8679 match(Set dst (CmpLTMask dst zero)); 8680 effect(DEF dst, KILL cr); 8681 ins_cost(100); 8682 8683 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8684 ins_encode %{ 8685 __ sarl($dst$$Register, 31); 8686 %} 8687 ins_pipe(ialu_reg); 8688 %} 8689 8690 /* better to save a register than avoid a branch */ 8691 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8692 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8693 effect(KILL cr); 8694 ins_cost(400); 8695 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8696 "JGE done\n\t" 8697 "ADD $p,$y\n" 8698 "done: " %} 8699 ins_encode %{ 8700 Register Rp = $p$$Register; 8701 Register Rq = $q$$Register; 8702 Register Ry = $y$$Register; 8703 Label done; 8704 __ subl(Rp, Rq); 8705 __ jccb(Assembler::greaterEqual, done); 8706 __ addl(Rp, Ry); 8707 __ bind(done); 8708 %} 8709 8710 ins_pipe(pipe_cmplt); 8711 %} 8712 8713 /* better to save a register than avoid a branch */ 8714 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8715 match(Set y (AndI (CmpLTMask p q) y)); 8716 effect(KILL cr); 8717 8718 ins_cost(300); 8719 8720 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8721 "JLT done\n\t" 8722 "XORL $y, $y\n" 8723 "done: " %} 8724 ins_encode %{ 8725 Register Rp = $p$$Register; 8726 Register Rq = $q$$Register; 8727 Register Ry = $y$$Register; 8728 Label done; 8729 __ cmpl(Rp, Rq); 8730 __ jccb(Assembler::less, done); 8731 __ xorl(Ry, Ry); 8732 __ bind(done); 8733 %} 8734 8735 ins_pipe(pipe_cmplt); 8736 %} 8737 8738 /* If I enable this, I encourage spilling in the inner loop of compress. 8739 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8740 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8741 */ 8742 //----------Overflow Math Instructions----------------------------------------- 8743 8744 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8745 %{ 8746 match(Set cr (OverflowAddI op1 op2)); 8747 effect(DEF cr, USE_KILL op1, USE op2); 8748 8749 format %{ "ADD $op1, $op2\t# overflow check int" %} 8750 8751 ins_encode %{ 8752 __ addl($op1$$Register, $op2$$Register); 8753 %} 8754 ins_pipe(ialu_reg_reg); 8755 %} 8756 8757 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8758 %{ 8759 match(Set cr (OverflowAddI op1 op2)); 8760 effect(DEF cr, USE_KILL op1, USE op2); 8761 8762 format %{ "ADD $op1, $op2\t# overflow check int" %} 8763 8764 ins_encode %{ 8765 __ addl($op1$$Register, $op2$$constant); 8766 %} 8767 ins_pipe(ialu_reg_reg); 8768 %} 8769 8770 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8771 %{ 8772 match(Set cr (OverflowSubI op1 op2)); 8773 8774 format %{ "CMP $op1, $op2\t# overflow check int" %} 8775 ins_encode %{ 8776 __ cmpl($op1$$Register, $op2$$Register); 8777 %} 8778 ins_pipe(ialu_reg_reg); 8779 %} 8780 8781 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8782 %{ 8783 match(Set cr (OverflowSubI op1 op2)); 8784 8785 format %{ "CMP $op1, $op2\t# overflow check int" %} 8786 ins_encode %{ 8787 __ cmpl($op1$$Register, $op2$$constant); 8788 %} 8789 ins_pipe(ialu_reg_reg); 8790 %} 8791 8792 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) 8793 %{ 8794 match(Set cr (OverflowSubI zero op2)); 8795 effect(DEF cr, USE_KILL op2); 8796 8797 format %{ "NEG $op2\t# overflow check int" %} 8798 ins_encode %{ 8799 __ negl($op2$$Register); 8800 %} 8801 ins_pipe(ialu_reg_reg); 8802 %} 8803 8804 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8805 %{ 8806 match(Set cr (OverflowMulI op1 op2)); 8807 effect(DEF cr, USE_KILL op1, USE op2); 8808 8809 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8810 ins_encode %{ 8811 __ imull($op1$$Register, $op2$$Register); 8812 %} 8813 ins_pipe(ialu_reg_reg_alu0); 8814 %} 8815 8816 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8817 %{ 8818 match(Set cr (OverflowMulI op1 op2)); 8819 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8820 8821 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8822 ins_encode %{ 8823 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8824 %} 8825 ins_pipe(ialu_reg_reg_alu0); 8826 %} 8827 8828 //----------Long Instructions------------------------------------------------ 8829 // Add Long Register with Register 8830 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8831 match(Set dst (AddL dst src)); 8832 effect(KILL cr); 8833 ins_cost(200); 8834 format %{ "ADD $dst.lo,$src.lo\n\t" 8835 "ADC $dst.hi,$src.hi" %} 8836 opcode(0x03, 0x13); 8837 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8838 ins_pipe( ialu_reg_reg_long ); 8839 %} 8840 8841 // Add Long Register with Immediate 8842 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8843 match(Set dst (AddL dst src)); 8844 effect(KILL cr); 8845 format %{ "ADD $dst.lo,$src.lo\n\t" 8846 "ADC $dst.hi,$src.hi" %} 8847 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8848 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8849 ins_pipe( ialu_reg_long ); 8850 %} 8851 8852 // Add Long Register with Memory 8853 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8854 match(Set dst (AddL dst (LoadL mem))); 8855 effect(KILL cr); 8856 ins_cost(125); 8857 format %{ "ADD $dst.lo,$mem\n\t" 8858 "ADC $dst.hi,$mem+4" %} 8859 opcode(0x03, 0x13); 8860 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8861 ins_pipe( ialu_reg_long_mem ); 8862 %} 8863 8864 // Subtract Long Register with Register. 8865 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8866 match(Set dst (SubL dst src)); 8867 effect(KILL cr); 8868 ins_cost(200); 8869 format %{ "SUB $dst.lo,$src.lo\n\t" 8870 "SBB $dst.hi,$src.hi" %} 8871 opcode(0x2B, 0x1B); 8872 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8873 ins_pipe( ialu_reg_reg_long ); 8874 %} 8875 8876 // Subtract Long Register with Immediate 8877 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8878 match(Set dst (SubL dst src)); 8879 effect(KILL cr); 8880 format %{ "SUB $dst.lo,$src.lo\n\t" 8881 "SBB $dst.hi,$src.hi" %} 8882 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8883 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8884 ins_pipe( ialu_reg_long ); 8885 %} 8886 8887 // Subtract Long Register with Memory 8888 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8889 match(Set dst (SubL dst (LoadL mem))); 8890 effect(KILL cr); 8891 ins_cost(125); 8892 format %{ "SUB $dst.lo,$mem\n\t" 8893 "SBB $dst.hi,$mem+4" %} 8894 opcode(0x2B, 0x1B); 8895 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8896 ins_pipe( ialu_reg_long_mem ); 8897 %} 8898 8899 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8900 match(Set dst (SubL zero dst)); 8901 effect(KILL cr); 8902 ins_cost(300); 8903 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8904 ins_encode( neg_long(dst) ); 8905 ins_pipe( ialu_reg_reg_long ); 8906 %} 8907 8908 // And Long Register with Register 8909 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8910 match(Set dst (AndL dst src)); 8911 effect(KILL cr); 8912 format %{ "AND $dst.lo,$src.lo\n\t" 8913 "AND $dst.hi,$src.hi" %} 8914 opcode(0x23,0x23); 8915 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8916 ins_pipe( ialu_reg_reg_long ); 8917 %} 8918 8919 // And Long Register with Immediate 8920 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8921 match(Set dst (AndL dst src)); 8922 effect(KILL cr); 8923 format %{ "AND $dst.lo,$src.lo\n\t" 8924 "AND $dst.hi,$src.hi" %} 8925 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8926 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8927 ins_pipe( ialu_reg_long ); 8928 %} 8929 8930 // And Long Register with Memory 8931 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8932 match(Set dst (AndL dst (LoadL mem))); 8933 effect(KILL cr); 8934 ins_cost(125); 8935 format %{ "AND $dst.lo,$mem\n\t" 8936 "AND $dst.hi,$mem+4" %} 8937 opcode(0x23, 0x23); 8938 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8939 ins_pipe( ialu_reg_long_mem ); 8940 %} 8941 8942 // BMI1 instructions 8943 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8944 match(Set dst (AndL (XorL src1 minus_1) src2)); 8945 predicate(UseBMI1Instructions); 8946 effect(KILL cr, TEMP dst); 8947 8948 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8949 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8950 %} 8951 8952 ins_encode %{ 8953 Register Rdst = $dst$$Register; 8954 Register Rsrc1 = $src1$$Register; 8955 Register Rsrc2 = $src2$$Register; 8956 __ andnl(Rdst, Rsrc1, Rsrc2); 8957 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 8958 %} 8959 ins_pipe(ialu_reg_reg_long); 8960 %} 8961 8962 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 8963 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 8964 predicate(UseBMI1Instructions); 8965 effect(KILL cr, TEMP dst); 8966 8967 ins_cost(125); 8968 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 8969 "ANDNL $dst.hi, $src1.hi, $src2+4" 8970 %} 8971 8972 ins_encode %{ 8973 Register Rdst = $dst$$Register; 8974 Register Rsrc1 = $src1$$Register; 8975 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 8976 8977 __ andnl(Rdst, Rsrc1, $src2$$Address); 8978 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 8979 %} 8980 ins_pipe(ialu_reg_mem); 8981 %} 8982 8983 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 8984 match(Set dst (AndL (SubL imm_zero src) src)); 8985 predicate(UseBMI1Instructions); 8986 effect(KILL cr, TEMP dst); 8987 8988 format %{ "MOVL $dst.hi, 0\n\t" 8989 "BLSIL $dst.lo, $src.lo\n\t" 8990 "JNZ done\n\t" 8991 "BLSIL $dst.hi, $src.hi\n" 8992 "done:" 8993 %} 8994 8995 ins_encode %{ 8996 Label done; 8997 Register Rdst = $dst$$Register; 8998 Register Rsrc = $src$$Register; 8999 __ movl(HIGH_FROM_LOW(Rdst), 0); 9000 __ blsil(Rdst, Rsrc); 9001 __ jccb(Assembler::notZero, done); 9002 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9003 __ bind(done); 9004 %} 9005 ins_pipe(ialu_reg); 9006 %} 9007 9008 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 9009 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 9010 predicate(UseBMI1Instructions); 9011 effect(KILL cr, TEMP dst); 9012 9013 ins_cost(125); 9014 format %{ "MOVL $dst.hi, 0\n\t" 9015 "BLSIL $dst.lo, $src\n\t" 9016 "JNZ done\n\t" 9017 "BLSIL $dst.hi, $src+4\n" 9018 "done:" 9019 %} 9020 9021 ins_encode %{ 9022 Label done; 9023 Register Rdst = $dst$$Register; 9024 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9025 9026 __ movl(HIGH_FROM_LOW(Rdst), 0); 9027 __ blsil(Rdst, $src$$Address); 9028 __ jccb(Assembler::notZero, done); 9029 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 9030 __ bind(done); 9031 %} 9032 ins_pipe(ialu_reg_mem); 9033 %} 9034 9035 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9036 %{ 9037 match(Set dst (XorL (AddL src minus_1) src)); 9038 predicate(UseBMI1Instructions); 9039 effect(KILL cr, TEMP dst); 9040 9041 format %{ "MOVL $dst.hi, 0\n\t" 9042 "BLSMSKL $dst.lo, $src.lo\n\t" 9043 "JNC done\n\t" 9044 "BLSMSKL $dst.hi, $src.hi\n" 9045 "done:" 9046 %} 9047 9048 ins_encode %{ 9049 Label done; 9050 Register Rdst = $dst$$Register; 9051 Register Rsrc = $src$$Register; 9052 __ movl(HIGH_FROM_LOW(Rdst), 0); 9053 __ blsmskl(Rdst, Rsrc); 9054 __ jccb(Assembler::carryClear, done); 9055 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9056 __ bind(done); 9057 %} 9058 9059 ins_pipe(ialu_reg); 9060 %} 9061 9062 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9063 %{ 9064 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 9065 predicate(UseBMI1Instructions); 9066 effect(KILL cr, TEMP dst); 9067 9068 ins_cost(125); 9069 format %{ "MOVL $dst.hi, 0\n\t" 9070 "BLSMSKL $dst.lo, $src\n\t" 9071 "JNC done\n\t" 9072 "BLSMSKL $dst.hi, $src+4\n" 9073 "done:" 9074 %} 9075 9076 ins_encode %{ 9077 Label done; 9078 Register Rdst = $dst$$Register; 9079 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9080 9081 __ movl(HIGH_FROM_LOW(Rdst), 0); 9082 __ blsmskl(Rdst, $src$$Address); 9083 __ jccb(Assembler::carryClear, done); 9084 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 9085 __ bind(done); 9086 %} 9087 9088 ins_pipe(ialu_reg_mem); 9089 %} 9090 9091 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 9092 %{ 9093 match(Set dst (AndL (AddL src minus_1) src) ); 9094 predicate(UseBMI1Instructions); 9095 effect(KILL cr, TEMP dst); 9096 9097 format %{ "MOVL $dst.hi, $src.hi\n\t" 9098 "BLSRL $dst.lo, $src.lo\n\t" 9099 "JNC done\n\t" 9100 "BLSRL $dst.hi, $src.hi\n" 9101 "done:" 9102 %} 9103 9104 ins_encode %{ 9105 Label done; 9106 Register Rdst = $dst$$Register; 9107 Register Rsrc = $src$$Register; 9108 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9109 __ blsrl(Rdst, Rsrc); 9110 __ jccb(Assembler::carryClear, done); 9111 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 9112 __ bind(done); 9113 %} 9114 9115 ins_pipe(ialu_reg); 9116 %} 9117 9118 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 9119 %{ 9120 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 9121 predicate(UseBMI1Instructions); 9122 effect(KILL cr, TEMP dst); 9123 9124 ins_cost(125); 9125 format %{ "MOVL $dst.hi, $src+4\n\t" 9126 "BLSRL $dst.lo, $src\n\t" 9127 "JNC done\n\t" 9128 "BLSRL $dst.hi, $src+4\n" 9129 "done:" 9130 %} 9131 9132 ins_encode %{ 9133 Label done; 9134 Register Rdst = $dst$$Register; 9135 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 9136 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 9137 __ blsrl(Rdst, $src$$Address); 9138 __ jccb(Assembler::carryClear, done); 9139 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 9140 __ bind(done); 9141 %} 9142 9143 ins_pipe(ialu_reg_mem); 9144 %} 9145 9146 // Or Long Register with Register 9147 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9148 match(Set dst (OrL dst src)); 9149 effect(KILL cr); 9150 format %{ "OR $dst.lo,$src.lo\n\t" 9151 "OR $dst.hi,$src.hi" %} 9152 opcode(0x0B,0x0B); 9153 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9154 ins_pipe( ialu_reg_reg_long ); 9155 %} 9156 9157 // Or Long Register with Immediate 9158 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9159 match(Set dst (OrL dst src)); 9160 effect(KILL cr); 9161 format %{ "OR $dst.lo,$src.lo\n\t" 9162 "OR $dst.hi,$src.hi" %} 9163 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 9164 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9165 ins_pipe( ialu_reg_long ); 9166 %} 9167 9168 // Or Long Register with Memory 9169 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9170 match(Set dst (OrL dst (LoadL mem))); 9171 effect(KILL cr); 9172 ins_cost(125); 9173 format %{ "OR $dst.lo,$mem\n\t" 9174 "OR $dst.hi,$mem+4" %} 9175 opcode(0x0B,0x0B); 9176 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9177 ins_pipe( ialu_reg_long_mem ); 9178 %} 9179 9180 // Xor Long Register with Register 9181 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 9182 match(Set dst (XorL dst src)); 9183 effect(KILL cr); 9184 format %{ "XOR $dst.lo,$src.lo\n\t" 9185 "XOR $dst.hi,$src.hi" %} 9186 opcode(0x33,0x33); 9187 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 9188 ins_pipe( ialu_reg_reg_long ); 9189 %} 9190 9191 // Xor Long Register with Immediate -1 9192 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 9193 match(Set dst (XorL dst imm)); 9194 format %{ "NOT $dst.lo\n\t" 9195 "NOT $dst.hi" %} 9196 ins_encode %{ 9197 __ notl($dst$$Register); 9198 __ notl(HIGH_FROM_LOW($dst$$Register)); 9199 %} 9200 ins_pipe( ialu_reg_long ); 9201 %} 9202 9203 // Xor Long Register with Immediate 9204 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 9205 match(Set dst (XorL dst src)); 9206 effect(KILL cr); 9207 format %{ "XOR $dst.lo,$src.lo\n\t" 9208 "XOR $dst.hi,$src.hi" %} 9209 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9210 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9211 ins_pipe( ialu_reg_long ); 9212 %} 9213 9214 // Xor Long Register with Memory 9215 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9216 match(Set dst (XorL dst (LoadL mem))); 9217 effect(KILL cr); 9218 ins_cost(125); 9219 format %{ "XOR $dst.lo,$mem\n\t" 9220 "XOR $dst.hi,$mem+4" %} 9221 opcode(0x33,0x33); 9222 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9223 ins_pipe( ialu_reg_long_mem ); 9224 %} 9225 9226 // Shift Left Long by 1 9227 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9228 predicate(UseNewLongLShift); 9229 match(Set dst (LShiftL dst cnt)); 9230 effect(KILL cr); 9231 ins_cost(100); 9232 format %{ "ADD $dst.lo,$dst.lo\n\t" 9233 "ADC $dst.hi,$dst.hi" %} 9234 ins_encode %{ 9235 __ addl($dst$$Register,$dst$$Register); 9236 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9237 %} 9238 ins_pipe( ialu_reg_long ); 9239 %} 9240 9241 // Shift Left Long by 2 9242 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9243 predicate(UseNewLongLShift); 9244 match(Set dst (LShiftL dst cnt)); 9245 effect(KILL cr); 9246 ins_cost(100); 9247 format %{ "ADD $dst.lo,$dst.lo\n\t" 9248 "ADC $dst.hi,$dst.hi\n\t" 9249 "ADD $dst.lo,$dst.lo\n\t" 9250 "ADC $dst.hi,$dst.hi" %} 9251 ins_encode %{ 9252 __ addl($dst$$Register,$dst$$Register); 9253 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9254 __ addl($dst$$Register,$dst$$Register); 9255 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9256 %} 9257 ins_pipe( ialu_reg_long ); 9258 %} 9259 9260 // Shift Left Long by 3 9261 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9262 predicate(UseNewLongLShift); 9263 match(Set dst (LShiftL dst cnt)); 9264 effect(KILL cr); 9265 ins_cost(100); 9266 format %{ "ADD $dst.lo,$dst.lo\n\t" 9267 "ADC $dst.hi,$dst.hi\n\t" 9268 "ADD $dst.lo,$dst.lo\n\t" 9269 "ADC $dst.hi,$dst.hi\n\t" 9270 "ADD $dst.lo,$dst.lo\n\t" 9271 "ADC $dst.hi,$dst.hi" %} 9272 ins_encode %{ 9273 __ addl($dst$$Register,$dst$$Register); 9274 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9275 __ addl($dst$$Register,$dst$$Register); 9276 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9277 __ addl($dst$$Register,$dst$$Register); 9278 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9279 %} 9280 ins_pipe( ialu_reg_long ); 9281 %} 9282 9283 // Shift Left Long by 1-31 9284 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9285 match(Set dst (LShiftL dst cnt)); 9286 effect(KILL cr); 9287 ins_cost(200); 9288 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9289 "SHL $dst.lo,$cnt" %} 9290 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9291 ins_encode( move_long_small_shift(dst,cnt) ); 9292 ins_pipe( ialu_reg_long ); 9293 %} 9294 9295 // Shift Left Long by 32-63 9296 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9297 match(Set dst (LShiftL dst cnt)); 9298 effect(KILL cr); 9299 ins_cost(300); 9300 format %{ "MOV $dst.hi,$dst.lo\n" 9301 "\tSHL $dst.hi,$cnt-32\n" 9302 "\tXOR $dst.lo,$dst.lo" %} 9303 opcode(0xC1, 0x4); /* C1 /4 ib */ 9304 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9305 ins_pipe( ialu_reg_long ); 9306 %} 9307 9308 // Shift Left Long by variable 9309 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9310 match(Set dst (LShiftL dst shift)); 9311 effect(KILL cr); 9312 ins_cost(500+200); 9313 size(17); 9314 format %{ "TEST $shift,32\n\t" 9315 "JEQ,s small\n\t" 9316 "MOV $dst.hi,$dst.lo\n\t" 9317 "XOR $dst.lo,$dst.lo\n" 9318 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9319 "SHL $dst.lo,$shift" %} 9320 ins_encode( shift_left_long( dst, shift ) ); 9321 ins_pipe( pipe_slow ); 9322 %} 9323 9324 // Shift Right Long by 1-31 9325 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9326 match(Set dst (URShiftL dst cnt)); 9327 effect(KILL cr); 9328 ins_cost(200); 9329 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9330 "SHR $dst.hi,$cnt" %} 9331 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9332 ins_encode( move_long_small_shift(dst,cnt) ); 9333 ins_pipe( ialu_reg_long ); 9334 %} 9335 9336 // Shift Right Long by 32-63 9337 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9338 match(Set dst (URShiftL dst cnt)); 9339 effect(KILL cr); 9340 ins_cost(300); 9341 format %{ "MOV $dst.lo,$dst.hi\n" 9342 "\tSHR $dst.lo,$cnt-32\n" 9343 "\tXOR $dst.hi,$dst.hi" %} 9344 opcode(0xC1, 0x5); /* C1 /5 ib */ 9345 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9346 ins_pipe( ialu_reg_long ); 9347 %} 9348 9349 // Shift Right Long by variable 9350 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9351 match(Set dst (URShiftL dst shift)); 9352 effect(KILL cr); 9353 ins_cost(600); 9354 size(17); 9355 format %{ "TEST $shift,32\n\t" 9356 "JEQ,s small\n\t" 9357 "MOV $dst.lo,$dst.hi\n\t" 9358 "XOR $dst.hi,$dst.hi\n" 9359 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9360 "SHR $dst.hi,$shift" %} 9361 ins_encode( shift_right_long( dst, shift ) ); 9362 ins_pipe( pipe_slow ); 9363 %} 9364 9365 // Shift Right Long by 1-31 9366 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9367 match(Set dst (RShiftL dst cnt)); 9368 effect(KILL cr); 9369 ins_cost(200); 9370 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9371 "SAR $dst.hi,$cnt" %} 9372 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9373 ins_encode( move_long_small_shift(dst,cnt) ); 9374 ins_pipe( ialu_reg_long ); 9375 %} 9376 9377 // Shift Right Long by 32-63 9378 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9379 match(Set dst (RShiftL dst cnt)); 9380 effect(KILL cr); 9381 ins_cost(300); 9382 format %{ "MOV $dst.lo,$dst.hi\n" 9383 "\tSAR $dst.lo,$cnt-32\n" 9384 "\tSAR $dst.hi,31" %} 9385 opcode(0xC1, 0x7); /* C1 /7 ib */ 9386 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9387 ins_pipe( ialu_reg_long ); 9388 %} 9389 9390 // Shift Right arithmetic Long by variable 9391 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9392 match(Set dst (RShiftL dst shift)); 9393 effect(KILL cr); 9394 ins_cost(600); 9395 size(18); 9396 format %{ "TEST $shift,32\n\t" 9397 "JEQ,s small\n\t" 9398 "MOV $dst.lo,$dst.hi\n\t" 9399 "SAR $dst.hi,31\n" 9400 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9401 "SAR $dst.hi,$shift" %} 9402 ins_encode( shift_right_arith_long( dst, shift ) ); 9403 ins_pipe( pipe_slow ); 9404 %} 9405 9406 9407 //----------Double Instructions------------------------------------------------ 9408 // Double Math 9409 9410 // Compare & branch 9411 9412 // P6 version of float compare, sets condition codes in EFLAGS 9413 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9414 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9415 match(Set cr (CmpD src1 src2)); 9416 effect(KILL rax); 9417 ins_cost(150); 9418 format %{ "FLD $src1\n\t" 9419 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9420 "JNP exit\n\t" 9421 "MOV ah,1 // saw a NaN, set CF\n\t" 9422 "SAHF\n" 9423 "exit:\tNOP // avoid branch to branch" %} 9424 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9425 ins_encode( Push_Reg_DPR(src1), 9426 OpcP, RegOpc(src2), 9427 cmpF_P6_fixup ); 9428 ins_pipe( pipe_slow ); 9429 %} 9430 9431 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9432 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9433 match(Set cr (CmpD src1 src2)); 9434 ins_cost(150); 9435 format %{ "FLD $src1\n\t" 9436 "FUCOMIP ST,$src2 // P6 instruction" %} 9437 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9438 ins_encode( Push_Reg_DPR(src1), 9439 OpcP, RegOpc(src2)); 9440 ins_pipe( pipe_slow ); 9441 %} 9442 9443 // Compare & branch 9444 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9445 predicate(UseSSE<=1); 9446 match(Set cr (CmpD src1 src2)); 9447 effect(KILL rax); 9448 ins_cost(200); 9449 format %{ "FLD $src1\n\t" 9450 "FCOMp $src2\n\t" 9451 "FNSTSW AX\n\t" 9452 "TEST AX,0x400\n\t" 9453 "JZ,s flags\n\t" 9454 "MOV AH,1\t# unordered treat as LT\n" 9455 "flags:\tSAHF" %} 9456 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9457 ins_encode( Push_Reg_DPR(src1), 9458 OpcP, RegOpc(src2), 9459 fpu_flags); 9460 ins_pipe( pipe_slow ); 9461 %} 9462 9463 // Compare vs zero into -1,0,1 9464 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9465 predicate(UseSSE<=1); 9466 match(Set dst (CmpD3 src1 zero)); 9467 effect(KILL cr, KILL rax); 9468 ins_cost(280); 9469 format %{ "FTSTD $dst,$src1" %} 9470 opcode(0xE4, 0xD9); 9471 ins_encode( Push_Reg_DPR(src1), 9472 OpcS, OpcP, PopFPU, 9473 CmpF_Result(dst)); 9474 ins_pipe( pipe_slow ); 9475 %} 9476 9477 // Compare into -1,0,1 9478 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9479 predicate(UseSSE<=1); 9480 match(Set dst (CmpD3 src1 src2)); 9481 effect(KILL cr, KILL rax); 9482 ins_cost(300); 9483 format %{ "FCMPD $dst,$src1,$src2" %} 9484 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9485 ins_encode( Push_Reg_DPR(src1), 9486 OpcP, RegOpc(src2), 9487 CmpF_Result(dst)); 9488 ins_pipe( pipe_slow ); 9489 %} 9490 9491 // float compare and set condition codes in EFLAGS by XMM regs 9492 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9493 predicate(UseSSE>=2); 9494 match(Set cr (CmpD src1 src2)); 9495 ins_cost(145); 9496 format %{ "UCOMISD $src1,$src2\n\t" 9497 "JNP,s exit\n\t" 9498 "PUSHF\t# saw NaN, set CF\n\t" 9499 "AND [rsp], #0xffffff2b\n\t" 9500 "POPF\n" 9501 "exit:" %} 9502 ins_encode %{ 9503 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9504 emit_cmpfp_fixup(_masm); 9505 %} 9506 ins_pipe( pipe_slow ); 9507 %} 9508 9509 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9510 predicate(UseSSE>=2); 9511 match(Set cr (CmpD src1 src2)); 9512 ins_cost(100); 9513 format %{ "UCOMISD $src1,$src2" %} 9514 ins_encode %{ 9515 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9516 %} 9517 ins_pipe( pipe_slow ); 9518 %} 9519 9520 // float compare and set condition codes in EFLAGS by XMM regs 9521 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9522 predicate(UseSSE>=2); 9523 match(Set cr (CmpD src1 (LoadD src2))); 9524 ins_cost(145); 9525 format %{ "UCOMISD $src1,$src2\n\t" 9526 "JNP,s exit\n\t" 9527 "PUSHF\t# saw NaN, set CF\n\t" 9528 "AND [rsp], #0xffffff2b\n\t" 9529 "POPF\n" 9530 "exit:" %} 9531 ins_encode %{ 9532 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9533 emit_cmpfp_fixup(_masm); 9534 %} 9535 ins_pipe( pipe_slow ); 9536 %} 9537 9538 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9539 predicate(UseSSE>=2); 9540 match(Set cr (CmpD src1 (LoadD src2))); 9541 ins_cost(100); 9542 format %{ "UCOMISD $src1,$src2" %} 9543 ins_encode %{ 9544 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9545 %} 9546 ins_pipe( pipe_slow ); 9547 %} 9548 9549 // Compare into -1,0,1 in XMM 9550 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9551 predicate(UseSSE>=2); 9552 match(Set dst (CmpD3 src1 src2)); 9553 effect(KILL cr); 9554 ins_cost(255); 9555 format %{ "UCOMISD $src1, $src2\n\t" 9556 "MOV $dst, #-1\n\t" 9557 "JP,s done\n\t" 9558 "JB,s done\n\t" 9559 "SETNE $dst\n\t" 9560 "MOVZB $dst, $dst\n" 9561 "done:" %} 9562 ins_encode %{ 9563 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9564 emit_cmpfp3(_masm, $dst$$Register); 9565 %} 9566 ins_pipe( pipe_slow ); 9567 %} 9568 9569 // Compare into -1,0,1 in XMM and memory 9570 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9571 predicate(UseSSE>=2); 9572 match(Set dst (CmpD3 src1 (LoadD src2))); 9573 effect(KILL cr); 9574 ins_cost(275); 9575 format %{ "UCOMISD $src1, $src2\n\t" 9576 "MOV $dst, #-1\n\t" 9577 "JP,s done\n\t" 9578 "JB,s done\n\t" 9579 "SETNE $dst\n\t" 9580 "MOVZB $dst, $dst\n" 9581 "done:" %} 9582 ins_encode %{ 9583 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9584 emit_cmpfp3(_masm, $dst$$Register); 9585 %} 9586 ins_pipe( pipe_slow ); 9587 %} 9588 9589 9590 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9591 predicate (UseSSE <=1); 9592 match(Set dst (SubD dst src)); 9593 9594 format %{ "FLD $src\n\t" 9595 "DSUBp $dst,ST" %} 9596 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9597 ins_cost(150); 9598 ins_encode( Push_Reg_DPR(src), 9599 OpcP, RegOpc(dst) ); 9600 ins_pipe( fpu_reg_reg ); 9601 %} 9602 9603 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9604 predicate (UseSSE <=1); 9605 match(Set dst (RoundDouble (SubD src1 src2))); 9606 ins_cost(250); 9607 9608 format %{ "FLD $src2\n\t" 9609 "DSUB ST,$src1\n\t" 9610 "FSTP_D $dst\t# D-round" %} 9611 opcode(0xD8, 0x5); 9612 ins_encode( Push_Reg_DPR(src2), 9613 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9614 ins_pipe( fpu_mem_reg_reg ); 9615 %} 9616 9617 9618 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9619 predicate (UseSSE <=1); 9620 match(Set dst (SubD dst (LoadD src))); 9621 ins_cost(150); 9622 9623 format %{ "FLD $src\n\t" 9624 "DSUBp $dst,ST" %} 9625 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9626 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9627 OpcP, RegOpc(dst) ); 9628 ins_pipe( fpu_reg_mem ); 9629 %} 9630 9631 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9632 predicate (UseSSE<=1); 9633 match(Set dst (AbsD src)); 9634 ins_cost(100); 9635 format %{ "FABS" %} 9636 opcode(0xE1, 0xD9); 9637 ins_encode( OpcS, OpcP ); 9638 ins_pipe( fpu_reg_reg ); 9639 %} 9640 9641 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9642 predicate(UseSSE<=1); 9643 match(Set dst (NegD src)); 9644 ins_cost(100); 9645 format %{ "FCHS" %} 9646 opcode(0xE0, 0xD9); 9647 ins_encode( OpcS, OpcP ); 9648 ins_pipe( fpu_reg_reg ); 9649 %} 9650 9651 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9652 predicate(UseSSE<=1); 9653 match(Set dst (AddD dst src)); 9654 format %{ "FLD $src\n\t" 9655 "DADD $dst,ST" %} 9656 size(4); 9657 ins_cost(150); 9658 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9659 ins_encode( Push_Reg_DPR(src), 9660 OpcP, RegOpc(dst) ); 9661 ins_pipe( fpu_reg_reg ); 9662 %} 9663 9664 9665 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9666 predicate(UseSSE<=1); 9667 match(Set dst (RoundDouble (AddD src1 src2))); 9668 ins_cost(250); 9669 9670 format %{ "FLD $src2\n\t" 9671 "DADD ST,$src1\n\t" 9672 "FSTP_D $dst\t# D-round" %} 9673 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9674 ins_encode( Push_Reg_DPR(src2), 9675 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9676 ins_pipe( fpu_mem_reg_reg ); 9677 %} 9678 9679 9680 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9681 predicate(UseSSE<=1); 9682 match(Set dst (AddD dst (LoadD src))); 9683 ins_cost(150); 9684 9685 format %{ "FLD $src\n\t" 9686 "DADDp $dst,ST" %} 9687 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9688 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9689 OpcP, RegOpc(dst) ); 9690 ins_pipe( fpu_reg_mem ); 9691 %} 9692 9693 // add-to-memory 9694 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9695 predicate(UseSSE<=1); 9696 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9697 ins_cost(150); 9698 9699 format %{ "FLD_D $dst\n\t" 9700 "DADD ST,$src\n\t" 9701 "FST_D $dst" %} 9702 opcode(0xDD, 0x0); 9703 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9704 Opcode(0xD8), RegOpc(src), 9705 set_instruction_start, 9706 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9707 ins_pipe( fpu_reg_mem ); 9708 %} 9709 9710 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9711 predicate(UseSSE<=1); 9712 match(Set dst (AddD dst con)); 9713 ins_cost(125); 9714 format %{ "FLD1\n\t" 9715 "DADDp $dst,ST" %} 9716 ins_encode %{ 9717 __ fld1(); 9718 __ faddp($dst$$reg); 9719 %} 9720 ins_pipe(fpu_reg); 9721 %} 9722 9723 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9724 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9725 match(Set dst (AddD dst con)); 9726 ins_cost(200); 9727 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9728 "DADDp $dst,ST" %} 9729 ins_encode %{ 9730 __ fld_d($constantaddress($con)); 9731 __ faddp($dst$$reg); 9732 %} 9733 ins_pipe(fpu_reg_mem); 9734 %} 9735 9736 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9737 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9738 match(Set dst (RoundDouble (AddD src con))); 9739 ins_cost(200); 9740 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9741 "DADD ST,$src\n\t" 9742 "FSTP_D $dst\t# D-round" %} 9743 ins_encode %{ 9744 __ fld_d($constantaddress($con)); 9745 __ fadd($src$$reg); 9746 __ fstp_d(Address(rsp, $dst$$disp)); 9747 %} 9748 ins_pipe(fpu_mem_reg_con); 9749 %} 9750 9751 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9752 predicate(UseSSE<=1); 9753 match(Set dst (MulD dst src)); 9754 format %{ "FLD $src\n\t" 9755 "DMULp $dst,ST" %} 9756 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9757 ins_cost(150); 9758 ins_encode( Push_Reg_DPR(src), 9759 OpcP, RegOpc(dst) ); 9760 ins_pipe( fpu_reg_reg ); 9761 %} 9762 9763 // Strict FP instruction biases argument before multiply then 9764 // biases result to avoid double rounding of subnormals. 9765 // 9766 // scale arg1 by multiplying arg1 by 2^(-15360) 9767 // load arg2 9768 // multiply scaled arg1 by arg2 9769 // rescale product by 2^(15360) 9770 // 9771 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9772 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9773 match(Set dst (MulD dst src)); 9774 ins_cost(1); // Select this instruction for all strict FP double multiplies 9775 9776 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9777 "DMULp $dst,ST\n\t" 9778 "FLD $src\n\t" 9779 "DMULp $dst,ST\n\t" 9780 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9781 "DMULp $dst,ST\n\t" %} 9782 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9783 ins_encode( strictfp_bias1(dst), 9784 Push_Reg_DPR(src), 9785 OpcP, RegOpc(dst), 9786 strictfp_bias2(dst) ); 9787 ins_pipe( fpu_reg_reg ); 9788 %} 9789 9790 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9791 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9792 match(Set dst (MulD dst con)); 9793 ins_cost(200); 9794 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9795 "DMULp $dst,ST" %} 9796 ins_encode %{ 9797 __ fld_d($constantaddress($con)); 9798 __ fmulp($dst$$reg); 9799 %} 9800 ins_pipe(fpu_reg_mem); 9801 %} 9802 9803 9804 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9805 predicate( UseSSE<=1 ); 9806 match(Set dst (MulD dst (LoadD src))); 9807 ins_cost(200); 9808 format %{ "FLD_D $src\n\t" 9809 "DMULp $dst,ST" %} 9810 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9811 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9812 OpcP, RegOpc(dst) ); 9813 ins_pipe( fpu_reg_mem ); 9814 %} 9815 9816 // 9817 // Cisc-alternate to reg-reg multiply 9818 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9819 predicate( UseSSE<=1 ); 9820 match(Set dst (MulD src (LoadD mem))); 9821 ins_cost(250); 9822 format %{ "FLD_D $mem\n\t" 9823 "DMUL ST,$src\n\t" 9824 "FSTP_D $dst" %} 9825 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9826 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9827 OpcReg_FPR(src), 9828 Pop_Reg_DPR(dst) ); 9829 ins_pipe( fpu_reg_reg_mem ); 9830 %} 9831 9832 9833 // MACRO3 -- addDPR a mulDPR 9834 // This instruction is a '2-address' instruction in that the result goes 9835 // back to src2. This eliminates a move from the macro; possibly the 9836 // register allocator will have to add it back (and maybe not). 9837 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9838 predicate( UseSSE<=1 ); 9839 match(Set src2 (AddD (MulD src0 src1) src2)); 9840 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9841 "DMUL ST,$src1\n\t" 9842 "DADDp $src2,ST" %} 9843 ins_cost(250); 9844 opcode(0xDD); /* LoadD DD /0 */ 9845 ins_encode( Push_Reg_FPR(src0), 9846 FMul_ST_reg(src1), 9847 FAddP_reg_ST(src2) ); 9848 ins_pipe( fpu_reg_reg_reg ); 9849 %} 9850 9851 9852 // MACRO3 -- subDPR a mulDPR 9853 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9854 predicate( UseSSE<=1 ); 9855 match(Set src2 (SubD (MulD src0 src1) src2)); 9856 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9857 "DMUL ST,$src1\n\t" 9858 "DSUBRp $src2,ST" %} 9859 ins_cost(250); 9860 ins_encode( Push_Reg_FPR(src0), 9861 FMul_ST_reg(src1), 9862 Opcode(0xDE), Opc_plus(0xE0,src2)); 9863 ins_pipe( fpu_reg_reg_reg ); 9864 %} 9865 9866 9867 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9868 predicate( UseSSE<=1 ); 9869 match(Set dst (DivD dst src)); 9870 9871 format %{ "FLD $src\n\t" 9872 "FDIVp $dst,ST" %} 9873 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9874 ins_cost(150); 9875 ins_encode( Push_Reg_DPR(src), 9876 OpcP, RegOpc(dst) ); 9877 ins_pipe( fpu_reg_reg ); 9878 %} 9879 9880 // Strict FP instruction biases argument before division then 9881 // biases result, to avoid double rounding of subnormals. 9882 // 9883 // scale dividend by multiplying dividend by 2^(-15360) 9884 // load divisor 9885 // divide scaled dividend by divisor 9886 // rescale quotient by 2^(15360) 9887 // 9888 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9889 predicate (UseSSE<=1); 9890 match(Set dst (DivD dst src)); 9891 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9892 ins_cost(01); 9893 9894 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9895 "DMULp $dst,ST\n\t" 9896 "FLD $src\n\t" 9897 "FDIVp $dst,ST\n\t" 9898 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9899 "DMULp $dst,ST\n\t" %} 9900 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9901 ins_encode( strictfp_bias1(dst), 9902 Push_Reg_DPR(src), 9903 OpcP, RegOpc(dst), 9904 strictfp_bias2(dst) ); 9905 ins_pipe( fpu_reg_reg ); 9906 %} 9907 9908 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9909 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 9910 match(Set dst (RoundDouble (DivD src1 src2))); 9911 9912 format %{ "FLD $src1\n\t" 9913 "FDIV ST,$src2\n\t" 9914 "FSTP_D $dst\t# D-round" %} 9915 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 9916 ins_encode( Push_Reg_DPR(src1), 9917 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 9918 ins_pipe( fpu_mem_reg_reg ); 9919 %} 9920 9921 9922 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9923 predicate(UseSSE<=1); 9924 match(Set dst (ModD dst src)); 9925 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9926 9927 format %{ "DMOD $dst,$src" %} 9928 ins_cost(250); 9929 ins_encode(Push_Reg_Mod_DPR(dst, src), 9930 emitModDPR(), 9931 Push_Result_Mod_DPR(src), 9932 Pop_Reg_DPR(dst)); 9933 ins_pipe( pipe_slow ); 9934 %} 9935 9936 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9937 predicate(UseSSE>=2); 9938 match(Set dst (ModD src0 src1)); 9939 effect(KILL rax, KILL cr); 9940 9941 format %{ "SUB ESP,8\t # DMOD\n" 9942 "\tMOVSD [ESP+0],$src1\n" 9943 "\tFLD_D [ESP+0]\n" 9944 "\tMOVSD [ESP+0],$src0\n" 9945 "\tFLD_D [ESP+0]\n" 9946 "loop:\tFPREM\n" 9947 "\tFWAIT\n" 9948 "\tFNSTSW AX\n" 9949 "\tSAHF\n" 9950 "\tJP loop\n" 9951 "\tFSTP_D [ESP+0]\n" 9952 "\tMOVSD $dst,[ESP+0]\n" 9953 "\tADD ESP,8\n" 9954 "\tFSTP ST0\t # Restore FPU Stack" 9955 %} 9956 ins_cost(250); 9957 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9958 ins_pipe( pipe_slow ); 9959 %} 9960 9961 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9962 predicate (UseSSE<=1); 9963 match(Set dst(AtanD dst src)); 9964 format %{ "DATA $dst,$src" %} 9965 opcode(0xD9, 0xF3); 9966 ins_encode( Push_Reg_DPR(src), 9967 OpcP, OpcS, RegOpc(dst) ); 9968 ins_pipe( pipe_slow ); 9969 %} 9970 9971 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 9972 predicate (UseSSE>=2); 9973 match(Set dst(AtanD dst src)); 9974 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9975 format %{ "DATA $dst,$src" %} 9976 opcode(0xD9, 0xF3); 9977 ins_encode( Push_SrcD(src), 9978 OpcP, OpcS, Push_ResultD(dst) ); 9979 ins_pipe( pipe_slow ); 9980 %} 9981 9982 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 9983 predicate (UseSSE<=1); 9984 match(Set dst (SqrtD src)); 9985 format %{ "DSQRT $dst,$src" %} 9986 opcode(0xFA, 0xD9); 9987 ins_encode( Push_Reg_DPR(src), 9988 OpcS, OpcP, Pop_Reg_DPR(dst) ); 9989 ins_pipe( pipe_slow ); 9990 %} 9991 9992 //-------------Float Instructions------------------------------- 9993 // Float Math 9994 9995 // Code for float compare: 9996 // fcompp(); 9997 // fwait(); fnstsw_ax(); 9998 // sahf(); 9999 // movl(dst, unordered_result); 10000 // jcc(Assembler::parity, exit); 10001 // movl(dst, less_result); 10002 // jcc(Assembler::below, exit); 10003 // movl(dst, equal_result); 10004 // jcc(Assembler::equal, exit); 10005 // movl(dst, greater_result); 10006 // exit: 10007 10008 // P6 version of float compare, sets condition codes in EFLAGS 10009 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10010 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10011 match(Set cr (CmpF src1 src2)); 10012 effect(KILL rax); 10013 ins_cost(150); 10014 format %{ "FLD $src1\n\t" 10015 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10016 "JNP exit\n\t" 10017 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10018 "SAHF\n" 10019 "exit:\tNOP // avoid branch to branch" %} 10020 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10021 ins_encode( Push_Reg_DPR(src1), 10022 OpcP, RegOpc(src2), 10023 cmpF_P6_fixup ); 10024 ins_pipe( pipe_slow ); 10025 %} 10026 10027 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 10028 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10029 match(Set cr (CmpF src1 src2)); 10030 ins_cost(100); 10031 format %{ "FLD $src1\n\t" 10032 "FUCOMIP ST,$src2 // P6 instruction" %} 10033 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10034 ins_encode( Push_Reg_DPR(src1), 10035 OpcP, RegOpc(src2)); 10036 ins_pipe( pipe_slow ); 10037 %} 10038 10039 10040 // Compare & branch 10041 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10042 predicate(UseSSE == 0); 10043 match(Set cr (CmpF src1 src2)); 10044 effect(KILL rax); 10045 ins_cost(200); 10046 format %{ "FLD $src1\n\t" 10047 "FCOMp $src2\n\t" 10048 "FNSTSW AX\n\t" 10049 "TEST AX,0x400\n\t" 10050 "JZ,s flags\n\t" 10051 "MOV AH,1\t# unordered treat as LT\n" 10052 "flags:\tSAHF" %} 10053 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10054 ins_encode( Push_Reg_DPR(src1), 10055 OpcP, RegOpc(src2), 10056 fpu_flags); 10057 ins_pipe( pipe_slow ); 10058 %} 10059 10060 // Compare vs zero into -1,0,1 10061 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10062 predicate(UseSSE == 0); 10063 match(Set dst (CmpF3 src1 zero)); 10064 effect(KILL cr, KILL rax); 10065 ins_cost(280); 10066 format %{ "FTSTF $dst,$src1" %} 10067 opcode(0xE4, 0xD9); 10068 ins_encode( Push_Reg_DPR(src1), 10069 OpcS, OpcP, PopFPU, 10070 CmpF_Result(dst)); 10071 ins_pipe( pipe_slow ); 10072 %} 10073 10074 // Compare into -1,0,1 10075 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10076 predicate(UseSSE == 0); 10077 match(Set dst (CmpF3 src1 src2)); 10078 effect(KILL cr, KILL rax); 10079 ins_cost(300); 10080 format %{ "FCMPF $dst,$src1,$src2" %} 10081 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10082 ins_encode( Push_Reg_DPR(src1), 10083 OpcP, RegOpc(src2), 10084 CmpF_Result(dst)); 10085 ins_pipe( pipe_slow ); 10086 %} 10087 10088 // float compare and set condition codes in EFLAGS by XMM regs 10089 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10090 predicate(UseSSE>=1); 10091 match(Set cr (CmpF src1 src2)); 10092 ins_cost(145); 10093 format %{ "UCOMISS $src1,$src2\n\t" 10094 "JNP,s exit\n\t" 10095 "PUSHF\t# saw NaN, set CF\n\t" 10096 "AND [rsp], #0xffffff2b\n\t" 10097 "POPF\n" 10098 "exit:" %} 10099 ins_encode %{ 10100 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10101 emit_cmpfp_fixup(_masm); 10102 %} 10103 ins_pipe( pipe_slow ); 10104 %} 10105 10106 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10107 predicate(UseSSE>=1); 10108 match(Set cr (CmpF src1 src2)); 10109 ins_cost(100); 10110 format %{ "UCOMISS $src1,$src2" %} 10111 ins_encode %{ 10112 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10113 %} 10114 ins_pipe( pipe_slow ); 10115 %} 10116 10117 // float compare and set condition codes in EFLAGS by XMM regs 10118 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10119 predicate(UseSSE>=1); 10120 match(Set cr (CmpF src1 (LoadF src2))); 10121 ins_cost(165); 10122 format %{ "UCOMISS $src1,$src2\n\t" 10123 "JNP,s exit\n\t" 10124 "PUSHF\t# saw NaN, set CF\n\t" 10125 "AND [rsp], #0xffffff2b\n\t" 10126 "POPF\n" 10127 "exit:" %} 10128 ins_encode %{ 10129 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10130 emit_cmpfp_fixup(_masm); 10131 %} 10132 ins_pipe( pipe_slow ); 10133 %} 10134 10135 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10136 predicate(UseSSE>=1); 10137 match(Set cr (CmpF src1 (LoadF src2))); 10138 ins_cost(100); 10139 format %{ "UCOMISS $src1,$src2" %} 10140 ins_encode %{ 10141 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10142 %} 10143 ins_pipe( pipe_slow ); 10144 %} 10145 10146 // Compare into -1,0,1 in XMM 10147 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10148 predicate(UseSSE>=1); 10149 match(Set dst (CmpF3 src1 src2)); 10150 effect(KILL cr); 10151 ins_cost(255); 10152 format %{ "UCOMISS $src1, $src2\n\t" 10153 "MOV $dst, #-1\n\t" 10154 "JP,s done\n\t" 10155 "JB,s done\n\t" 10156 "SETNE $dst\n\t" 10157 "MOVZB $dst, $dst\n" 10158 "done:" %} 10159 ins_encode %{ 10160 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10161 emit_cmpfp3(_masm, $dst$$Register); 10162 %} 10163 ins_pipe( pipe_slow ); 10164 %} 10165 10166 // Compare into -1,0,1 in XMM and memory 10167 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10168 predicate(UseSSE>=1); 10169 match(Set dst (CmpF3 src1 (LoadF src2))); 10170 effect(KILL cr); 10171 ins_cost(275); 10172 format %{ "UCOMISS $src1, $src2\n\t" 10173 "MOV $dst, #-1\n\t" 10174 "JP,s done\n\t" 10175 "JB,s done\n\t" 10176 "SETNE $dst\n\t" 10177 "MOVZB $dst, $dst\n" 10178 "done:" %} 10179 ins_encode %{ 10180 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10181 emit_cmpfp3(_masm, $dst$$Register); 10182 %} 10183 ins_pipe( pipe_slow ); 10184 %} 10185 10186 // Spill to obtain 24-bit precision 10187 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10188 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10189 match(Set dst (SubF src1 src2)); 10190 10191 format %{ "FSUB $dst,$src1 - $src2" %} 10192 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10193 ins_encode( Push_Reg_FPR(src1), 10194 OpcReg_FPR(src2), 10195 Pop_Mem_FPR(dst) ); 10196 ins_pipe( fpu_mem_reg_reg ); 10197 %} 10198 // 10199 // This instruction does not round to 24-bits 10200 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10201 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10202 match(Set dst (SubF dst src)); 10203 10204 format %{ "FSUB $dst,$src" %} 10205 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10206 ins_encode( Push_Reg_FPR(src), 10207 OpcP, RegOpc(dst) ); 10208 ins_pipe( fpu_reg_reg ); 10209 %} 10210 10211 // Spill to obtain 24-bit precision 10212 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10213 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10214 match(Set dst (AddF src1 src2)); 10215 10216 format %{ "FADD $dst,$src1,$src2" %} 10217 opcode(0xD8, 0x0); /* D8 C0+i */ 10218 ins_encode( Push_Reg_FPR(src2), 10219 OpcReg_FPR(src1), 10220 Pop_Mem_FPR(dst) ); 10221 ins_pipe( fpu_mem_reg_reg ); 10222 %} 10223 // 10224 // This instruction does not round to 24-bits 10225 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10226 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10227 match(Set dst (AddF dst src)); 10228 10229 format %{ "FLD $src\n\t" 10230 "FADDp $dst,ST" %} 10231 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10232 ins_encode( Push_Reg_FPR(src), 10233 OpcP, RegOpc(dst) ); 10234 ins_pipe( fpu_reg_reg ); 10235 %} 10236 10237 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10238 predicate(UseSSE==0); 10239 match(Set dst (AbsF src)); 10240 ins_cost(100); 10241 format %{ "FABS" %} 10242 opcode(0xE1, 0xD9); 10243 ins_encode( OpcS, OpcP ); 10244 ins_pipe( fpu_reg_reg ); 10245 %} 10246 10247 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10248 predicate(UseSSE==0); 10249 match(Set dst (NegF src)); 10250 ins_cost(100); 10251 format %{ "FCHS" %} 10252 opcode(0xE0, 0xD9); 10253 ins_encode( OpcS, OpcP ); 10254 ins_pipe( fpu_reg_reg ); 10255 %} 10256 10257 // Cisc-alternate to addFPR_reg 10258 // Spill to obtain 24-bit precision 10259 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10260 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10261 match(Set dst (AddF src1 (LoadF src2))); 10262 10263 format %{ "FLD $src2\n\t" 10264 "FADD ST,$src1\n\t" 10265 "FSTP_S $dst" %} 10266 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10267 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10268 OpcReg_FPR(src1), 10269 Pop_Mem_FPR(dst) ); 10270 ins_pipe( fpu_mem_reg_mem ); 10271 %} 10272 // 10273 // Cisc-alternate to addFPR_reg 10274 // This instruction does not round to 24-bits 10275 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10276 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10277 match(Set dst (AddF dst (LoadF src))); 10278 10279 format %{ "FADD $dst,$src" %} 10280 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10281 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10282 OpcP, RegOpc(dst) ); 10283 ins_pipe( fpu_reg_mem ); 10284 %} 10285 10286 // // Following two instructions for _222_mpegaudio 10287 // Spill to obtain 24-bit precision 10288 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10289 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10290 match(Set dst (AddF src1 src2)); 10291 10292 format %{ "FADD $dst,$src1,$src2" %} 10293 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10294 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10295 OpcReg_FPR(src2), 10296 Pop_Mem_FPR(dst) ); 10297 ins_pipe( fpu_mem_reg_mem ); 10298 %} 10299 10300 // Cisc-spill variant 10301 // Spill to obtain 24-bit precision 10302 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10303 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10304 match(Set dst (AddF src1 (LoadF src2))); 10305 10306 format %{ "FADD $dst,$src1,$src2 cisc" %} 10307 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10308 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10309 set_instruction_start, 10310 OpcP, RMopc_Mem(secondary,src1), 10311 Pop_Mem_FPR(dst) ); 10312 ins_pipe( fpu_mem_mem_mem ); 10313 %} 10314 10315 // Spill to obtain 24-bit precision 10316 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10317 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10318 match(Set dst (AddF src1 src2)); 10319 10320 format %{ "FADD $dst,$src1,$src2" %} 10321 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10322 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10323 set_instruction_start, 10324 OpcP, RMopc_Mem(secondary,src1), 10325 Pop_Mem_FPR(dst) ); 10326 ins_pipe( fpu_mem_mem_mem ); 10327 %} 10328 10329 10330 // Spill to obtain 24-bit precision 10331 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10332 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10333 match(Set dst (AddF src con)); 10334 format %{ "FLD $src\n\t" 10335 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10336 "FSTP_S $dst" %} 10337 ins_encode %{ 10338 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10339 __ fadd_s($constantaddress($con)); 10340 __ fstp_s(Address(rsp, $dst$$disp)); 10341 %} 10342 ins_pipe(fpu_mem_reg_con); 10343 %} 10344 // 10345 // This instruction does not round to 24-bits 10346 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10347 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10348 match(Set dst (AddF src con)); 10349 format %{ "FLD $src\n\t" 10350 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10351 "FSTP $dst" %} 10352 ins_encode %{ 10353 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10354 __ fadd_s($constantaddress($con)); 10355 __ fstp_d($dst$$reg); 10356 %} 10357 ins_pipe(fpu_reg_reg_con); 10358 %} 10359 10360 // Spill to obtain 24-bit precision 10361 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10362 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10363 match(Set dst (MulF src1 src2)); 10364 10365 format %{ "FLD $src1\n\t" 10366 "FMUL $src2\n\t" 10367 "FSTP_S $dst" %} 10368 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10369 ins_encode( Push_Reg_FPR(src1), 10370 OpcReg_FPR(src2), 10371 Pop_Mem_FPR(dst) ); 10372 ins_pipe( fpu_mem_reg_reg ); 10373 %} 10374 // 10375 // This instruction does not round to 24-bits 10376 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10377 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10378 match(Set dst (MulF src1 src2)); 10379 10380 format %{ "FLD $src1\n\t" 10381 "FMUL $src2\n\t" 10382 "FSTP_S $dst" %} 10383 opcode(0xD8, 0x1); /* D8 C8+i */ 10384 ins_encode( Push_Reg_FPR(src2), 10385 OpcReg_FPR(src1), 10386 Pop_Reg_FPR(dst) ); 10387 ins_pipe( fpu_reg_reg_reg ); 10388 %} 10389 10390 10391 // Spill to obtain 24-bit precision 10392 // Cisc-alternate to reg-reg multiply 10393 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10394 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10395 match(Set dst (MulF src1 (LoadF src2))); 10396 10397 format %{ "FLD_S $src2\n\t" 10398 "FMUL $src1\n\t" 10399 "FSTP_S $dst" %} 10400 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10401 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10402 OpcReg_FPR(src1), 10403 Pop_Mem_FPR(dst) ); 10404 ins_pipe( fpu_mem_reg_mem ); 10405 %} 10406 // 10407 // This instruction does not round to 24-bits 10408 // Cisc-alternate to reg-reg multiply 10409 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10410 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10411 match(Set dst (MulF src1 (LoadF src2))); 10412 10413 format %{ "FMUL $dst,$src1,$src2" %} 10414 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10415 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10416 OpcReg_FPR(src1), 10417 Pop_Reg_FPR(dst) ); 10418 ins_pipe( fpu_reg_reg_mem ); 10419 %} 10420 10421 // Spill to obtain 24-bit precision 10422 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10423 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10424 match(Set dst (MulF src1 src2)); 10425 10426 format %{ "FMUL $dst,$src1,$src2" %} 10427 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10428 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10429 set_instruction_start, 10430 OpcP, RMopc_Mem(secondary,src1), 10431 Pop_Mem_FPR(dst) ); 10432 ins_pipe( fpu_mem_mem_mem ); 10433 %} 10434 10435 // Spill to obtain 24-bit precision 10436 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10437 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10438 match(Set dst (MulF src con)); 10439 10440 format %{ "FLD $src\n\t" 10441 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10442 "FSTP_S $dst" %} 10443 ins_encode %{ 10444 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10445 __ fmul_s($constantaddress($con)); 10446 __ fstp_s(Address(rsp, $dst$$disp)); 10447 %} 10448 ins_pipe(fpu_mem_reg_con); 10449 %} 10450 // 10451 // This instruction does not round to 24-bits 10452 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10453 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10454 match(Set dst (MulF src con)); 10455 10456 format %{ "FLD $src\n\t" 10457 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10458 "FSTP $dst" %} 10459 ins_encode %{ 10460 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10461 __ fmul_s($constantaddress($con)); 10462 __ fstp_d($dst$$reg); 10463 %} 10464 ins_pipe(fpu_reg_reg_con); 10465 %} 10466 10467 10468 // 10469 // MACRO1 -- subsume unshared load into mulFPR 10470 // This instruction does not round to 24-bits 10471 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10472 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10473 match(Set dst (MulF (LoadF mem1) src)); 10474 10475 format %{ "FLD $mem1 ===MACRO1===\n\t" 10476 "FMUL ST,$src\n\t" 10477 "FSTP $dst" %} 10478 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10479 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10480 OpcReg_FPR(src), 10481 Pop_Reg_FPR(dst) ); 10482 ins_pipe( fpu_reg_reg_mem ); 10483 %} 10484 // 10485 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10486 // This instruction does not round to 24-bits 10487 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10488 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10489 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10490 ins_cost(95); 10491 10492 format %{ "FLD $mem1 ===MACRO2===\n\t" 10493 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10494 "FADD ST,$src2\n\t" 10495 "FSTP $dst" %} 10496 opcode(0xD9); /* LoadF D9 /0 */ 10497 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10498 FMul_ST_reg(src1), 10499 FAdd_ST_reg(src2), 10500 Pop_Reg_FPR(dst) ); 10501 ins_pipe( fpu_reg_mem_reg_reg ); 10502 %} 10503 10504 // MACRO3 -- addFPR a mulFPR 10505 // This instruction does not round to 24-bits. It is a '2-address' 10506 // instruction in that the result goes back to src2. This eliminates 10507 // a move from the macro; possibly the register allocator will have 10508 // to add it back (and maybe not). 10509 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10510 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10511 match(Set src2 (AddF (MulF src0 src1) src2)); 10512 10513 format %{ "FLD $src0 ===MACRO3===\n\t" 10514 "FMUL ST,$src1\n\t" 10515 "FADDP $src2,ST" %} 10516 opcode(0xD9); /* LoadF D9 /0 */ 10517 ins_encode( Push_Reg_FPR(src0), 10518 FMul_ST_reg(src1), 10519 FAddP_reg_ST(src2) ); 10520 ins_pipe( fpu_reg_reg_reg ); 10521 %} 10522 10523 // MACRO4 -- divFPR subFPR 10524 // This instruction does not round to 24-bits 10525 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10526 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10527 match(Set dst (DivF (SubF src2 src1) src3)); 10528 10529 format %{ "FLD $src2 ===MACRO4===\n\t" 10530 "FSUB ST,$src1\n\t" 10531 "FDIV ST,$src3\n\t" 10532 "FSTP $dst" %} 10533 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10534 ins_encode( Push_Reg_FPR(src2), 10535 subFPR_divFPR_encode(src1,src3), 10536 Pop_Reg_FPR(dst) ); 10537 ins_pipe( fpu_reg_reg_reg_reg ); 10538 %} 10539 10540 // Spill to obtain 24-bit precision 10541 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10542 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10543 match(Set dst (DivF src1 src2)); 10544 10545 format %{ "FDIV $dst,$src1,$src2" %} 10546 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10547 ins_encode( Push_Reg_FPR(src1), 10548 OpcReg_FPR(src2), 10549 Pop_Mem_FPR(dst) ); 10550 ins_pipe( fpu_mem_reg_reg ); 10551 %} 10552 // 10553 // This instruction does not round to 24-bits 10554 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10555 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10556 match(Set dst (DivF dst src)); 10557 10558 format %{ "FDIV $dst,$src" %} 10559 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10560 ins_encode( Push_Reg_FPR(src), 10561 OpcP, RegOpc(dst) ); 10562 ins_pipe( fpu_reg_reg ); 10563 %} 10564 10565 10566 // Spill to obtain 24-bit precision 10567 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10568 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10569 match(Set dst (ModF src1 src2)); 10570 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10571 10572 format %{ "FMOD $dst,$src1,$src2" %} 10573 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10574 emitModDPR(), 10575 Push_Result_Mod_DPR(src2), 10576 Pop_Mem_FPR(dst)); 10577 ins_pipe( pipe_slow ); 10578 %} 10579 // 10580 // This instruction does not round to 24-bits 10581 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10582 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10583 match(Set dst (ModF dst src)); 10584 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10585 10586 format %{ "FMOD $dst,$src" %} 10587 ins_encode(Push_Reg_Mod_DPR(dst, src), 10588 emitModDPR(), 10589 Push_Result_Mod_DPR(src), 10590 Pop_Reg_FPR(dst)); 10591 ins_pipe( pipe_slow ); 10592 %} 10593 10594 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10595 predicate(UseSSE>=1); 10596 match(Set dst (ModF src0 src1)); 10597 effect(KILL rax, KILL cr); 10598 format %{ "SUB ESP,4\t # FMOD\n" 10599 "\tMOVSS [ESP+0],$src1\n" 10600 "\tFLD_S [ESP+0]\n" 10601 "\tMOVSS [ESP+0],$src0\n" 10602 "\tFLD_S [ESP+0]\n" 10603 "loop:\tFPREM\n" 10604 "\tFWAIT\n" 10605 "\tFNSTSW AX\n" 10606 "\tSAHF\n" 10607 "\tJP loop\n" 10608 "\tFSTP_S [ESP+0]\n" 10609 "\tMOVSS $dst,[ESP+0]\n" 10610 "\tADD ESP,4\n" 10611 "\tFSTP ST0\t # Restore FPU Stack" 10612 %} 10613 ins_cost(250); 10614 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10615 ins_pipe( pipe_slow ); 10616 %} 10617 10618 10619 //----------Arithmetic Conversion Instructions--------------------------------- 10620 // The conversions operations are all Alpha sorted. Please keep it that way! 10621 10622 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10623 predicate(UseSSE==0); 10624 match(Set dst (RoundFloat src)); 10625 ins_cost(125); 10626 format %{ "FST_S $dst,$src\t# F-round" %} 10627 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10628 ins_pipe( fpu_mem_reg ); 10629 %} 10630 10631 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10632 predicate(UseSSE<=1); 10633 match(Set dst (RoundDouble src)); 10634 ins_cost(125); 10635 format %{ "FST_D $dst,$src\t# D-round" %} 10636 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10637 ins_pipe( fpu_mem_reg ); 10638 %} 10639 10640 // Force rounding to 24-bit precision and 6-bit exponent 10641 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10642 predicate(UseSSE==0); 10643 match(Set dst (ConvD2F src)); 10644 format %{ "FST_S $dst,$src\t# F-round" %} 10645 expand %{ 10646 roundFloat_mem_reg(dst,src); 10647 %} 10648 %} 10649 10650 // Force rounding to 24-bit precision and 6-bit exponent 10651 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10652 predicate(UseSSE==1); 10653 match(Set dst (ConvD2F src)); 10654 effect( KILL cr ); 10655 format %{ "SUB ESP,4\n\t" 10656 "FST_S [ESP],$src\t# F-round\n\t" 10657 "MOVSS $dst,[ESP]\n\t" 10658 "ADD ESP,4" %} 10659 ins_encode %{ 10660 __ subptr(rsp, 4); 10661 if ($src$$reg != FPR1L_enc) { 10662 __ fld_s($src$$reg-1); 10663 __ fstp_s(Address(rsp, 0)); 10664 } else { 10665 __ fst_s(Address(rsp, 0)); 10666 } 10667 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10668 __ addptr(rsp, 4); 10669 %} 10670 ins_pipe( pipe_slow ); 10671 %} 10672 10673 // Force rounding double precision to single precision 10674 instruct convD2F_reg(regF dst, regD src) %{ 10675 predicate(UseSSE>=2); 10676 match(Set dst (ConvD2F src)); 10677 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10678 ins_encode %{ 10679 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10680 %} 10681 ins_pipe( pipe_slow ); 10682 %} 10683 10684 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10685 predicate(UseSSE==0); 10686 match(Set dst (ConvF2D src)); 10687 format %{ "FST_S $dst,$src\t# D-round" %} 10688 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10689 ins_pipe( fpu_reg_reg ); 10690 %} 10691 10692 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10693 predicate(UseSSE==1); 10694 match(Set dst (ConvF2D src)); 10695 format %{ "FST_D $dst,$src\t# D-round" %} 10696 expand %{ 10697 roundDouble_mem_reg(dst,src); 10698 %} 10699 %} 10700 10701 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10702 predicate(UseSSE==1); 10703 match(Set dst (ConvF2D src)); 10704 effect( KILL cr ); 10705 format %{ "SUB ESP,4\n\t" 10706 "MOVSS [ESP] $src\n\t" 10707 "FLD_S [ESP]\n\t" 10708 "ADD ESP,4\n\t" 10709 "FSTP $dst\t# D-round" %} 10710 ins_encode %{ 10711 __ subptr(rsp, 4); 10712 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10713 __ fld_s(Address(rsp, 0)); 10714 __ addptr(rsp, 4); 10715 __ fstp_d($dst$$reg); 10716 %} 10717 ins_pipe( pipe_slow ); 10718 %} 10719 10720 instruct convF2D_reg(regD dst, regF src) %{ 10721 predicate(UseSSE>=2); 10722 match(Set dst (ConvF2D src)); 10723 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10724 ins_encode %{ 10725 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10726 %} 10727 ins_pipe( pipe_slow ); 10728 %} 10729 10730 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10731 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10732 predicate(UseSSE<=1); 10733 match(Set dst (ConvD2I src)); 10734 effect( KILL tmp, KILL cr ); 10735 format %{ "FLD $src\t# Convert double to int \n\t" 10736 "FLDCW trunc mode\n\t" 10737 "SUB ESP,4\n\t" 10738 "FISTp [ESP + #0]\n\t" 10739 "FLDCW std/24-bit mode\n\t" 10740 "POP EAX\n\t" 10741 "CMP EAX,0x80000000\n\t" 10742 "JNE,s fast\n\t" 10743 "FLD_D $src\n\t" 10744 "CALL d2i_wrapper\n" 10745 "fast:" %} 10746 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10747 ins_pipe( pipe_slow ); 10748 %} 10749 10750 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10751 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10752 predicate(UseSSE>=2); 10753 match(Set dst (ConvD2I src)); 10754 effect( KILL tmp, KILL cr ); 10755 format %{ "CVTTSD2SI $dst, $src\n\t" 10756 "CMP $dst,0x80000000\n\t" 10757 "JNE,s fast\n\t" 10758 "SUB ESP, 8\n\t" 10759 "MOVSD [ESP], $src\n\t" 10760 "FLD_D [ESP]\n\t" 10761 "ADD ESP, 8\n\t" 10762 "CALL d2i_wrapper\n" 10763 "fast:" %} 10764 ins_encode %{ 10765 Label fast; 10766 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10767 __ cmpl($dst$$Register, 0x80000000); 10768 __ jccb(Assembler::notEqual, fast); 10769 __ subptr(rsp, 8); 10770 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10771 __ fld_d(Address(rsp, 0)); 10772 __ addptr(rsp, 8); 10773 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10774 __ bind(fast); 10775 %} 10776 ins_pipe( pipe_slow ); 10777 %} 10778 10779 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10780 predicate(UseSSE<=1); 10781 match(Set dst (ConvD2L src)); 10782 effect( KILL cr ); 10783 format %{ "FLD $src\t# Convert double to long\n\t" 10784 "FLDCW trunc mode\n\t" 10785 "SUB ESP,8\n\t" 10786 "FISTp [ESP + #0]\n\t" 10787 "FLDCW std/24-bit mode\n\t" 10788 "POP EAX\n\t" 10789 "POP EDX\n\t" 10790 "CMP EDX,0x80000000\n\t" 10791 "JNE,s fast\n\t" 10792 "TEST EAX,EAX\n\t" 10793 "JNE,s fast\n\t" 10794 "FLD $src\n\t" 10795 "CALL d2l_wrapper\n" 10796 "fast:" %} 10797 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10798 ins_pipe( pipe_slow ); 10799 %} 10800 10801 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10802 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10803 predicate (UseSSE>=2); 10804 match(Set dst (ConvD2L src)); 10805 effect( KILL cr ); 10806 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10807 "MOVSD [ESP],$src\n\t" 10808 "FLD_D [ESP]\n\t" 10809 "FLDCW trunc mode\n\t" 10810 "FISTp [ESP + #0]\n\t" 10811 "FLDCW std/24-bit mode\n\t" 10812 "POP EAX\n\t" 10813 "POP EDX\n\t" 10814 "CMP EDX,0x80000000\n\t" 10815 "JNE,s fast\n\t" 10816 "TEST EAX,EAX\n\t" 10817 "JNE,s fast\n\t" 10818 "SUB ESP,8\n\t" 10819 "MOVSD [ESP],$src\n\t" 10820 "FLD_D [ESP]\n\t" 10821 "ADD ESP,8\n\t" 10822 "CALL d2l_wrapper\n" 10823 "fast:" %} 10824 ins_encode %{ 10825 Label fast; 10826 __ subptr(rsp, 8); 10827 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10828 __ fld_d(Address(rsp, 0)); 10829 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10830 __ fistp_d(Address(rsp, 0)); 10831 // Restore the rounding mode, mask the exception 10832 if (Compile::current()->in_24_bit_fp_mode()) { 10833 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10834 } else { 10835 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10836 } 10837 // Load the converted long, adjust CPU stack 10838 __ pop(rax); 10839 __ pop(rdx); 10840 __ cmpl(rdx, 0x80000000); 10841 __ jccb(Assembler::notEqual, fast); 10842 __ testl(rax, rax); 10843 __ jccb(Assembler::notEqual, fast); 10844 __ subptr(rsp, 8); 10845 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10846 __ fld_d(Address(rsp, 0)); 10847 __ addptr(rsp, 8); 10848 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10849 __ bind(fast); 10850 %} 10851 ins_pipe( pipe_slow ); 10852 %} 10853 10854 // Convert a double to an int. Java semantics require we do complex 10855 // manglations in the corner cases. So we set the rounding mode to 10856 // 'zero', store the darned double down as an int, and reset the 10857 // rounding mode to 'nearest'. The hardware stores a flag value down 10858 // if we would overflow or converted a NAN; we check for this and 10859 // and go the slow path if needed. 10860 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10861 predicate(UseSSE==0); 10862 match(Set dst (ConvF2I src)); 10863 effect( KILL tmp, KILL cr ); 10864 format %{ "FLD $src\t# Convert float to int \n\t" 10865 "FLDCW trunc mode\n\t" 10866 "SUB ESP,4\n\t" 10867 "FISTp [ESP + #0]\n\t" 10868 "FLDCW std/24-bit mode\n\t" 10869 "POP EAX\n\t" 10870 "CMP EAX,0x80000000\n\t" 10871 "JNE,s fast\n\t" 10872 "FLD $src\n\t" 10873 "CALL d2i_wrapper\n" 10874 "fast:" %} 10875 // DPR2I_encoding works for FPR2I 10876 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10877 ins_pipe( pipe_slow ); 10878 %} 10879 10880 // Convert a float in xmm to an int reg. 10881 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10882 predicate(UseSSE>=1); 10883 match(Set dst (ConvF2I src)); 10884 effect( KILL tmp, KILL cr ); 10885 format %{ "CVTTSS2SI $dst, $src\n\t" 10886 "CMP $dst,0x80000000\n\t" 10887 "JNE,s fast\n\t" 10888 "SUB ESP, 4\n\t" 10889 "MOVSS [ESP], $src\n\t" 10890 "FLD [ESP]\n\t" 10891 "ADD ESP, 4\n\t" 10892 "CALL d2i_wrapper\n" 10893 "fast:" %} 10894 ins_encode %{ 10895 Label fast; 10896 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10897 __ cmpl($dst$$Register, 0x80000000); 10898 __ jccb(Assembler::notEqual, fast); 10899 __ subptr(rsp, 4); 10900 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10901 __ fld_s(Address(rsp, 0)); 10902 __ addptr(rsp, 4); 10903 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10904 __ bind(fast); 10905 %} 10906 ins_pipe( pipe_slow ); 10907 %} 10908 10909 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10910 predicate(UseSSE==0); 10911 match(Set dst (ConvF2L src)); 10912 effect( KILL cr ); 10913 format %{ "FLD $src\t# Convert float to long\n\t" 10914 "FLDCW trunc mode\n\t" 10915 "SUB ESP,8\n\t" 10916 "FISTp [ESP + #0]\n\t" 10917 "FLDCW std/24-bit mode\n\t" 10918 "POP EAX\n\t" 10919 "POP EDX\n\t" 10920 "CMP EDX,0x80000000\n\t" 10921 "JNE,s fast\n\t" 10922 "TEST EAX,EAX\n\t" 10923 "JNE,s fast\n\t" 10924 "FLD $src\n\t" 10925 "CALL d2l_wrapper\n" 10926 "fast:" %} 10927 // DPR2L_encoding works for FPR2L 10928 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10929 ins_pipe( pipe_slow ); 10930 %} 10931 10932 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10933 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10934 predicate (UseSSE>=1); 10935 match(Set dst (ConvF2L src)); 10936 effect( KILL cr ); 10937 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10938 "MOVSS [ESP],$src\n\t" 10939 "FLD_S [ESP]\n\t" 10940 "FLDCW trunc mode\n\t" 10941 "FISTp [ESP + #0]\n\t" 10942 "FLDCW std/24-bit mode\n\t" 10943 "POP EAX\n\t" 10944 "POP EDX\n\t" 10945 "CMP EDX,0x80000000\n\t" 10946 "JNE,s fast\n\t" 10947 "TEST EAX,EAX\n\t" 10948 "JNE,s fast\n\t" 10949 "SUB ESP,4\t# Convert float to long\n\t" 10950 "MOVSS [ESP],$src\n\t" 10951 "FLD_S [ESP]\n\t" 10952 "ADD ESP,4\n\t" 10953 "CALL d2l_wrapper\n" 10954 "fast:" %} 10955 ins_encode %{ 10956 Label fast; 10957 __ subptr(rsp, 8); 10958 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10959 __ fld_s(Address(rsp, 0)); 10960 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10961 __ fistp_d(Address(rsp, 0)); 10962 // Restore the rounding mode, mask the exception 10963 if (Compile::current()->in_24_bit_fp_mode()) { 10964 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10965 } else { 10966 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10967 } 10968 // Load the converted long, adjust CPU stack 10969 __ pop(rax); 10970 __ pop(rdx); 10971 __ cmpl(rdx, 0x80000000); 10972 __ jccb(Assembler::notEqual, fast); 10973 __ testl(rax, rax); 10974 __ jccb(Assembler::notEqual, fast); 10975 __ subptr(rsp, 4); 10976 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10977 __ fld_s(Address(rsp, 0)); 10978 __ addptr(rsp, 4); 10979 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10980 __ bind(fast); 10981 %} 10982 ins_pipe( pipe_slow ); 10983 %} 10984 10985 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 10986 predicate( UseSSE<=1 ); 10987 match(Set dst (ConvI2D src)); 10988 format %{ "FILD $src\n\t" 10989 "FSTP $dst" %} 10990 opcode(0xDB, 0x0); /* DB /0 */ 10991 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 10992 ins_pipe( fpu_reg_mem ); 10993 %} 10994 10995 instruct convI2D_reg(regD dst, rRegI src) %{ 10996 predicate( UseSSE>=2 && !UseXmmI2D ); 10997 match(Set dst (ConvI2D src)); 10998 format %{ "CVTSI2SD $dst,$src" %} 10999 ins_encode %{ 11000 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 11001 %} 11002 ins_pipe( pipe_slow ); 11003 %} 11004 11005 instruct convI2D_mem(regD dst, memory mem) %{ 11006 predicate( UseSSE>=2 ); 11007 match(Set dst (ConvI2D (LoadI mem))); 11008 format %{ "CVTSI2SD $dst,$mem" %} 11009 ins_encode %{ 11010 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 11011 %} 11012 ins_pipe( pipe_slow ); 11013 %} 11014 11015 instruct convXI2D_reg(regD dst, rRegI src) 11016 %{ 11017 predicate( UseSSE>=2 && UseXmmI2D ); 11018 match(Set dst (ConvI2D src)); 11019 11020 format %{ "MOVD $dst,$src\n\t" 11021 "CVTDQ2PD $dst,$dst\t# i2d" %} 11022 ins_encode %{ 11023 __ movdl($dst$$XMMRegister, $src$$Register); 11024 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11025 %} 11026 ins_pipe(pipe_slow); // XXX 11027 %} 11028 11029 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 11030 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11031 match(Set dst (ConvI2D (LoadI mem))); 11032 format %{ "FILD $mem\n\t" 11033 "FSTP $dst" %} 11034 opcode(0xDB); /* DB /0 */ 11035 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11036 Pop_Reg_DPR(dst)); 11037 ins_pipe( fpu_reg_mem ); 11038 %} 11039 11040 // Convert a byte to a float; no rounding step needed. 11041 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11042 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11043 match(Set dst (ConvI2F src)); 11044 format %{ "FILD $src\n\t" 11045 "FSTP $dst" %} 11046 11047 opcode(0xDB, 0x0); /* DB /0 */ 11048 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11049 ins_pipe( fpu_reg_mem ); 11050 %} 11051 11052 // In 24-bit mode, force exponent rounding by storing back out 11053 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11054 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11055 match(Set dst (ConvI2F src)); 11056 ins_cost(200); 11057 format %{ "FILD $src\n\t" 11058 "FSTP_S $dst" %} 11059 opcode(0xDB, 0x0); /* DB /0 */ 11060 ins_encode( Push_Mem_I(src), 11061 Pop_Mem_FPR(dst)); 11062 ins_pipe( fpu_mem_mem ); 11063 %} 11064 11065 // In 24-bit mode, force exponent rounding by storing back out 11066 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11067 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11068 match(Set dst (ConvI2F (LoadI mem))); 11069 ins_cost(200); 11070 format %{ "FILD $mem\n\t" 11071 "FSTP_S $dst" %} 11072 opcode(0xDB); /* DB /0 */ 11073 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11074 Pop_Mem_FPR(dst)); 11075 ins_pipe( fpu_mem_mem ); 11076 %} 11077 11078 // This instruction does not round to 24-bits 11079 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11080 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11081 match(Set dst (ConvI2F src)); 11082 format %{ "FILD $src\n\t" 11083 "FSTP $dst" %} 11084 opcode(0xDB, 0x0); /* DB /0 */ 11085 ins_encode( Push_Mem_I(src), 11086 Pop_Reg_FPR(dst)); 11087 ins_pipe( fpu_reg_mem ); 11088 %} 11089 11090 // This instruction does not round to 24-bits 11091 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11092 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11093 match(Set dst (ConvI2F (LoadI mem))); 11094 format %{ "FILD $mem\n\t" 11095 "FSTP $dst" %} 11096 opcode(0xDB); /* DB /0 */ 11097 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11098 Pop_Reg_FPR(dst)); 11099 ins_pipe( fpu_reg_mem ); 11100 %} 11101 11102 // Convert an int to a float in xmm; no rounding step needed. 11103 instruct convI2F_reg(regF dst, rRegI src) %{ 11104 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11105 match(Set dst (ConvI2F src)); 11106 format %{ "CVTSI2SS $dst, $src" %} 11107 ins_encode %{ 11108 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11109 %} 11110 ins_pipe( pipe_slow ); 11111 %} 11112 11113 instruct convXI2F_reg(regF dst, rRegI src) 11114 %{ 11115 predicate( UseSSE>=2 && UseXmmI2F ); 11116 match(Set dst (ConvI2F src)); 11117 11118 format %{ "MOVD $dst,$src\n\t" 11119 "CVTDQ2PS $dst,$dst\t# i2f" %} 11120 ins_encode %{ 11121 __ movdl($dst$$XMMRegister, $src$$Register); 11122 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11123 %} 11124 ins_pipe(pipe_slow); // XXX 11125 %} 11126 11127 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11128 match(Set dst (ConvI2L src)); 11129 effect(KILL cr); 11130 ins_cost(375); 11131 format %{ "MOV $dst.lo,$src\n\t" 11132 "MOV $dst.hi,$src\n\t" 11133 "SAR $dst.hi,31" %} 11134 ins_encode(convert_int_long(dst,src)); 11135 ins_pipe( ialu_reg_reg_long ); 11136 %} 11137 11138 // Zero-extend convert int to long 11139 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11140 match(Set dst (AndL (ConvI2L src) mask) ); 11141 effect( KILL flags ); 11142 ins_cost(250); 11143 format %{ "MOV $dst.lo,$src\n\t" 11144 "XOR $dst.hi,$dst.hi" %} 11145 opcode(0x33); // XOR 11146 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11147 ins_pipe( ialu_reg_reg_long ); 11148 %} 11149 11150 // Zero-extend long 11151 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11152 match(Set dst (AndL src mask) ); 11153 effect( KILL flags ); 11154 ins_cost(250); 11155 format %{ "MOV $dst.lo,$src.lo\n\t" 11156 "XOR $dst.hi,$dst.hi\n\t" %} 11157 opcode(0x33); // XOR 11158 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11159 ins_pipe( ialu_reg_reg_long ); 11160 %} 11161 11162 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11163 predicate (UseSSE<=1); 11164 match(Set dst (ConvL2D src)); 11165 effect( KILL cr ); 11166 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11167 "PUSH $src.lo\n\t" 11168 "FILD ST,[ESP + #0]\n\t" 11169 "ADD ESP,8\n\t" 11170 "FSTP_D $dst\t# D-round" %} 11171 opcode(0xDF, 0x5); /* DF /5 */ 11172 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11173 ins_pipe( pipe_slow ); 11174 %} 11175 11176 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11177 predicate (UseSSE>=2); 11178 match(Set dst (ConvL2D src)); 11179 effect( KILL cr ); 11180 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11181 "PUSH $src.lo\n\t" 11182 "FILD_D [ESP]\n\t" 11183 "FSTP_D [ESP]\n\t" 11184 "MOVSD $dst,[ESP]\n\t" 11185 "ADD ESP,8" %} 11186 opcode(0xDF, 0x5); /* DF /5 */ 11187 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11188 ins_pipe( pipe_slow ); 11189 %} 11190 11191 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11192 predicate (UseSSE>=1); 11193 match(Set dst (ConvL2F src)); 11194 effect( KILL cr ); 11195 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11196 "PUSH $src.lo\n\t" 11197 "FILD_D [ESP]\n\t" 11198 "FSTP_S [ESP]\n\t" 11199 "MOVSS $dst,[ESP]\n\t" 11200 "ADD ESP,8" %} 11201 opcode(0xDF, 0x5); /* DF /5 */ 11202 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11203 ins_pipe( pipe_slow ); 11204 %} 11205 11206 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11207 match(Set dst (ConvL2F src)); 11208 effect( KILL cr ); 11209 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11210 "PUSH $src.lo\n\t" 11211 "FILD ST,[ESP + #0]\n\t" 11212 "ADD ESP,8\n\t" 11213 "FSTP_S $dst\t# F-round" %} 11214 opcode(0xDF, 0x5); /* DF /5 */ 11215 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11216 ins_pipe( pipe_slow ); 11217 %} 11218 11219 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11220 match(Set dst (ConvL2I src)); 11221 effect( DEF dst, USE src ); 11222 format %{ "MOV $dst,$src.lo" %} 11223 ins_encode(enc_CopyL_Lo(dst,src)); 11224 ins_pipe( ialu_reg_reg ); 11225 %} 11226 11227 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11228 match(Set dst (MoveF2I src)); 11229 effect( DEF dst, USE src ); 11230 ins_cost(100); 11231 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11232 ins_encode %{ 11233 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11234 %} 11235 ins_pipe( ialu_reg_mem ); 11236 %} 11237 11238 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11239 predicate(UseSSE==0); 11240 match(Set dst (MoveF2I src)); 11241 effect( DEF dst, USE src ); 11242 11243 ins_cost(125); 11244 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11245 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11246 ins_pipe( fpu_mem_reg ); 11247 %} 11248 11249 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11250 predicate(UseSSE>=1); 11251 match(Set dst (MoveF2I src)); 11252 effect( DEF dst, USE src ); 11253 11254 ins_cost(95); 11255 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11256 ins_encode %{ 11257 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11258 %} 11259 ins_pipe( pipe_slow ); 11260 %} 11261 11262 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11263 predicate(UseSSE>=2); 11264 match(Set dst (MoveF2I src)); 11265 effect( DEF dst, USE src ); 11266 ins_cost(85); 11267 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11268 ins_encode %{ 11269 __ movdl($dst$$Register, $src$$XMMRegister); 11270 %} 11271 ins_pipe( pipe_slow ); 11272 %} 11273 11274 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11275 match(Set dst (MoveI2F src)); 11276 effect( DEF dst, USE src ); 11277 11278 ins_cost(100); 11279 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11280 ins_encode %{ 11281 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11282 %} 11283 ins_pipe( ialu_mem_reg ); 11284 %} 11285 11286 11287 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11288 predicate(UseSSE==0); 11289 match(Set dst (MoveI2F src)); 11290 effect(DEF dst, USE src); 11291 11292 ins_cost(125); 11293 format %{ "FLD_S $src\n\t" 11294 "FSTP $dst\t# MoveI2F_stack_reg" %} 11295 opcode(0xD9); /* D9 /0, FLD m32real */ 11296 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11297 Pop_Reg_FPR(dst) ); 11298 ins_pipe( fpu_reg_mem ); 11299 %} 11300 11301 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11302 predicate(UseSSE>=1); 11303 match(Set dst (MoveI2F src)); 11304 effect( DEF dst, USE src ); 11305 11306 ins_cost(95); 11307 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11308 ins_encode %{ 11309 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11310 %} 11311 ins_pipe( pipe_slow ); 11312 %} 11313 11314 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11315 predicate(UseSSE>=2); 11316 match(Set dst (MoveI2F src)); 11317 effect( DEF dst, USE src ); 11318 11319 ins_cost(85); 11320 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11321 ins_encode %{ 11322 __ movdl($dst$$XMMRegister, $src$$Register); 11323 %} 11324 ins_pipe( pipe_slow ); 11325 %} 11326 11327 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11328 match(Set dst (MoveD2L src)); 11329 effect(DEF dst, USE src); 11330 11331 ins_cost(250); 11332 format %{ "MOV $dst.lo,$src\n\t" 11333 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11334 opcode(0x8B, 0x8B); 11335 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11336 ins_pipe( ialu_mem_long_reg ); 11337 %} 11338 11339 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11340 predicate(UseSSE<=1); 11341 match(Set dst (MoveD2L src)); 11342 effect(DEF dst, USE src); 11343 11344 ins_cost(125); 11345 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11346 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11347 ins_pipe( fpu_mem_reg ); 11348 %} 11349 11350 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11351 predicate(UseSSE>=2); 11352 match(Set dst (MoveD2L src)); 11353 effect(DEF dst, USE src); 11354 ins_cost(95); 11355 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11356 ins_encode %{ 11357 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11358 %} 11359 ins_pipe( pipe_slow ); 11360 %} 11361 11362 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11363 predicate(UseSSE>=2); 11364 match(Set dst (MoveD2L src)); 11365 effect(DEF dst, USE src, TEMP tmp); 11366 ins_cost(85); 11367 format %{ "MOVD $dst.lo,$src\n\t" 11368 "PSHUFLW $tmp,$src,0x4E\n\t" 11369 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11370 ins_encode %{ 11371 __ movdl($dst$$Register, $src$$XMMRegister); 11372 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11373 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11374 %} 11375 ins_pipe( pipe_slow ); 11376 %} 11377 11378 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11379 match(Set dst (MoveL2D src)); 11380 effect(DEF dst, USE src); 11381 11382 ins_cost(200); 11383 format %{ "MOV $dst,$src.lo\n\t" 11384 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11385 opcode(0x89, 0x89); 11386 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11387 ins_pipe( ialu_mem_long_reg ); 11388 %} 11389 11390 11391 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11392 predicate(UseSSE<=1); 11393 match(Set dst (MoveL2D src)); 11394 effect(DEF dst, USE src); 11395 ins_cost(125); 11396 11397 format %{ "FLD_D $src\n\t" 11398 "FSTP $dst\t# MoveL2D_stack_reg" %} 11399 opcode(0xDD); /* DD /0, FLD m64real */ 11400 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11401 Pop_Reg_DPR(dst) ); 11402 ins_pipe( fpu_reg_mem ); 11403 %} 11404 11405 11406 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11407 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11408 match(Set dst (MoveL2D src)); 11409 effect(DEF dst, USE src); 11410 11411 ins_cost(95); 11412 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11413 ins_encode %{ 11414 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11415 %} 11416 ins_pipe( pipe_slow ); 11417 %} 11418 11419 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11420 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11421 match(Set dst (MoveL2D src)); 11422 effect(DEF dst, USE src); 11423 11424 ins_cost(95); 11425 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11426 ins_encode %{ 11427 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11428 %} 11429 ins_pipe( pipe_slow ); 11430 %} 11431 11432 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11433 predicate(UseSSE>=2); 11434 match(Set dst (MoveL2D src)); 11435 effect(TEMP dst, USE src, TEMP tmp); 11436 ins_cost(85); 11437 format %{ "MOVD $dst,$src.lo\n\t" 11438 "MOVD $tmp,$src.hi\n\t" 11439 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11440 ins_encode %{ 11441 __ movdl($dst$$XMMRegister, $src$$Register); 11442 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11443 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11444 %} 11445 ins_pipe( pipe_slow ); 11446 %} 11447 11448 11449 // ======================================================================= 11450 // fast clearing of an array 11451 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11452 predicate(!((ClearArrayNode*)n)->is_large()); 11453 match(Set dummy (ClearArray cnt base)); 11454 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11455 11456 format %{ $$template 11457 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11458 $$emit$$"CMP InitArrayShortSize,rcx\n\t" 11459 $$emit$$"JG LARGE\n\t" 11460 $$emit$$"SHL ECX, 1\n\t" 11461 $$emit$$"DEC ECX\n\t" 11462 $$emit$$"JS DONE\t# Zero length\n\t" 11463 $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" 11464 $$emit$$"DEC ECX\n\t" 11465 $$emit$$"JGE LOOP\n\t" 11466 $$emit$$"JMP DONE\n\t" 11467 $$emit$$"# LARGE:\n\t" 11468 if (UseFastStosb) { 11469 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11470 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11471 } else { 11472 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11473 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11474 } 11475 $$emit$$"# DONE" 11476 %} 11477 ins_encode %{ 11478 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, false); 11479 %} 11480 ins_pipe( pipe_slow ); 11481 %} 11482 11483 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11484 predicate(((ClearArrayNode*)n)->is_large()); 11485 match(Set dummy (ClearArray cnt base)); 11486 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11487 format %{ $$template 11488 $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" 11489 if (UseFastStosb) { 11490 $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" 11491 $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" 11492 } else { 11493 $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" 11494 $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" 11495 } 11496 $$emit$$"# DONE" 11497 %} 11498 ins_encode %{ 11499 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, true); 11500 %} 11501 ins_pipe( pipe_slow ); 11502 %} 11503 11504 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11505 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11506 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); 11507 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11508 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11509 11510 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11511 ins_encode %{ 11512 __ string_compare($str1$$Register, $str2$$Register, 11513 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11514 $tmp1$$XMMRegister, StrIntrinsicNode::LL); 11515 %} 11516 ins_pipe( pipe_slow ); 11517 %} 11518 11519 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11520 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11521 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); 11522 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11523 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11524 11525 format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11526 ins_encode %{ 11527 __ string_compare($str1$$Register, $str2$$Register, 11528 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11529 $tmp1$$XMMRegister, StrIntrinsicNode::UU); 11530 %} 11531 ins_pipe( pipe_slow ); 11532 %} 11533 11534 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11535 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11536 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); 11537 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11538 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11539 11540 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11541 ins_encode %{ 11542 __ string_compare($str1$$Register, $str2$$Register, 11543 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11544 $tmp1$$XMMRegister, StrIntrinsicNode::LU); 11545 %} 11546 ins_pipe( pipe_slow ); 11547 %} 11548 11549 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, 11550 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11551 predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); 11552 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11553 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11554 11555 format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11556 ins_encode %{ 11557 __ string_compare($str2$$Register, $str1$$Register, 11558 $cnt2$$Register, $cnt1$$Register, $result$$Register, 11559 $tmp1$$XMMRegister, StrIntrinsicNode::UL); 11560 %} 11561 ins_pipe( pipe_slow ); 11562 %} 11563 11564 // fast string equals 11565 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11566 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11567 match(Set result (StrEquals (Binary str1 str2) cnt)); 11568 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11569 11570 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11571 ins_encode %{ 11572 __ arrays_equals(false, $str1$$Register, $str2$$Register, 11573 $cnt$$Register, $result$$Register, $tmp3$$Register, 11574 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11575 %} 11576 11577 ins_pipe( pipe_slow ); 11578 %} 11579 11580 // fast search of substring with known size. 11581 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11582 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11583 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11584 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11585 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11586 11587 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11588 ins_encode %{ 11589 int icnt2 = (int)$int_cnt2$$constant; 11590 if (icnt2 >= 16) { 11591 // IndexOf for constant substrings with size >= 16 elements 11592 // which don't need to be loaded through stack. 11593 __ string_indexofC8($str1$$Register, $str2$$Register, 11594 $cnt1$$Register, $cnt2$$Register, 11595 icnt2, $result$$Register, 11596 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11597 } else { 11598 // Small strings are loaded through stack if they cross page boundary. 11599 __ string_indexof($str1$$Register, $str2$$Register, 11600 $cnt1$$Register, $cnt2$$Register, 11601 icnt2, $result$$Register, 11602 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11603 } 11604 %} 11605 ins_pipe( pipe_slow ); 11606 %} 11607 11608 // fast search of substring with known size. 11609 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11610 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11611 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11612 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11613 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11614 11615 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11616 ins_encode %{ 11617 int icnt2 = (int)$int_cnt2$$constant; 11618 if (icnt2 >= 8) { 11619 // IndexOf for constant substrings with size >= 8 elements 11620 // which don't need to be loaded through stack. 11621 __ string_indexofC8($str1$$Register, $str2$$Register, 11622 $cnt1$$Register, $cnt2$$Register, 11623 icnt2, $result$$Register, 11624 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11625 } else { 11626 // Small strings are loaded through stack if they cross page boundary. 11627 __ string_indexof($str1$$Register, $str2$$Register, 11628 $cnt1$$Register, $cnt2$$Register, 11629 icnt2, $result$$Register, 11630 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11631 } 11632 %} 11633 ins_pipe( pipe_slow ); 11634 %} 11635 11636 // fast search of substring with known size. 11637 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11638 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11639 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11640 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11641 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11642 11643 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11644 ins_encode %{ 11645 int icnt2 = (int)$int_cnt2$$constant; 11646 if (icnt2 >= 8) { 11647 // IndexOf for constant substrings with size >= 8 elements 11648 // which don't need to be loaded through stack. 11649 __ string_indexofC8($str1$$Register, $str2$$Register, 11650 $cnt1$$Register, $cnt2$$Register, 11651 icnt2, $result$$Register, 11652 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11653 } else { 11654 // Small strings are loaded through stack if they cross page boundary. 11655 __ string_indexof($str1$$Register, $str2$$Register, 11656 $cnt1$$Register, $cnt2$$Register, 11657 icnt2, $result$$Register, 11658 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11659 } 11660 %} 11661 ins_pipe( pipe_slow ); 11662 %} 11663 11664 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11665 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11666 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); 11667 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11668 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11669 11670 format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11671 ins_encode %{ 11672 __ string_indexof($str1$$Register, $str2$$Register, 11673 $cnt1$$Register, $cnt2$$Register, 11674 (-1), $result$$Register, 11675 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); 11676 %} 11677 ins_pipe( pipe_slow ); 11678 %} 11679 11680 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11681 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11682 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); 11683 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11684 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11685 11686 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11687 ins_encode %{ 11688 __ string_indexof($str1$$Register, $str2$$Register, 11689 $cnt1$$Register, $cnt2$$Register, 11690 (-1), $result$$Register, 11691 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); 11692 %} 11693 ins_pipe( pipe_slow ); 11694 %} 11695 11696 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11697 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11698 predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); 11699 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11700 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11701 11702 format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11703 ins_encode %{ 11704 __ string_indexof($str1$$Register, $str2$$Register, 11705 $cnt1$$Register, $cnt2$$Register, 11706 (-1), $result$$Register, 11707 $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); 11708 %} 11709 ins_pipe( pipe_slow ); 11710 %} 11711 11712 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, 11713 eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ 11714 predicate(UseSSE42Intrinsics); 11715 match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); 11716 effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); 11717 format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} 11718 ins_encode %{ 11719 __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, 11720 $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); 11721 %} 11722 ins_pipe( pipe_slow ); 11723 %} 11724 11725 // fast array equals 11726 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11727 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11728 %{ 11729 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); 11730 match(Set result (AryEq ary1 ary2)); 11731 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11732 //ins_cost(300); 11733 11734 format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11735 ins_encode %{ 11736 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11737 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11738 $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */); 11739 %} 11740 ins_pipe( pipe_slow ); 11741 %} 11742 11743 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11744 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11745 %{ 11746 predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); 11747 match(Set result (AryEq ary1 ary2)); 11748 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11749 //ins_cost(300); 11750 11751 format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11752 ins_encode %{ 11753 __ arrays_equals(true, $ary1$$Register, $ary2$$Register, 11754 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11755 $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */); 11756 %} 11757 ins_pipe( pipe_slow ); 11758 %} 11759 11760 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result, 11761 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) 11762 %{ 11763 match(Set result (HasNegatives ary1 len)); 11764 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); 11765 11766 format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11767 ins_encode %{ 11768 __ has_negatives($ary1$$Register, $len$$Register, 11769 $result$$Register, $tmp3$$Register, 11770 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11771 %} 11772 ins_pipe( pipe_slow ); 11773 %} 11774 11775 // fast char[] to byte[] compression 11776 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11777 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11778 match(Set result (StrCompressedCopy src (Binary dst len))); 11779 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11780 11781 format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} 11782 ins_encode %{ 11783 __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, 11784 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11785 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11786 %} 11787 ins_pipe( pipe_slow ); 11788 %} 11789 11790 // fast byte[] to char[] inflation 11791 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, 11792 regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ 11793 match(Set dummy (StrInflatedCopy src (Binary dst len))); 11794 effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); 11795 11796 format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} 11797 ins_encode %{ 11798 __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, 11799 $tmp1$$XMMRegister, $tmp2$$Register); 11800 %} 11801 ins_pipe( pipe_slow ); 11802 %} 11803 11804 // encode char[] to byte[] in ISO_8859_1 11805 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 11806 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11807 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11808 match(Set result (EncodeISOArray src (Binary dst len))); 11809 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11810 11811 format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 11812 ins_encode %{ 11813 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 11814 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11815 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11816 %} 11817 ins_pipe( pipe_slow ); 11818 %} 11819 11820 11821 //----------Control Flow Instructions------------------------------------------ 11822 // Signed compare Instructions 11823 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 11824 match(Set cr (CmpI op1 op2)); 11825 effect( DEF cr, USE op1, USE op2 ); 11826 format %{ "CMP $op1,$op2" %} 11827 opcode(0x3B); /* Opcode 3B /r */ 11828 ins_encode( OpcP, RegReg( op1, op2) ); 11829 ins_pipe( ialu_cr_reg_reg ); 11830 %} 11831 11832 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 11833 match(Set cr (CmpI op1 op2)); 11834 effect( DEF cr, USE op1 ); 11835 format %{ "CMP $op1,$op2" %} 11836 opcode(0x81,0x07); /* Opcode 81 /7 */ 11837 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 11838 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11839 ins_pipe( ialu_cr_reg_imm ); 11840 %} 11841 11842 // Cisc-spilled version of cmpI_eReg 11843 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 11844 match(Set cr (CmpI op1 (LoadI op2))); 11845 11846 format %{ "CMP $op1,$op2" %} 11847 ins_cost(500); 11848 opcode(0x3B); /* Opcode 3B /r */ 11849 ins_encode( OpcP, RegMem( op1, op2) ); 11850 ins_pipe( ialu_cr_reg_mem ); 11851 %} 11852 11853 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ 11854 match(Set cr (CmpI src zero)); 11855 effect( DEF cr, USE src ); 11856 11857 format %{ "TEST $src,$src" %} 11858 opcode(0x85); 11859 ins_encode( OpcP, RegReg( src, src ) ); 11860 ins_pipe( ialu_cr_reg_imm ); 11861 %} 11862 11863 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ 11864 match(Set cr (CmpI (AndI src con) zero)); 11865 11866 format %{ "TEST $src,$con" %} 11867 opcode(0xF7,0x00); 11868 ins_encode( OpcP, RegOpc(src), Con32(con) ); 11869 ins_pipe( ialu_cr_reg_imm ); 11870 %} 11871 11872 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ 11873 match(Set cr (CmpI (AndI src mem) zero)); 11874 11875 format %{ "TEST $src,$mem" %} 11876 opcode(0x85); 11877 ins_encode( OpcP, RegMem( src, mem ) ); 11878 ins_pipe( ialu_cr_reg_mem ); 11879 %} 11880 11881 // Unsigned compare Instructions; really, same as signed except they 11882 // produce an eFlagsRegU instead of eFlagsReg. 11883 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 11884 match(Set cr (CmpU op1 op2)); 11885 11886 format %{ "CMPu $op1,$op2" %} 11887 opcode(0x3B); /* Opcode 3B /r */ 11888 ins_encode( OpcP, RegReg( op1, op2) ); 11889 ins_pipe( ialu_cr_reg_reg ); 11890 %} 11891 11892 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 11893 match(Set cr (CmpU op1 op2)); 11894 11895 format %{ "CMPu $op1,$op2" %} 11896 opcode(0x81,0x07); /* Opcode 81 /7 */ 11897 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11898 ins_pipe( ialu_cr_reg_imm ); 11899 %} 11900 11901 // // Cisc-spilled version of cmpU_eReg 11902 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 11903 match(Set cr (CmpU op1 (LoadI op2))); 11904 11905 format %{ "CMPu $op1,$op2" %} 11906 ins_cost(500); 11907 opcode(0x3B); /* Opcode 3B /r */ 11908 ins_encode( OpcP, RegMem( op1, op2) ); 11909 ins_pipe( ialu_cr_reg_mem ); 11910 %} 11911 11912 // // Cisc-spilled version of cmpU_eReg 11913 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 11914 // match(Set cr (CmpU (LoadI op1) op2)); 11915 // 11916 // format %{ "CMPu $op1,$op2" %} 11917 // ins_cost(500); 11918 // opcode(0x39); /* Opcode 39 /r */ 11919 // ins_encode( OpcP, RegMem( op1, op2) ); 11920 //%} 11921 11922 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ 11923 match(Set cr (CmpU src zero)); 11924 11925 format %{ "TESTu $src,$src" %} 11926 opcode(0x85); 11927 ins_encode( OpcP, RegReg( src, src ) ); 11928 ins_pipe( ialu_cr_reg_imm ); 11929 %} 11930 11931 // Unsigned pointer compare Instructions 11932 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 11933 match(Set cr (CmpP op1 op2)); 11934 11935 format %{ "CMPu $op1,$op2" %} 11936 opcode(0x3B); /* Opcode 3B /r */ 11937 ins_encode( OpcP, RegReg( op1, op2) ); 11938 ins_pipe( ialu_cr_reg_reg ); 11939 %} 11940 11941 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 11942 match(Set cr (CmpP op1 op2)); 11943 11944 format %{ "CMPu $op1,$op2" %} 11945 opcode(0x81,0x07); /* Opcode 81 /7 */ 11946 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11947 ins_pipe( ialu_cr_reg_imm ); 11948 %} 11949 11950 // // Cisc-spilled version of cmpP_eReg 11951 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 11952 match(Set cr (CmpP op1 (LoadP op2))); 11953 11954 format %{ "CMPu $op1,$op2" %} 11955 ins_cost(500); 11956 opcode(0x3B); /* Opcode 3B /r */ 11957 ins_encode( OpcP, RegMem( op1, op2) ); 11958 ins_pipe( ialu_cr_reg_mem ); 11959 %} 11960 11961 // // Cisc-spilled version of cmpP_eReg 11962 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 11963 // match(Set cr (CmpP (LoadP op1) op2)); 11964 // 11965 // format %{ "CMPu $op1,$op2" %} 11966 // ins_cost(500); 11967 // opcode(0x39); /* Opcode 39 /r */ 11968 // ins_encode( OpcP, RegMem( op1, op2) ); 11969 //%} 11970 11971 // Compare raw pointer (used in out-of-heap check). 11972 // Only works because non-oop pointers must be raw pointers 11973 // and raw pointers have no anti-dependencies. 11974 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 11975 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 11976 match(Set cr (CmpP op1 (LoadP op2))); 11977 11978 format %{ "CMPu $op1,$op2" %} 11979 opcode(0x3B); /* Opcode 3B /r */ 11980 ins_encode( OpcP, RegMem( op1, op2) ); 11981 ins_pipe( ialu_cr_reg_mem ); 11982 %} 11983 11984 // 11985 // This will generate a signed flags result. This should be ok 11986 // since any compare to a zero should be eq/neq. 11987 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 11988 match(Set cr (CmpP src zero)); 11989 11990 format %{ "TEST $src,$src" %} 11991 opcode(0x85); 11992 ins_encode( OpcP, RegReg( src, src ) ); 11993 ins_pipe( ialu_cr_reg_imm ); 11994 %} 11995 11996 // Cisc-spilled version of testP_reg 11997 // This will generate a signed flags result. This should be ok 11998 // since any compare to a zero should be eq/neq. 11999 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 12000 match(Set cr (CmpP (LoadP op) zero)); 12001 12002 format %{ "TEST $op,0xFFFFFFFF" %} 12003 ins_cost(500); 12004 opcode(0xF7); /* Opcode F7 /0 */ 12005 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 12006 ins_pipe( ialu_cr_reg_imm ); 12007 %} 12008 12009 // Yanked all unsigned pointer compare operations. 12010 // Pointer compares are done with CmpP which is already unsigned. 12011 12012 //----------Max and Min-------------------------------------------------------- 12013 // Min Instructions 12014 //// 12015 // *** Min and Max using the conditional move are slower than the 12016 // *** branch version on a Pentium III. 12017 // // Conditional move for min 12018 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12019 // effect( USE_DEF op2, USE op1, USE cr ); 12020 // format %{ "CMOVlt $op2,$op1\t! min" %} 12021 // opcode(0x4C,0x0F); 12022 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12023 // ins_pipe( pipe_cmov_reg ); 12024 //%} 12025 // 12026 //// Min Register with Register (P6 version) 12027 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12028 // predicate(VM_Version::supports_cmov() ); 12029 // match(Set op2 (MinI op1 op2)); 12030 // ins_cost(200); 12031 // expand %{ 12032 // eFlagsReg cr; 12033 // compI_eReg(cr,op1,op2); 12034 // cmovI_reg_lt(op2,op1,cr); 12035 // %} 12036 //%} 12037 12038 // Min Register with Register (generic version) 12039 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12040 match(Set dst (MinI dst src)); 12041 effect(KILL flags); 12042 ins_cost(300); 12043 12044 format %{ "MIN $dst,$src" %} 12045 opcode(0xCC); 12046 ins_encode( min_enc(dst,src) ); 12047 ins_pipe( pipe_slow ); 12048 %} 12049 12050 // Max Register with Register 12051 // *** Min and Max using the conditional move are slower than the 12052 // *** branch version on a Pentium III. 12053 // // Conditional move for max 12054 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 12055 // effect( USE_DEF op2, USE op1, USE cr ); 12056 // format %{ "CMOVgt $op2,$op1\t! max" %} 12057 // opcode(0x4F,0x0F); 12058 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 12059 // ins_pipe( pipe_cmov_reg ); 12060 //%} 12061 // 12062 // // Max Register with Register (P6 version) 12063 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 12064 // predicate(VM_Version::supports_cmov() ); 12065 // match(Set op2 (MaxI op1 op2)); 12066 // ins_cost(200); 12067 // expand %{ 12068 // eFlagsReg cr; 12069 // compI_eReg(cr,op1,op2); 12070 // cmovI_reg_gt(op2,op1,cr); 12071 // %} 12072 //%} 12073 12074 // Max Register with Register (generic version) 12075 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 12076 match(Set dst (MaxI dst src)); 12077 effect(KILL flags); 12078 ins_cost(300); 12079 12080 format %{ "MAX $dst,$src" %} 12081 opcode(0xCC); 12082 ins_encode( max_enc(dst,src) ); 12083 ins_pipe( pipe_slow ); 12084 %} 12085 12086 // ============================================================================ 12087 // Counted Loop limit node which represents exact final iterator value. 12088 // Note: the resulting value should fit into integer range since 12089 // counted loops have limit check on overflow. 12090 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 12091 match(Set limit (LoopLimit (Binary init limit) stride)); 12092 effect(TEMP limit_hi, TEMP tmp, KILL flags); 12093 ins_cost(300); 12094 12095 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 12096 ins_encode %{ 12097 int strd = (int)$stride$$constant; 12098 assert(strd != 1 && strd != -1, "sanity"); 12099 int m1 = (strd > 0) ? 1 : -1; 12100 // Convert limit to long (EAX:EDX) 12101 __ cdql(); 12102 // Convert init to long (init:tmp) 12103 __ movl($tmp$$Register, $init$$Register); 12104 __ sarl($tmp$$Register, 31); 12105 // $limit - $init 12106 __ subl($limit$$Register, $init$$Register); 12107 __ sbbl($limit_hi$$Register, $tmp$$Register); 12108 // + ($stride - 1) 12109 if (strd > 0) { 12110 __ addl($limit$$Register, (strd - 1)); 12111 __ adcl($limit_hi$$Register, 0); 12112 __ movl($tmp$$Register, strd); 12113 } else { 12114 __ addl($limit$$Register, (strd + 1)); 12115 __ adcl($limit_hi$$Register, -1); 12116 __ lneg($limit_hi$$Register, $limit$$Register); 12117 __ movl($tmp$$Register, -strd); 12118 } 12119 // signed devision: (EAX:EDX) / pos_stride 12120 __ idivl($tmp$$Register); 12121 if (strd < 0) { 12122 // restore sign 12123 __ negl($tmp$$Register); 12124 } 12125 // (EAX) * stride 12126 __ mull($tmp$$Register); 12127 // + init (ignore upper bits) 12128 __ addl($limit$$Register, $init$$Register); 12129 %} 12130 ins_pipe( pipe_slow ); 12131 %} 12132 12133 // ============================================================================ 12134 // Branch Instructions 12135 // Jump Table 12136 instruct jumpXtnd(rRegI switch_val) %{ 12137 match(Jump switch_val); 12138 ins_cost(350); 12139 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 12140 ins_encode %{ 12141 // Jump to Address(table_base + switch_reg) 12142 Address index(noreg, $switch_val$$Register, Address::times_1); 12143 __ jump(ArrayAddress($constantaddress, index)); 12144 %} 12145 ins_pipe(pipe_jmp); 12146 %} 12147 12148 // Jump Direct - Label defines a relative address from JMP+1 12149 instruct jmpDir(label labl) %{ 12150 match(Goto); 12151 effect(USE labl); 12152 12153 ins_cost(300); 12154 format %{ "JMP $labl" %} 12155 size(5); 12156 ins_encode %{ 12157 Label* L = $labl$$label; 12158 __ jmp(*L, false); // Always long jump 12159 %} 12160 ins_pipe( pipe_jmp ); 12161 %} 12162 12163 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12164 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 12165 match(If cop cr); 12166 effect(USE labl); 12167 12168 ins_cost(300); 12169 format %{ "J$cop $labl" %} 12170 size(6); 12171 ins_encode %{ 12172 Label* L = $labl$$label; 12173 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12174 %} 12175 ins_pipe( pipe_jcc ); 12176 %} 12177 12178 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12179 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 12180 predicate(!n->has_vector_mask_set()); 12181 match(CountedLoopEnd cop cr); 12182 effect(USE labl); 12183 12184 ins_cost(300); 12185 format %{ "J$cop $labl\t# Loop end" %} 12186 size(6); 12187 ins_encode %{ 12188 Label* L = $labl$$label; 12189 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12190 %} 12191 ins_pipe( pipe_jcc ); 12192 %} 12193 12194 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12195 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12196 predicate(!n->has_vector_mask_set()); 12197 match(CountedLoopEnd cop cmp); 12198 effect(USE labl); 12199 12200 ins_cost(300); 12201 format %{ "J$cop,u $labl\t# Loop end" %} 12202 size(6); 12203 ins_encode %{ 12204 Label* L = $labl$$label; 12205 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12206 %} 12207 ins_pipe( pipe_jcc ); 12208 %} 12209 12210 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12211 predicate(!n->has_vector_mask_set()); 12212 match(CountedLoopEnd cop cmp); 12213 effect(USE labl); 12214 12215 ins_cost(200); 12216 format %{ "J$cop,u $labl\t# Loop end" %} 12217 size(6); 12218 ins_encode %{ 12219 Label* L = $labl$$label; 12220 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12221 %} 12222 ins_pipe( pipe_jcc ); 12223 %} 12224 12225 // mask version 12226 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12227 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{ 12228 predicate(n->has_vector_mask_set()); 12229 match(CountedLoopEnd cop cr); 12230 effect(USE labl); 12231 12232 ins_cost(400); 12233 format %{ "J$cop $labl\t# Loop end\n\t" 12234 "restorevectmask \t# vector mask restore for loops" %} 12235 size(10); 12236 ins_encode %{ 12237 Label* L = $labl$$label; 12238 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12239 __ restorevectmask(); 12240 %} 12241 ins_pipe( pipe_jcc ); 12242 %} 12243 12244 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12245 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12246 predicate(n->has_vector_mask_set()); 12247 match(CountedLoopEnd cop cmp); 12248 effect(USE labl); 12249 12250 ins_cost(400); 12251 format %{ "J$cop,u $labl\t# Loop end\n\t" 12252 "restorevectmask \t# vector mask restore for loops" %} 12253 size(10); 12254 ins_encode %{ 12255 Label* L = $labl$$label; 12256 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12257 __ restorevectmask(); 12258 %} 12259 ins_pipe( pipe_jcc ); 12260 %} 12261 12262 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12263 predicate(n->has_vector_mask_set()); 12264 match(CountedLoopEnd cop cmp); 12265 effect(USE labl); 12266 12267 ins_cost(300); 12268 format %{ "J$cop,u $labl\t# Loop end\n\t" 12269 "restorevectmask \t# vector mask restore for loops" %} 12270 size(10); 12271 ins_encode %{ 12272 Label* L = $labl$$label; 12273 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12274 __ restorevectmask(); 12275 %} 12276 ins_pipe( pipe_jcc ); 12277 %} 12278 12279 // Jump Direct Conditional - using unsigned comparison 12280 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12281 match(If cop cmp); 12282 effect(USE labl); 12283 12284 ins_cost(300); 12285 format %{ "J$cop,u $labl" %} 12286 size(6); 12287 ins_encode %{ 12288 Label* L = $labl$$label; 12289 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12290 %} 12291 ins_pipe(pipe_jcc); 12292 %} 12293 12294 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12295 match(If cop cmp); 12296 effect(USE labl); 12297 12298 ins_cost(200); 12299 format %{ "J$cop,u $labl" %} 12300 size(6); 12301 ins_encode %{ 12302 Label* L = $labl$$label; 12303 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 12304 %} 12305 ins_pipe(pipe_jcc); 12306 %} 12307 12308 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12309 match(If cop cmp); 12310 effect(USE labl); 12311 12312 ins_cost(200); 12313 format %{ $$template 12314 if ($cop$$cmpcode == Assembler::notEqual) { 12315 $$emit$$"JP,u $labl\n\t" 12316 $$emit$$"J$cop,u $labl" 12317 } else { 12318 $$emit$$"JP,u done\n\t" 12319 $$emit$$"J$cop,u $labl\n\t" 12320 $$emit$$"done:" 12321 } 12322 %} 12323 ins_encode %{ 12324 Label* l = $labl$$label; 12325 if ($cop$$cmpcode == Assembler::notEqual) { 12326 __ jcc(Assembler::parity, *l, false); 12327 __ jcc(Assembler::notEqual, *l, false); 12328 } else if ($cop$$cmpcode == Assembler::equal) { 12329 Label done; 12330 __ jccb(Assembler::parity, done); 12331 __ jcc(Assembler::equal, *l, false); 12332 __ bind(done); 12333 } else { 12334 ShouldNotReachHere(); 12335 } 12336 %} 12337 ins_pipe(pipe_jcc); 12338 %} 12339 12340 // ============================================================================ 12341 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12342 // array for an instance of the superklass. Set a hidden internal cache on a 12343 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12344 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12345 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12346 match(Set result (PartialSubtypeCheck sub super)); 12347 effect( KILL rcx, KILL cr ); 12348 12349 ins_cost(1100); // slightly larger than the next version 12350 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12351 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12352 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12353 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12354 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12355 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12356 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12357 "miss:\t" %} 12358 12359 opcode(0x1); // Force a XOR of EDI 12360 ins_encode( enc_PartialSubtypeCheck() ); 12361 ins_pipe( pipe_slow ); 12362 %} 12363 12364 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12365 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12366 effect( KILL rcx, KILL result ); 12367 12368 ins_cost(1000); 12369 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12370 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12371 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12372 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12373 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12374 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12375 "miss:\t" %} 12376 12377 opcode(0x0); // No need to XOR EDI 12378 ins_encode( enc_PartialSubtypeCheck() ); 12379 ins_pipe( pipe_slow ); 12380 %} 12381 12382 // ============================================================================ 12383 // Branch Instructions -- short offset versions 12384 // 12385 // These instructions are used to replace jumps of a long offset (the default 12386 // match) with jumps of a shorter offset. These instructions are all tagged 12387 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12388 // match rules in general matching. Instead, the ADLC generates a conversion 12389 // method in the MachNode which can be used to do in-place replacement of the 12390 // long variant with the shorter variant. The compiler will determine if a 12391 // branch can be taken by the is_short_branch_offset() predicate in the machine 12392 // specific code section of the file. 12393 12394 // Jump Direct - Label defines a relative address from JMP+1 12395 instruct jmpDir_short(label labl) %{ 12396 match(Goto); 12397 effect(USE labl); 12398 12399 ins_cost(300); 12400 format %{ "JMP,s $labl" %} 12401 size(2); 12402 ins_encode %{ 12403 Label* L = $labl$$label; 12404 __ jmpb(*L); 12405 %} 12406 ins_pipe( pipe_jmp ); 12407 ins_short_branch(1); 12408 %} 12409 12410 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12411 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12412 match(If cop cr); 12413 effect(USE labl); 12414 12415 ins_cost(300); 12416 format %{ "J$cop,s $labl" %} 12417 size(2); 12418 ins_encode %{ 12419 Label* L = $labl$$label; 12420 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12421 %} 12422 ins_pipe( pipe_jcc ); 12423 ins_short_branch(1); 12424 %} 12425 12426 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12427 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12428 match(CountedLoopEnd cop cr); 12429 effect(USE labl); 12430 12431 ins_cost(300); 12432 format %{ "J$cop,s $labl\t# Loop end" %} 12433 size(2); 12434 ins_encode %{ 12435 Label* L = $labl$$label; 12436 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12437 %} 12438 ins_pipe( pipe_jcc ); 12439 ins_short_branch(1); 12440 %} 12441 12442 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12443 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12444 match(CountedLoopEnd cop cmp); 12445 effect(USE labl); 12446 12447 ins_cost(300); 12448 format %{ "J$cop,us $labl\t# Loop end" %} 12449 size(2); 12450 ins_encode %{ 12451 Label* L = $labl$$label; 12452 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12453 %} 12454 ins_pipe( pipe_jcc ); 12455 ins_short_branch(1); 12456 %} 12457 12458 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12459 match(CountedLoopEnd cop cmp); 12460 effect(USE labl); 12461 12462 ins_cost(300); 12463 format %{ "J$cop,us $labl\t# Loop end" %} 12464 size(2); 12465 ins_encode %{ 12466 Label* L = $labl$$label; 12467 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12468 %} 12469 ins_pipe( pipe_jcc ); 12470 ins_short_branch(1); 12471 %} 12472 12473 // Jump Direct Conditional - using unsigned comparison 12474 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12475 match(If cop cmp); 12476 effect(USE labl); 12477 12478 ins_cost(300); 12479 format %{ "J$cop,us $labl" %} 12480 size(2); 12481 ins_encode %{ 12482 Label* L = $labl$$label; 12483 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12484 %} 12485 ins_pipe( pipe_jcc ); 12486 ins_short_branch(1); 12487 %} 12488 12489 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12490 match(If cop cmp); 12491 effect(USE labl); 12492 12493 ins_cost(300); 12494 format %{ "J$cop,us $labl" %} 12495 size(2); 12496 ins_encode %{ 12497 Label* L = $labl$$label; 12498 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12499 %} 12500 ins_pipe( pipe_jcc ); 12501 ins_short_branch(1); 12502 %} 12503 12504 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12505 match(If cop cmp); 12506 effect(USE labl); 12507 12508 ins_cost(300); 12509 format %{ $$template 12510 if ($cop$$cmpcode == Assembler::notEqual) { 12511 $$emit$$"JP,u,s $labl\n\t" 12512 $$emit$$"J$cop,u,s $labl" 12513 } else { 12514 $$emit$$"JP,u,s done\n\t" 12515 $$emit$$"J$cop,u,s $labl\n\t" 12516 $$emit$$"done:" 12517 } 12518 %} 12519 size(4); 12520 ins_encode %{ 12521 Label* l = $labl$$label; 12522 if ($cop$$cmpcode == Assembler::notEqual) { 12523 __ jccb(Assembler::parity, *l); 12524 __ jccb(Assembler::notEqual, *l); 12525 } else if ($cop$$cmpcode == Assembler::equal) { 12526 Label done; 12527 __ jccb(Assembler::parity, done); 12528 __ jccb(Assembler::equal, *l); 12529 __ bind(done); 12530 } else { 12531 ShouldNotReachHere(); 12532 } 12533 %} 12534 ins_pipe(pipe_jcc); 12535 ins_short_branch(1); 12536 %} 12537 12538 // ============================================================================ 12539 // Long Compare 12540 // 12541 // Currently we hold longs in 2 registers. Comparing such values efficiently 12542 // is tricky. The flavor of compare used depends on whether we are testing 12543 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12544 // The GE test is the negated LT test. The LE test can be had by commuting 12545 // the operands (yielding a GE test) and then negating; negate again for the 12546 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12547 // NE test is negated from that. 12548 12549 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12550 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12551 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12552 // are collapsed internally in the ADLC's dfa-gen code. The match for 12553 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12554 // foo match ends up with the wrong leaf. One fix is to not match both 12555 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12556 // both forms beat the trinary form of long-compare and both are very useful 12557 // on Intel which has so few registers. 12558 12559 // Manifest a CmpL result in an integer register. Very painful. 12560 // This is the test to avoid. 12561 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12562 match(Set dst (CmpL3 src1 src2)); 12563 effect( KILL flags ); 12564 ins_cost(1000); 12565 format %{ "XOR $dst,$dst\n\t" 12566 "CMP $src1.hi,$src2.hi\n\t" 12567 "JLT,s m_one\n\t" 12568 "JGT,s p_one\n\t" 12569 "CMP $src1.lo,$src2.lo\n\t" 12570 "JB,s m_one\n\t" 12571 "JEQ,s done\n" 12572 "p_one:\tINC $dst\n\t" 12573 "JMP,s done\n" 12574 "m_one:\tDEC $dst\n" 12575 "done:" %} 12576 ins_encode %{ 12577 Label p_one, m_one, done; 12578 __ xorptr($dst$$Register, $dst$$Register); 12579 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12580 __ jccb(Assembler::less, m_one); 12581 __ jccb(Assembler::greater, p_one); 12582 __ cmpl($src1$$Register, $src2$$Register); 12583 __ jccb(Assembler::below, m_one); 12584 __ jccb(Assembler::equal, done); 12585 __ bind(p_one); 12586 __ incrementl($dst$$Register); 12587 __ jmpb(done); 12588 __ bind(m_one); 12589 __ decrementl($dst$$Register); 12590 __ bind(done); 12591 %} 12592 ins_pipe( pipe_slow ); 12593 %} 12594 12595 //====== 12596 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12597 // compares. Can be used for LE or GT compares by reversing arguments. 12598 // NOT GOOD FOR EQ/NE tests. 12599 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12600 match( Set flags (CmpL src zero )); 12601 ins_cost(100); 12602 format %{ "TEST $src.hi,$src.hi" %} 12603 opcode(0x85); 12604 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12605 ins_pipe( ialu_cr_reg_reg ); 12606 %} 12607 12608 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12609 // compares. Can be used for LE or GT compares by reversing arguments. 12610 // NOT GOOD FOR EQ/NE tests. 12611 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12612 match( Set flags (CmpL src1 src2 )); 12613 effect( TEMP tmp ); 12614 ins_cost(300); 12615 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12616 "MOV $tmp,$src1.hi\n\t" 12617 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12618 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12619 ins_pipe( ialu_cr_reg_reg ); 12620 %} 12621 12622 // Long compares reg < zero/req OR reg >= zero/req. 12623 // Just a wrapper for a normal branch, plus the predicate test. 12624 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12625 match(If cmp flags); 12626 effect(USE labl); 12627 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12628 expand %{ 12629 jmpCon(cmp,flags,labl); // JLT or JGE... 12630 %} 12631 %} 12632 12633 // Compare 2 longs and CMOVE longs. 12634 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12635 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12636 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12637 ins_cost(400); 12638 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12639 "CMOV$cmp $dst.hi,$src.hi" %} 12640 opcode(0x0F,0x40); 12641 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12642 ins_pipe( pipe_cmov_reg_long ); 12643 %} 12644 12645 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12646 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12647 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12648 ins_cost(500); 12649 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12650 "CMOV$cmp $dst.hi,$src.hi" %} 12651 opcode(0x0F,0x40); 12652 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12653 ins_pipe( pipe_cmov_reg_long ); 12654 %} 12655 12656 // Compare 2 longs and CMOVE ints. 12657 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12658 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12659 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12660 ins_cost(200); 12661 format %{ "CMOV$cmp $dst,$src" %} 12662 opcode(0x0F,0x40); 12663 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12664 ins_pipe( pipe_cmov_reg ); 12665 %} 12666 12667 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12668 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12669 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12670 ins_cost(250); 12671 format %{ "CMOV$cmp $dst,$src" %} 12672 opcode(0x0F,0x40); 12673 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12674 ins_pipe( pipe_cmov_mem ); 12675 %} 12676 12677 // Compare 2 longs and CMOVE ints. 12678 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12679 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12680 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12681 ins_cost(200); 12682 format %{ "CMOV$cmp $dst,$src" %} 12683 opcode(0x0F,0x40); 12684 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12685 ins_pipe( pipe_cmov_reg ); 12686 %} 12687 12688 // Compare 2 longs and CMOVE doubles 12689 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 12690 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12691 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12692 ins_cost(200); 12693 expand %{ 12694 fcmovDPR_regS(cmp,flags,dst,src); 12695 %} 12696 %} 12697 12698 // Compare 2 longs and CMOVE doubles 12699 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12700 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12701 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12702 ins_cost(200); 12703 expand %{ 12704 fcmovD_regS(cmp,flags,dst,src); 12705 %} 12706 %} 12707 12708 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 12709 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12710 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12711 ins_cost(200); 12712 expand %{ 12713 fcmovFPR_regS(cmp,flags,dst,src); 12714 %} 12715 %} 12716 12717 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 12718 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12719 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12720 ins_cost(200); 12721 expand %{ 12722 fcmovF_regS(cmp,flags,dst,src); 12723 %} 12724 %} 12725 12726 //====== 12727 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12728 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12729 match( Set flags (CmpL src zero )); 12730 effect(TEMP tmp); 12731 ins_cost(200); 12732 format %{ "MOV $tmp,$src.lo\n\t" 12733 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 12734 ins_encode( long_cmp_flags0( src, tmp ) ); 12735 ins_pipe( ialu_reg_reg_long ); 12736 %} 12737 12738 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12739 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 12740 match( Set flags (CmpL src1 src2 )); 12741 ins_cost(200+300); 12742 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12743 "JNE,s skip\n\t" 12744 "CMP $src1.hi,$src2.hi\n\t" 12745 "skip:\t" %} 12746 ins_encode( long_cmp_flags1( src1, src2 ) ); 12747 ins_pipe( ialu_cr_reg_reg ); 12748 %} 12749 12750 // Long compare reg == zero/reg OR reg != zero/reg 12751 // Just a wrapper for a normal branch, plus the predicate test. 12752 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 12753 match(If cmp flags); 12754 effect(USE labl); 12755 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12756 expand %{ 12757 jmpCon(cmp,flags,labl); // JEQ or JNE... 12758 %} 12759 %} 12760 12761 // Compare 2 longs and CMOVE longs. 12762 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 12763 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12764 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12765 ins_cost(400); 12766 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12767 "CMOV$cmp $dst.hi,$src.hi" %} 12768 opcode(0x0F,0x40); 12769 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12770 ins_pipe( pipe_cmov_reg_long ); 12771 %} 12772 12773 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 12774 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12775 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12776 ins_cost(500); 12777 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12778 "CMOV$cmp $dst.hi,$src.hi" %} 12779 opcode(0x0F,0x40); 12780 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12781 ins_pipe( pipe_cmov_reg_long ); 12782 %} 12783 12784 // Compare 2 longs and CMOVE ints. 12785 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 12786 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12787 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12788 ins_cost(200); 12789 format %{ "CMOV$cmp $dst,$src" %} 12790 opcode(0x0F,0x40); 12791 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12792 ins_pipe( pipe_cmov_reg ); 12793 %} 12794 12795 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 12796 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12797 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12798 ins_cost(250); 12799 format %{ "CMOV$cmp $dst,$src" %} 12800 opcode(0x0F,0x40); 12801 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12802 ins_pipe( pipe_cmov_mem ); 12803 %} 12804 12805 // Compare 2 longs and CMOVE ints. 12806 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 12807 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12808 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12809 ins_cost(200); 12810 format %{ "CMOV$cmp $dst,$src" %} 12811 opcode(0x0F,0x40); 12812 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12813 ins_pipe( pipe_cmov_reg ); 12814 %} 12815 12816 // Compare 2 longs and CMOVE doubles 12817 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 12818 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12819 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12820 ins_cost(200); 12821 expand %{ 12822 fcmovDPR_regS(cmp,flags,dst,src); 12823 %} 12824 %} 12825 12826 // Compare 2 longs and CMOVE doubles 12827 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 12828 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12829 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12830 ins_cost(200); 12831 expand %{ 12832 fcmovD_regS(cmp,flags,dst,src); 12833 %} 12834 %} 12835 12836 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 12837 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12838 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12839 ins_cost(200); 12840 expand %{ 12841 fcmovFPR_regS(cmp,flags,dst,src); 12842 %} 12843 %} 12844 12845 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 12846 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12847 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12848 ins_cost(200); 12849 expand %{ 12850 fcmovF_regS(cmp,flags,dst,src); 12851 %} 12852 %} 12853 12854 //====== 12855 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12856 // Same as cmpL_reg_flags_LEGT except must negate src 12857 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12858 match( Set flags (CmpL src zero )); 12859 effect( TEMP tmp ); 12860 ins_cost(300); 12861 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 12862 "CMP $tmp,$src.lo\n\t" 12863 "SBB $tmp,$src.hi\n\t" %} 12864 ins_encode( long_cmp_flags3(src, tmp) ); 12865 ins_pipe( ialu_reg_reg_long ); 12866 %} 12867 12868 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12869 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 12870 // requires a commuted test to get the same result. 12871 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12872 match( Set flags (CmpL src1 src2 )); 12873 effect( TEMP tmp ); 12874 ins_cost(300); 12875 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 12876 "MOV $tmp,$src2.hi\n\t" 12877 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 12878 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 12879 ins_pipe( ialu_cr_reg_reg ); 12880 %} 12881 12882 // Long compares reg < zero/req OR reg >= zero/req. 12883 // Just a wrapper for a normal branch, plus the predicate test 12884 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 12885 match(If cmp flags); 12886 effect(USE labl); 12887 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 12888 ins_cost(300); 12889 expand %{ 12890 jmpCon(cmp,flags,labl); // JGT or JLE... 12891 %} 12892 %} 12893 12894 // Compare 2 longs and CMOVE longs. 12895 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 12896 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12897 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12898 ins_cost(400); 12899 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12900 "CMOV$cmp $dst.hi,$src.hi" %} 12901 opcode(0x0F,0x40); 12902 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12903 ins_pipe( pipe_cmov_reg_long ); 12904 %} 12905 12906 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 12907 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12908 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12909 ins_cost(500); 12910 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12911 "CMOV$cmp $dst.hi,$src.hi+4" %} 12912 opcode(0x0F,0x40); 12913 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12914 ins_pipe( pipe_cmov_reg_long ); 12915 %} 12916 12917 // Compare 2 longs and CMOVE ints. 12918 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 12919 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12920 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12921 ins_cost(200); 12922 format %{ "CMOV$cmp $dst,$src" %} 12923 opcode(0x0F,0x40); 12924 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12925 ins_pipe( pipe_cmov_reg ); 12926 %} 12927 12928 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 12929 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12930 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12931 ins_cost(250); 12932 format %{ "CMOV$cmp $dst,$src" %} 12933 opcode(0x0F,0x40); 12934 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12935 ins_pipe( pipe_cmov_mem ); 12936 %} 12937 12938 // Compare 2 longs and CMOVE ptrs. 12939 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 12940 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12941 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12942 ins_cost(200); 12943 format %{ "CMOV$cmp $dst,$src" %} 12944 opcode(0x0F,0x40); 12945 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12946 ins_pipe( pipe_cmov_reg ); 12947 %} 12948 12949 // Compare 2 longs and CMOVE doubles 12950 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 12951 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12952 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12953 ins_cost(200); 12954 expand %{ 12955 fcmovDPR_regS(cmp,flags,dst,src); 12956 %} 12957 %} 12958 12959 // Compare 2 longs and CMOVE doubles 12960 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 12961 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12962 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12963 ins_cost(200); 12964 expand %{ 12965 fcmovD_regS(cmp,flags,dst,src); 12966 %} 12967 %} 12968 12969 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 12970 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12971 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12972 ins_cost(200); 12973 expand %{ 12974 fcmovFPR_regS(cmp,flags,dst,src); 12975 %} 12976 %} 12977 12978 12979 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 12980 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12981 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12982 ins_cost(200); 12983 expand %{ 12984 fcmovF_regS(cmp,flags,dst,src); 12985 %} 12986 %} 12987 12988 12989 // ============================================================================ 12990 // Procedure Call/Return Instructions 12991 // Call Java Static Instruction 12992 // Note: If this code changes, the corresponding ret_addr_offset() and 12993 // compute_padding() functions will have to be adjusted. 12994 instruct CallStaticJavaDirect(method meth) %{ 12995 match(CallStaticJava); 12996 effect(USE meth); 12997 12998 ins_cost(300); 12999 format %{ "CALL,static " %} 13000 opcode(0xE8); /* E8 cd */ 13001 ins_encode( pre_call_resets, 13002 Java_Static_Call( meth ), 13003 call_epilog, 13004 post_call_FPU ); 13005 ins_pipe( pipe_slow ); 13006 ins_alignment(4); 13007 %} 13008 13009 // Call Java Dynamic Instruction 13010 // Note: If this code changes, the corresponding ret_addr_offset() and 13011 // compute_padding() functions will have to be adjusted. 13012 instruct CallDynamicJavaDirect(method meth) %{ 13013 match(CallDynamicJava); 13014 effect(USE meth); 13015 13016 ins_cost(300); 13017 format %{ "MOV EAX,(oop)-1\n\t" 13018 "CALL,dynamic" %} 13019 opcode(0xE8); /* E8 cd */ 13020 ins_encode( pre_call_resets, 13021 Java_Dynamic_Call( meth ), 13022 call_epilog, 13023 post_call_FPU ); 13024 ins_pipe( pipe_slow ); 13025 ins_alignment(4); 13026 %} 13027 13028 // Call Runtime Instruction 13029 instruct CallRuntimeDirect(method meth) %{ 13030 match(CallRuntime ); 13031 effect(USE meth); 13032 13033 ins_cost(300); 13034 format %{ "CALL,runtime " %} 13035 opcode(0xE8); /* E8 cd */ 13036 // Use FFREEs to clear entries in float stack 13037 ins_encode( pre_call_resets, 13038 FFree_Float_Stack_All, 13039 Java_To_Runtime( meth ), 13040 post_call_FPU ); 13041 ins_pipe( pipe_slow ); 13042 %} 13043 13044 // Call runtime without safepoint 13045 instruct CallLeafDirect(method meth) %{ 13046 match(CallLeaf); 13047 effect(USE meth); 13048 13049 ins_cost(300); 13050 format %{ "CALL_LEAF,runtime " %} 13051 opcode(0xE8); /* E8 cd */ 13052 ins_encode( pre_call_resets, 13053 FFree_Float_Stack_All, 13054 Java_To_Runtime( meth ), 13055 Verify_FPU_For_Leaf, post_call_FPU ); 13056 ins_pipe( pipe_slow ); 13057 %} 13058 13059 instruct CallLeafNoFPDirect(method meth) %{ 13060 match(CallLeafNoFP); 13061 effect(USE meth); 13062 13063 ins_cost(300); 13064 format %{ "CALL_LEAF_NOFP,runtime " %} 13065 opcode(0xE8); /* E8 cd */ 13066 ins_encode(Java_To_Runtime(meth)); 13067 ins_pipe( pipe_slow ); 13068 %} 13069 13070 13071 // Return Instruction 13072 // Remove the return address & jump to it. 13073 instruct Ret() %{ 13074 match(Return); 13075 format %{ "RET" %} 13076 opcode(0xC3); 13077 ins_encode(OpcP); 13078 ins_pipe( pipe_jmp ); 13079 %} 13080 13081 // Tail Call; Jump from runtime stub to Java code. 13082 // Also known as an 'interprocedural jump'. 13083 // Target of jump will eventually return to caller. 13084 // TailJump below removes the return address. 13085 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 13086 match(TailCall jump_target method_oop ); 13087 ins_cost(300); 13088 format %{ "JMP $jump_target \t# EBX holds method oop" %} 13089 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13090 ins_encode( OpcP, RegOpc(jump_target) ); 13091 ins_pipe( pipe_jmp ); 13092 %} 13093 13094 13095 // Tail Jump; remove the return address; jump to target. 13096 // TailCall above leaves the return address around. 13097 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 13098 match( TailJump jump_target ex_oop ); 13099 ins_cost(300); 13100 format %{ "POP EDX\t# pop return address into dummy\n\t" 13101 "JMP $jump_target " %} 13102 opcode(0xFF, 0x4); /* Opcode FF /4 */ 13103 ins_encode( enc_pop_rdx, 13104 OpcP, RegOpc(jump_target) ); 13105 ins_pipe( pipe_jmp ); 13106 %} 13107 13108 // Create exception oop: created by stack-crawling runtime code. 13109 // Created exception is now available to this handler, and is setup 13110 // just prior to jumping to this handler. No code emitted. 13111 instruct CreateException( eAXRegP ex_oop ) 13112 %{ 13113 match(Set ex_oop (CreateEx)); 13114 13115 size(0); 13116 // use the following format syntax 13117 format %{ "# exception oop is in EAX; no code emitted" %} 13118 ins_encode(); 13119 ins_pipe( empty ); 13120 %} 13121 13122 13123 // Rethrow exception: 13124 // The exception oop will come in the first argument position. 13125 // Then JUMP (not call) to the rethrow stub code. 13126 instruct RethrowException() 13127 %{ 13128 match(Rethrow); 13129 13130 // use the following format syntax 13131 format %{ "JMP rethrow_stub" %} 13132 ins_encode(enc_rethrow); 13133 ins_pipe( pipe_jmp ); 13134 %} 13135 13136 // inlined locking and unlocking 13137 13138 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 13139 predicate(Compile::current()->use_rtm()); 13140 match(Set cr (FastLock object box)); 13141 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 13142 ins_cost(300); 13143 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 13144 ins_encode %{ 13145 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13146 $scr$$Register, $cx1$$Register, $cx2$$Register, 13147 _counters, _rtm_counters, _stack_rtm_counters, 13148 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 13149 true, ra_->C->profile_rtm()); 13150 %} 13151 ins_pipe(pipe_slow); 13152 %} 13153 13154 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 13155 predicate(!Compile::current()->use_rtm()); 13156 match(Set cr (FastLock object box)); 13157 effect(TEMP tmp, TEMP scr, USE_KILL box); 13158 ins_cost(300); 13159 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 13160 ins_encode %{ 13161 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 13162 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 13163 %} 13164 ins_pipe(pipe_slow); 13165 %} 13166 13167 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 13168 match(Set cr (FastUnlock object box)); 13169 effect(TEMP tmp, USE_KILL box); 13170 ins_cost(300); 13171 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 13172 ins_encode %{ 13173 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 13174 %} 13175 ins_pipe(pipe_slow); 13176 %} 13177 13178 13179 13180 // ============================================================================ 13181 // Safepoint Instruction 13182 instruct safePoint_poll(eFlagsReg cr) %{ 13183 match(SafePoint); 13184 effect(KILL cr); 13185 13186 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. 13187 // On SPARC that might be acceptable as we can generate the address with 13188 // just a sethi, saving an or. By polling at offset 0 we can end up 13189 // putting additional pressure on the index-0 in the D$. Because of 13190 // alignment (just like the situation at hand) the lower indices tend 13191 // to see more traffic. It'd be better to change the polling address 13192 // to offset 0 of the last $line in the polling page. 13193 13194 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} 13195 ins_cost(125); 13196 size(6) ; 13197 ins_encode( Safepoint_Poll() ); 13198 ins_pipe( ialu_reg_mem ); 13199 %} 13200 13201 13202 // ============================================================================ 13203 // This name is KNOWN by the ADLC and cannot be changed. 13204 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 13205 // for this guy. 13206 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 13207 match(Set dst (ThreadLocal)); 13208 effect(DEF dst, KILL cr); 13209 13210 format %{ "MOV $dst, Thread::current()" %} 13211 ins_encode %{ 13212 Register dstReg = as_Register($dst$$reg); 13213 __ get_thread(dstReg); 13214 %} 13215 ins_pipe( ialu_reg_fat ); 13216 %} 13217 13218 13219 13220 //----------PEEPHOLE RULES----------------------------------------------------- 13221 // These must follow all instruction definitions as they use the names 13222 // defined in the instructions definitions. 13223 // 13224 // peepmatch ( root_instr_name [preceding_instruction]* ); 13225 // 13226 // peepconstraint %{ 13227 // (instruction_number.operand_name relational_op instruction_number.operand_name 13228 // [, ...] ); 13229 // // instruction numbers are zero-based using left to right order in peepmatch 13230 // 13231 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 13232 // // provide an instruction_number.operand_name for each operand that appears 13233 // // in the replacement instruction's match rule 13234 // 13235 // ---------VM FLAGS--------------------------------------------------------- 13236 // 13237 // All peephole optimizations can be turned off using -XX:-OptoPeephole 13238 // 13239 // Each peephole rule is given an identifying number starting with zero and 13240 // increasing by one in the order seen by the parser. An individual peephole 13241 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 13242 // on the command-line. 13243 // 13244 // ---------CURRENT LIMITATIONS---------------------------------------------- 13245 // 13246 // Only match adjacent instructions in same basic block 13247 // Only equality constraints 13248 // Only constraints between operands, not (0.dest_reg == EAX_enc) 13249 // Only one replacement instruction 13250 // 13251 // ---------EXAMPLE---------------------------------------------------------- 13252 // 13253 // // pertinent parts of existing instructions in architecture description 13254 // instruct movI(rRegI dst, rRegI src) %{ 13255 // match(Set dst (CopyI src)); 13256 // %} 13257 // 13258 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 13259 // match(Set dst (AddI dst src)); 13260 // effect(KILL cr); 13261 // %} 13262 // 13263 // // Change (inc mov) to lea 13264 // peephole %{ 13265 // // increment preceeded by register-register move 13266 // peepmatch ( incI_eReg movI ); 13267 // // require that the destination register of the increment 13268 // // match the destination register of the move 13269 // peepconstraint ( 0.dst == 1.dst ); 13270 // // construct a replacement instruction that sets 13271 // // the destination to ( move's source register + one ) 13272 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13273 // %} 13274 // 13275 // Implementation no longer uses movX instructions since 13276 // machine-independent system no longer uses CopyX nodes. 13277 // 13278 // peephole %{ 13279 // peepmatch ( incI_eReg movI ); 13280 // peepconstraint ( 0.dst == 1.dst ); 13281 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13282 // %} 13283 // 13284 // peephole %{ 13285 // peepmatch ( decI_eReg movI ); 13286 // peepconstraint ( 0.dst == 1.dst ); 13287 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13288 // %} 13289 // 13290 // peephole %{ 13291 // peepmatch ( addI_eReg_imm movI ); 13292 // peepconstraint ( 0.dst == 1.dst ); 13293 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13294 // %} 13295 // 13296 // peephole %{ 13297 // peepmatch ( addP_eReg_imm movP ); 13298 // peepconstraint ( 0.dst == 1.dst ); 13299 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13300 // %} 13301 13302 // // Change load of spilled value to only a spill 13303 // instruct storeI(memory mem, rRegI src) %{ 13304 // match(Set mem (StoreI mem src)); 13305 // %} 13306 // 13307 // instruct loadI(rRegI dst, memory mem) %{ 13308 // match(Set dst (LoadI mem)); 13309 // %} 13310 // 13311 peephole %{ 13312 peepmatch ( loadI storeI ); 13313 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13314 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13315 %} 13316 13317 //----------SMARTSPILL RULES--------------------------------------------------- 13318 // These must follow all instruction definitions as they use the names 13319 // defined in the instructions definitions.