1 // 2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // General Registers 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code 64 // Turn off SOE in java-code due to frequent use of uncommon-traps. 65 // Now that allocator is better, turn on ESI and EDI as SOE registers. 66 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); 76 77 // Float registers. We treat TOS/FPR0 special. It is invisible to the 78 // allocator, and only shows up in the encodings. 79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); 81 // Ok so here's the trick FPR1 is really st(0) except in the midst 82 // of emission of assembly for a machnode. During the emission the fpu stack 83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint 84 // the stack will not have this element so FPR1 == st(0) from the 85 // oopMap viewpoint. This same weirdness with numbering causes 86 // instruction encoding to have to play games with the register 87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation 88 // where it does flt->flt moves to see an example 89 // 90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); 91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); 92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); 93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); 94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); 95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); 96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); 97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); 98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); 99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); 104 // 105 // Empty fill registers, which are never used, but supply alignment to xmm regs 106 // 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2)); 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3)); 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4)); 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5)); 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6)); 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7)); 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8)); 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9)); 115 116 // Specify priority of register selection within phases of register 117 // allocation. Highest priority is first. A useful heuristic is to 118 // give registers a low priority when they are required by machine 119 // instructions, like EAX and EDX. Registers which are used as 120 // pairs must fall on an even boundary (witness the FPR#L's in this list). 121 // For the Intel integer registers, the equivalent Long pairs are 122 // EDX:EAX, EBX:ECX, and EDI:EBP. 123 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, 124 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, 125 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, 126 FPR6L, FPR6H, FPR7L, FPR7H, 127 FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); 128 129 130 //----------Architecture Description Register Classes-------------------------- 131 // Several register classes are automatically defined based upon information in 132 // this architecture description. 133 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) 134 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ ) 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) 137 // 138 // Class for all registers 139 reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); 140 // Class for general registers 141 reg_class int_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX); 142 // Class for general registers which may be used for implicit null checks on win95 143 // Also safe for use by tailjump. We don't want to allocate in rbp, 144 reg_class int_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX); 145 // Class of "X" registers 146 reg_class int_x_reg(EBX, ECX, EDX, EAX); 147 // Class of registers that can appear in an address with no offset. 148 // EBP and ESP require an extra instruction byte for zero offset. 149 // Used in fast-unlock 150 reg_class p_reg(EDX, EDI, ESI, EBX); 151 // Class for general registers not including ECX 152 reg_class ncx_reg(EAX, EDX, EBP, EDI, ESI, EBX); 153 // Class for general registers not including EAX 154 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); 155 // Class for general registers not including EAX or EBX. 156 reg_class nabx_reg(EDX, EDI, ESI, ECX, EBP); 157 // Class of EAX (for multiply and divide operations) 158 reg_class eax_reg(EAX); 159 // Class of EBX (for atomic add) 160 reg_class ebx_reg(EBX); 161 // Class of ECX (for shift and JCXZ operations and cmpLTMask) 162 reg_class ecx_reg(ECX); 163 // Class of EDX (for multiply and divide operations) 164 reg_class edx_reg(EDX); 165 // Class of EDI (for synchronization) 166 reg_class edi_reg(EDI); 167 // Class of ESI (for synchronization) 168 reg_class esi_reg(ESI); 169 // Singleton class for interpreter's stack pointer 170 reg_class ebp_reg(EBP); 171 // Singleton class for stack pointer 172 reg_class sp_reg(ESP); 173 // Singleton class for instruction pointer 174 // reg_class ip_reg(EIP); 175 // Class of integer register pairs 176 reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI ); 177 // Class of integer register pairs that aligns with calling convention 178 reg_class eadx_reg( EAX,EDX ); 179 reg_class ebcx_reg( ECX,EBX ); 180 // Not AX or DX, used in divides 181 reg_class nadx_reg( EBX,ECX,ESI,EDI,EBP ); 182 183 // Floating point registers. Notice FPR0 is not a choice. 184 // FPR0 is not ever allocated; we use clever encodings to fake 185 // a 2-address instructions out of Intels FP stack. 186 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); 187 188 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, 189 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, 190 FPR7L,FPR7H ); 191 192 reg_class fp_flt_reg0( FPR1L ); 193 reg_class fp_dbl_reg0( FPR1L,FPR1H ); 194 reg_class fp_dbl_reg1( FPR2L,FPR2H ); 195 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, 196 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); 197 198 %} 199 200 201 //----------SOURCE BLOCK------------------------------------------------------- 202 // This is a block of C++ code which provides values, functions, and 203 // definitions necessary in the rest of the architecture description 204 source_hpp %{ 205 // Must be visible to the DFA in dfa_x86_32.cpp 206 extern bool is_operand_hi32_zero(Node* n); 207 %} 208 209 source %{ 210 #define RELOC_IMM32 Assembler::imm_operand 211 #define RELOC_DISP32 Assembler::disp32_operand 212 213 #define __ _masm. 214 215 // How to find the high register of a Long pair, given the low register 216 #define HIGH_FROM_LOW(x) ((x)+2) 217 218 // These masks are used to provide 128-bit aligned bitmasks to the XMM 219 // instructions, to allow sign-masking or sign-bit flipping. They allow 220 // fast versions of NegF/NegD and AbsF/AbsD. 221 222 // Note: 'double' and 'long long' have 32-bits alignment on x86. 223 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { 224 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address 225 // of 128-bits operands for SSE instructions. 226 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); 227 // Store the value to a 128-bits operand. 228 operand[0] = lo; 229 operand[1] = hi; 230 return operand; 231 } 232 233 // Buffer for 128-bits masks used by SSE instructions. 234 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) 235 236 // Static initialization during VM startup. 237 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); 238 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); 239 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); 240 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); 241 242 // Offset hacking within calls. 243 static int pre_call_resets_size() { 244 int size = 0; 245 Compile* C = Compile::current(); 246 if (C->in_24_bit_fp_mode()) { 247 size += 6; // fldcw 248 } 249 if (C->max_vector_size() > 16) { 250 if(UseAVX <= 2) { 251 size += 3; // vzeroupper 252 } 253 } 254 return size; 255 } 256 257 static int preserve_SP_size() { 258 return 2; // op, rm(reg/reg) 259 } 260 261 // !!!!! Special hack to get all type of calls to specify the byte offset 262 // from the start of the call to the point where the return address 263 // will point. 264 int MachCallStaticJavaNode::ret_addr_offset() { 265 int offset = 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points 266 if (_method_handle_invoke) 267 offset += preserve_SP_size(); 268 return offset; 269 } 270 271 int MachCallDynamicJavaNode::ret_addr_offset() { 272 return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points 273 } 274 275 static int sizeof_FFree_Float_Stack_All = -1; 276 277 int MachCallRuntimeNode::ret_addr_offset() { 278 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); 279 return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size(); 280 } 281 282 // Indicate if the safepoint node needs the polling page as an input. 283 // Since x86 does have absolute addressing, it doesn't. 284 bool SafePointNode::needs_polling_address_input() { 285 return false; 286 } 287 288 // 289 // Compute padding required for nodes which need alignment 290 // 291 292 // The address of the call instruction needs to be 4-byte aligned to 293 // ensure that it does not span a cache line so that it can be patched. 294 int CallStaticJavaDirectNode::compute_padding(int current_offset) const { 295 current_offset += pre_call_resets_size(); // skip fldcw, if any 296 current_offset += 1; // skip call opcode byte 297 return round_to(current_offset, alignment_required()) - current_offset; 298 } 299 300 // The address of the call instruction needs to be 4-byte aligned to 301 // ensure that it does not span a cache line so that it can be patched. 302 int CallStaticJavaHandleNode::compute_padding(int current_offset) const { 303 current_offset += pre_call_resets_size(); // skip fldcw, if any 304 current_offset += preserve_SP_size(); // skip mov rbp, rsp 305 current_offset += 1; // skip call opcode byte 306 return round_to(current_offset, alignment_required()) - current_offset; 307 } 308 309 // The address of the call instruction needs to be 4-byte aligned to 310 // ensure that it does not span a cache line so that it can be patched. 311 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { 312 current_offset += pre_call_resets_size(); // skip fldcw, if any 313 current_offset += 5; // skip MOV instruction 314 current_offset += 1; // skip call opcode byte 315 return round_to(current_offset, alignment_required()) - current_offset; 316 } 317 318 // EMIT_RM() 319 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) { 320 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); 321 cbuf.insts()->emit_int8(c); 322 } 323 324 // EMIT_CC() 325 void emit_cc(CodeBuffer &cbuf, int f1, int f2) { 326 unsigned char c = (unsigned char)( f1 | f2 ); 327 cbuf.insts()->emit_int8(c); 328 } 329 330 // EMIT_OPCODE() 331 void emit_opcode(CodeBuffer &cbuf, int code) { 332 cbuf.insts()->emit_int8((unsigned char) code); 333 } 334 335 // EMIT_OPCODE() w/ relocation information 336 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) { 337 cbuf.relocate(cbuf.insts_mark() + offset, reloc); 338 emit_opcode(cbuf, code); 339 } 340 341 // EMIT_D8() 342 void emit_d8(CodeBuffer &cbuf, int d8) { 343 cbuf.insts()->emit_int8((unsigned char) d8); 344 } 345 346 // EMIT_D16() 347 void emit_d16(CodeBuffer &cbuf, int d16) { 348 cbuf.insts()->emit_int16(d16); 349 } 350 351 // EMIT_D32() 352 void emit_d32(CodeBuffer &cbuf, int d32) { 353 cbuf.insts()->emit_int32(d32); 354 } 355 356 // emit 32 bit value and construct relocation entry from relocInfo::relocType 357 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc, 358 int format) { 359 cbuf.relocate(cbuf.insts_mark(), reloc, format); 360 cbuf.insts()->emit_int32(d32); 361 } 362 363 // emit 32 bit value and construct relocation entry from RelocationHolder 364 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec, 365 int format) { 366 #ifdef ASSERT 367 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { 368 assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code"); 369 } 370 #endif 371 cbuf.relocate(cbuf.insts_mark(), rspec, format); 372 cbuf.insts()->emit_int32(d32); 373 } 374 375 // Access stack slot for load or store 376 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) { 377 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src]) 378 if( -128 <= disp && disp <= 127 ) { 379 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte 380 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 381 emit_d8 (cbuf, disp); // Displacement // R/M byte 382 } else { 383 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte 384 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 385 emit_d32(cbuf, disp); // Displacement // R/M byte 386 } 387 } 388 389 // rRegI ereg, memory mem) %{ // emit_reg_mem 390 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { 391 // There is no index & no scale, use form without SIB byte 392 if ((index == 0x4) && 393 (scale == 0) && (base != ESP_enc)) { 394 // If no displacement, mode is 0x0; unless base is [EBP] 395 if ( (displace == 0) && (base != EBP_enc) ) { 396 emit_rm(cbuf, 0x0, reg_encoding, base); 397 } 398 else { // If 8-bit displacement, mode 0x1 399 if ((displace >= -128) && (displace <= 127) 400 && (disp_reloc == relocInfo::none) ) { 401 emit_rm(cbuf, 0x1, reg_encoding, base); 402 emit_d8(cbuf, displace); 403 } 404 else { // If 32-bit displacement 405 if (base == -1) { // Special flag for absolute address 406 emit_rm(cbuf, 0x0, reg_encoding, 0x5); 407 // (manual lies; no SIB needed here) 408 if ( disp_reloc != relocInfo::none ) { 409 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 410 } else { 411 emit_d32 (cbuf, displace); 412 } 413 } 414 else { // Normal base + offset 415 emit_rm(cbuf, 0x2, reg_encoding, base); 416 if ( disp_reloc != relocInfo::none ) { 417 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 418 } else { 419 emit_d32 (cbuf, displace); 420 } 421 } 422 } 423 } 424 } 425 else { // Else, encode with the SIB byte 426 // If no displacement, mode is 0x0; unless base is [EBP] 427 if (displace == 0 && (base != EBP_enc)) { // If no displacement 428 emit_rm(cbuf, 0x0, reg_encoding, 0x4); 429 emit_rm(cbuf, scale, index, base); 430 } 431 else { // If 8-bit displacement, mode 0x1 432 if ((displace >= -128) && (displace <= 127) 433 && (disp_reloc == relocInfo::none) ) { 434 emit_rm(cbuf, 0x1, reg_encoding, 0x4); 435 emit_rm(cbuf, scale, index, base); 436 emit_d8(cbuf, displace); 437 } 438 else { // If 32-bit displacement 439 if (base == 0x04 ) { 440 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 441 emit_rm(cbuf, scale, index, 0x04); 442 } else { 443 emit_rm(cbuf, 0x2, reg_encoding, 0x4); 444 emit_rm(cbuf, scale, index, base); 445 } 446 if ( disp_reloc != relocInfo::none ) { 447 emit_d32_reloc(cbuf, displace, disp_reloc, 1); 448 } else { 449 emit_d32 (cbuf, displace); 450 } 451 } 452 } 453 } 454 } 455 456 457 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) { 458 if( dst_encoding == src_encoding ) { 459 // reg-reg copy, use an empty encoding 460 } else { 461 emit_opcode( cbuf, 0x8B ); 462 emit_rm(cbuf, 0x3, dst_encoding, src_encoding ); 463 } 464 } 465 466 void emit_cmpfp_fixup(MacroAssembler& _masm) { 467 Label exit; 468 __ jccb(Assembler::noParity, exit); 469 __ pushf(); 470 // 471 // comiss/ucomiss instructions set ZF,PF,CF flags and 472 // zero OF,AF,SF for NaN values. 473 // Fixup flags by zeroing ZF,PF so that compare of NaN 474 // values returns 'less than' result (CF is set). 475 // Leave the rest of flags unchanged. 476 // 477 // 7 6 5 4 3 2 1 0 478 // |S|Z|r|A|r|P|r|C| (r - reserved bit) 479 // 0 0 1 0 1 0 1 1 (0x2B) 480 // 481 __ andl(Address(rsp, 0), 0xffffff2b); 482 __ popf(); 483 __ bind(exit); 484 } 485 486 void emit_cmpfp3(MacroAssembler& _masm, Register dst) { 487 Label done; 488 __ movl(dst, -1); 489 __ jcc(Assembler::parity, done); 490 __ jcc(Assembler::below, done); 491 __ setb(Assembler::notEqual, dst); 492 __ movzbl(dst, dst); 493 __ bind(done); 494 } 495 496 497 //============================================================================= 498 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; 499 500 int Compile::ConstantTable::calculate_table_base_offset() const { 501 return 0; // absolute addressing, no offset 502 } 503 504 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } 505 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) { 506 ShouldNotReachHere(); 507 } 508 509 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { 510 // Empty encoding 511 } 512 513 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { 514 return 0; 515 } 516 517 #ifndef PRODUCT 518 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 519 st->print("# MachConstantBaseNode (empty encoding)"); 520 } 521 #endif 522 523 524 //============================================================================= 525 #ifndef PRODUCT 526 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { 527 Compile* C = ra_->C; 528 529 int framesize = C->frame_size_in_bytes(); 530 int bangsize = C->bang_size_in_bytes(); 531 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 532 // Remove wordSize for return addr which is already pushed. 533 framesize -= wordSize; 534 535 if (C->need_stack_bang(bangsize)) { 536 framesize -= wordSize; 537 st->print("# stack bang (%d bytes)", bangsize); 538 st->print("\n\t"); 539 st->print("PUSH EBP\t# Save EBP"); 540 if (framesize) { 541 st->print("\n\t"); 542 st->print("SUB ESP, #%d\t# Create frame",framesize); 543 } 544 } else { 545 st->print("SUB ESP, #%d\t# Create frame",framesize); 546 st->print("\n\t"); 547 framesize -= wordSize; 548 st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); 549 } 550 551 if (VerifyStackAtCalls) { 552 st->print("\n\t"); 553 framesize -= wordSize; 554 st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); 555 } 556 557 if( C->in_24_bit_fp_mode() ) { 558 st->print("\n\t"); 559 st->print("FLDCW \t# load 24 bit fpu control word"); 560 } 561 if (UseSSE >= 2 && VerifyFPU) { 562 st->print("\n\t"); 563 st->print("# verify FPU stack (must be clean on entry)"); 564 } 565 566 #ifdef ASSERT 567 if (VerifyStackAtCalls) { 568 st->print("\n\t"); 569 st->print("# stack alignment check"); 570 } 571 #endif 572 st->cr(); 573 } 574 #endif 575 576 577 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 578 Compile* C = ra_->C; 579 MacroAssembler _masm(&cbuf); 580 581 int framesize = C->frame_size_in_bytes(); 582 int bangsize = C->bang_size_in_bytes(); 583 584 __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode()); 585 586 C->set_frame_complete(cbuf.insts_size()); 587 588 if (C->has_mach_constant_base_node()) { 589 // NOTE: We set the table base offset here because users might be 590 // emitted before MachConstantBaseNode. 591 Compile::ConstantTable& constant_table = C->constant_table(); 592 constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); 593 } 594 } 595 596 uint MachPrologNode::size(PhaseRegAlloc *ra_) const { 597 return MachNode::size(ra_); // too many variables; just compute it the hard way 598 } 599 600 int MachPrologNode::reloc() const { 601 return 0; // a large enough number 602 } 603 604 //============================================================================= 605 #ifndef PRODUCT 606 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 607 Compile *C = ra_->C; 608 int framesize = C->frame_size_in_bytes(); 609 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 610 // Remove two words for return addr and rbp, 611 framesize -= 2*wordSize; 612 613 if (C->max_vector_size() > 16) { 614 st->print("VZEROUPPER"); 615 st->cr(); st->print("\t"); 616 } 617 if (C->in_24_bit_fp_mode()) { 618 st->print("FLDCW standard control word"); 619 st->cr(); st->print("\t"); 620 } 621 if (framesize) { 622 st->print("ADD ESP,%d\t# Destroy frame",framesize); 623 st->cr(); st->print("\t"); 624 } 625 st->print_cr("POPL EBP"); st->print("\t"); 626 if (do_polling() && C->is_method_compilation()) { 627 st->print("TEST PollPage,EAX\t! Poll Safepoint"); 628 st->cr(); st->print("\t"); 629 } 630 } 631 #endif 632 633 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 634 Compile *C = ra_->C; 635 636 if (C->max_vector_size() > 16) { 637 // Clear upper bits of YMM registers when current compiled code uses 638 // wide vectors to avoid AVX <-> SSE transition penalty during call. 639 MacroAssembler masm(&cbuf); 640 masm.vzeroupper(); 641 } 642 // If method set FPU control word, restore to standard control word 643 if (C->in_24_bit_fp_mode()) { 644 MacroAssembler masm(&cbuf); 645 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 646 } 647 648 int framesize = C->frame_size_in_bytes(); 649 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 650 // Remove two words for return addr and rbp, 651 framesize -= 2*wordSize; 652 653 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here 654 655 if (framesize >= 128) { 656 emit_opcode(cbuf, 0x81); // add SP, #framesize 657 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 658 emit_d32(cbuf, framesize); 659 } else if (framesize) { 660 emit_opcode(cbuf, 0x83); // add SP, #framesize 661 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 662 emit_d8(cbuf, framesize); 663 } 664 665 emit_opcode(cbuf, 0x58 | EBP_enc); 666 667 if (do_polling() && C->is_method_compilation()) { 668 cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0); 669 emit_opcode(cbuf,0x85); 670 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX 671 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 672 } 673 } 674 675 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { 676 Compile *C = ra_->C; 677 // If method set FPU control word, restore to standard control word 678 int size = C->in_24_bit_fp_mode() ? 6 : 0; 679 if (C->max_vector_size() > 16) size += 3; // vzeroupper 680 if (do_polling() && C->is_method_compilation()) size += 6; 681 682 int framesize = C->frame_size_in_bytes(); 683 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); 684 // Remove two words for return addr and rbp, 685 framesize -= 2*wordSize; 686 687 size++; // popl rbp, 688 689 if (framesize >= 128) { 690 size += 6; 691 } else { 692 size += framesize ? 3 : 0; 693 } 694 return size; 695 } 696 697 int MachEpilogNode::reloc() const { 698 return 0; // a large enough number 699 } 700 701 const Pipeline * MachEpilogNode::pipeline() const { 702 return MachNode::pipeline_class(); 703 } 704 705 int MachEpilogNode::safepoint_offset() const { return 0; } 706 707 //============================================================================= 708 709 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack }; 710 static enum RC rc_class( OptoReg::Name reg ) { 711 712 if( !OptoReg::is_valid(reg) ) return rc_bad; 713 if (OptoReg::is_stack(reg)) return rc_stack; 714 715 VMReg r = OptoReg::as_VMReg(reg); 716 if (r->is_Register()) return rc_int; 717 if (r->is_FloatRegister()) { 718 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); 719 return rc_float; 720 } 721 assert(r->is_XMMRegister(), "must be"); 722 return rc_xmm; 723 } 724 725 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg, 726 int opcode, const char *op_str, int size, outputStream* st ) { 727 if( cbuf ) { 728 emit_opcode (*cbuf, opcode ); 729 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); 730 #ifndef PRODUCT 731 } else if( !do_size ) { 732 if( size != 0 ) st->print("\n\t"); 733 if( opcode == 0x8B || opcode == 0x89 ) { // MOV 734 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); 735 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); 736 } else { // FLD, FST, PUSH, POP 737 st->print("%s [ESP + #%d]",op_str,offset); 738 } 739 #endif 740 } 741 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 742 return size+3+offset_size; 743 } 744 745 // Helper for XMM registers. Extra opcode bits, limited syntax. 746 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load, 747 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { 748 int in_size_in_bits = Assembler::EVEX_32bit; 749 int evex_encoding = 0; 750 if (reg_lo+1 == reg_hi) { 751 in_size_in_bits = Assembler::EVEX_64bit; 752 evex_encoding = Assembler::VEX_W; 753 } 754 if (cbuf) { 755 MacroAssembler _masm(cbuf); 756 if (reg_lo+1 == reg_hi) { // double move? 757 if (is_load) { 758 __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 759 } else { 760 __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 761 } 762 } else { 763 if (is_load) { 764 __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); 765 } else { 766 __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); 767 } 768 } 769 #ifndef PRODUCT 770 } else if (!do_size) { 771 if (size != 0) st->print("\n\t"); 772 if (reg_lo+1 == reg_hi) { // double move? 773 if (is_load) st->print("%s %s,[ESP + #%d]", 774 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", 775 Matcher::regName[reg_lo], offset); 776 else st->print("MOVSD [ESP + #%d],%s", 777 offset, Matcher::regName[reg_lo]); 778 } else { 779 if (is_load) st->print("MOVSS %s,[ESP + #%d]", 780 Matcher::regName[reg_lo], offset); 781 else st->print("MOVSS [ESP + #%d],%s", 782 offset, Matcher::regName[reg_lo]); 783 } 784 #endif 785 } 786 bool is_single_byte = false; 787 if ((UseAVX > 2) && (offset != 0)) { 788 is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); 789 } 790 int offset_size = 0; 791 if (UseAVX > 2 ) { 792 offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); 793 } else { 794 offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 795 } 796 size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX 797 // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. 798 return size+5+offset_size; 799 } 800 801 802 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 803 int src_hi, int dst_hi, int size, outputStream* st ) { 804 if (cbuf) { 805 MacroAssembler _masm(cbuf); 806 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 807 __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 808 as_XMMRegister(Matcher::_regEncode[src_lo])); 809 } else { 810 __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), 811 as_XMMRegister(Matcher::_regEncode[src_lo])); 812 } 813 #ifndef PRODUCT 814 } else if (!do_size) { 815 if (size != 0) st->print("\n\t"); 816 if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers 817 if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? 818 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 819 } else { 820 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 821 } 822 } else { 823 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? 824 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 825 } else { 826 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 827 } 828 } 829 #endif 830 } 831 // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. 832 // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. 833 int sz = (UseAVX > 2) ? 6 : 4; 834 if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && 835 UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; 836 return size + sz; 837 } 838 839 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 840 int src_hi, int dst_hi, int size, outputStream* st ) { 841 // 32-bit 842 if (cbuf) { 843 MacroAssembler _masm(cbuf); 844 __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), 845 as_Register(Matcher::_regEncode[src_lo])); 846 #ifndef PRODUCT 847 } else if (!do_size) { 848 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 849 #endif 850 } 851 return (UseAVX> 2) ? 6 : 4; 852 } 853 854 855 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 856 int src_hi, int dst_hi, int size, outputStream* st ) { 857 // 32-bit 858 if (cbuf) { 859 MacroAssembler _masm(cbuf); 860 __ movdl(as_Register(Matcher::_regEncode[dst_lo]), 861 as_XMMRegister(Matcher::_regEncode[src_lo])); 862 #ifndef PRODUCT 863 } else if (!do_size) { 864 st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); 865 #endif 866 } 867 return (UseAVX> 2) ? 6 : 4; 868 } 869 870 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) { 871 if( cbuf ) { 872 emit_opcode(*cbuf, 0x8B ); 873 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); 874 #ifndef PRODUCT 875 } else if( !do_size ) { 876 if( size != 0 ) st->print("\n\t"); 877 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); 878 #endif 879 } 880 return size+2; 881 } 882 883 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, 884 int offset, int size, outputStream* st ) { 885 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there 886 if( cbuf ) { 887 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it) 888 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] ); 889 #ifndef PRODUCT 890 } else if( !do_size ) { 891 if( size != 0 ) st->print("\n\t"); 892 st->print("FLD %s",Matcher::regName[src_lo]); 893 #endif 894 } 895 size += 2; 896 } 897 898 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; 899 const char *op_str; 900 int op; 901 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? 902 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; 903 op = 0xDD; 904 } else { // 32-bit store 905 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; 906 op = 0xD9; 907 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); 908 } 909 910 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st); 911 } 912 913 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. 914 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, 915 int src_hi, int dst_hi, uint ireg, outputStream* st); 916 917 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load, 918 int stack_offset, int reg, uint ireg, outputStream* st); 919 920 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset, 921 int dst_offset, uint ireg, outputStream* st) { 922 int calc_size = 0; 923 int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 924 int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 925 switch (ireg) { 926 case Op_VecS: 927 calc_size = 3+src_offset_size + 3+dst_offset_size; 928 break; 929 case Op_VecD: 930 calc_size = 3+src_offset_size + 3+dst_offset_size; 931 src_offset += 4; 932 dst_offset += 4; 933 src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4); 934 dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4); 935 calc_size += 3+src_offset_size + 3+dst_offset_size; 936 break; 937 case Op_VecX: 938 case Op_VecY: 939 case Op_VecZ: 940 calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size; 941 break; 942 default: 943 ShouldNotReachHere(); 944 } 945 if (cbuf) { 946 MacroAssembler _masm(cbuf); 947 int offset = __ offset(); 948 switch (ireg) { 949 case Op_VecS: 950 __ pushl(Address(rsp, src_offset)); 951 __ popl (Address(rsp, dst_offset)); 952 break; 953 case Op_VecD: 954 __ pushl(Address(rsp, src_offset)); 955 __ popl (Address(rsp, dst_offset)); 956 __ pushl(Address(rsp, src_offset+4)); 957 __ popl (Address(rsp, dst_offset+4)); 958 break; 959 case Op_VecX: 960 __ movdqu(Address(rsp, -16), xmm0); 961 __ movdqu(xmm0, Address(rsp, src_offset)); 962 __ movdqu(Address(rsp, dst_offset), xmm0); 963 __ movdqu(xmm0, Address(rsp, -16)); 964 break; 965 case Op_VecY: 966 __ vmovdqu(Address(rsp, -32), xmm0); 967 __ vmovdqu(xmm0, Address(rsp, src_offset)); 968 __ vmovdqu(Address(rsp, dst_offset), xmm0); 969 __ vmovdqu(xmm0, Address(rsp, -32)); 970 case Op_VecZ: 971 __ evmovdqu(Address(rsp, -64), xmm0, 2); 972 __ evmovdqu(xmm0, Address(rsp, src_offset), 2); 973 __ evmovdqu(Address(rsp, dst_offset), xmm0, 2); 974 __ evmovdqu(xmm0, Address(rsp, -64), 2); 975 break; 976 default: 977 ShouldNotReachHere(); 978 } 979 int size = __ offset() - offset; 980 assert(size == calc_size, "incorrect size calculattion"); 981 return size; 982 #ifndef PRODUCT 983 } else if (!do_size) { 984 switch (ireg) { 985 case Op_VecS: 986 st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" 987 "popl [rsp + #%d]", 988 src_offset, dst_offset); 989 break; 990 case Op_VecD: 991 st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" 992 "popq [rsp + #%d]\n\t" 993 "pushl [rsp + #%d]\n\t" 994 "popq [rsp + #%d]", 995 src_offset, dst_offset, src_offset+4, dst_offset+4); 996 break; 997 case Op_VecX: 998 st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" 999 "movdqu xmm0, [rsp + #%d]\n\t" 1000 "movdqu [rsp + #%d], xmm0\n\t" 1001 "movdqu xmm0, [rsp - #16]", 1002 src_offset, dst_offset); 1003 break; 1004 case Op_VecY: 1005 st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" 1006 "vmovdqu xmm0, [rsp + #%d]\n\t" 1007 "vmovdqu [rsp + #%d], xmm0\n\t" 1008 "vmovdqu xmm0, [rsp - #32]", 1009 src_offset, dst_offset); 1010 case Op_VecZ: 1011 st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" 1012 "vmovdqu xmm0, [rsp + #%d]\n\t" 1013 "vmovdqu [rsp + #%d], xmm0\n\t" 1014 "vmovdqu xmm0, [rsp - #64]", 1015 src_offset, dst_offset); 1016 break; 1017 default: 1018 ShouldNotReachHere(); 1019 } 1020 #endif 1021 } 1022 return calc_size; 1023 } 1024 1025 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { 1026 // Get registers to move 1027 OptoReg::Name src_second = ra_->get_reg_second(in(1)); 1028 OptoReg::Name src_first = ra_->get_reg_first(in(1)); 1029 OptoReg::Name dst_second = ra_->get_reg_second(this ); 1030 OptoReg::Name dst_first = ra_->get_reg_first(this ); 1031 1032 enum RC src_second_rc = rc_class(src_second); 1033 enum RC src_first_rc = rc_class(src_first); 1034 enum RC dst_second_rc = rc_class(dst_second); 1035 enum RC dst_first_rc = rc_class(dst_first); 1036 1037 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); 1038 1039 // Generate spill code! 1040 int size = 0; 1041 1042 if( src_first == dst_first && src_second == dst_second ) 1043 return size; // Self copy, no move 1044 1045 if (bottom_type()->isa_vect() != NULL) { 1046 uint ireg = ideal_reg(); 1047 assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); 1048 assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); 1049 assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); 1050 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1051 // mem -> mem 1052 int src_offset = ra_->reg2offset(src_first); 1053 int dst_offset = ra_->reg2offset(dst_first); 1054 return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st); 1055 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1056 return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st); 1057 } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1058 int stack_offset = ra_->reg2offset(dst_first); 1059 return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st); 1060 } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { 1061 int stack_offset = ra_->reg2offset(src_first); 1062 return vec_spill_helper(cbuf, do_size, true, stack_offset, dst_first, ireg, st); 1063 } else { 1064 ShouldNotReachHere(); 1065 } 1066 } 1067 1068 // -------------------------------------- 1069 // Check for mem-mem move. push/pop to move. 1070 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { 1071 if( src_second == dst_first ) { // overlapping stack copy ranges 1072 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); 1073 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1074 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1075 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits 1076 } 1077 // move low bits 1078 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); 1079 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); 1080 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits 1081 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); 1082 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); 1083 } 1084 return size; 1085 } 1086 1087 // -------------------------------------- 1088 // Check for integer reg-reg copy 1089 if( src_first_rc == rc_int && dst_first_rc == rc_int ) 1090 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st); 1091 1092 // Check for integer store 1093 if( src_first_rc == rc_int && dst_first_rc == rc_stack ) 1094 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); 1095 1096 // Check for integer load 1097 if( dst_first_rc == rc_int && src_first_rc == rc_stack ) 1098 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); 1099 1100 // Check for integer reg-xmm reg copy 1101 if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { 1102 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1103 "no 64 bit integer-float reg moves" ); 1104 return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1105 } 1106 // -------------------------------------- 1107 // Check for float reg-reg copy 1108 if( src_first_rc == rc_float && dst_first_rc == rc_float ) { 1109 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1110 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); 1111 if( cbuf ) { 1112 1113 // Note the mucking with the register encode to compensate for the 0/1 1114 // indexing issue mentioned in a comment in the reg_def sections 1115 // for FPR registers many lines above here. 1116 1117 if( src_first != FPR1L_num ) { 1118 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i) 1119 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 ); 1120 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1121 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1122 } else { 1123 emit_opcode (*cbuf, 0xDD ); // FST ST(i) 1124 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 ); 1125 } 1126 #ifndef PRODUCT 1127 } else if( !do_size ) { 1128 if( size != 0 ) st->print("\n\t"); 1129 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); 1130 else st->print( "FST %s", Matcher::regName[dst_first]); 1131 #endif 1132 } 1133 return size + ((src_first != FPR1L_num) ? 2+2 : 2); 1134 } 1135 1136 // Check for float store 1137 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { 1138 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); 1139 } 1140 1141 // Check for float load 1142 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { 1143 int offset = ra_->reg2offset(src_first); 1144 const char *op_str; 1145 int op; 1146 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? 1147 op_str = "FLD_D"; 1148 op = 0xDD; 1149 } else { // 32-bit load 1150 op_str = "FLD_S"; 1151 op = 0xD9; 1152 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); 1153 } 1154 if( cbuf ) { 1155 emit_opcode (*cbuf, op ); 1156 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); 1157 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i) 1158 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] ); 1159 #ifndef PRODUCT 1160 } else if( !do_size ) { 1161 if( size != 0 ) st->print("\n\t"); 1162 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); 1163 #endif 1164 } 1165 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); 1166 return size + 3+offset_size+2; 1167 } 1168 1169 // Check for xmm reg-reg copy 1170 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { 1171 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || 1172 (src_first+1 == src_second && dst_first+1 == dst_second), 1173 "no non-adjacent float-moves" ); 1174 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1175 } 1176 1177 // Check for xmm reg-integer reg copy 1178 if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { 1179 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), 1180 "no 64 bit float-integer reg moves" ); 1181 return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st); 1182 } 1183 1184 // Check for xmm store 1185 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { 1186 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st); 1187 } 1188 1189 // Check for float xmm load 1190 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) { 1191 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); 1192 } 1193 1194 // Copy from float reg to xmm reg 1195 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) { 1196 // copy to the top of stack from floating point reg 1197 // and use LEA to preserve flags 1198 if( cbuf ) { 1199 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8] 1200 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1201 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1202 emit_d8(*cbuf,0xF8); 1203 #ifndef PRODUCT 1204 } else if( !do_size ) { 1205 if( size != 0 ) st->print("\n\t"); 1206 st->print("LEA ESP,[ESP-8]"); 1207 #endif 1208 } 1209 size += 4; 1210 1211 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st); 1212 1213 // Copy from the temp memory to the xmm reg. 1214 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st); 1215 1216 if( cbuf ) { 1217 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8] 1218 emit_rm(*cbuf, 0x1, ESP_enc, 0x04); 1219 emit_rm(*cbuf, 0x0, 0x04, ESP_enc); 1220 emit_d8(*cbuf,0x08); 1221 #ifndef PRODUCT 1222 } else if( !do_size ) { 1223 if( size != 0 ) st->print("\n\t"); 1224 st->print("LEA ESP,[ESP+8]"); 1225 #endif 1226 } 1227 size += 4; 1228 return size; 1229 } 1230 1231 assert( size > 0, "missed a case" ); 1232 1233 // -------------------------------------------------------------------- 1234 // Check for second bits still needing moving. 1235 if( src_second == dst_second ) 1236 return size; // Self copy; no move 1237 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); 1238 1239 // Check for second word int-int move 1240 if( src_second_rc == rc_int && dst_second_rc == rc_int ) 1241 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st); 1242 1243 // Check for second word integer store 1244 if( src_second_rc == rc_int && dst_second_rc == rc_stack ) 1245 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); 1246 1247 // Check for second word integer load 1248 if( dst_second_rc == rc_int && src_second_rc == rc_stack ) 1249 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); 1250 1251 1252 Unimplemented(); 1253 return 0; // Mute compiler 1254 } 1255 1256 #ifndef PRODUCT 1257 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { 1258 implementation( NULL, ra_, false, st ); 1259 } 1260 #endif 1261 1262 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1263 implementation( &cbuf, ra_, false, NULL ); 1264 } 1265 1266 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { 1267 return implementation( NULL, ra_, true, NULL ); 1268 } 1269 1270 1271 //============================================================================= 1272 #ifndef PRODUCT 1273 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1274 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1275 int reg = ra_->get_reg_first(this); 1276 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); 1277 } 1278 #endif 1279 1280 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1281 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1282 int reg = ra_->get_encode(this); 1283 if( offset >= 128 ) { 1284 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1285 emit_rm(cbuf, 0x2, reg, 0x04); 1286 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1287 emit_d32(cbuf, offset); 1288 } 1289 else { 1290 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset] 1291 emit_rm(cbuf, 0x1, reg, 0x04); 1292 emit_rm(cbuf, 0x0, 0x04, ESP_enc); 1293 emit_d8(cbuf, offset); 1294 } 1295 } 1296 1297 uint BoxLockNode::size(PhaseRegAlloc *ra_) const { 1298 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); 1299 if( offset >= 128 ) { 1300 return 7; 1301 } 1302 else { 1303 return 4; 1304 } 1305 } 1306 1307 //============================================================================= 1308 #ifndef PRODUCT 1309 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { 1310 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); 1311 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); 1312 st->print_cr("\tNOP"); 1313 st->print_cr("\tNOP"); 1314 if( !OptoBreakpoint ) 1315 st->print_cr("\tNOP"); 1316 } 1317 #endif 1318 1319 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { 1320 MacroAssembler masm(&cbuf); 1321 #ifdef ASSERT 1322 uint insts_size = cbuf.insts_size(); 1323 #endif 1324 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); 1325 masm.jump_cc(Assembler::notEqual, 1326 RuntimeAddress(SharedRuntime::get_ic_miss_stub())); 1327 /* WARNING these NOPs are critical so that verified entry point is properly 1328 aligned for patching by NativeJump::patch_verified_entry() */ 1329 int nops_cnt = 2; 1330 if( !OptoBreakpoint ) // Leave space for int3 1331 nops_cnt += 1; 1332 masm.nop(nops_cnt); 1333 1334 assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node"); 1335 } 1336 1337 uint MachUEPNode::size(PhaseRegAlloc *ra_) const { 1338 return OptoBreakpoint ? 11 : 12; 1339 } 1340 1341 1342 //============================================================================= 1343 1344 int Matcher::regnum_to_fpu_offset(int regnum) { 1345 return regnum - 32; // The FP registers are in the second chunk 1346 } 1347 1348 // This is UltraSparc specific, true just means we have fast l2f conversion 1349 const bool Matcher::convL2FSupported(void) { 1350 return true; 1351 } 1352 1353 // Is this branch offset short enough that a short branch can be used? 1354 // 1355 // NOTE: If the platform does not provide any short branch variants, then 1356 // this method should return false for offset 0. 1357 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { 1358 // The passed offset is relative to address of the branch. 1359 // On 86 a branch displacement is calculated relative to address 1360 // of a next instruction. 1361 offset -= br_size; 1362 1363 // the short version of jmpConUCF2 contains multiple branches, 1364 // making the reach slightly less 1365 if (rule == jmpConUCF2_rule) 1366 return (-126 <= offset && offset <= 125); 1367 return (-128 <= offset && offset <= 127); 1368 } 1369 1370 const bool Matcher::isSimpleConstant64(jlong value) { 1371 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. 1372 return false; 1373 } 1374 1375 // The ecx parameter to rep stos for the ClearArray node is in dwords. 1376 const bool Matcher::init_array_count_is_in_bytes = false; 1377 1378 // Threshold size for cleararray. 1379 const int Matcher::init_array_short_size = 8 * BytesPerLong; 1380 1381 // Needs 2 CMOV's for longs. 1382 const int Matcher::long_cmove_cost() { return 1; } 1383 1384 // No CMOVF/CMOVD with SSE/SSE2 1385 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } 1386 1387 // Does the CPU require late expand (see block.cpp for description of late expand)? 1388 const bool Matcher::require_postalloc_expand = false; 1389 1390 // Should the Matcher clone shifts on addressing modes, expecting them to 1391 // be subsumed into complex addressing expressions or compute them into 1392 // registers? True for Intel but false for most RISCs 1393 const bool Matcher::clone_shift_expressions = true; 1394 1395 // Do we need to mask the count passed to shift instructions or does 1396 // the cpu only look at the lower 5/6 bits anyway? 1397 const bool Matcher::need_masked_shift_count = false; 1398 1399 bool Matcher::narrow_oop_use_complex_address() { 1400 ShouldNotCallThis(); 1401 return true; 1402 } 1403 1404 bool Matcher::narrow_klass_use_complex_address() { 1405 ShouldNotCallThis(); 1406 return true; 1407 } 1408 1409 1410 // Is it better to copy float constants, or load them directly from memory? 1411 // Intel can load a float constant from a direct address, requiring no 1412 // extra registers. Most RISCs will have to materialize an address into a 1413 // register first, so they would do better to copy the constant from stack. 1414 const bool Matcher::rematerialize_float_constants = true; 1415 1416 // If CPU can load and store mis-aligned doubles directly then no fixup is 1417 // needed. Else we split the double into 2 integer pieces and move it 1418 // piece-by-piece. Only happens when passing doubles into C code as the 1419 // Java calling convention forces doubles to be aligned. 1420 const bool Matcher::misaligned_doubles_ok = true; 1421 1422 1423 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { 1424 // Get the memory operand from the node 1425 uint numopnds = node->num_opnds(); // Virtual call for number of operands 1426 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far 1427 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" ); 1428 uint opcnt = 1; // First operand 1429 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand 1430 while( idx >= skipped+num_edges ) { 1431 skipped += num_edges; 1432 opcnt++; // Bump operand count 1433 assert( opcnt < numopnds, "Accessing non-existent operand" ); 1434 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand 1435 } 1436 1437 MachOper *memory = node->_opnds[opcnt]; 1438 MachOper *new_memory = NULL; 1439 switch (memory->opcode()) { 1440 case DIRECT: 1441 case INDOFFSET32X: 1442 // No transformation necessary. 1443 return; 1444 case INDIRECT: 1445 new_memory = new indirect_win95_safeOper( ); 1446 break; 1447 case INDOFFSET8: 1448 new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0)); 1449 break; 1450 case INDOFFSET32: 1451 new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0)); 1452 break; 1453 case INDINDEXOFFSET: 1454 new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0)); 1455 break; 1456 case INDINDEXSCALE: 1457 new_memory = new indIndexScale_win95_safeOper(memory->scale()); 1458 break; 1459 case INDINDEXSCALEOFFSET: 1460 new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0)); 1461 break; 1462 case LOAD_LONG_INDIRECT: 1463 case LOAD_LONG_INDOFFSET32: 1464 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI} 1465 return; 1466 default: 1467 assert(false, "unexpected memory operand in pd_implicit_null_fixup()"); 1468 return; 1469 } 1470 node->_opnds[opcnt] = new_memory; 1471 } 1472 1473 // Advertise here if the CPU requires explicit rounding operations 1474 // to implement the UseStrictFP mode. 1475 const bool Matcher::strict_fp_requires_explicit_rounding = true; 1476 1477 // Are floats conerted to double when stored to stack during deoptimization? 1478 // On x32 it is stored with convertion only when FPU is used for floats. 1479 bool Matcher::float_in_double() { return (UseSSE == 0); } 1480 1481 // Do ints take an entire long register or just half? 1482 const bool Matcher::int_in_long = false; 1483 1484 // Return whether or not this register is ever used as an argument. This 1485 // function is used on startup to build the trampoline stubs in generateOptoStub. 1486 // Registers not mentioned will be killed by the VM call in the trampoline, and 1487 // arguments in those registers not be available to the callee. 1488 bool Matcher::can_be_java_arg( int reg ) { 1489 if( reg == ECX_num || reg == EDX_num ) return true; 1490 if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; 1491 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; 1492 return false; 1493 } 1494 1495 bool Matcher::is_spillable_arg( int reg ) { 1496 return can_be_java_arg(reg); 1497 } 1498 1499 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { 1500 // Use hardware integer DIV instruction when 1501 // it is faster than a code which use multiply. 1502 // Only when constant divisor fits into 32 bit 1503 // (min_jint is excluded to get only correct 1504 // positive 32 bit values from negative). 1505 return VM_Version::has_fast_idiv() && 1506 (divisor == (int)divisor && divisor != min_jint); 1507 } 1508 1509 // Register for DIVI projection of divmodI 1510 RegMask Matcher::divI_proj_mask() { 1511 return EAX_REG_mask(); 1512 } 1513 1514 // Register for MODI projection of divmodI 1515 RegMask Matcher::modI_proj_mask() { 1516 return EDX_REG_mask(); 1517 } 1518 1519 // Register for DIVL projection of divmodL 1520 RegMask Matcher::divL_proj_mask() { 1521 ShouldNotReachHere(); 1522 return RegMask(); 1523 } 1524 1525 // Register for MODL projection of divmodL 1526 RegMask Matcher::modL_proj_mask() { 1527 ShouldNotReachHere(); 1528 return RegMask(); 1529 } 1530 1531 const RegMask Matcher::method_handle_invoke_SP_save_mask() { 1532 return EBP_REG_mask(); 1533 } 1534 1535 // Returns true if the high 32 bits of the value is known to be zero. 1536 bool is_operand_hi32_zero(Node* n) { 1537 int opc = n->Opcode(); 1538 if (opc == Op_AndL) { 1539 Node* o2 = n->in(2); 1540 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1541 return true; 1542 } 1543 } 1544 if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { 1545 return true; 1546 } 1547 return false; 1548 } 1549 1550 %} 1551 1552 //----------ENCODING BLOCK----------------------------------------------------- 1553 // This block specifies the encoding classes used by the compiler to output 1554 // byte streams. Encoding classes generate functions which are called by 1555 // Machine Instruction Nodes in order to generate the bit encoding of the 1556 // instruction. Operands specify their base encoding interface with the 1557 // interface keyword. There are currently supported four interfaces, 1558 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an 1559 // operand to generate a function which returns its register number when 1560 // queried. CONST_INTER causes an operand to generate a function which 1561 // returns the value of the constant when queried. MEMORY_INTER causes an 1562 // operand to generate four functions which return the Base Register, the 1563 // Index Register, the Scale Value, and the Offset Value of the operand when 1564 // queried. COND_INTER causes an operand to generate six functions which 1565 // return the encoding code (ie - encoding bits for the instruction) 1566 // associated with each basic boolean condition for a conditional instruction. 1567 // Instructions specify two basic values for encoding. They use the 1568 // ins_encode keyword to specify their encoding class (which must be one of 1569 // the class names specified in the encoding block), and they use the 1570 // opcode keyword to specify, in order, their primary, secondary, and 1571 // tertiary opcode. Only the opcode sections which a particular instruction 1572 // needs for encoding need to be specified. 1573 encode %{ 1574 // Build emit functions for each basic byte or larger field in the intel 1575 // encoding scheme (opcode, rm, sib, immediate), and call them from C++ 1576 // code in the enc_class source block. Emit functions will live in the 1577 // main source block for now. In future, we can generalize this by 1578 // adding a syntax that specifies the sizes of fields in an order, 1579 // so that the adlc can build the emit functions automagically 1580 1581 // Emit primary opcode 1582 enc_class OpcP %{ 1583 emit_opcode(cbuf, $primary); 1584 %} 1585 1586 // Emit secondary opcode 1587 enc_class OpcS %{ 1588 emit_opcode(cbuf, $secondary); 1589 %} 1590 1591 // Emit opcode directly 1592 enc_class Opcode(immI d8) %{ 1593 emit_opcode(cbuf, $d8$$constant); 1594 %} 1595 1596 enc_class SizePrefix %{ 1597 emit_opcode(cbuf,0x66); 1598 %} 1599 1600 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1601 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1602 %} 1603 1604 enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) 1605 emit_opcode(cbuf,$opcode$$constant); 1606 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1607 %} 1608 1609 enc_class mov_r32_imm0( rRegI dst ) %{ 1610 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 1611 emit_d32 ( cbuf, 0x0 ); // imm32==0x0 1612 %} 1613 1614 enc_class cdq_enc %{ 1615 // Full implementation of Java idiv and irem; checks for 1616 // special case as described in JVM spec., p.243 & p.271. 1617 // 1618 // normal case special case 1619 // 1620 // input : rax,: dividend min_int 1621 // reg: divisor -1 1622 // 1623 // output: rax,: quotient (= rax, idiv reg) min_int 1624 // rdx: remainder (= rax, irem reg) 0 1625 // 1626 // Code sequnce: 1627 // 1628 // 81 F8 00 00 00 80 cmp rax,80000000h 1629 // 0F 85 0B 00 00 00 jne normal_case 1630 // 33 D2 xor rdx,edx 1631 // 83 F9 FF cmp rcx,0FFh 1632 // 0F 84 03 00 00 00 je done 1633 // normal_case: 1634 // 99 cdq 1635 // F7 F9 idiv rax,ecx 1636 // done: 1637 // 1638 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8); 1639 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); 1640 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h 1641 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85); 1642 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00); 1643 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case 1644 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx 1645 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh 1646 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84); 1647 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00); 1648 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done 1649 // normal_case: 1650 emit_opcode(cbuf,0x99); // cdq 1651 // idiv (note: must be emitted by the user of this rule) 1652 // normal: 1653 %} 1654 1655 // Dense encoding for older common ops 1656 enc_class Opc_plus(immI opcode, rRegI reg) %{ 1657 emit_opcode(cbuf, $opcode$$constant + $reg$$reg); 1658 %} 1659 1660 1661 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension 1662 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit 1663 // Check for 8-bit immediate, and set sign extend bit in opcode 1664 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1665 emit_opcode(cbuf, $primary | 0x02); 1666 } 1667 else { // If 32-bit immediate 1668 emit_opcode(cbuf, $primary); 1669 } 1670 %} 1671 1672 enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m 1673 // Emit primary opcode and set sign-extend bit 1674 // Check for 8-bit immediate, and set sign extend bit in opcode 1675 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1676 emit_opcode(cbuf, $primary | 0x02); } 1677 else { // If 32-bit immediate 1678 emit_opcode(cbuf, $primary); 1679 } 1680 // Emit r/m byte with secondary opcode, after primary opcode. 1681 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1682 %} 1683 1684 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits 1685 // Check for 8-bit immediate, and set sign extend bit in opcode 1686 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { 1687 $$$emit8$imm$$constant; 1688 } 1689 else { // If 32-bit immediate 1690 // Output immediate 1691 $$$emit32$imm$$constant; 1692 } 1693 %} 1694 1695 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ 1696 // Emit primary opcode and set sign-extend bit 1697 // Check for 8-bit immediate, and set sign extend bit in opcode 1698 int con = (int)$imm$$constant; // Throw away top bits 1699 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1700 // Emit r/m byte with secondary opcode, after primary opcode. 1701 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1702 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1703 else emit_d32(cbuf,con); 1704 %} 1705 1706 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ 1707 // Emit primary opcode and set sign-extend bit 1708 // Check for 8-bit immediate, and set sign extend bit in opcode 1709 int con = (int)($imm$$constant >> 32); // Throw away bottom bits 1710 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); 1711 // Emit r/m byte with tertiary opcode, after primary opcode. 1712 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg)); 1713 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con); 1714 else emit_d32(cbuf,con); 1715 %} 1716 1717 enc_class OpcSReg (rRegI dst) %{ // BSWAP 1718 emit_cc(cbuf, $secondary, $dst$$reg ); 1719 %} 1720 1721 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP 1722 int destlo = $dst$$reg; 1723 int desthi = HIGH_FROM_LOW(destlo); 1724 // bswap lo 1725 emit_opcode(cbuf, 0x0F); 1726 emit_cc(cbuf, 0xC8, destlo); 1727 // bswap hi 1728 emit_opcode(cbuf, 0x0F); 1729 emit_cc(cbuf, 0xC8, desthi); 1730 // xchg lo and hi 1731 emit_opcode(cbuf, 0x87); 1732 emit_rm(cbuf, 0x3, destlo, desthi); 1733 %} 1734 1735 enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... 1736 emit_rm(cbuf, 0x3, $secondary, $div$$reg ); 1737 %} 1738 1739 enc_class enc_cmov(cmpOp cop ) %{ // CMOV 1740 $$$emit8$primary; 1741 emit_cc(cbuf, $secondary, $cop$$cmpcode); 1742 %} 1743 1744 enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV 1745 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); 1746 emit_d8(cbuf, op >> 8 ); 1747 emit_d8(cbuf, op & 255); 1748 %} 1749 1750 // emulate a CMOV with a conditional branch around a MOV 1751 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV 1752 // Invert sense of branch from sense of CMOV 1753 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) ); 1754 emit_d8( cbuf, $brOffs$$constant ); 1755 %} 1756 1757 enc_class enc_PartialSubtypeCheck( ) %{ 1758 Register Redi = as_Register(EDI_enc); // result register 1759 Register Reax = as_Register(EAX_enc); // super class 1760 Register Recx = as_Register(ECX_enc); // killed 1761 Register Resi = as_Register(ESI_enc); // sub class 1762 Label miss; 1763 1764 MacroAssembler _masm(&cbuf); 1765 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, 1766 NULL, &miss, 1767 /*set_cond_codes:*/ true); 1768 if ($primary) { 1769 __ xorptr(Redi, Redi); 1770 } 1771 __ bind(miss); 1772 %} 1773 1774 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All 1775 MacroAssembler masm(&cbuf); 1776 int start = masm.offset(); 1777 if (UseSSE >= 2) { 1778 if (VerifyFPU) { 1779 masm.verify_FPU(0, "must be empty in SSE2+ mode"); 1780 } 1781 } else { 1782 // External c_calling_convention expects the FPU stack to be 'clean'. 1783 // Compiled code leaves it dirty. Do cleanup now. 1784 masm.empty_FPU_stack(); 1785 } 1786 if (sizeof_FFree_Float_Stack_All == -1) { 1787 sizeof_FFree_Float_Stack_All = masm.offset() - start; 1788 } else { 1789 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); 1790 } 1791 %} 1792 1793 enc_class Verify_FPU_For_Leaf %{ 1794 if( VerifyFPU ) { 1795 MacroAssembler masm(&cbuf); 1796 masm.verify_FPU( -3, "Returning from Runtime Leaf call"); 1797 } 1798 %} 1799 1800 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf 1801 // This is the instruction starting address for relocation info. 1802 cbuf.set_insts_mark(); 1803 $$$emit8$primary; 1804 // CALL directly to the runtime 1805 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1806 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1807 1808 if (UseSSE >= 2) { 1809 MacroAssembler _masm(&cbuf); 1810 BasicType rt = tf()->return_type(); 1811 1812 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { 1813 // A C runtime call where the return value is unused. In SSE2+ 1814 // mode the result needs to be removed from the FPU stack. It's 1815 // likely that this function call could be removed by the 1816 // optimizer if the C function is a pure function. 1817 __ ffree(0); 1818 } else if (rt == T_FLOAT) { 1819 __ lea(rsp, Address(rsp, -4)); 1820 __ fstp_s(Address(rsp, 0)); 1821 __ movflt(xmm0, Address(rsp, 0)); 1822 __ lea(rsp, Address(rsp, 4)); 1823 } else if (rt == T_DOUBLE) { 1824 __ lea(rsp, Address(rsp, -8)); 1825 __ fstp_d(Address(rsp, 0)); 1826 __ movdbl(xmm0, Address(rsp, 0)); 1827 __ lea(rsp, Address(rsp, 8)); 1828 } 1829 } 1830 %} 1831 1832 1833 enc_class pre_call_resets %{ 1834 // If method sets FPU control word restore it here 1835 debug_only(int off0 = cbuf.insts_size()); 1836 if (ra_->C->in_24_bit_fp_mode()) { 1837 MacroAssembler _masm(&cbuf); 1838 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 1839 } 1840 if (ra_->C->max_vector_size() > 16) { 1841 // Clear upper bits of YMM registers when current compiled code uses 1842 // wide vectors to avoid AVX <-> SSE transition penalty during call. 1843 MacroAssembler _masm(&cbuf); 1844 __ vzeroupper(); 1845 } 1846 debug_only(int off1 = cbuf.insts_size()); 1847 assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); 1848 %} 1849 1850 enc_class post_call_FPU %{ 1851 // If method sets FPU control word do it here also 1852 if (Compile::current()->in_24_bit_fp_mode()) { 1853 MacroAssembler masm(&cbuf); 1854 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 1855 } 1856 %} 1857 1858 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL 1859 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine 1860 // who we intended to call. 1861 cbuf.set_insts_mark(); 1862 $$$emit8$primary; 1863 if (!_method) { 1864 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1865 runtime_call_Relocation::spec(), RELOC_IMM32 ); 1866 } else if (_optimized_virtual) { 1867 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1868 opt_virtual_call_Relocation::spec(), RELOC_IMM32 ); 1869 } else { 1870 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4), 1871 static_call_Relocation::spec(), RELOC_IMM32 ); 1872 } 1873 if (_method) { // Emit stub for static call. 1874 CompiledStaticCall::emit_to_interp_stub(cbuf); 1875 } 1876 %} 1877 1878 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL 1879 MacroAssembler _masm(&cbuf); 1880 __ ic_call((address)$meth$$method); 1881 %} 1882 1883 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL 1884 int disp = in_bytes(Method::from_compiled_offset()); 1885 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); 1886 1887 // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] 1888 cbuf.set_insts_mark(); 1889 $$$emit8$primary; 1890 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte 1891 emit_d8(cbuf, disp); // Displacement 1892 1893 %} 1894 1895 // Following encoding is no longer used, but may be restored if calling 1896 // convention changes significantly. 1897 // Became: Xor_Reg(EBP), Java_To_Runtime( labl ) 1898 // 1899 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL 1900 // // int ic_reg = Matcher::inline_cache_reg(); 1901 // // int ic_encode = Matcher::_regEncode[ic_reg]; 1902 // // int imo_reg = Matcher::interpreter_method_oop_reg(); 1903 // // int imo_encode = Matcher::_regEncode[imo_reg]; 1904 // 1905 // // // Interpreter expects method_oop in EBX, currently a callee-saved register, 1906 // // // so we load it immediately before the call 1907 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop 1908 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte 1909 // 1910 // // xor rbp,ebp 1911 // emit_opcode(cbuf, 0x33); 1912 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc); 1913 // 1914 // // CALL to interpreter. 1915 // cbuf.set_insts_mark(); 1916 // $$$emit8$primary; 1917 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4), 1918 // runtime_call_Relocation::spec(), RELOC_IMM32 ); 1919 // %} 1920 1921 enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR 1922 $$$emit8$primary; 1923 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 1924 $$$emit8$shift$$constant; 1925 %} 1926 1927 enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate 1928 // Load immediate does not have a zero or sign extended version 1929 // for 8-bit immediates 1930 emit_opcode(cbuf, 0xB8 + $dst$$reg); 1931 $$$emit32$src$$constant; 1932 %} 1933 1934 enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate 1935 // Load immediate does not have a zero or sign extended version 1936 // for 8-bit immediates 1937 emit_opcode(cbuf, $primary + $dst$$reg); 1938 $$$emit32$src$$constant; 1939 %} 1940 1941 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate 1942 // Load immediate does not have a zero or sign extended version 1943 // for 8-bit immediates 1944 int dst_enc = $dst$$reg; 1945 int src_con = $src$$constant & 0x0FFFFFFFFL; 1946 if (src_con == 0) { 1947 // xor dst, dst 1948 emit_opcode(cbuf, 0x33); 1949 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1950 } else { 1951 emit_opcode(cbuf, $primary + dst_enc); 1952 emit_d32(cbuf, src_con); 1953 } 1954 %} 1955 1956 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate 1957 // Load immediate does not have a zero or sign extended version 1958 // for 8-bit immediates 1959 int dst_enc = $dst$$reg + 2; 1960 int src_con = ((julong)($src$$constant)) >> 32; 1961 if (src_con == 0) { 1962 // xor dst, dst 1963 emit_opcode(cbuf, 0x33); 1964 emit_rm(cbuf, 0x3, dst_enc, dst_enc); 1965 } else { 1966 emit_opcode(cbuf, $primary + dst_enc); 1967 emit_d32(cbuf, src_con); 1968 } 1969 %} 1970 1971 1972 // Encode a reg-reg copy. If it is useless, then empty encoding. 1973 enc_class enc_Copy( rRegI dst, rRegI src ) %{ 1974 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1975 %} 1976 1977 enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ 1978 encode_Copy( cbuf, $dst$$reg, $src$$reg ); 1979 %} 1980 1981 enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) 1982 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1983 %} 1984 1985 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) 1986 $$$emit8$primary; 1987 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1988 %} 1989 1990 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) 1991 $$$emit8$secondary; 1992 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 1993 %} 1994 1995 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) 1996 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 1997 %} 1998 1999 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) 2000 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg)); 2001 %} 2002 2003 enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ 2004 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg)); 2005 %} 2006 2007 enc_class Con32 (immI src) %{ // Con32(storeImmI) 2008 // Output immediate 2009 $$$emit32$src$$constant; 2010 %} 2011 2012 enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm 2013 // Output Float immediate bits 2014 jfloat jf = $src$$constant; 2015 int jf_as_bits = jint_cast( jf ); 2016 emit_d32(cbuf, jf_as_bits); 2017 %} 2018 2019 enc_class Con32F_as_bits(immF src) %{ // storeX_imm 2020 // Output Float immediate bits 2021 jfloat jf = $src$$constant; 2022 int jf_as_bits = jint_cast( jf ); 2023 emit_d32(cbuf, jf_as_bits); 2024 %} 2025 2026 enc_class Con16 (immI src) %{ // Con16(storeImmI) 2027 // Output immediate 2028 $$$emit16$src$$constant; 2029 %} 2030 2031 enc_class Con_d32(immI src) %{ 2032 emit_d32(cbuf,$src$$constant); 2033 %} 2034 2035 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) 2036 // Output immediate memory reference 2037 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 ); 2038 emit_d32(cbuf, 0x00); 2039 %} 2040 2041 enc_class lock_prefix( ) %{ 2042 if( os::is_MP() ) 2043 emit_opcode(cbuf,0xF0); // [Lock] 2044 %} 2045 2046 // Cmp-xchg long value. 2047 // Note: we need to swap rbx, and rcx before and after the 2048 // cmpxchg8 instruction because the instruction uses 2049 // rcx as the high order word of the new value to store but 2050 // our register encoding uses rbx,. 2051 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ 2052 2053 // XCHG rbx,ecx 2054 emit_opcode(cbuf,0x87); 2055 emit_opcode(cbuf,0xD9); 2056 // [Lock] 2057 if( os::is_MP() ) 2058 emit_opcode(cbuf,0xF0); 2059 // CMPXCHG8 [Eptr] 2060 emit_opcode(cbuf,0x0F); 2061 emit_opcode(cbuf,0xC7); 2062 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2063 // XCHG rbx,ecx 2064 emit_opcode(cbuf,0x87); 2065 emit_opcode(cbuf,0xD9); 2066 %} 2067 2068 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ 2069 // [Lock] 2070 if( os::is_MP() ) 2071 emit_opcode(cbuf,0xF0); 2072 2073 // CMPXCHG [Eptr] 2074 emit_opcode(cbuf,0x0F); 2075 emit_opcode(cbuf,0xB1); 2076 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg ); 2077 %} 2078 2079 enc_class enc_flags_ne_to_boolean( iRegI res ) %{ 2080 int res_encoding = $res$$reg; 2081 2082 // MOV res,0 2083 emit_opcode( cbuf, 0xB8 + res_encoding); 2084 emit_d32( cbuf, 0 ); 2085 // JNE,s fail 2086 emit_opcode(cbuf,0x75); 2087 emit_d8(cbuf, 5 ); 2088 // MOV res,1 2089 emit_opcode( cbuf, 0xB8 + res_encoding); 2090 emit_d32( cbuf, 1 ); 2091 // fail: 2092 %} 2093 2094 enc_class set_instruction_start( ) %{ 2095 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2096 %} 2097 2098 enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem 2099 int reg_encoding = $ereg$$reg; 2100 int base = $mem$$base; 2101 int index = $mem$$index; 2102 int scale = $mem$$scale; 2103 int displace = $mem$$disp; 2104 relocInfo::relocType disp_reloc = $mem->disp_reloc(); 2105 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2106 %} 2107 2108 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem 2109 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo 2110 int base = $mem$$base; 2111 int index = $mem$$index; 2112 int scale = $mem$$scale; 2113 int displace = $mem$$disp + 4; // Offset is 4 further in memory 2114 assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); 2115 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none); 2116 %} 2117 2118 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ 2119 int r1, r2; 2120 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2121 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2122 emit_opcode(cbuf,0x0F); 2123 emit_opcode(cbuf,$tertiary); 2124 emit_rm(cbuf, 0x3, r1, r2); 2125 emit_d8(cbuf,$cnt$$constant); 2126 emit_d8(cbuf,$primary); 2127 emit_rm(cbuf, 0x3, $secondary, r1); 2128 emit_d8(cbuf,$cnt$$constant); 2129 %} 2130 2131 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ 2132 emit_opcode( cbuf, 0x8B ); // Move 2133 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2134 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2135 emit_d8(cbuf,$primary); 2136 emit_rm(cbuf, 0x3, $secondary, $dst$$reg); 2137 emit_d8(cbuf,$cnt$$constant-32); 2138 } 2139 emit_d8(cbuf,$primary); 2140 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg)); 2141 emit_d8(cbuf,31); 2142 %} 2143 2144 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ 2145 int r1, r2; 2146 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); } 2147 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); } 2148 2149 emit_opcode( cbuf, 0x8B ); // Move r1,r2 2150 emit_rm(cbuf, 0x3, r1, r2); 2151 if( $cnt$$constant > 32 ) { // Shift, if not by zero 2152 emit_opcode(cbuf,$primary); 2153 emit_rm(cbuf, 0x3, $secondary, r1); 2154 emit_d8(cbuf,$cnt$$constant-32); 2155 } 2156 emit_opcode(cbuf,0x33); // XOR r2,r2 2157 emit_rm(cbuf, 0x3, r2, r2); 2158 %} 2159 2160 // Clone of RegMem but accepts an extra parameter to access each 2161 // half of a double in memory; it never needs relocation info. 2162 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ 2163 emit_opcode(cbuf,$opcode$$constant); 2164 int reg_encoding = $rm_reg$$reg; 2165 int base = $mem$$base; 2166 int index = $mem$$index; 2167 int scale = $mem$$scale; 2168 int displace = $mem$$disp + $disp_for_half$$constant; 2169 relocInfo::relocType disp_reloc = relocInfo::none; 2170 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2171 %} 2172 2173 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! 2174 // 2175 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant 2176 // and it never needs relocation information. 2177 // Frequently used to move data between FPU's Stack Top and memory. 2178 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ 2179 int rm_byte_opcode = $rm_opcode$$constant; 2180 int base = $mem$$base; 2181 int index = $mem$$index; 2182 int scale = $mem$$scale; 2183 int displace = $mem$$disp; 2184 assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); 2185 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none); 2186 %} 2187 2188 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ 2189 int rm_byte_opcode = $rm_opcode$$constant; 2190 int base = $mem$$base; 2191 int index = $mem$$index; 2192 int scale = $mem$$scale; 2193 int displace = $mem$$disp; 2194 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2195 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 2196 %} 2197 2198 enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea 2199 int reg_encoding = $dst$$reg; 2200 int base = $src0$$reg; // 0xFFFFFFFF indicates no base 2201 int index = 0x04; // 0x04 indicates no index 2202 int scale = 0x00; // 0x00 indicates no scale 2203 int displace = $src1$$constant; // 0x00 indicates no displacement 2204 relocInfo::relocType disp_reloc = relocInfo::none; 2205 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2206 %} 2207 2208 enc_class min_enc (rRegI dst, rRegI src) %{ // MIN 2209 // Compare dst,src 2210 emit_opcode(cbuf,0x3B); 2211 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2212 // jmp dst < src around move 2213 emit_opcode(cbuf,0x7C); 2214 emit_d8(cbuf,2); 2215 // move dst,src 2216 emit_opcode(cbuf,0x8B); 2217 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2218 %} 2219 2220 enc_class max_enc (rRegI dst, rRegI src) %{ // MAX 2221 // Compare dst,src 2222 emit_opcode(cbuf,0x3B); 2223 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2224 // jmp dst > src around move 2225 emit_opcode(cbuf,0x7F); 2226 emit_d8(cbuf,2); 2227 // move dst,src 2228 emit_opcode(cbuf,0x8B); 2229 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg); 2230 %} 2231 2232 enc_class enc_FPR_store(memory mem, regDPR src) %{ 2233 // If src is FPR1, we can just FST to store it. 2234 // Else we need to FLD it to FPR1, then FSTP to store/pop it. 2235 int reg_encoding = 0x2; // Just store 2236 int base = $mem$$base; 2237 int index = $mem$$index; 2238 int scale = $mem$$scale; 2239 int displace = $mem$$disp; 2240 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 2241 if( $src$$reg != FPR1L_enc ) { 2242 reg_encoding = 0x3; // Store & pop 2243 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it) 2244 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2245 } 2246 cbuf.set_insts_mark(); // Mark start of opcode for reloc info in mem operand 2247 emit_opcode(cbuf,$primary); 2248 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc); 2249 %} 2250 2251 enc_class neg_reg(rRegI dst) %{ 2252 // NEG $dst 2253 emit_opcode(cbuf,0xF7); 2254 emit_rm(cbuf, 0x3, 0x03, $dst$$reg ); 2255 %} 2256 2257 enc_class setLT_reg(eCXRegI dst) %{ 2258 // SETLT $dst 2259 emit_opcode(cbuf,0x0F); 2260 emit_opcode(cbuf,0x9C); 2261 emit_rm( cbuf, 0x3, 0x4, $dst$$reg ); 2262 %} 2263 2264 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT 2265 int tmpReg = $tmp$$reg; 2266 2267 // SUB $p,$q 2268 emit_opcode(cbuf,0x2B); 2269 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg); 2270 // SBB $tmp,$tmp 2271 emit_opcode(cbuf,0x1B); 2272 emit_rm(cbuf, 0x3, tmpReg, tmpReg); 2273 // AND $tmp,$y 2274 emit_opcode(cbuf,0x23); 2275 emit_rm(cbuf, 0x3, tmpReg, $y$$reg); 2276 // ADD $p,$tmp 2277 emit_opcode(cbuf,0x03); 2278 emit_rm(cbuf, 0x3, $p$$reg, tmpReg); 2279 %} 2280 2281 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ 2282 // TEST shift,32 2283 emit_opcode(cbuf,0xF7); 2284 emit_rm(cbuf, 0x3, 0, ECX_enc); 2285 emit_d32(cbuf,0x20); 2286 // JEQ,s small 2287 emit_opcode(cbuf, 0x74); 2288 emit_d8(cbuf, 0x04); 2289 // MOV $dst.hi,$dst.lo 2290 emit_opcode( cbuf, 0x8B ); 2291 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2292 // CLR $dst.lo 2293 emit_opcode(cbuf, 0x33); 2294 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg); 2295 // small: 2296 // SHLD $dst.hi,$dst.lo,$shift 2297 emit_opcode(cbuf,0x0F); 2298 emit_opcode(cbuf,0xA5); 2299 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg)); 2300 // SHL $dst.lo,$shift" 2301 emit_opcode(cbuf,0xD3); 2302 emit_rm(cbuf, 0x3, 0x4, $dst$$reg ); 2303 %} 2304 2305 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ 2306 // TEST shift,32 2307 emit_opcode(cbuf,0xF7); 2308 emit_rm(cbuf, 0x3, 0, ECX_enc); 2309 emit_d32(cbuf,0x20); 2310 // JEQ,s small 2311 emit_opcode(cbuf, 0x74); 2312 emit_d8(cbuf, 0x04); 2313 // MOV $dst.lo,$dst.hi 2314 emit_opcode( cbuf, 0x8B ); 2315 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2316 // CLR $dst.hi 2317 emit_opcode(cbuf, 0x33); 2318 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg)); 2319 // small: 2320 // SHRD $dst.lo,$dst.hi,$shift 2321 emit_opcode(cbuf,0x0F); 2322 emit_opcode(cbuf,0xAD); 2323 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2324 // SHR $dst.hi,$shift" 2325 emit_opcode(cbuf,0xD3); 2326 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) ); 2327 %} 2328 2329 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ 2330 // TEST shift,32 2331 emit_opcode(cbuf,0xF7); 2332 emit_rm(cbuf, 0x3, 0, ECX_enc); 2333 emit_d32(cbuf,0x20); 2334 // JEQ,s small 2335 emit_opcode(cbuf, 0x74); 2336 emit_d8(cbuf, 0x05); 2337 // MOV $dst.lo,$dst.hi 2338 emit_opcode( cbuf, 0x8B ); 2339 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) ); 2340 // SAR $dst.hi,31 2341 emit_opcode(cbuf, 0xC1); 2342 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) ); 2343 emit_d8(cbuf, 0x1F ); 2344 // small: 2345 // SHRD $dst.lo,$dst.hi,$shift 2346 emit_opcode(cbuf,0x0F); 2347 emit_opcode(cbuf,0xAD); 2348 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg); 2349 // SAR $dst.hi,$shift" 2350 emit_opcode(cbuf,0xD3); 2351 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) ); 2352 %} 2353 2354 2355 // ----------------- Encodings for floating point unit ----------------- 2356 // May leave result in FPU-TOS or FPU reg depending on opcodes 2357 enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV 2358 $$$emit8$primary; 2359 emit_rm(cbuf, 0x3, $secondary, $src$$reg ); 2360 %} 2361 2362 // Pop argument in FPR0 with FSTP ST(0) 2363 enc_class PopFPU() %{ 2364 emit_opcode( cbuf, 0xDD ); 2365 emit_d8( cbuf, 0xD8 ); 2366 %} 2367 2368 // !!!!! equivalent to Pop_Reg_F 2369 enc_class Pop_Reg_DPR( regDPR dst ) %{ 2370 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2371 emit_d8( cbuf, 0xD8+$dst$$reg ); 2372 %} 2373 2374 enc_class Push_Reg_DPR( regDPR dst ) %{ 2375 emit_opcode( cbuf, 0xD9 ); 2376 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1) 2377 %} 2378 2379 enc_class strictfp_bias1( regDPR dst ) %{ 2380 emit_opcode( cbuf, 0xDB ); // FLD m80real 2381 emit_opcode( cbuf, 0x2D ); 2382 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() ); 2383 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2384 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2385 %} 2386 2387 enc_class strictfp_bias2( regDPR dst ) %{ 2388 emit_opcode( cbuf, 0xDB ); // FLD m80real 2389 emit_opcode( cbuf, 0x2D ); 2390 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() ); 2391 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0 2392 emit_opcode( cbuf, 0xC8+$dst$$reg ); 2393 %} 2394 2395 // Special case for moving an integer register to a stack slot. 2396 enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2397 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp ); 2398 %} 2399 2400 // Special case for moving a register to a stack slot. 2401 enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS 2402 // Opcode already emitted 2403 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte 2404 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte 2405 emit_d32(cbuf, $dst$$disp); // Displacement 2406 %} 2407 2408 // Push the integer in stackSlot 'src' onto FP-stack 2409 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] 2410 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp ); 2411 %} 2412 2413 // Push FPU's TOS float to a stack-slot, and pop FPU-stack 2414 enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] 2415 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp ); 2416 %} 2417 2418 // Same as Pop_Mem_F except for opcode 2419 // Push FPU's TOS double to a stack-slot, and pop FPU-stack 2420 enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] 2421 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp ); 2422 %} 2423 2424 enc_class Pop_Reg_FPR( regFPR dst ) %{ 2425 emit_opcode( cbuf, 0xDD ); // FSTP ST(i) 2426 emit_d8( cbuf, 0xD8+$dst$$reg ); 2427 %} 2428 2429 enc_class Push_Reg_FPR( regFPR dst ) %{ 2430 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2431 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2432 %} 2433 2434 // Push FPU's float to a stack-slot, and pop FPU-stack 2435 enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ 2436 int pop = 0x02; 2437 if ($src$$reg != FPR1L_enc) { 2438 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2439 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2440 pop = 0x03; 2441 } 2442 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst] 2443 %} 2444 2445 // Push FPU's double to a stack-slot, and pop FPU-stack 2446 enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ 2447 int pop = 0x02; 2448 if ($src$$reg != FPR1L_enc) { 2449 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1) 2450 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2451 pop = 0x03; 2452 } 2453 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst] 2454 %} 2455 2456 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack 2457 enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ 2458 int pop = 0xD0 - 1; // -1 since we skip FLD 2459 if ($src$$reg != FPR1L_enc) { 2460 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1) 2461 emit_d8( cbuf, 0xC0-1+$src$$reg ); 2462 pop = 0xD8; 2463 } 2464 emit_opcode( cbuf, 0xDD ); 2465 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i) 2466 %} 2467 2468 2469 enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ 2470 // load dst in FPR0 2471 emit_opcode( cbuf, 0xD9 ); 2472 emit_d8( cbuf, 0xC0-1+$dst$$reg ); 2473 if ($src$$reg != FPR1L_enc) { 2474 // fincstp 2475 emit_opcode (cbuf, 0xD9); 2476 emit_opcode (cbuf, 0xF7); 2477 // swap src with FPR1: 2478 // FXCH FPR1 with src 2479 emit_opcode(cbuf, 0xD9); 2480 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2481 // fdecstp 2482 emit_opcode (cbuf, 0xD9); 2483 emit_opcode (cbuf, 0xF6); 2484 } 2485 %} 2486 2487 enc_class Push_ModD_encoding(regD src0, regD src1) %{ 2488 MacroAssembler _masm(&cbuf); 2489 __ subptr(rsp, 8); 2490 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 2491 __ fld_d(Address(rsp, 0)); 2492 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 2493 __ fld_d(Address(rsp, 0)); 2494 %} 2495 2496 enc_class Push_ModF_encoding(regF src0, regF src1) %{ 2497 MacroAssembler _masm(&cbuf); 2498 __ subptr(rsp, 4); 2499 __ movflt(Address(rsp, 0), $src1$$XMMRegister); 2500 __ fld_s(Address(rsp, 0)); 2501 __ movflt(Address(rsp, 0), $src0$$XMMRegister); 2502 __ fld_s(Address(rsp, 0)); 2503 %} 2504 2505 enc_class Push_ResultD(regD dst) %{ 2506 MacroAssembler _masm(&cbuf); 2507 __ fstp_d(Address(rsp, 0)); 2508 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 2509 __ addptr(rsp, 8); 2510 %} 2511 2512 enc_class Push_ResultF(regF dst, immI d8) %{ 2513 MacroAssembler _masm(&cbuf); 2514 __ fstp_s(Address(rsp, 0)); 2515 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 2516 __ addptr(rsp, $d8$$constant); 2517 %} 2518 2519 enc_class Push_SrcD(regD src) %{ 2520 MacroAssembler _masm(&cbuf); 2521 __ subptr(rsp, 8); 2522 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2523 __ fld_d(Address(rsp, 0)); 2524 %} 2525 2526 enc_class push_stack_temp_qword() %{ 2527 MacroAssembler _masm(&cbuf); 2528 __ subptr(rsp, 8); 2529 %} 2530 2531 enc_class pop_stack_temp_qword() %{ 2532 MacroAssembler _masm(&cbuf); 2533 __ addptr(rsp, 8); 2534 %} 2535 2536 enc_class push_xmm_to_fpr1(regD src) %{ 2537 MacroAssembler _masm(&cbuf); 2538 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 2539 __ fld_d(Address(rsp, 0)); 2540 %} 2541 2542 enc_class Push_Result_Mod_DPR( regDPR src) %{ 2543 if ($src$$reg != FPR1L_enc) { 2544 // fincstp 2545 emit_opcode (cbuf, 0xD9); 2546 emit_opcode (cbuf, 0xF7); 2547 // FXCH FPR1 with src 2548 emit_opcode(cbuf, 0xD9); 2549 emit_d8(cbuf, 0xC8-1+$src$$reg ); 2550 // fdecstp 2551 emit_opcode (cbuf, 0xD9); 2552 emit_opcode (cbuf, 0xF6); 2553 } 2554 // // following asm replaced with Pop_Reg_F or Pop_Mem_F 2555 // // FSTP FPR$dst$$reg 2556 // emit_opcode( cbuf, 0xDD ); 2557 // emit_d8( cbuf, 0xD8+$dst$$reg ); 2558 %} 2559 2560 enc_class fnstsw_sahf_skip_parity() %{ 2561 // fnstsw ax 2562 emit_opcode( cbuf, 0xDF ); 2563 emit_opcode( cbuf, 0xE0 ); 2564 // sahf 2565 emit_opcode( cbuf, 0x9E ); 2566 // jnp ::skip 2567 emit_opcode( cbuf, 0x7B ); 2568 emit_opcode( cbuf, 0x05 ); 2569 %} 2570 2571 enc_class emitModDPR() %{ 2572 // fprem must be iterative 2573 // :: loop 2574 // fprem 2575 emit_opcode( cbuf, 0xD9 ); 2576 emit_opcode( cbuf, 0xF8 ); 2577 // wait 2578 emit_opcode( cbuf, 0x9b ); 2579 // fnstsw ax 2580 emit_opcode( cbuf, 0xDF ); 2581 emit_opcode( cbuf, 0xE0 ); 2582 // sahf 2583 emit_opcode( cbuf, 0x9E ); 2584 // jp ::loop 2585 emit_opcode( cbuf, 0x0F ); 2586 emit_opcode( cbuf, 0x8A ); 2587 emit_opcode( cbuf, 0xF4 ); 2588 emit_opcode( cbuf, 0xFF ); 2589 emit_opcode( cbuf, 0xFF ); 2590 emit_opcode( cbuf, 0xFF ); 2591 %} 2592 2593 enc_class fpu_flags() %{ 2594 // fnstsw_ax 2595 emit_opcode( cbuf, 0xDF); 2596 emit_opcode( cbuf, 0xE0); 2597 // test ax,0x0400 2598 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate 2599 emit_opcode( cbuf, 0xA9 ); 2600 emit_d16 ( cbuf, 0x0400 ); 2601 // // // This sequence works, but stalls for 12-16 cycles on PPro 2602 // // test rax,0x0400 2603 // emit_opcode( cbuf, 0xA9 ); 2604 // emit_d32 ( cbuf, 0x00000400 ); 2605 // 2606 // jz exit (no unordered comparison) 2607 emit_opcode( cbuf, 0x74 ); 2608 emit_d8 ( cbuf, 0x02 ); 2609 // mov ah,1 - treat as LT case (set carry flag) 2610 emit_opcode( cbuf, 0xB4 ); 2611 emit_d8 ( cbuf, 0x01 ); 2612 // sahf 2613 emit_opcode( cbuf, 0x9E); 2614 %} 2615 2616 enc_class cmpF_P6_fixup() %{ 2617 // Fixup the integer flags in case comparison involved a NaN 2618 // 2619 // JNP exit (no unordered comparison, P-flag is set by NaN) 2620 emit_opcode( cbuf, 0x7B ); 2621 emit_d8 ( cbuf, 0x03 ); 2622 // MOV AH,1 - treat as LT case (set carry flag) 2623 emit_opcode( cbuf, 0xB4 ); 2624 emit_d8 ( cbuf, 0x01 ); 2625 // SAHF 2626 emit_opcode( cbuf, 0x9E); 2627 // NOP // target for branch to avoid branch to branch 2628 emit_opcode( cbuf, 0x90); 2629 %} 2630 2631 // fnstsw_ax(); 2632 // sahf(); 2633 // movl(dst, nan_result); 2634 // jcc(Assembler::parity, exit); 2635 // movl(dst, less_result); 2636 // jcc(Assembler::below, exit); 2637 // movl(dst, equal_result); 2638 // jcc(Assembler::equal, exit); 2639 // movl(dst, greater_result); 2640 2641 // less_result = 1; 2642 // greater_result = -1; 2643 // equal_result = 0; 2644 // nan_result = -1; 2645 2646 enc_class CmpF_Result(rRegI dst) %{ 2647 // fnstsw_ax(); 2648 emit_opcode( cbuf, 0xDF); 2649 emit_opcode( cbuf, 0xE0); 2650 // sahf 2651 emit_opcode( cbuf, 0x9E); 2652 // movl(dst, nan_result); 2653 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2654 emit_d32( cbuf, -1 ); 2655 // jcc(Assembler::parity, exit); 2656 emit_opcode( cbuf, 0x7A ); 2657 emit_d8 ( cbuf, 0x13 ); 2658 // movl(dst, less_result); 2659 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2660 emit_d32( cbuf, -1 ); 2661 // jcc(Assembler::below, exit); 2662 emit_opcode( cbuf, 0x72 ); 2663 emit_d8 ( cbuf, 0x0C ); 2664 // movl(dst, equal_result); 2665 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2666 emit_d32( cbuf, 0 ); 2667 // jcc(Assembler::equal, exit); 2668 emit_opcode( cbuf, 0x74 ); 2669 emit_d8 ( cbuf, 0x05 ); 2670 // movl(dst, greater_result); 2671 emit_opcode( cbuf, 0xB8 + $dst$$reg); 2672 emit_d32( cbuf, 1 ); 2673 %} 2674 2675 2676 // Compare the longs and set flags 2677 // BROKEN! Do Not use as-is 2678 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ 2679 // CMP $src1.hi,$src2.hi 2680 emit_opcode( cbuf, 0x3B ); 2681 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2682 // JNE,s done 2683 emit_opcode(cbuf,0x75); 2684 emit_d8(cbuf, 2 ); 2685 // CMP $src1.lo,$src2.lo 2686 emit_opcode( cbuf, 0x3B ); 2687 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2688 // done: 2689 %} 2690 2691 enc_class convert_int_long( regL dst, rRegI src ) %{ 2692 // mov $dst.lo,$src 2693 int dst_encoding = $dst$$reg; 2694 int src_encoding = $src$$reg; 2695 encode_Copy( cbuf, dst_encoding , src_encoding ); 2696 // mov $dst.hi,$src 2697 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding ); 2698 // sar $dst.hi,31 2699 emit_opcode( cbuf, 0xC1 ); 2700 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) ); 2701 emit_d8(cbuf, 0x1F ); 2702 %} 2703 2704 enc_class convert_long_double( eRegL src ) %{ 2705 // push $src.hi 2706 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2707 // push $src.lo 2708 emit_opcode(cbuf, 0x50+$src$$reg ); 2709 // fild 64-bits at [SP] 2710 emit_opcode(cbuf,0xdf); 2711 emit_d8(cbuf, 0x6C); 2712 emit_d8(cbuf, 0x24); 2713 emit_d8(cbuf, 0x00); 2714 // pop stack 2715 emit_opcode(cbuf, 0x83); // add SP, #8 2716 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2717 emit_d8(cbuf, 0x8); 2718 %} 2719 2720 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ 2721 // IMUL EDX:EAX,$src1 2722 emit_opcode( cbuf, 0xF7 ); 2723 emit_rm( cbuf, 0x3, 0x5, $src1$$reg ); 2724 // SAR EDX,$cnt-32 2725 int shift_count = ((int)$cnt$$constant) - 32; 2726 if (shift_count > 0) { 2727 emit_opcode(cbuf, 0xC1); 2728 emit_rm(cbuf, 0x3, 7, $dst$$reg ); 2729 emit_d8(cbuf, shift_count); 2730 } 2731 %} 2732 2733 // this version doesn't have add sp, 8 2734 enc_class convert_long_double2( eRegL src ) %{ 2735 // push $src.hi 2736 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg)); 2737 // push $src.lo 2738 emit_opcode(cbuf, 0x50+$src$$reg ); 2739 // fild 64-bits at [SP] 2740 emit_opcode(cbuf,0xdf); 2741 emit_d8(cbuf, 0x6C); 2742 emit_d8(cbuf, 0x24); 2743 emit_d8(cbuf, 0x00); 2744 %} 2745 2746 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ 2747 // Basic idea: long = (long)int * (long)int 2748 // IMUL EDX:EAX, src 2749 emit_opcode( cbuf, 0xF7 ); 2750 emit_rm( cbuf, 0x3, 0x5, $src$$reg); 2751 %} 2752 2753 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ 2754 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 2755 // MUL EDX:EAX, src 2756 emit_opcode( cbuf, 0xF7 ); 2757 emit_rm( cbuf, 0x3, 0x4, $src$$reg); 2758 %} 2759 2760 enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ 2761 // Basic idea: lo(result) = lo(x_lo * y_lo) 2762 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 2763 // MOV $tmp,$src.lo 2764 encode_Copy( cbuf, $tmp$$reg, $src$$reg ); 2765 // IMUL $tmp,EDX 2766 emit_opcode( cbuf, 0x0F ); 2767 emit_opcode( cbuf, 0xAF ); 2768 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2769 // MOV EDX,$src.hi 2770 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) ); 2771 // IMUL EDX,EAX 2772 emit_opcode( cbuf, 0x0F ); 2773 emit_opcode( cbuf, 0xAF ); 2774 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg ); 2775 // ADD $tmp,EDX 2776 emit_opcode( cbuf, 0x03 ); 2777 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2778 // MUL EDX:EAX,$src.lo 2779 emit_opcode( cbuf, 0xF7 ); 2780 emit_rm( cbuf, 0x3, 0x4, $src$$reg ); 2781 // ADD EDX,ESI 2782 emit_opcode( cbuf, 0x03 ); 2783 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg ); 2784 %} 2785 2786 enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ 2787 // Basic idea: lo(result) = lo(src * y_lo) 2788 // hi(result) = hi(src * y_lo) + lo(src * y_hi) 2789 // IMUL $tmp,EDX,$src 2790 emit_opcode( cbuf, 0x6B ); 2791 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) ); 2792 emit_d8( cbuf, (int)$src$$constant ); 2793 // MOV EDX,$src 2794 emit_opcode(cbuf, 0xB8 + EDX_enc); 2795 emit_d32( cbuf, (int)$src$$constant ); 2796 // MUL EDX:EAX,EDX 2797 emit_opcode( cbuf, 0xF7 ); 2798 emit_rm( cbuf, 0x3, 0x4, EDX_enc ); 2799 // ADD EDX,ESI 2800 emit_opcode( cbuf, 0x03 ); 2801 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg ); 2802 %} 2803 2804 enc_class long_div( eRegL src1, eRegL src2 ) %{ 2805 // PUSH src1.hi 2806 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2807 // PUSH src1.lo 2808 emit_opcode(cbuf, 0x50+$src1$$reg ); 2809 // PUSH src2.hi 2810 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2811 // PUSH src2.lo 2812 emit_opcode(cbuf, 0x50+$src2$$reg ); 2813 // CALL directly to the runtime 2814 cbuf.set_insts_mark(); 2815 emit_opcode(cbuf,0xE8); // Call into runtime 2816 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2817 // Restore stack 2818 emit_opcode(cbuf, 0x83); // add SP, #framesize 2819 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2820 emit_d8(cbuf, 4*4); 2821 %} 2822 2823 enc_class long_mod( eRegL src1, eRegL src2 ) %{ 2824 // PUSH src1.hi 2825 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) ); 2826 // PUSH src1.lo 2827 emit_opcode(cbuf, 0x50+$src1$$reg ); 2828 // PUSH src2.hi 2829 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) ); 2830 // PUSH src2.lo 2831 emit_opcode(cbuf, 0x50+$src2$$reg ); 2832 // CALL directly to the runtime 2833 cbuf.set_insts_mark(); 2834 emit_opcode(cbuf,0xE8); // Call into runtime 2835 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2836 // Restore stack 2837 emit_opcode(cbuf, 0x83); // add SP, #framesize 2838 emit_rm(cbuf, 0x3, 0x00, ESP_enc); 2839 emit_d8(cbuf, 4*4); 2840 %} 2841 2842 enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ 2843 // MOV $tmp,$src.lo 2844 emit_opcode(cbuf, 0x8B); 2845 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg); 2846 // OR $tmp,$src.hi 2847 emit_opcode(cbuf, 0x0B); 2848 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg)); 2849 %} 2850 2851 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ 2852 // CMP $src1.lo,$src2.lo 2853 emit_opcode( cbuf, 0x3B ); 2854 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2855 // JNE,s skip 2856 emit_cc(cbuf, 0x70, 0x5); 2857 emit_d8(cbuf,2); 2858 // CMP $src1.hi,$src2.hi 2859 emit_opcode( cbuf, 0x3B ); 2860 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) ); 2861 %} 2862 2863 enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ 2864 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits 2865 emit_opcode( cbuf, 0x3B ); 2866 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg ); 2867 // MOV $tmp,$src1.hi 2868 emit_opcode( cbuf, 0x8B ); 2869 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) ); 2870 // SBB $tmp,$src2.hi\t! Compute flags for long compare 2871 emit_opcode( cbuf, 0x1B ); 2872 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) ); 2873 %} 2874 2875 enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ 2876 // XOR $tmp,$tmp 2877 emit_opcode(cbuf,0x33); // XOR 2878 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg); 2879 // CMP $tmp,$src.lo 2880 emit_opcode( cbuf, 0x3B ); 2881 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg ); 2882 // SBB $tmp,$src.hi 2883 emit_opcode( cbuf, 0x1B ); 2884 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) ); 2885 %} 2886 2887 // Sniff, sniff... smells like Gnu Superoptimizer 2888 enc_class neg_long( eRegL dst ) %{ 2889 emit_opcode(cbuf,0xF7); // NEG hi 2890 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2891 emit_opcode(cbuf,0xF7); // NEG lo 2892 emit_rm (cbuf,0x3, 0x3, $dst$$reg ); 2893 emit_opcode(cbuf,0x83); // SBB hi,0 2894 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg)); 2895 emit_d8 (cbuf,0 ); 2896 %} 2897 2898 enc_class enc_pop_rdx() %{ 2899 emit_opcode(cbuf,0x5A); 2900 %} 2901 2902 enc_class enc_rethrow() %{ 2903 cbuf.set_insts_mark(); 2904 emit_opcode(cbuf, 0xE9); // jmp entry 2905 emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4, 2906 runtime_call_Relocation::spec(), RELOC_IMM32 ); 2907 %} 2908 2909 2910 // Convert a double to an int. Java semantics require we do complex 2911 // manglelations in the corner cases. So we set the rounding mode to 2912 // 'zero', store the darned double down as an int, and reset the 2913 // rounding mode to 'nearest'. The hardware throws an exception which 2914 // patches up the correct value directly to the stack. 2915 enc_class DPR2I_encoding( regDPR src ) %{ 2916 // Flip to round-to-zero mode. We attempted to allow invalid-op 2917 // exceptions here, so that a NAN or other corner-case value will 2918 // thrown an exception (but normal values get converted at full speed). 2919 // However, I2C adapters and other float-stack manglers leave pending 2920 // invalid-op exceptions hanging. We would have to clear them before 2921 // enabling them and that is more expensive than just testing for the 2922 // invalid value Intel stores down in the corner cases. 2923 emit_opcode(cbuf,0xD9); // FLDCW trunc 2924 emit_opcode(cbuf,0x2D); 2925 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 2926 // Allocate a word 2927 emit_opcode(cbuf,0x83); // SUB ESP,4 2928 emit_opcode(cbuf,0xEC); 2929 emit_d8(cbuf,0x04); 2930 // Encoding assumes a double has been pushed into FPR0. 2931 // Store down the double as an int, popping the FPU stack 2932 emit_opcode(cbuf,0xDB); // FISTP [ESP] 2933 emit_opcode(cbuf,0x1C); 2934 emit_d8(cbuf,0x24); 2935 // Restore the rounding mode; mask the exception 2936 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2937 emit_opcode(cbuf,0x2D); 2938 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2939 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 2940 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 2941 2942 // Load the converted int; adjust CPU stack 2943 emit_opcode(cbuf,0x58); // POP EAX 2944 emit_opcode(cbuf,0x3D); // CMP EAX,imm 2945 emit_d32 (cbuf,0x80000000); // 0x80000000 2946 emit_opcode(cbuf,0x75); // JNE around_slow_call 2947 emit_d8 (cbuf,0x07); // Size of slow_call 2948 // Push src onto stack slow-path 2949 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2950 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2951 // CALL directly to the runtime 2952 cbuf.set_insts_mark(); 2953 emit_opcode(cbuf,0xE8); // Call into runtime 2954 emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2955 // Carry on here... 2956 %} 2957 2958 enc_class DPR2L_encoding( regDPR src ) %{ 2959 emit_opcode(cbuf,0xD9); // FLDCW trunc 2960 emit_opcode(cbuf,0x2D); 2961 emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc()); 2962 // Allocate a word 2963 emit_opcode(cbuf,0x83); // SUB ESP,8 2964 emit_opcode(cbuf,0xEC); 2965 emit_d8(cbuf,0x08); 2966 // Encoding assumes a double has been pushed into FPR0. 2967 // Store down the double as a long, popping the FPU stack 2968 emit_opcode(cbuf,0xDF); // FISTP [ESP] 2969 emit_opcode(cbuf,0x3C); 2970 emit_d8(cbuf,0x24); 2971 // Restore the rounding mode; mask the exception 2972 emit_opcode(cbuf,0xD9); // FLDCW std/24-bit mode 2973 emit_opcode(cbuf,0x2D); 2974 emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode() 2975 ? (int)StubRoutines::addr_fpu_cntrl_wrd_24() 2976 : (int)StubRoutines::addr_fpu_cntrl_wrd_std()); 2977 2978 // Load the converted int; adjust CPU stack 2979 emit_opcode(cbuf,0x58); // POP EAX 2980 emit_opcode(cbuf,0x5A); // POP EDX 2981 emit_opcode(cbuf,0x81); // CMP EDX,imm 2982 emit_d8 (cbuf,0xFA); // rdx 2983 emit_d32 (cbuf,0x80000000); // 0x80000000 2984 emit_opcode(cbuf,0x75); // JNE around_slow_call 2985 emit_d8 (cbuf,0x07+4); // Size of slow_call 2986 emit_opcode(cbuf,0x85); // TEST EAX,EAX 2987 emit_opcode(cbuf,0xC0); // 2/rax,/rax, 2988 emit_opcode(cbuf,0x75); // JNE around_slow_call 2989 emit_d8 (cbuf,0x07); // Size of slow_call 2990 // Push src onto stack slow-path 2991 emit_opcode(cbuf,0xD9 ); // FLD ST(i) 2992 emit_d8 (cbuf,0xC0-1+$src$$reg ); 2993 // CALL directly to the runtime 2994 cbuf.set_insts_mark(); 2995 emit_opcode(cbuf,0xE8); // Call into runtime 2996 emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); 2997 // Carry on here... 2998 %} 2999 3000 enc_class FMul_ST_reg( eRegFPR src1 ) %{ 3001 // Operand was loaded from memory into fp ST (stack top) 3002 // FMUL ST,$src /* D8 C8+i */ 3003 emit_opcode(cbuf, 0xD8); 3004 emit_opcode(cbuf, 0xC8 + $src1$$reg); 3005 %} 3006 3007 enc_class FAdd_ST_reg( eRegFPR src2 ) %{ 3008 // FADDP ST,src2 /* D8 C0+i */ 3009 emit_opcode(cbuf, 0xD8); 3010 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3011 //could use FADDP src2,fpST /* DE C0+i */ 3012 %} 3013 3014 enc_class FAddP_reg_ST( eRegFPR src2 ) %{ 3015 // FADDP src2,ST /* DE C0+i */ 3016 emit_opcode(cbuf, 0xDE); 3017 emit_opcode(cbuf, 0xC0 + $src2$$reg); 3018 %} 3019 3020 enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ 3021 // Operand has been loaded into fp ST (stack top) 3022 // FSUB ST,$src1 3023 emit_opcode(cbuf, 0xD8); 3024 emit_opcode(cbuf, 0xE0 + $src1$$reg); 3025 3026 // FDIV 3027 emit_opcode(cbuf, 0xD8); 3028 emit_opcode(cbuf, 0xF0 + $src2$$reg); 3029 %} 3030 3031 enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ 3032 // Operand was loaded from memory into fp ST (stack top) 3033 // FADD ST,$src /* D8 C0+i */ 3034 emit_opcode(cbuf, 0xD8); 3035 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3036 3037 // FMUL ST,src2 /* D8 C*+i */ 3038 emit_opcode(cbuf, 0xD8); 3039 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3040 %} 3041 3042 3043 enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ 3044 // Operand was loaded from memory into fp ST (stack top) 3045 // FADD ST,$src /* D8 C0+i */ 3046 emit_opcode(cbuf, 0xD8); 3047 emit_opcode(cbuf, 0xC0 + $src1$$reg); 3048 3049 // FMULP src2,ST /* DE C8+i */ 3050 emit_opcode(cbuf, 0xDE); 3051 emit_opcode(cbuf, 0xC8 + $src2$$reg); 3052 %} 3053 3054 // Atomically load the volatile long 3055 enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ 3056 emit_opcode(cbuf,0xDF); 3057 int rm_byte_opcode = 0x05; 3058 int base = $mem$$base; 3059 int index = $mem$$index; 3060 int scale = $mem$$scale; 3061 int displace = $mem$$disp; 3062 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3063 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3064 store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp ); 3065 %} 3066 3067 // Volatile Store Long. Must be atomic, so move it into 3068 // the FP TOS and then do a 64-bit FIST. Has to probe the 3069 // target address before the store (for null-ptr checks) 3070 // so the memory operand is used twice in the encoding. 3071 enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ 3072 store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp ); 3073 cbuf.set_insts_mark(); // Mark start of FIST in case $mem has an oop 3074 emit_opcode(cbuf,0xDF); 3075 int rm_byte_opcode = 0x07; 3076 int base = $mem$$base; 3077 int index = $mem$$index; 3078 int scale = $mem$$scale; 3079 int displace = $mem$$disp; 3080 relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals 3081 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc); 3082 %} 3083 3084 // Safepoint Poll. This polls the safepoint page, and causes an 3085 // exception if it is not readable. Unfortunately, it kills the condition code 3086 // in the process 3087 // We current use TESTL [spp],EDI 3088 // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0 3089 3090 enc_class Safepoint_Poll() %{ 3091 cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0); 3092 emit_opcode(cbuf,0x85); 3093 emit_rm (cbuf, 0x0, 0x7, 0x5); 3094 emit_d32(cbuf, (intptr_t)os::get_polling_page()); 3095 %} 3096 %} 3097 3098 3099 //----------FRAME-------------------------------------------------------------- 3100 // Definition of frame structure and management information. 3101 // 3102 // S T A C K L A Y O U T Allocators stack-slot number 3103 // | (to get allocators register number 3104 // G Owned by | | v add OptoReg::stack0()) 3105 // r CALLER | | 3106 // o | +--------+ pad to even-align allocators stack-slot 3107 // w V | pad0 | numbers; owned by CALLER 3108 // t -----------+--------+----> Matcher::_in_arg_limit, unaligned 3109 // h ^ | in | 5 3110 // | | args | 4 Holes in incoming args owned by SELF 3111 // | | | | 3 3112 // | | +--------+ 3113 // V | | old out| Empty on Intel, window on Sparc 3114 // | old |preserve| Must be even aligned. 3115 // | SP-+--------+----> Matcher::_old_SP, even aligned 3116 // | | in | 3 area for Intel ret address 3117 // Owned by |preserve| Empty on Sparc. 3118 // SELF +--------+ 3119 // | | pad2 | 2 pad to align old SP 3120 // | +--------+ 1 3121 // | | locks | 0 3122 // | +--------+----> OptoReg::stack0(), even aligned 3123 // | | pad1 | 11 pad to align new SP 3124 // | +--------+ 3125 // | | | 10 3126 // | | spills | 9 spills 3127 // V | | 8 (pad0 slot for callee) 3128 // -----------+--------+----> Matcher::_out_arg_limit, unaligned 3129 // ^ | out | 7 3130 // | | args | 6 Holes in outgoing args owned by CALLEE 3131 // Owned by +--------+ 3132 // CALLEE | new out| 6 Empty on Intel, window on Sparc 3133 // | new |preserve| Must be even-aligned. 3134 // | SP-+--------+----> Matcher::_new_SP, even aligned 3135 // | | | 3136 // 3137 // Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is 3138 // known from SELF's arguments and the Java calling convention. 3139 // Region 6-7 is determined per call site. 3140 // Note 2: If the calling convention leaves holes in the incoming argument 3141 // area, those holes are owned by SELF. Holes in the outgoing area 3142 // are owned by the CALLEE. Holes should not be nessecary in the 3143 // incoming area, as the Java calling convention is completely under 3144 // the control of the AD file. Doubles can be sorted and packed to 3145 // avoid holes. Holes in the outgoing arguments may be nessecary for 3146 // varargs C calling conventions. 3147 // Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is 3148 // even aligned with pad0 as needed. 3149 // Region 6 is even aligned. Region 6-7 is NOT even aligned; 3150 // region 6-11 is even aligned; it may be padded out more so that 3151 // the region from SP to FP meets the minimum stack alignment. 3152 3153 frame %{ 3154 // What direction does stack grow in (assumed to be same for C & Java) 3155 stack_direction(TOWARDS_LOW); 3156 3157 // These three registers define part of the calling convention 3158 // between compiled code and the interpreter. 3159 inline_cache_reg(EAX); // Inline Cache Register 3160 interpreter_method_oop_reg(EBX); // Method Oop Register when calling interpreter 3161 3162 // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] 3163 cisc_spilling_operand_name(indOffset32); 3164 3165 // Number of stack slots consumed by locking an object 3166 sync_stack_slots(1); 3167 3168 // Compiled code's Frame Pointer 3169 frame_pointer(ESP); 3170 // Interpreter stores its frame pointer in a register which is 3171 // stored to the stack by I2CAdaptors. 3172 // I2CAdaptors convert from interpreted java to compiled java. 3173 interpreter_frame_pointer(EBP); 3174 3175 // Stack alignment requirement 3176 // Alignment size in bytes (128-bit -> 16 bytes) 3177 stack_alignment(StackAlignmentInBytes); 3178 3179 // Number of stack slots between incoming argument block and the start of 3180 // a new frame. The PROLOG must add this many slots to the stack. The 3181 // EPILOG must remove this many slots. Intel needs one slot for 3182 // return address and one for rbp, (must save rbp) 3183 in_preserve_stack_slots(2+VerifyStackAtCalls); 3184 3185 // Number of outgoing stack slots killed above the out_preserve_stack_slots 3186 // for calls to C. Supports the var-args backing area for register parms. 3187 varargs_C_out_slots_killed(0); 3188 3189 // The after-PROLOG location of the return address. Location of 3190 // return address specifies a type (REG or STACK) and a number 3191 // representing the register number (i.e. - use a register name) or 3192 // stack slot. 3193 // Ret Addr is on stack in slot 0 if no locks or verification or alignment. 3194 // Otherwise, it is above the locks and verification slot and alignment word 3195 return_addr(STACK - 1 + 3196 round_to((Compile::current()->in_preserve_stack_slots() + 3197 Compile::current()->fixed_slots()), 3198 stack_alignment_in_slots())); 3199 3200 // Body of function which returns an integer array locating 3201 // arguments either in registers or in stack slots. Passed an array 3202 // of ideal registers called "sig" and a "length" count. Stack-slot 3203 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3204 // arguments for a CALLEE. Incoming stack arguments are 3205 // automatically biased by the preserve_stack_slots field above. 3206 calling_convention %{ 3207 // No difference between ingoing/outgoing just pass false 3208 SharedRuntime::java_calling_convention(sig_bt, regs, length, false); 3209 %} 3210 3211 3212 // Body of function which returns an integer array locating 3213 // arguments either in registers or in stack slots. Passed an array 3214 // of ideal registers called "sig" and a "length" count. Stack-slot 3215 // offsets are based on outgoing arguments, i.e. a CALLER setting up 3216 // arguments for a CALLEE. Incoming stack arguments are 3217 // automatically biased by the preserve_stack_slots field above. 3218 c_calling_convention %{ 3219 // This is obviously always outgoing 3220 (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); 3221 %} 3222 3223 // Location of C & interpreter return values 3224 c_return_value %{ 3225 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3226 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3227 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3228 3229 // in SSE2+ mode we want to keep the FPU stack clean so pretend 3230 // that C functions return float and double results in XMM0. 3231 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3232 return OptoRegPair(XMM0b_num,XMM0_num); 3233 if( ideal_reg == Op_RegF && UseSSE>=2 ) 3234 return OptoRegPair(OptoReg::Bad,XMM0_num); 3235 3236 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3237 %} 3238 3239 // Location of return values 3240 return_value %{ 3241 assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); 3242 static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; 3243 static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; 3244 if( ideal_reg == Op_RegD && UseSSE>=2 ) 3245 return OptoRegPair(XMM0b_num,XMM0_num); 3246 if( ideal_reg == Op_RegF && UseSSE>=1 ) 3247 return OptoRegPair(OptoReg::Bad,XMM0_num); 3248 return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); 3249 %} 3250 3251 %} 3252 3253 //----------ATTRIBUTES--------------------------------------------------------- 3254 //----------Operand Attributes------------------------------------------------- 3255 op_attrib op_cost(0); // Required cost attribute 3256 3257 //----------Instruction Attributes--------------------------------------------- 3258 ins_attrib ins_cost(100); // Required cost attribute 3259 ins_attrib ins_size(8); // Required size attribute (in bits) 3260 ins_attrib ins_short_branch(0); // Required flag: is this instruction a 3261 // non-matching short branch variant of some 3262 // long branch? 3263 ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) 3264 // specifies the alignment that some part of the instruction (not 3265 // necessarily the start) requires. If > 1, a compute_padding() 3266 // function must be provided for the instruction 3267 3268 //----------OPERANDS----------------------------------------------------------- 3269 // Operand definitions must precede instruction definitions for correct parsing 3270 // in the ADLC because operands constitute user defined types which are used in 3271 // instruction definitions. 3272 3273 //----------Simple Operands---------------------------------------------------- 3274 // Immediate Operands 3275 // Integer Immediate 3276 operand immI() %{ 3277 match(ConI); 3278 3279 op_cost(10); 3280 format %{ %} 3281 interface(CONST_INTER); 3282 %} 3283 3284 // Constant for test vs zero 3285 operand immI0() %{ 3286 predicate(n->get_int() == 0); 3287 match(ConI); 3288 3289 op_cost(0); 3290 format %{ %} 3291 interface(CONST_INTER); 3292 %} 3293 3294 // Constant for increment 3295 operand immI1() %{ 3296 predicate(n->get_int() == 1); 3297 match(ConI); 3298 3299 op_cost(0); 3300 format %{ %} 3301 interface(CONST_INTER); 3302 %} 3303 3304 // Constant for decrement 3305 operand immI_M1() %{ 3306 predicate(n->get_int() == -1); 3307 match(ConI); 3308 3309 op_cost(0); 3310 format %{ %} 3311 interface(CONST_INTER); 3312 %} 3313 3314 // Valid scale values for addressing modes 3315 operand immI2() %{ 3316 predicate(0 <= n->get_int() && (n->get_int() <= 3)); 3317 match(ConI); 3318 3319 format %{ %} 3320 interface(CONST_INTER); 3321 %} 3322 3323 operand immI8() %{ 3324 predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); 3325 match(ConI); 3326 3327 op_cost(5); 3328 format %{ %} 3329 interface(CONST_INTER); 3330 %} 3331 3332 operand immI16() %{ 3333 predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); 3334 match(ConI); 3335 3336 op_cost(10); 3337 format %{ %} 3338 interface(CONST_INTER); 3339 %} 3340 3341 // Int Immediate non-negative 3342 operand immU31() 3343 %{ 3344 predicate(n->get_int() >= 0); 3345 match(ConI); 3346 3347 op_cost(0); 3348 format %{ %} 3349 interface(CONST_INTER); 3350 %} 3351 3352 // Constant for long shifts 3353 operand immI_32() %{ 3354 predicate( n->get_int() == 32 ); 3355 match(ConI); 3356 3357 op_cost(0); 3358 format %{ %} 3359 interface(CONST_INTER); 3360 %} 3361 3362 operand immI_1_31() %{ 3363 predicate( n->get_int() >= 1 && n->get_int() <= 31 ); 3364 match(ConI); 3365 3366 op_cost(0); 3367 format %{ %} 3368 interface(CONST_INTER); 3369 %} 3370 3371 operand immI_32_63() %{ 3372 predicate( n->get_int() >= 32 && n->get_int() <= 63 ); 3373 match(ConI); 3374 op_cost(0); 3375 3376 format %{ %} 3377 interface(CONST_INTER); 3378 %} 3379 3380 operand immI_1() %{ 3381 predicate( n->get_int() == 1 ); 3382 match(ConI); 3383 3384 op_cost(0); 3385 format %{ %} 3386 interface(CONST_INTER); 3387 %} 3388 3389 operand immI_2() %{ 3390 predicate( n->get_int() == 2 ); 3391 match(ConI); 3392 3393 op_cost(0); 3394 format %{ %} 3395 interface(CONST_INTER); 3396 %} 3397 3398 operand immI_3() %{ 3399 predicate( n->get_int() == 3 ); 3400 match(ConI); 3401 3402 op_cost(0); 3403 format %{ %} 3404 interface(CONST_INTER); 3405 %} 3406 3407 // Pointer Immediate 3408 operand immP() %{ 3409 match(ConP); 3410 3411 op_cost(10); 3412 format %{ %} 3413 interface(CONST_INTER); 3414 %} 3415 3416 // NULL Pointer Immediate 3417 operand immP0() %{ 3418 predicate( n->get_ptr() == 0 ); 3419 match(ConP); 3420 op_cost(0); 3421 3422 format %{ %} 3423 interface(CONST_INTER); 3424 %} 3425 3426 // Long Immediate 3427 operand immL() %{ 3428 match(ConL); 3429 3430 op_cost(20); 3431 format %{ %} 3432 interface(CONST_INTER); 3433 %} 3434 3435 // Long Immediate zero 3436 operand immL0() %{ 3437 predicate( n->get_long() == 0L ); 3438 match(ConL); 3439 op_cost(0); 3440 3441 format %{ %} 3442 interface(CONST_INTER); 3443 %} 3444 3445 // Long Immediate zero 3446 operand immL_M1() %{ 3447 predicate( n->get_long() == -1L ); 3448 match(ConL); 3449 op_cost(0); 3450 3451 format %{ %} 3452 interface(CONST_INTER); 3453 %} 3454 3455 // Long immediate from 0 to 127. 3456 // Used for a shorter form of long mul by 10. 3457 operand immL_127() %{ 3458 predicate((0 <= n->get_long()) && (n->get_long() <= 127)); 3459 match(ConL); 3460 op_cost(0); 3461 3462 format %{ %} 3463 interface(CONST_INTER); 3464 %} 3465 3466 // Long Immediate: low 32-bit mask 3467 operand immL_32bits() %{ 3468 predicate(n->get_long() == 0xFFFFFFFFL); 3469 match(ConL); 3470 op_cost(0); 3471 3472 format %{ %} 3473 interface(CONST_INTER); 3474 %} 3475 3476 // Long Immediate: low 32-bit mask 3477 operand immL32() %{ 3478 predicate(n->get_long() == (int)(n->get_long())); 3479 match(ConL); 3480 op_cost(20); 3481 3482 format %{ %} 3483 interface(CONST_INTER); 3484 %} 3485 3486 //Double Immediate zero 3487 operand immDPR0() %{ 3488 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3489 // bug that generates code such that NaNs compare equal to 0.0 3490 predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); 3491 match(ConD); 3492 3493 op_cost(5); 3494 format %{ %} 3495 interface(CONST_INTER); 3496 %} 3497 3498 // Double Immediate one 3499 operand immDPR1() %{ 3500 predicate( UseSSE<=1 && n->getd() == 1.0 ); 3501 match(ConD); 3502 3503 op_cost(5); 3504 format %{ %} 3505 interface(CONST_INTER); 3506 %} 3507 3508 // Double Immediate 3509 operand immDPR() %{ 3510 predicate(UseSSE<=1); 3511 match(ConD); 3512 3513 op_cost(5); 3514 format %{ %} 3515 interface(CONST_INTER); 3516 %} 3517 3518 operand immD() %{ 3519 predicate(UseSSE>=2); 3520 match(ConD); 3521 3522 op_cost(5); 3523 format %{ %} 3524 interface(CONST_INTER); 3525 %} 3526 3527 // Double Immediate zero 3528 operand immD0() %{ 3529 // Do additional (and counter-intuitive) test against NaN to work around VC++ 3530 // bug that generates code such that NaNs compare equal to 0.0 AND do not 3531 // compare equal to -0.0. 3532 predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); 3533 match(ConD); 3534 3535 format %{ %} 3536 interface(CONST_INTER); 3537 %} 3538 3539 // Float Immediate zero 3540 operand immFPR0() %{ 3541 predicate(UseSSE == 0 && n->getf() == 0.0F); 3542 match(ConF); 3543 3544 op_cost(5); 3545 format %{ %} 3546 interface(CONST_INTER); 3547 %} 3548 3549 // Float Immediate one 3550 operand immFPR1() %{ 3551 predicate(UseSSE == 0 && n->getf() == 1.0F); 3552 match(ConF); 3553 3554 op_cost(5); 3555 format %{ %} 3556 interface(CONST_INTER); 3557 %} 3558 3559 // Float Immediate 3560 operand immFPR() %{ 3561 predicate( UseSSE == 0 ); 3562 match(ConF); 3563 3564 op_cost(5); 3565 format %{ %} 3566 interface(CONST_INTER); 3567 %} 3568 3569 // Float Immediate 3570 operand immF() %{ 3571 predicate(UseSSE >= 1); 3572 match(ConF); 3573 3574 op_cost(5); 3575 format %{ %} 3576 interface(CONST_INTER); 3577 %} 3578 3579 // Float Immediate zero. Zero and not -0.0 3580 operand immF0() %{ 3581 predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); 3582 match(ConF); 3583 3584 op_cost(5); 3585 format %{ %} 3586 interface(CONST_INTER); 3587 %} 3588 3589 // Immediates for special shifts (sign extend) 3590 3591 // Constants for increment 3592 operand immI_16() %{ 3593 predicate( n->get_int() == 16 ); 3594 match(ConI); 3595 3596 format %{ %} 3597 interface(CONST_INTER); 3598 %} 3599 3600 operand immI_24() %{ 3601 predicate( n->get_int() == 24 ); 3602 match(ConI); 3603 3604 format %{ %} 3605 interface(CONST_INTER); 3606 %} 3607 3608 // Constant for byte-wide masking 3609 operand immI_255() %{ 3610 predicate( n->get_int() == 255 ); 3611 match(ConI); 3612 3613 format %{ %} 3614 interface(CONST_INTER); 3615 %} 3616 3617 // Constant for short-wide masking 3618 operand immI_65535() %{ 3619 predicate(n->get_int() == 65535); 3620 match(ConI); 3621 3622 format %{ %} 3623 interface(CONST_INTER); 3624 %} 3625 3626 // Register Operands 3627 // Integer Register 3628 operand rRegI() %{ 3629 constraint(ALLOC_IN_RC(int_reg)); 3630 match(RegI); 3631 match(xRegI); 3632 match(eAXRegI); 3633 match(eBXRegI); 3634 match(eCXRegI); 3635 match(eDXRegI); 3636 match(eDIRegI); 3637 match(eSIRegI); 3638 3639 format %{ %} 3640 interface(REG_INTER); 3641 %} 3642 3643 // Subset of Integer Register 3644 operand xRegI(rRegI reg) %{ 3645 constraint(ALLOC_IN_RC(int_x_reg)); 3646 match(reg); 3647 match(eAXRegI); 3648 match(eBXRegI); 3649 match(eCXRegI); 3650 match(eDXRegI); 3651 3652 format %{ %} 3653 interface(REG_INTER); 3654 %} 3655 3656 // Special Registers 3657 operand eAXRegI(xRegI reg) %{ 3658 constraint(ALLOC_IN_RC(eax_reg)); 3659 match(reg); 3660 match(rRegI); 3661 3662 format %{ "EAX" %} 3663 interface(REG_INTER); 3664 %} 3665 3666 // Special Registers 3667 operand eBXRegI(xRegI reg) %{ 3668 constraint(ALLOC_IN_RC(ebx_reg)); 3669 match(reg); 3670 match(rRegI); 3671 3672 format %{ "EBX" %} 3673 interface(REG_INTER); 3674 %} 3675 3676 operand eCXRegI(xRegI reg) %{ 3677 constraint(ALLOC_IN_RC(ecx_reg)); 3678 match(reg); 3679 match(rRegI); 3680 3681 format %{ "ECX" %} 3682 interface(REG_INTER); 3683 %} 3684 3685 operand eDXRegI(xRegI reg) %{ 3686 constraint(ALLOC_IN_RC(edx_reg)); 3687 match(reg); 3688 match(rRegI); 3689 3690 format %{ "EDX" %} 3691 interface(REG_INTER); 3692 %} 3693 3694 operand eDIRegI(xRegI reg) %{ 3695 constraint(ALLOC_IN_RC(edi_reg)); 3696 match(reg); 3697 match(rRegI); 3698 3699 format %{ "EDI" %} 3700 interface(REG_INTER); 3701 %} 3702 3703 operand naxRegI() %{ 3704 constraint(ALLOC_IN_RC(nax_reg)); 3705 match(RegI); 3706 match(eCXRegI); 3707 match(eDXRegI); 3708 match(eSIRegI); 3709 match(eDIRegI); 3710 3711 format %{ %} 3712 interface(REG_INTER); 3713 %} 3714 3715 operand nadxRegI() %{ 3716 constraint(ALLOC_IN_RC(nadx_reg)); 3717 match(RegI); 3718 match(eBXRegI); 3719 match(eCXRegI); 3720 match(eSIRegI); 3721 match(eDIRegI); 3722 3723 format %{ %} 3724 interface(REG_INTER); 3725 %} 3726 3727 operand ncxRegI() %{ 3728 constraint(ALLOC_IN_RC(ncx_reg)); 3729 match(RegI); 3730 match(eAXRegI); 3731 match(eDXRegI); 3732 match(eSIRegI); 3733 match(eDIRegI); 3734 3735 format %{ %} 3736 interface(REG_INTER); 3737 %} 3738 3739 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg 3740 // // 3741 operand eSIRegI(xRegI reg) %{ 3742 constraint(ALLOC_IN_RC(esi_reg)); 3743 match(reg); 3744 match(rRegI); 3745 3746 format %{ "ESI" %} 3747 interface(REG_INTER); 3748 %} 3749 3750 // Pointer Register 3751 operand anyRegP() %{ 3752 constraint(ALLOC_IN_RC(any_reg)); 3753 match(RegP); 3754 match(eAXRegP); 3755 match(eBXRegP); 3756 match(eCXRegP); 3757 match(eDIRegP); 3758 match(eRegP); 3759 3760 format %{ %} 3761 interface(REG_INTER); 3762 %} 3763 3764 operand eRegP() %{ 3765 constraint(ALLOC_IN_RC(int_reg)); 3766 match(RegP); 3767 match(eAXRegP); 3768 match(eBXRegP); 3769 match(eCXRegP); 3770 match(eDIRegP); 3771 3772 format %{ %} 3773 interface(REG_INTER); 3774 %} 3775 3776 // On windows95, EBP is not safe to use for implicit null tests. 3777 operand eRegP_no_EBP() %{ 3778 constraint(ALLOC_IN_RC(int_reg_no_rbp)); 3779 match(RegP); 3780 match(eAXRegP); 3781 match(eBXRegP); 3782 match(eCXRegP); 3783 match(eDIRegP); 3784 3785 op_cost(100); 3786 format %{ %} 3787 interface(REG_INTER); 3788 %} 3789 3790 operand naxRegP() %{ 3791 constraint(ALLOC_IN_RC(nax_reg)); 3792 match(RegP); 3793 match(eBXRegP); 3794 match(eDXRegP); 3795 match(eCXRegP); 3796 match(eSIRegP); 3797 match(eDIRegP); 3798 3799 format %{ %} 3800 interface(REG_INTER); 3801 %} 3802 3803 operand nabxRegP() %{ 3804 constraint(ALLOC_IN_RC(nabx_reg)); 3805 match(RegP); 3806 match(eCXRegP); 3807 match(eDXRegP); 3808 match(eSIRegP); 3809 match(eDIRegP); 3810 3811 format %{ %} 3812 interface(REG_INTER); 3813 %} 3814 3815 operand pRegP() %{ 3816 constraint(ALLOC_IN_RC(p_reg)); 3817 match(RegP); 3818 match(eBXRegP); 3819 match(eDXRegP); 3820 match(eSIRegP); 3821 match(eDIRegP); 3822 3823 format %{ %} 3824 interface(REG_INTER); 3825 %} 3826 3827 // Special Registers 3828 // Return a pointer value 3829 operand eAXRegP(eRegP reg) %{ 3830 constraint(ALLOC_IN_RC(eax_reg)); 3831 match(reg); 3832 format %{ "EAX" %} 3833 interface(REG_INTER); 3834 %} 3835 3836 // Used in AtomicAdd 3837 operand eBXRegP(eRegP reg) %{ 3838 constraint(ALLOC_IN_RC(ebx_reg)); 3839 match(reg); 3840 format %{ "EBX" %} 3841 interface(REG_INTER); 3842 %} 3843 3844 // Tail-call (interprocedural jump) to interpreter 3845 operand eCXRegP(eRegP reg) %{ 3846 constraint(ALLOC_IN_RC(ecx_reg)); 3847 match(reg); 3848 format %{ "ECX" %} 3849 interface(REG_INTER); 3850 %} 3851 3852 operand eSIRegP(eRegP reg) %{ 3853 constraint(ALLOC_IN_RC(esi_reg)); 3854 match(reg); 3855 format %{ "ESI" %} 3856 interface(REG_INTER); 3857 %} 3858 3859 // Used in rep stosw 3860 operand eDIRegP(eRegP reg) %{ 3861 constraint(ALLOC_IN_RC(edi_reg)); 3862 match(reg); 3863 format %{ "EDI" %} 3864 interface(REG_INTER); 3865 %} 3866 3867 operand eBPRegP() %{ 3868 constraint(ALLOC_IN_RC(ebp_reg)); 3869 match(RegP); 3870 format %{ "EBP" %} 3871 interface(REG_INTER); 3872 %} 3873 3874 operand eRegL() %{ 3875 constraint(ALLOC_IN_RC(long_reg)); 3876 match(RegL); 3877 match(eADXRegL); 3878 3879 format %{ %} 3880 interface(REG_INTER); 3881 %} 3882 3883 operand eADXRegL( eRegL reg ) %{ 3884 constraint(ALLOC_IN_RC(eadx_reg)); 3885 match(reg); 3886 3887 format %{ "EDX:EAX" %} 3888 interface(REG_INTER); 3889 %} 3890 3891 operand eBCXRegL( eRegL reg ) %{ 3892 constraint(ALLOC_IN_RC(ebcx_reg)); 3893 match(reg); 3894 3895 format %{ "EBX:ECX" %} 3896 interface(REG_INTER); 3897 %} 3898 3899 // Special case for integer high multiply 3900 operand eADXRegL_low_only() %{ 3901 constraint(ALLOC_IN_RC(eadx_reg)); 3902 match(RegL); 3903 3904 format %{ "EAX" %} 3905 interface(REG_INTER); 3906 %} 3907 3908 // Flags register, used as output of compare instructions 3909 operand eFlagsReg() %{ 3910 constraint(ALLOC_IN_RC(int_flags)); 3911 match(RegFlags); 3912 3913 format %{ "EFLAGS" %} 3914 interface(REG_INTER); 3915 %} 3916 3917 // Flags register, used as output of FLOATING POINT compare instructions 3918 operand eFlagsRegU() %{ 3919 constraint(ALLOC_IN_RC(int_flags)); 3920 match(RegFlags); 3921 3922 format %{ "EFLAGS_U" %} 3923 interface(REG_INTER); 3924 %} 3925 3926 operand eFlagsRegUCF() %{ 3927 constraint(ALLOC_IN_RC(int_flags)); 3928 match(RegFlags); 3929 predicate(false); 3930 3931 format %{ "EFLAGS_U_CF" %} 3932 interface(REG_INTER); 3933 %} 3934 3935 // Condition Code Register used by long compare 3936 operand flagsReg_long_LTGE() %{ 3937 constraint(ALLOC_IN_RC(int_flags)); 3938 match(RegFlags); 3939 format %{ "FLAGS_LTGE" %} 3940 interface(REG_INTER); 3941 %} 3942 operand flagsReg_long_EQNE() %{ 3943 constraint(ALLOC_IN_RC(int_flags)); 3944 match(RegFlags); 3945 format %{ "FLAGS_EQNE" %} 3946 interface(REG_INTER); 3947 %} 3948 operand flagsReg_long_LEGT() %{ 3949 constraint(ALLOC_IN_RC(int_flags)); 3950 match(RegFlags); 3951 format %{ "FLAGS_LEGT" %} 3952 interface(REG_INTER); 3953 %} 3954 3955 // Float register operands 3956 operand regDPR() %{ 3957 predicate( UseSSE < 2 ); 3958 constraint(ALLOC_IN_RC(fp_dbl_reg)); 3959 match(RegD); 3960 match(regDPR1); 3961 match(regDPR2); 3962 format %{ %} 3963 interface(REG_INTER); 3964 %} 3965 3966 operand regDPR1(regDPR reg) %{ 3967 predicate( UseSSE < 2 ); 3968 constraint(ALLOC_IN_RC(fp_dbl_reg0)); 3969 match(reg); 3970 format %{ "FPR1" %} 3971 interface(REG_INTER); 3972 %} 3973 3974 operand regDPR2(regDPR reg) %{ 3975 predicate( UseSSE < 2 ); 3976 constraint(ALLOC_IN_RC(fp_dbl_reg1)); 3977 match(reg); 3978 format %{ "FPR2" %} 3979 interface(REG_INTER); 3980 %} 3981 3982 operand regnotDPR1(regDPR reg) %{ 3983 predicate( UseSSE < 2 ); 3984 constraint(ALLOC_IN_RC(fp_dbl_notreg0)); 3985 match(reg); 3986 format %{ %} 3987 interface(REG_INTER); 3988 %} 3989 3990 // Float register operands 3991 operand regFPR() %{ 3992 predicate( UseSSE < 2 ); 3993 constraint(ALLOC_IN_RC(fp_flt_reg)); 3994 match(RegF); 3995 match(regFPR1); 3996 format %{ %} 3997 interface(REG_INTER); 3998 %} 3999 4000 // Float register operands 4001 operand regFPR1(regFPR reg) %{ 4002 predicate( UseSSE < 2 ); 4003 constraint(ALLOC_IN_RC(fp_flt_reg0)); 4004 match(reg); 4005 format %{ "FPR1" %} 4006 interface(REG_INTER); 4007 %} 4008 4009 // XMM Float register operands 4010 operand regF() %{ 4011 predicate( UseSSE>=1 ); 4012 constraint(ALLOC_IN_RC(float_reg_legacy)); 4013 match(RegF); 4014 format %{ %} 4015 interface(REG_INTER); 4016 %} 4017 4018 // XMM Double register operands 4019 operand regD() %{ 4020 predicate( UseSSE>=2 ); 4021 constraint(ALLOC_IN_RC(double_reg_legacy)); 4022 match(RegD); 4023 format %{ %} 4024 interface(REG_INTER); 4025 %} 4026 4027 // Vectors 4028 operand vecS() %{ 4029 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 4030 match(VecS); 4031 4032 format %{ %} 4033 interface(REG_INTER); 4034 %} 4035 4036 operand vecD() %{ 4037 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 4038 match(VecD); 4039 4040 format %{ %} 4041 interface(REG_INTER); 4042 %} 4043 4044 operand vecX() %{ 4045 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 4046 match(VecX); 4047 4048 format %{ %} 4049 interface(REG_INTER); 4050 %} 4051 4052 operand vecY() %{ 4053 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 4054 match(VecY); 4055 4056 format %{ %} 4057 interface(REG_INTER); 4058 %} 4059 4060 //----------Memory Operands---------------------------------------------------- 4061 // Direct Memory Operand 4062 operand direct(immP addr) %{ 4063 match(addr); 4064 4065 format %{ "[$addr]" %} 4066 interface(MEMORY_INTER) %{ 4067 base(0xFFFFFFFF); 4068 index(0x4); 4069 scale(0x0); 4070 disp($addr); 4071 %} 4072 %} 4073 4074 // Indirect Memory Operand 4075 operand indirect(eRegP reg) %{ 4076 constraint(ALLOC_IN_RC(int_reg)); 4077 match(reg); 4078 4079 format %{ "[$reg]" %} 4080 interface(MEMORY_INTER) %{ 4081 base($reg); 4082 index(0x4); 4083 scale(0x0); 4084 disp(0x0); 4085 %} 4086 %} 4087 4088 // Indirect Memory Plus Short Offset Operand 4089 operand indOffset8(eRegP reg, immI8 off) %{ 4090 match(AddP reg off); 4091 4092 format %{ "[$reg + $off]" %} 4093 interface(MEMORY_INTER) %{ 4094 base($reg); 4095 index(0x4); 4096 scale(0x0); 4097 disp($off); 4098 %} 4099 %} 4100 4101 // Indirect Memory Plus Long Offset Operand 4102 operand indOffset32(eRegP reg, immI off) %{ 4103 match(AddP reg off); 4104 4105 format %{ "[$reg + $off]" %} 4106 interface(MEMORY_INTER) %{ 4107 base($reg); 4108 index(0x4); 4109 scale(0x0); 4110 disp($off); 4111 %} 4112 %} 4113 4114 // Indirect Memory Plus Long Offset Operand 4115 operand indOffset32X(rRegI reg, immP off) %{ 4116 match(AddP off reg); 4117 4118 format %{ "[$reg + $off]" %} 4119 interface(MEMORY_INTER) %{ 4120 base($reg); 4121 index(0x4); 4122 scale(0x0); 4123 disp($off); 4124 %} 4125 %} 4126 4127 // Indirect Memory Plus Index Register Plus Offset Operand 4128 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ 4129 match(AddP (AddP reg ireg) off); 4130 4131 op_cost(10); 4132 format %{"[$reg + $off + $ireg]" %} 4133 interface(MEMORY_INTER) %{ 4134 base($reg); 4135 index($ireg); 4136 scale(0x0); 4137 disp($off); 4138 %} 4139 %} 4140 4141 // Indirect Memory Plus Index Register Plus Offset Operand 4142 operand indIndex(eRegP reg, rRegI ireg) %{ 4143 match(AddP reg ireg); 4144 4145 op_cost(10); 4146 format %{"[$reg + $ireg]" %} 4147 interface(MEMORY_INTER) %{ 4148 base($reg); 4149 index($ireg); 4150 scale(0x0); 4151 disp(0x0); 4152 %} 4153 %} 4154 4155 // // ------------------------------------------------------------------------- 4156 // // 486 architecture doesn't support "scale * index + offset" with out a base 4157 // // ------------------------------------------------------------------------- 4158 // // Scaled Memory Operands 4159 // // Indirect Memory Times Scale Plus Offset Operand 4160 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ 4161 // match(AddP off (LShiftI ireg scale)); 4162 // 4163 // op_cost(10); 4164 // format %{"[$off + $ireg << $scale]" %} 4165 // interface(MEMORY_INTER) %{ 4166 // base(0x4); 4167 // index($ireg); 4168 // scale($scale); 4169 // disp($off); 4170 // %} 4171 // %} 4172 4173 // Indirect Memory Times Scale Plus Index Register 4174 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ 4175 match(AddP reg (LShiftI ireg scale)); 4176 4177 op_cost(10); 4178 format %{"[$reg + $ireg << $scale]" %} 4179 interface(MEMORY_INTER) %{ 4180 base($reg); 4181 index($ireg); 4182 scale($scale); 4183 disp(0x0); 4184 %} 4185 %} 4186 4187 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4188 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ 4189 match(AddP (AddP reg (LShiftI ireg scale)) off); 4190 4191 op_cost(10); 4192 format %{"[$reg + $off + $ireg << $scale]" %} 4193 interface(MEMORY_INTER) %{ 4194 base($reg); 4195 index($ireg); 4196 scale($scale); 4197 disp($off); 4198 %} 4199 %} 4200 4201 //----------Load Long Memory Operands------------------------------------------ 4202 // The load-long idiom will use it's address expression again after loading 4203 // the first word of the long. If the load-long destination overlaps with 4204 // registers used in the addressing expression, the 2nd half will be loaded 4205 // from a clobbered address. Fix this by requiring that load-long use 4206 // address registers that do not overlap with the load-long target. 4207 4208 // load-long support 4209 operand load_long_RegP() %{ 4210 constraint(ALLOC_IN_RC(esi_reg)); 4211 match(RegP); 4212 match(eSIRegP); 4213 op_cost(100); 4214 format %{ %} 4215 interface(REG_INTER); 4216 %} 4217 4218 // Indirect Memory Operand Long 4219 operand load_long_indirect(load_long_RegP reg) %{ 4220 constraint(ALLOC_IN_RC(esi_reg)); 4221 match(reg); 4222 4223 format %{ "[$reg]" %} 4224 interface(MEMORY_INTER) %{ 4225 base($reg); 4226 index(0x4); 4227 scale(0x0); 4228 disp(0x0); 4229 %} 4230 %} 4231 4232 // Indirect Memory Plus Long Offset Operand 4233 operand load_long_indOffset32(load_long_RegP reg, immI off) %{ 4234 match(AddP reg off); 4235 4236 format %{ "[$reg + $off]" %} 4237 interface(MEMORY_INTER) %{ 4238 base($reg); 4239 index(0x4); 4240 scale(0x0); 4241 disp($off); 4242 %} 4243 %} 4244 4245 opclass load_long_memory(load_long_indirect, load_long_indOffset32); 4246 4247 4248 //----------Special Memory Operands-------------------------------------------- 4249 // Stack Slot Operand - This operand is used for loading and storing temporary 4250 // values on the stack where a match requires a value to 4251 // flow through memory. 4252 operand stackSlotP(sRegP reg) %{ 4253 constraint(ALLOC_IN_RC(stack_slots)); 4254 // No match rule because this operand is only generated in matching 4255 format %{ "[$reg]" %} 4256 interface(MEMORY_INTER) %{ 4257 base(0x4); // ESP 4258 index(0x4); // No Index 4259 scale(0x0); // No Scale 4260 disp($reg); // Stack Offset 4261 %} 4262 %} 4263 4264 operand stackSlotI(sRegI reg) %{ 4265 constraint(ALLOC_IN_RC(stack_slots)); 4266 // No match rule because this operand is only generated in matching 4267 format %{ "[$reg]" %} 4268 interface(MEMORY_INTER) %{ 4269 base(0x4); // ESP 4270 index(0x4); // No Index 4271 scale(0x0); // No Scale 4272 disp($reg); // Stack Offset 4273 %} 4274 %} 4275 4276 operand stackSlotF(sRegF reg) %{ 4277 constraint(ALLOC_IN_RC(stack_slots)); 4278 // No match rule because this operand is only generated in matching 4279 format %{ "[$reg]" %} 4280 interface(MEMORY_INTER) %{ 4281 base(0x4); // ESP 4282 index(0x4); // No Index 4283 scale(0x0); // No Scale 4284 disp($reg); // Stack Offset 4285 %} 4286 %} 4287 4288 operand stackSlotD(sRegD reg) %{ 4289 constraint(ALLOC_IN_RC(stack_slots)); 4290 // No match rule because this operand is only generated in matching 4291 format %{ "[$reg]" %} 4292 interface(MEMORY_INTER) %{ 4293 base(0x4); // ESP 4294 index(0x4); // No Index 4295 scale(0x0); // No Scale 4296 disp($reg); // Stack Offset 4297 %} 4298 %} 4299 4300 operand stackSlotL(sRegL reg) %{ 4301 constraint(ALLOC_IN_RC(stack_slots)); 4302 // No match rule because this operand is only generated in matching 4303 format %{ "[$reg]" %} 4304 interface(MEMORY_INTER) %{ 4305 base(0x4); // ESP 4306 index(0x4); // No Index 4307 scale(0x0); // No Scale 4308 disp($reg); // Stack Offset 4309 %} 4310 %} 4311 4312 //----------Memory Operands - Win95 Implicit Null Variants---------------- 4313 // Indirect Memory Operand 4314 operand indirect_win95_safe(eRegP_no_EBP reg) 4315 %{ 4316 constraint(ALLOC_IN_RC(int_reg)); 4317 match(reg); 4318 4319 op_cost(100); 4320 format %{ "[$reg]" %} 4321 interface(MEMORY_INTER) %{ 4322 base($reg); 4323 index(0x4); 4324 scale(0x0); 4325 disp(0x0); 4326 %} 4327 %} 4328 4329 // Indirect Memory Plus Short Offset Operand 4330 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off) 4331 %{ 4332 match(AddP reg off); 4333 4334 op_cost(100); 4335 format %{ "[$reg + $off]" %} 4336 interface(MEMORY_INTER) %{ 4337 base($reg); 4338 index(0x4); 4339 scale(0x0); 4340 disp($off); 4341 %} 4342 %} 4343 4344 // Indirect Memory Plus Long Offset Operand 4345 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off) 4346 %{ 4347 match(AddP reg off); 4348 4349 op_cost(100); 4350 format %{ "[$reg + $off]" %} 4351 interface(MEMORY_INTER) %{ 4352 base($reg); 4353 index(0x4); 4354 scale(0x0); 4355 disp($off); 4356 %} 4357 %} 4358 4359 // Indirect Memory Plus Index Register Plus Offset Operand 4360 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off) 4361 %{ 4362 match(AddP (AddP reg ireg) off); 4363 4364 op_cost(100); 4365 format %{"[$reg + $off + $ireg]" %} 4366 interface(MEMORY_INTER) %{ 4367 base($reg); 4368 index($ireg); 4369 scale(0x0); 4370 disp($off); 4371 %} 4372 %} 4373 4374 // Indirect Memory Times Scale Plus Index Register 4375 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale) 4376 %{ 4377 match(AddP reg (LShiftI ireg scale)); 4378 4379 op_cost(100); 4380 format %{"[$reg + $ireg << $scale]" %} 4381 interface(MEMORY_INTER) %{ 4382 base($reg); 4383 index($ireg); 4384 scale($scale); 4385 disp(0x0); 4386 %} 4387 %} 4388 4389 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand 4390 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale) 4391 %{ 4392 match(AddP (AddP reg (LShiftI ireg scale)) off); 4393 4394 op_cost(100); 4395 format %{"[$reg + $off + $ireg << $scale]" %} 4396 interface(MEMORY_INTER) %{ 4397 base($reg); 4398 index($ireg); 4399 scale($scale); 4400 disp($off); 4401 %} 4402 %} 4403 4404 //----------Conditional Branch Operands---------------------------------------- 4405 // Comparison Op - This is the operation of the comparison, and is limited to 4406 // the following set of codes: 4407 // L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) 4408 // 4409 // Other attributes of the comparison, such as unsignedness, are specified 4410 // by the comparison instruction that sets a condition code flags register. 4411 // That result is represented by a flags operand whose subtype is appropriate 4412 // to the unsignedness (etc.) of the comparison. 4413 // 4414 // Later, the instruction which matches both the Comparison Op (a Bool) and 4415 // the flags (produced by the Cmp) specifies the coding of the comparison op 4416 // by matching a specific subtype of Bool operand below, such as cmpOpU. 4417 4418 // Comparision Code 4419 operand cmpOp() %{ 4420 match(Bool); 4421 4422 format %{ "" %} 4423 interface(COND_INTER) %{ 4424 equal(0x4, "e"); 4425 not_equal(0x5, "ne"); 4426 less(0xC, "l"); 4427 greater_equal(0xD, "ge"); 4428 less_equal(0xE, "le"); 4429 greater(0xF, "g"); 4430 overflow(0x0, "o"); 4431 no_overflow(0x1, "no"); 4432 %} 4433 %} 4434 4435 // Comparison Code, unsigned compare. Used by FP also, with 4436 // C2 (unordered) turned into GT or LT already. The other bits 4437 // C0 and C3 are turned into Carry & Zero flags. 4438 operand cmpOpU() %{ 4439 match(Bool); 4440 4441 format %{ "" %} 4442 interface(COND_INTER) %{ 4443 equal(0x4, "e"); 4444 not_equal(0x5, "ne"); 4445 less(0x2, "b"); 4446 greater_equal(0x3, "nb"); 4447 less_equal(0x6, "be"); 4448 greater(0x7, "nbe"); 4449 overflow(0x0, "o"); 4450 no_overflow(0x1, "no"); 4451 %} 4452 %} 4453 4454 // Floating comparisons that don't require any fixup for the unordered case 4455 operand cmpOpUCF() %{ 4456 match(Bool); 4457 predicate(n->as_Bool()->_test._test == BoolTest::lt || 4458 n->as_Bool()->_test._test == BoolTest::ge || 4459 n->as_Bool()->_test._test == BoolTest::le || 4460 n->as_Bool()->_test._test == BoolTest::gt); 4461 format %{ "" %} 4462 interface(COND_INTER) %{ 4463 equal(0x4, "e"); 4464 not_equal(0x5, "ne"); 4465 less(0x2, "b"); 4466 greater_equal(0x3, "nb"); 4467 less_equal(0x6, "be"); 4468 greater(0x7, "nbe"); 4469 overflow(0x0, "o"); 4470 no_overflow(0x1, "no"); 4471 %} 4472 %} 4473 4474 4475 // Floating comparisons that can be fixed up with extra conditional jumps 4476 operand cmpOpUCF2() %{ 4477 match(Bool); 4478 predicate(n->as_Bool()->_test._test == BoolTest::ne || 4479 n->as_Bool()->_test._test == BoolTest::eq); 4480 format %{ "" %} 4481 interface(COND_INTER) %{ 4482 equal(0x4, "e"); 4483 not_equal(0x5, "ne"); 4484 less(0x2, "b"); 4485 greater_equal(0x3, "nb"); 4486 less_equal(0x6, "be"); 4487 greater(0x7, "nbe"); 4488 overflow(0x0, "o"); 4489 no_overflow(0x1, "no"); 4490 %} 4491 %} 4492 4493 // Comparison Code for FP conditional move 4494 operand cmpOp_fcmov() %{ 4495 match(Bool); 4496 4497 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 4498 n->as_Bool()->_test._test != BoolTest::no_overflow); 4499 format %{ "" %} 4500 interface(COND_INTER) %{ 4501 equal (0x0C8); 4502 not_equal (0x1C8); 4503 less (0x0C0); 4504 greater_equal(0x1C0); 4505 less_equal (0x0D0); 4506 greater (0x1D0); 4507 overflow(0x0, "o"); // not really supported by the instruction 4508 no_overflow(0x1, "no"); // not really supported by the instruction 4509 %} 4510 %} 4511 4512 // Comparision Code used in long compares 4513 operand cmpOp_commute() %{ 4514 match(Bool); 4515 4516 format %{ "" %} 4517 interface(COND_INTER) %{ 4518 equal(0x4, "e"); 4519 not_equal(0x5, "ne"); 4520 less(0xF, "g"); 4521 greater_equal(0xE, "le"); 4522 less_equal(0xD, "ge"); 4523 greater(0xC, "l"); 4524 overflow(0x0, "o"); 4525 no_overflow(0x1, "no"); 4526 %} 4527 %} 4528 4529 //----------OPERAND CLASSES---------------------------------------------------- 4530 // Operand Classes are groups of operands that are used as to simplify 4531 // instruction definitions by not requiring the AD writer to specify separate 4532 // instructions for every form of operand when the instruction accepts 4533 // multiple operand types with the same basic encoding and format. The classic 4534 // case of this is memory operands. 4535 4536 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, 4537 indIndex, indIndexScale, indIndexScaleOffset); 4538 4539 // Long memory operations are encoded in 2 instructions and a +4 offset. 4540 // This means some kind of offset is always required and you cannot use 4541 // an oop as the offset (done when working on static globals). 4542 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, 4543 indIndex, indIndexScale, indIndexScaleOffset); 4544 4545 4546 //----------PIPELINE----------------------------------------------------------- 4547 // Rules which define the behavior of the target architectures pipeline. 4548 pipeline %{ 4549 4550 //----------ATTRIBUTES--------------------------------------------------------- 4551 attributes %{ 4552 variable_size_instructions; // Fixed size instructions 4553 max_instructions_per_bundle = 3; // Up to 3 instructions per bundle 4554 instruction_unit_size = 1; // An instruction is 1 bytes long 4555 instruction_fetch_unit_size = 16; // The processor fetches one line 4556 instruction_fetch_units = 1; // of 16 bytes 4557 4558 // List of nop instructions 4559 nops( MachNop ); 4560 %} 4561 4562 //----------RESOURCES---------------------------------------------------------- 4563 // Resources are the functional units available to the machine 4564 4565 // Generic P2/P3 pipeline 4566 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of 4567 // 3 instructions decoded per cycle. 4568 // 2 load/store ops per cycle, 1 branch, 1 FPU, 4569 // 2 ALU op, only ALU0 handles mul/div instructions. 4570 resources( D0, D1, D2, DECODE = D0 | D1 | D2, 4571 MS0, MS1, MEM = MS0 | MS1, 4572 BR, FPU, 4573 ALU0, ALU1, ALU = ALU0 | ALU1 ); 4574 4575 //----------PIPELINE DESCRIPTION----------------------------------------------- 4576 // Pipeline Description specifies the stages in the machine's pipeline 4577 4578 // Generic P2/P3 pipeline 4579 pipe_desc(S0, S1, S2, S3, S4, S5); 4580 4581 //----------PIPELINE CLASSES--------------------------------------------------- 4582 // Pipeline Classes describe the stages in which input and output are 4583 // referenced by the hardware pipeline. 4584 4585 // Naming convention: ialu or fpu 4586 // Then: _reg 4587 // Then: _reg if there is a 2nd register 4588 // Then: _long if it's a pair of instructions implementing a long 4589 // Then: _fat if it requires the big decoder 4590 // Or: _mem if it requires the big decoder and a memory unit. 4591 4592 // Integer ALU reg operation 4593 pipe_class ialu_reg(rRegI dst) %{ 4594 single_instruction; 4595 dst : S4(write); 4596 dst : S3(read); 4597 DECODE : S0; // any decoder 4598 ALU : S3; // any alu 4599 %} 4600 4601 // Long ALU reg operation 4602 pipe_class ialu_reg_long(eRegL dst) %{ 4603 instruction_count(2); 4604 dst : S4(write); 4605 dst : S3(read); 4606 DECODE : S0(2); // any 2 decoders 4607 ALU : S3(2); // both alus 4608 %} 4609 4610 // Integer ALU reg operation using big decoder 4611 pipe_class ialu_reg_fat(rRegI dst) %{ 4612 single_instruction; 4613 dst : S4(write); 4614 dst : S3(read); 4615 D0 : S0; // big decoder only 4616 ALU : S3; // any alu 4617 %} 4618 4619 // Long ALU reg operation using big decoder 4620 pipe_class ialu_reg_long_fat(eRegL dst) %{ 4621 instruction_count(2); 4622 dst : S4(write); 4623 dst : S3(read); 4624 D0 : S0(2); // big decoder only; twice 4625 ALU : S3(2); // any 2 alus 4626 %} 4627 4628 // Integer ALU reg-reg operation 4629 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ 4630 single_instruction; 4631 dst : S4(write); 4632 src : S3(read); 4633 DECODE : S0; // any decoder 4634 ALU : S3; // any alu 4635 %} 4636 4637 // Long ALU reg-reg operation 4638 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ 4639 instruction_count(2); 4640 dst : S4(write); 4641 src : S3(read); 4642 DECODE : S0(2); // any 2 decoders 4643 ALU : S3(2); // both alus 4644 %} 4645 4646 // Integer ALU reg-reg operation 4647 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ 4648 single_instruction; 4649 dst : S4(write); 4650 src : S3(read); 4651 D0 : S0; // big decoder only 4652 ALU : S3; // any alu 4653 %} 4654 4655 // Long ALU reg-reg operation 4656 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ 4657 instruction_count(2); 4658 dst : S4(write); 4659 src : S3(read); 4660 D0 : S0(2); // big decoder only; twice 4661 ALU : S3(2); // both alus 4662 %} 4663 4664 // Integer ALU reg-mem operation 4665 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ 4666 single_instruction; 4667 dst : S5(write); 4668 mem : S3(read); 4669 D0 : S0; // big decoder only 4670 ALU : S4; // any alu 4671 MEM : S3; // any mem 4672 %} 4673 4674 // Long ALU reg-mem operation 4675 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ 4676 instruction_count(2); 4677 dst : S5(write); 4678 mem : S3(read); 4679 D0 : S0(2); // big decoder only; twice 4680 ALU : S4(2); // any 2 alus 4681 MEM : S3(2); // both mems 4682 %} 4683 4684 // Integer mem operation (prefetch) 4685 pipe_class ialu_mem(memory mem) 4686 %{ 4687 single_instruction; 4688 mem : S3(read); 4689 D0 : S0; // big decoder only 4690 MEM : S3; // any mem 4691 %} 4692 4693 // Integer Store to Memory 4694 pipe_class ialu_mem_reg(memory mem, rRegI src) %{ 4695 single_instruction; 4696 mem : S3(read); 4697 src : S5(read); 4698 D0 : S0; // big decoder only 4699 ALU : S4; // any alu 4700 MEM : S3; 4701 %} 4702 4703 // Long Store to Memory 4704 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ 4705 instruction_count(2); 4706 mem : S3(read); 4707 src : S5(read); 4708 D0 : S0(2); // big decoder only; twice 4709 ALU : S4(2); // any 2 alus 4710 MEM : S3(2); // Both mems 4711 %} 4712 4713 // Integer Store to Memory 4714 pipe_class ialu_mem_imm(memory mem) %{ 4715 single_instruction; 4716 mem : S3(read); 4717 D0 : S0; // big decoder only 4718 ALU : S4; // any alu 4719 MEM : S3; 4720 %} 4721 4722 // Integer ALU0 reg-reg operation 4723 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ 4724 single_instruction; 4725 dst : S4(write); 4726 src : S3(read); 4727 D0 : S0; // Big decoder only 4728 ALU0 : S3; // only alu0 4729 %} 4730 4731 // Integer ALU0 reg-mem operation 4732 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ 4733 single_instruction; 4734 dst : S5(write); 4735 mem : S3(read); 4736 D0 : S0; // big decoder only 4737 ALU0 : S4; // ALU0 only 4738 MEM : S3; // any mem 4739 %} 4740 4741 // Integer ALU reg-reg operation 4742 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ 4743 single_instruction; 4744 cr : S4(write); 4745 src1 : S3(read); 4746 src2 : S3(read); 4747 DECODE : S0; // any decoder 4748 ALU : S3; // any alu 4749 %} 4750 4751 // Integer ALU reg-imm operation 4752 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ 4753 single_instruction; 4754 cr : S4(write); 4755 src1 : S3(read); 4756 DECODE : S0; // any decoder 4757 ALU : S3; // any alu 4758 %} 4759 4760 // Integer ALU reg-mem operation 4761 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ 4762 single_instruction; 4763 cr : S4(write); 4764 src1 : S3(read); 4765 src2 : S3(read); 4766 D0 : S0; // big decoder only 4767 ALU : S4; // any alu 4768 MEM : S3; 4769 %} 4770 4771 // Conditional move reg-reg 4772 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ 4773 instruction_count(4); 4774 y : S4(read); 4775 q : S3(read); 4776 p : S3(read); 4777 DECODE : S0(4); // any decoder 4778 %} 4779 4780 // Conditional move reg-reg 4781 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ 4782 single_instruction; 4783 dst : S4(write); 4784 src : S3(read); 4785 cr : S3(read); 4786 DECODE : S0; // any decoder 4787 %} 4788 4789 // Conditional move reg-mem 4790 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ 4791 single_instruction; 4792 dst : S4(write); 4793 src : S3(read); 4794 cr : S3(read); 4795 DECODE : S0; // any decoder 4796 MEM : S3; 4797 %} 4798 4799 // Conditional move reg-reg long 4800 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ 4801 single_instruction; 4802 dst : S4(write); 4803 src : S3(read); 4804 cr : S3(read); 4805 DECODE : S0(2); // any 2 decoders 4806 %} 4807 4808 // Conditional move double reg-reg 4809 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ 4810 single_instruction; 4811 dst : S4(write); 4812 src : S3(read); 4813 cr : S3(read); 4814 DECODE : S0; // any decoder 4815 %} 4816 4817 // Float reg-reg operation 4818 pipe_class fpu_reg(regDPR dst) %{ 4819 instruction_count(2); 4820 dst : S3(read); 4821 DECODE : S0(2); // any 2 decoders 4822 FPU : S3; 4823 %} 4824 4825 // Float reg-reg operation 4826 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ 4827 instruction_count(2); 4828 dst : S4(write); 4829 src : S3(read); 4830 DECODE : S0(2); // any 2 decoders 4831 FPU : S3; 4832 %} 4833 4834 // Float reg-reg operation 4835 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ 4836 instruction_count(3); 4837 dst : S4(write); 4838 src1 : S3(read); 4839 src2 : S3(read); 4840 DECODE : S0(3); // any 3 decoders 4841 FPU : S3(2); 4842 %} 4843 4844 // Float reg-reg operation 4845 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ 4846 instruction_count(4); 4847 dst : S4(write); 4848 src1 : S3(read); 4849 src2 : S3(read); 4850 src3 : S3(read); 4851 DECODE : S0(4); // any 3 decoders 4852 FPU : S3(2); 4853 %} 4854 4855 // Float reg-reg operation 4856 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ 4857 instruction_count(4); 4858 dst : S4(write); 4859 src1 : S3(read); 4860 src2 : S3(read); 4861 src3 : S3(read); 4862 DECODE : S1(3); // any 3 decoders 4863 D0 : S0; // Big decoder only 4864 FPU : S3(2); 4865 MEM : S3; 4866 %} 4867 4868 // Float reg-mem operation 4869 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ 4870 instruction_count(2); 4871 dst : S5(write); 4872 mem : S3(read); 4873 D0 : S0; // big decoder only 4874 DECODE : S1; // any decoder for FPU POP 4875 FPU : S4; 4876 MEM : S3; // any mem 4877 %} 4878 4879 // Float reg-mem operation 4880 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ 4881 instruction_count(3); 4882 dst : S5(write); 4883 src1 : S3(read); 4884 mem : S3(read); 4885 D0 : S0; // big decoder only 4886 DECODE : S1(2); // any decoder for FPU POP 4887 FPU : S4; 4888 MEM : S3; // any mem 4889 %} 4890 4891 // Float mem-reg operation 4892 pipe_class fpu_mem_reg(memory mem, regDPR src) %{ 4893 instruction_count(2); 4894 src : S5(read); 4895 mem : S3(read); 4896 DECODE : S0; // any decoder for FPU PUSH 4897 D0 : S1; // big decoder only 4898 FPU : S4; 4899 MEM : S3; // any mem 4900 %} 4901 4902 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ 4903 instruction_count(3); 4904 src1 : S3(read); 4905 src2 : S3(read); 4906 mem : S3(read); 4907 DECODE : S0(2); // any decoder for FPU PUSH 4908 D0 : S1; // big decoder only 4909 FPU : S4; 4910 MEM : S3; // any mem 4911 %} 4912 4913 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ 4914 instruction_count(3); 4915 src1 : S3(read); 4916 src2 : S3(read); 4917 mem : S4(read); 4918 DECODE : S0; // any decoder for FPU PUSH 4919 D0 : S0(2); // big decoder only 4920 FPU : S4; 4921 MEM : S3(2); // any mem 4922 %} 4923 4924 pipe_class fpu_mem_mem(memory dst, memory src1) %{ 4925 instruction_count(2); 4926 src1 : S3(read); 4927 dst : S4(read); 4928 D0 : S0(2); // big decoder only 4929 MEM : S3(2); // any mem 4930 %} 4931 4932 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ 4933 instruction_count(3); 4934 src1 : S3(read); 4935 src2 : S3(read); 4936 dst : S4(read); 4937 D0 : S0(3); // big decoder only 4938 FPU : S4; 4939 MEM : S3(3); // any mem 4940 %} 4941 4942 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ 4943 instruction_count(3); 4944 src1 : S4(read); 4945 mem : S4(read); 4946 DECODE : S0; // any decoder for FPU PUSH 4947 D0 : S0(2); // big decoder only 4948 FPU : S4; 4949 MEM : S3(2); // any mem 4950 %} 4951 4952 // Float load constant 4953 pipe_class fpu_reg_con(regDPR dst) %{ 4954 instruction_count(2); 4955 dst : S5(write); 4956 D0 : S0; // big decoder only for the load 4957 DECODE : S1; // any decoder for FPU POP 4958 FPU : S4; 4959 MEM : S3; // any mem 4960 %} 4961 4962 // Float load constant 4963 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ 4964 instruction_count(3); 4965 dst : S5(write); 4966 src : S3(read); 4967 D0 : S0; // big decoder only for the load 4968 DECODE : S1(2); // any decoder for FPU POP 4969 FPU : S4; 4970 MEM : S3; // any mem 4971 %} 4972 4973 // UnConditional branch 4974 pipe_class pipe_jmp( label labl ) %{ 4975 single_instruction; 4976 BR : S3; 4977 %} 4978 4979 // Conditional branch 4980 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ 4981 single_instruction; 4982 cr : S1(read); 4983 BR : S3; 4984 %} 4985 4986 // Allocation idiom 4987 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ 4988 instruction_count(1); force_serialization; 4989 fixed_latency(6); 4990 heap_ptr : S3(read); 4991 DECODE : S0(3); 4992 D0 : S2; 4993 MEM : S3; 4994 ALU : S3(2); 4995 dst : S5(write); 4996 BR : S5; 4997 %} 4998 4999 // Generic big/slow expanded idiom 5000 pipe_class pipe_slow( ) %{ 5001 instruction_count(10); multiple_bundles; force_serialization; 5002 fixed_latency(100); 5003 D0 : S0(2); 5004 MEM : S3(2); 5005 %} 5006 5007 // The real do-nothing guy 5008 pipe_class empty( ) %{ 5009 instruction_count(0); 5010 %} 5011 5012 // Define the class for the Nop node 5013 define %{ 5014 MachNop = empty; 5015 %} 5016 5017 %} 5018 5019 //----------INSTRUCTIONS------------------------------------------------------- 5020 // 5021 // match -- States which machine-independent subtree may be replaced 5022 // by this instruction. 5023 // ins_cost -- The estimated cost of this instruction is used by instruction 5024 // selection to identify a minimum cost tree of machine 5025 // instructions that matches a tree of machine-independent 5026 // instructions. 5027 // format -- A string providing the disassembly for this instruction. 5028 // The value of an instruction's operand may be inserted 5029 // by referring to it with a '$' prefix. 5030 // opcode -- Three instruction opcodes may be provided. These are referred 5031 // to within an encode class as $primary, $secondary, and $tertiary 5032 // respectively. The primary opcode is commonly used to 5033 // indicate the type of machine instruction, while secondary 5034 // and tertiary are often used for prefix options or addressing 5035 // modes. 5036 // ins_encode -- A list of encode classes with parameters. The encode class 5037 // name must have been defined in an 'enc_class' specification 5038 // in the encode section of the architecture description. 5039 5040 //----------BSWAP-Instruction-------------------------------------------------- 5041 instruct bytes_reverse_int(rRegI dst) %{ 5042 match(Set dst (ReverseBytesI dst)); 5043 5044 format %{ "BSWAP $dst" %} 5045 opcode(0x0F, 0xC8); 5046 ins_encode( OpcP, OpcSReg(dst) ); 5047 ins_pipe( ialu_reg ); 5048 %} 5049 5050 instruct bytes_reverse_long(eRegL dst) %{ 5051 match(Set dst (ReverseBytesL dst)); 5052 5053 format %{ "BSWAP $dst.lo\n\t" 5054 "BSWAP $dst.hi\n\t" 5055 "XCHG $dst.lo $dst.hi" %} 5056 5057 ins_cost(125); 5058 ins_encode( bswap_long_bytes(dst) ); 5059 ins_pipe( ialu_reg_reg); 5060 %} 5061 5062 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ 5063 match(Set dst (ReverseBytesUS dst)); 5064 effect(KILL cr); 5065 5066 format %{ "BSWAP $dst\n\t" 5067 "SHR $dst,16\n\t" %} 5068 ins_encode %{ 5069 __ bswapl($dst$$Register); 5070 __ shrl($dst$$Register, 16); 5071 %} 5072 ins_pipe( ialu_reg ); 5073 %} 5074 5075 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ 5076 match(Set dst (ReverseBytesS dst)); 5077 effect(KILL cr); 5078 5079 format %{ "BSWAP $dst\n\t" 5080 "SAR $dst,16\n\t" %} 5081 ins_encode %{ 5082 __ bswapl($dst$$Register); 5083 __ sarl($dst$$Register, 16); 5084 %} 5085 ins_pipe( ialu_reg ); 5086 %} 5087 5088 5089 //---------- Zeros Count Instructions ------------------------------------------ 5090 5091 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5092 predicate(UseCountLeadingZerosInstruction); 5093 match(Set dst (CountLeadingZerosI src)); 5094 effect(KILL cr); 5095 5096 format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} 5097 ins_encode %{ 5098 __ lzcntl($dst$$Register, $src$$Register); 5099 %} 5100 ins_pipe(ialu_reg); 5101 %} 5102 5103 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ 5104 predicate(!UseCountLeadingZerosInstruction); 5105 match(Set dst (CountLeadingZerosI src)); 5106 effect(KILL cr); 5107 5108 format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" 5109 "JNZ skip\n\t" 5110 "MOV $dst, -1\n" 5111 "skip:\n\t" 5112 "NEG $dst\n\t" 5113 "ADD $dst, 31" %} 5114 ins_encode %{ 5115 Register Rdst = $dst$$Register; 5116 Register Rsrc = $src$$Register; 5117 Label skip; 5118 __ bsrl(Rdst, Rsrc); 5119 __ jccb(Assembler::notZero, skip); 5120 __ movl(Rdst, -1); 5121 __ bind(skip); 5122 __ negl(Rdst); 5123 __ addl(Rdst, BitsPerInt - 1); 5124 %} 5125 ins_pipe(ialu_reg); 5126 %} 5127 5128 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5129 predicate(UseCountLeadingZerosInstruction); 5130 match(Set dst (CountLeadingZerosL src)); 5131 effect(TEMP dst, KILL cr); 5132 5133 format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" 5134 "JNC done\n\t" 5135 "LZCNT $dst, $src.lo\n\t" 5136 "ADD $dst, 32\n" 5137 "done:" %} 5138 ins_encode %{ 5139 Register Rdst = $dst$$Register; 5140 Register Rsrc = $src$$Register; 5141 Label done; 5142 __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5143 __ jccb(Assembler::carryClear, done); 5144 __ lzcntl(Rdst, Rsrc); 5145 __ addl(Rdst, BitsPerInt); 5146 __ bind(done); 5147 %} 5148 ins_pipe(ialu_reg); 5149 %} 5150 5151 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ 5152 predicate(!UseCountLeadingZerosInstruction); 5153 match(Set dst (CountLeadingZerosL src)); 5154 effect(TEMP dst, KILL cr); 5155 5156 format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" 5157 "JZ msw_is_zero\n\t" 5158 "ADD $dst, 32\n\t" 5159 "JMP not_zero\n" 5160 "msw_is_zero:\n\t" 5161 "BSR $dst, $src.lo\n\t" 5162 "JNZ not_zero\n\t" 5163 "MOV $dst, -1\n" 5164 "not_zero:\n\t" 5165 "NEG $dst\n\t" 5166 "ADD $dst, 63\n" %} 5167 ins_encode %{ 5168 Register Rdst = $dst$$Register; 5169 Register Rsrc = $src$$Register; 5170 Label msw_is_zero; 5171 Label not_zero; 5172 __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); 5173 __ jccb(Assembler::zero, msw_is_zero); 5174 __ addl(Rdst, BitsPerInt); 5175 __ jmpb(not_zero); 5176 __ bind(msw_is_zero); 5177 __ bsrl(Rdst, Rsrc); 5178 __ jccb(Assembler::notZero, not_zero); 5179 __ movl(Rdst, -1); 5180 __ bind(not_zero); 5181 __ negl(Rdst); 5182 __ addl(Rdst, BitsPerLong - 1); 5183 %} 5184 ins_pipe(ialu_reg); 5185 %} 5186 5187 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5188 predicate(UseCountTrailingZerosInstruction); 5189 match(Set dst (CountTrailingZerosI src)); 5190 effect(KILL cr); 5191 5192 format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} 5193 ins_encode %{ 5194 __ tzcntl($dst$$Register, $src$$Register); 5195 %} 5196 ins_pipe(ialu_reg); 5197 %} 5198 5199 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ 5200 predicate(!UseCountTrailingZerosInstruction); 5201 match(Set dst (CountTrailingZerosI src)); 5202 effect(KILL cr); 5203 5204 format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" 5205 "JNZ done\n\t" 5206 "MOV $dst, 32\n" 5207 "done:" %} 5208 ins_encode %{ 5209 Register Rdst = $dst$$Register; 5210 Label done; 5211 __ bsfl(Rdst, $src$$Register); 5212 __ jccb(Assembler::notZero, done); 5213 __ movl(Rdst, BitsPerInt); 5214 __ bind(done); 5215 %} 5216 ins_pipe(ialu_reg); 5217 %} 5218 5219 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ 5220 predicate(UseCountTrailingZerosInstruction); 5221 match(Set dst (CountTrailingZerosL src)); 5222 effect(TEMP dst, KILL cr); 5223 5224 format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" 5225 "JNC done\n\t" 5226 "TZCNT $dst, $src.hi\n\t" 5227 "ADD $dst, 32\n" 5228 "done:" %} 5229 ins_encode %{ 5230 Register Rdst = $dst$$Register; 5231 Register Rsrc = $src$$Register; 5232 Label done; 5233 __ tzcntl(Rdst, Rsrc); 5234 __ jccb(Assembler::carryClear, done); 5235 __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); 5236 __ addl(Rdst, BitsPerInt); 5237 __ bind(done); 5238 %} 5239 ins_pipe(ialu_reg); 5240 %} 5241 5242 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ 5243 predicate(!UseCountTrailingZerosInstruction); 5244 match(Set dst (CountTrailingZerosL src)); 5245 effect(TEMP dst, KILL cr); 5246 5247 format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" 5248 "JNZ done\n\t" 5249 "BSF $dst, $src.hi\n\t" 5250 "JNZ msw_not_zero\n\t" 5251 "MOV $dst, 32\n" 5252 "msw_not_zero:\n\t" 5253 "ADD $dst, 32\n" 5254 "done:" %} 5255 ins_encode %{ 5256 Register Rdst = $dst$$Register; 5257 Register Rsrc = $src$$Register; 5258 Label msw_not_zero; 5259 Label done; 5260 __ bsfl(Rdst, Rsrc); 5261 __ jccb(Assembler::notZero, done); 5262 __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); 5263 __ jccb(Assembler::notZero, msw_not_zero); 5264 __ movl(Rdst, BitsPerInt); 5265 __ bind(msw_not_zero); 5266 __ addl(Rdst, BitsPerInt); 5267 __ bind(done); 5268 %} 5269 ins_pipe(ialu_reg); 5270 %} 5271 5272 5273 //---------- Population Count Instructions ------------------------------------- 5274 5275 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ 5276 predicate(UsePopCountInstruction); 5277 match(Set dst (PopCountI src)); 5278 effect(KILL cr); 5279 5280 format %{ "POPCNT $dst, $src" %} 5281 ins_encode %{ 5282 __ popcntl($dst$$Register, $src$$Register); 5283 %} 5284 ins_pipe(ialu_reg); 5285 %} 5286 5287 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ 5288 predicate(UsePopCountInstruction); 5289 match(Set dst (PopCountI (LoadI mem))); 5290 effect(KILL cr); 5291 5292 format %{ "POPCNT $dst, $mem" %} 5293 ins_encode %{ 5294 __ popcntl($dst$$Register, $mem$$Address); 5295 %} 5296 ins_pipe(ialu_reg); 5297 %} 5298 5299 // Note: Long.bitCount(long) returns an int. 5300 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 5301 predicate(UsePopCountInstruction); 5302 match(Set dst (PopCountL src)); 5303 effect(KILL cr, TEMP tmp, TEMP dst); 5304 5305 format %{ "POPCNT $dst, $src.lo\n\t" 5306 "POPCNT $tmp, $src.hi\n\t" 5307 "ADD $dst, $tmp" %} 5308 ins_encode %{ 5309 __ popcntl($dst$$Register, $src$$Register); 5310 __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 5311 __ addl($dst$$Register, $tmp$$Register); 5312 %} 5313 ins_pipe(ialu_reg); 5314 %} 5315 5316 // Note: Long.bitCount(long) returns an int. 5317 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ 5318 predicate(UsePopCountInstruction); 5319 match(Set dst (PopCountL (LoadL mem))); 5320 effect(KILL cr, TEMP tmp, TEMP dst); 5321 5322 format %{ "POPCNT $dst, $mem\n\t" 5323 "POPCNT $tmp, $mem+4\n\t" 5324 "ADD $dst, $tmp" %} 5325 ins_encode %{ 5326 //__ popcntl($dst$$Register, $mem$$Address$$first); 5327 //__ popcntl($tmp$$Register, $mem$$Address$$second); 5328 __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); 5329 __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); 5330 __ addl($dst$$Register, $tmp$$Register); 5331 %} 5332 ins_pipe(ialu_reg); 5333 %} 5334 5335 5336 //----------Load/Store/Move Instructions--------------------------------------- 5337 //----------Load Instructions-------------------------------------------------- 5338 // Load Byte (8bit signed) 5339 instruct loadB(xRegI dst, memory mem) %{ 5340 match(Set dst (LoadB mem)); 5341 5342 ins_cost(125); 5343 format %{ "MOVSX8 $dst,$mem\t# byte" %} 5344 5345 ins_encode %{ 5346 __ movsbl($dst$$Register, $mem$$Address); 5347 %} 5348 5349 ins_pipe(ialu_reg_mem); 5350 %} 5351 5352 // Load Byte (8bit signed) into Long Register 5353 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5354 match(Set dst (ConvI2L (LoadB mem))); 5355 effect(KILL cr); 5356 5357 ins_cost(375); 5358 format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" 5359 "MOV $dst.hi,$dst.lo\n\t" 5360 "SAR $dst.hi,7" %} 5361 5362 ins_encode %{ 5363 __ movsbl($dst$$Register, $mem$$Address); 5364 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5365 __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. 5366 %} 5367 5368 ins_pipe(ialu_reg_mem); 5369 %} 5370 5371 // Load Unsigned Byte (8bit UNsigned) 5372 instruct loadUB(xRegI dst, memory mem) %{ 5373 match(Set dst (LoadUB mem)); 5374 5375 ins_cost(125); 5376 format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} 5377 5378 ins_encode %{ 5379 __ movzbl($dst$$Register, $mem$$Address); 5380 %} 5381 5382 ins_pipe(ialu_reg_mem); 5383 %} 5384 5385 // Load Unsigned Byte (8 bit UNsigned) into Long Register 5386 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5387 match(Set dst (ConvI2L (LoadUB mem))); 5388 effect(KILL cr); 5389 5390 ins_cost(250); 5391 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" 5392 "XOR $dst.hi,$dst.hi" %} 5393 5394 ins_encode %{ 5395 Register Rdst = $dst$$Register; 5396 __ movzbl(Rdst, $mem$$Address); 5397 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5398 %} 5399 5400 ins_pipe(ialu_reg_mem); 5401 %} 5402 5403 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register 5404 instruct loadUB2L_immI8(eRegL dst, memory mem, immI8 mask, eFlagsReg cr) %{ 5405 match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); 5406 effect(KILL cr); 5407 5408 format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 8-bit mask -> long\n\t" 5409 "XOR $dst.hi,$dst.hi\n\t" 5410 "AND $dst.lo,$mask" %} 5411 ins_encode %{ 5412 Register Rdst = $dst$$Register; 5413 __ movzbl(Rdst, $mem$$Address); 5414 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5415 __ andl(Rdst, $mask$$constant); 5416 %} 5417 ins_pipe(ialu_reg_mem); 5418 %} 5419 5420 // Load Short (16bit signed) 5421 instruct loadS(rRegI dst, memory mem) %{ 5422 match(Set dst (LoadS mem)); 5423 5424 ins_cost(125); 5425 format %{ "MOVSX $dst,$mem\t# short" %} 5426 5427 ins_encode %{ 5428 __ movswl($dst$$Register, $mem$$Address); 5429 %} 5430 5431 ins_pipe(ialu_reg_mem); 5432 %} 5433 5434 // Load Short (16 bit signed) to Byte (8 bit signed) 5435 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5436 match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); 5437 5438 ins_cost(125); 5439 format %{ "MOVSX $dst, $mem\t# short -> byte" %} 5440 ins_encode %{ 5441 __ movsbl($dst$$Register, $mem$$Address); 5442 %} 5443 ins_pipe(ialu_reg_mem); 5444 %} 5445 5446 // Load Short (16bit signed) into Long Register 5447 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5448 match(Set dst (ConvI2L (LoadS mem))); 5449 effect(KILL cr); 5450 5451 ins_cost(375); 5452 format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" 5453 "MOV $dst.hi,$dst.lo\n\t" 5454 "SAR $dst.hi,15" %} 5455 5456 ins_encode %{ 5457 __ movswl($dst$$Register, $mem$$Address); 5458 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5459 __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. 5460 %} 5461 5462 ins_pipe(ialu_reg_mem); 5463 %} 5464 5465 // Load Unsigned Short/Char (16bit unsigned) 5466 instruct loadUS(rRegI dst, memory mem) %{ 5467 match(Set dst (LoadUS mem)); 5468 5469 ins_cost(125); 5470 format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} 5471 5472 ins_encode %{ 5473 __ movzwl($dst$$Register, $mem$$Address); 5474 %} 5475 5476 ins_pipe(ialu_reg_mem); 5477 %} 5478 5479 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) 5480 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5481 match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); 5482 5483 ins_cost(125); 5484 format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} 5485 ins_encode %{ 5486 __ movsbl($dst$$Register, $mem$$Address); 5487 %} 5488 ins_pipe(ialu_reg_mem); 5489 %} 5490 5491 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register 5492 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5493 match(Set dst (ConvI2L (LoadUS mem))); 5494 effect(KILL cr); 5495 5496 ins_cost(250); 5497 format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" 5498 "XOR $dst.hi,$dst.hi" %} 5499 5500 ins_encode %{ 5501 __ movzwl($dst$$Register, $mem$$Address); 5502 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5503 %} 5504 5505 ins_pipe(ialu_reg_mem); 5506 %} 5507 5508 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register 5509 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5510 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5511 effect(KILL cr); 5512 5513 format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" 5514 "XOR $dst.hi,$dst.hi" %} 5515 ins_encode %{ 5516 Register Rdst = $dst$$Register; 5517 __ movzbl(Rdst, $mem$$Address); 5518 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5519 %} 5520 ins_pipe(ialu_reg_mem); 5521 %} 5522 5523 // Load Unsigned Short/Char (16 bit UNsigned) with a 16-bit mask into Long Register 5524 instruct loadUS2L_immI16(eRegL dst, memory mem, immI16 mask, eFlagsReg cr) %{ 5525 match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); 5526 effect(KILL cr); 5527 5528 format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 16-bit mask -> long\n\t" 5529 "XOR $dst.hi,$dst.hi\n\t" 5530 "AND $dst.lo,$mask" %} 5531 ins_encode %{ 5532 Register Rdst = $dst$$Register; 5533 __ movzwl(Rdst, $mem$$Address); 5534 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5535 __ andl(Rdst, $mask$$constant); 5536 %} 5537 ins_pipe(ialu_reg_mem); 5538 %} 5539 5540 // Load Integer 5541 instruct loadI(rRegI dst, memory mem) %{ 5542 match(Set dst (LoadI mem)); 5543 5544 ins_cost(125); 5545 format %{ "MOV $dst,$mem\t# int" %} 5546 5547 ins_encode %{ 5548 __ movl($dst$$Register, $mem$$Address); 5549 %} 5550 5551 ins_pipe(ialu_reg_mem); 5552 %} 5553 5554 // Load Integer (32 bit signed) to Byte (8 bit signed) 5555 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ 5556 match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); 5557 5558 ins_cost(125); 5559 format %{ "MOVSX $dst, $mem\t# int -> byte" %} 5560 ins_encode %{ 5561 __ movsbl($dst$$Register, $mem$$Address); 5562 %} 5563 ins_pipe(ialu_reg_mem); 5564 %} 5565 5566 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) 5567 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ 5568 match(Set dst (AndI (LoadI mem) mask)); 5569 5570 ins_cost(125); 5571 format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} 5572 ins_encode %{ 5573 __ movzbl($dst$$Register, $mem$$Address); 5574 %} 5575 ins_pipe(ialu_reg_mem); 5576 %} 5577 5578 // Load Integer (32 bit signed) to Short (16 bit signed) 5579 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ 5580 match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); 5581 5582 ins_cost(125); 5583 format %{ "MOVSX $dst, $mem\t# int -> short" %} 5584 ins_encode %{ 5585 __ movswl($dst$$Register, $mem$$Address); 5586 %} 5587 ins_pipe(ialu_reg_mem); 5588 %} 5589 5590 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) 5591 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ 5592 match(Set dst (AndI (LoadI mem) mask)); 5593 5594 ins_cost(125); 5595 format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} 5596 ins_encode %{ 5597 __ movzwl($dst$$Register, $mem$$Address); 5598 %} 5599 ins_pipe(ialu_reg_mem); 5600 %} 5601 5602 // Load Integer into Long Register 5603 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ 5604 match(Set dst (ConvI2L (LoadI mem))); 5605 effect(KILL cr); 5606 5607 ins_cost(375); 5608 format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" 5609 "MOV $dst.hi,$dst.lo\n\t" 5610 "SAR $dst.hi,31" %} 5611 5612 ins_encode %{ 5613 __ movl($dst$$Register, $mem$$Address); 5614 __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. 5615 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); 5616 %} 5617 5618 ins_pipe(ialu_reg_mem); 5619 %} 5620 5621 // Load Integer with mask 0xFF into Long Register 5622 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ 5623 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5624 effect(KILL cr); 5625 5626 format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" 5627 "XOR $dst.hi,$dst.hi" %} 5628 ins_encode %{ 5629 Register Rdst = $dst$$Register; 5630 __ movzbl(Rdst, $mem$$Address); 5631 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5632 %} 5633 ins_pipe(ialu_reg_mem); 5634 %} 5635 5636 // Load Integer with mask 0xFFFF into Long Register 5637 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ 5638 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5639 effect(KILL cr); 5640 5641 format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" 5642 "XOR $dst.hi,$dst.hi" %} 5643 ins_encode %{ 5644 Register Rdst = $dst$$Register; 5645 __ movzwl(Rdst, $mem$$Address); 5646 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5647 %} 5648 ins_pipe(ialu_reg_mem); 5649 %} 5650 5651 // Load Integer with 31-bit mask into Long Register 5652 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ 5653 match(Set dst (ConvI2L (AndI (LoadI mem) mask))); 5654 effect(KILL cr); 5655 5656 format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" 5657 "XOR $dst.hi,$dst.hi\n\t" 5658 "AND $dst.lo,$mask" %} 5659 ins_encode %{ 5660 Register Rdst = $dst$$Register; 5661 __ movl(Rdst, $mem$$Address); 5662 __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); 5663 __ andl(Rdst, $mask$$constant); 5664 %} 5665 ins_pipe(ialu_reg_mem); 5666 %} 5667 5668 // Load Unsigned Integer into Long Register 5669 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ 5670 match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); 5671 effect(KILL cr); 5672 5673 ins_cost(250); 5674 format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" 5675 "XOR $dst.hi,$dst.hi" %} 5676 5677 ins_encode %{ 5678 __ movl($dst$$Register, $mem$$Address); 5679 __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); 5680 %} 5681 5682 ins_pipe(ialu_reg_mem); 5683 %} 5684 5685 // Load Long. Cannot clobber address while loading, so restrict address 5686 // register to ESI 5687 instruct loadL(eRegL dst, load_long_memory mem) %{ 5688 predicate(!((LoadLNode*)n)->require_atomic_access()); 5689 match(Set dst (LoadL mem)); 5690 5691 ins_cost(250); 5692 format %{ "MOV $dst.lo,$mem\t# long\n\t" 5693 "MOV $dst.hi,$mem+4" %} 5694 5695 ins_encode %{ 5696 Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); 5697 Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); 5698 __ movl($dst$$Register, Amemlo); 5699 __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); 5700 %} 5701 5702 ins_pipe(ialu_reg_long_mem); 5703 %} 5704 5705 // Volatile Load Long. Must be atomic, so do 64-bit FILD 5706 // then store it down to the stack and reload on the int 5707 // side. 5708 instruct loadL_volatile(stackSlotL dst, memory mem) %{ 5709 predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); 5710 match(Set dst (LoadL mem)); 5711 5712 ins_cost(200); 5713 format %{ "FILD $mem\t# Atomic volatile long load\n\t" 5714 "FISTp $dst" %} 5715 ins_encode(enc_loadL_volatile(mem,dst)); 5716 ins_pipe( fpu_reg_mem ); 5717 %} 5718 5719 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ 5720 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5721 match(Set dst (LoadL mem)); 5722 effect(TEMP tmp); 5723 ins_cost(180); 5724 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5725 "MOVSD $dst,$tmp" %} 5726 ins_encode %{ 5727 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5728 __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); 5729 %} 5730 ins_pipe( pipe_slow ); 5731 %} 5732 5733 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ 5734 predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); 5735 match(Set dst (LoadL mem)); 5736 effect(TEMP tmp); 5737 ins_cost(160); 5738 format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" 5739 "MOVD $dst.lo,$tmp\n\t" 5740 "PSRLQ $tmp,32\n\t" 5741 "MOVD $dst.hi,$tmp" %} 5742 ins_encode %{ 5743 __ movdbl($tmp$$XMMRegister, $mem$$Address); 5744 __ movdl($dst$$Register, $tmp$$XMMRegister); 5745 __ psrlq($tmp$$XMMRegister, 32); 5746 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 5747 %} 5748 ins_pipe( pipe_slow ); 5749 %} 5750 5751 // Load Range 5752 instruct loadRange(rRegI dst, memory mem) %{ 5753 match(Set dst (LoadRange mem)); 5754 5755 ins_cost(125); 5756 format %{ "MOV $dst,$mem" %} 5757 opcode(0x8B); 5758 ins_encode( OpcP, RegMem(dst,mem)); 5759 ins_pipe( ialu_reg_mem ); 5760 %} 5761 5762 5763 // Load Pointer 5764 instruct loadP(eRegP dst, memory mem) %{ 5765 match(Set dst (LoadP mem)); 5766 5767 ins_cost(125); 5768 format %{ "MOV $dst,$mem" %} 5769 opcode(0x8B); 5770 ins_encode( OpcP, RegMem(dst,mem)); 5771 ins_pipe( ialu_reg_mem ); 5772 %} 5773 5774 // Load Klass Pointer 5775 instruct loadKlass(eRegP dst, memory mem) %{ 5776 match(Set dst (LoadKlass mem)); 5777 5778 ins_cost(125); 5779 format %{ "MOV $dst,$mem" %} 5780 opcode(0x8B); 5781 ins_encode( OpcP, RegMem(dst,mem)); 5782 ins_pipe( ialu_reg_mem ); 5783 %} 5784 5785 // Load Double 5786 instruct loadDPR(regDPR dst, memory mem) %{ 5787 predicate(UseSSE<=1); 5788 match(Set dst (LoadD mem)); 5789 5790 ins_cost(150); 5791 format %{ "FLD_D ST,$mem\n\t" 5792 "FSTP $dst" %} 5793 opcode(0xDD); /* DD /0 */ 5794 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5795 Pop_Reg_DPR(dst) ); 5796 ins_pipe( fpu_reg_mem ); 5797 %} 5798 5799 // Load Double to XMM 5800 instruct loadD(regD dst, memory mem) %{ 5801 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 5802 match(Set dst (LoadD mem)); 5803 ins_cost(145); 5804 format %{ "MOVSD $dst,$mem" %} 5805 ins_encode %{ 5806 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5807 %} 5808 ins_pipe( pipe_slow ); 5809 %} 5810 5811 instruct loadD_partial(regD dst, memory mem) %{ 5812 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 5813 match(Set dst (LoadD mem)); 5814 ins_cost(145); 5815 format %{ "MOVLPD $dst,$mem" %} 5816 ins_encode %{ 5817 __ movdbl ($dst$$XMMRegister, $mem$$Address); 5818 %} 5819 ins_pipe( pipe_slow ); 5820 %} 5821 5822 // Load to XMM register (single-precision floating point) 5823 // MOVSS instruction 5824 instruct loadF(regF dst, memory mem) %{ 5825 predicate(UseSSE>=1); 5826 match(Set dst (LoadF mem)); 5827 ins_cost(145); 5828 format %{ "MOVSS $dst,$mem" %} 5829 ins_encode %{ 5830 __ movflt ($dst$$XMMRegister, $mem$$Address); 5831 %} 5832 ins_pipe( pipe_slow ); 5833 %} 5834 5835 // Load Float 5836 instruct loadFPR(regFPR dst, memory mem) %{ 5837 predicate(UseSSE==0); 5838 match(Set dst (LoadF mem)); 5839 5840 ins_cost(150); 5841 format %{ "FLD_S ST,$mem\n\t" 5842 "FSTP $dst" %} 5843 opcode(0xD9); /* D9 /0 */ 5844 ins_encode( OpcP, RMopc_Mem(0x00,mem), 5845 Pop_Reg_FPR(dst) ); 5846 ins_pipe( fpu_reg_mem ); 5847 %} 5848 5849 // Load Effective Address 5850 instruct leaP8(eRegP dst, indOffset8 mem) %{ 5851 match(Set dst mem); 5852 5853 ins_cost(110); 5854 format %{ "LEA $dst,$mem" %} 5855 opcode(0x8D); 5856 ins_encode( OpcP, RegMem(dst,mem)); 5857 ins_pipe( ialu_reg_reg_fat ); 5858 %} 5859 5860 instruct leaP32(eRegP dst, indOffset32 mem) %{ 5861 match(Set dst mem); 5862 5863 ins_cost(110); 5864 format %{ "LEA $dst,$mem" %} 5865 opcode(0x8D); 5866 ins_encode( OpcP, RegMem(dst,mem)); 5867 ins_pipe( ialu_reg_reg_fat ); 5868 %} 5869 5870 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ 5871 match(Set dst mem); 5872 5873 ins_cost(110); 5874 format %{ "LEA $dst,$mem" %} 5875 opcode(0x8D); 5876 ins_encode( OpcP, RegMem(dst,mem)); 5877 ins_pipe( ialu_reg_reg_fat ); 5878 %} 5879 5880 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ 5881 match(Set dst mem); 5882 5883 ins_cost(110); 5884 format %{ "LEA $dst,$mem" %} 5885 opcode(0x8D); 5886 ins_encode( OpcP, RegMem(dst,mem)); 5887 ins_pipe( ialu_reg_reg_fat ); 5888 %} 5889 5890 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ 5891 match(Set dst mem); 5892 5893 ins_cost(110); 5894 format %{ "LEA $dst,$mem" %} 5895 opcode(0x8D); 5896 ins_encode( OpcP, RegMem(dst,mem)); 5897 ins_pipe( ialu_reg_reg_fat ); 5898 %} 5899 5900 // Load Constant 5901 instruct loadConI(rRegI dst, immI src) %{ 5902 match(Set dst src); 5903 5904 format %{ "MOV $dst,$src" %} 5905 ins_encode( LdImmI(dst, src) ); 5906 ins_pipe( ialu_reg_fat ); 5907 %} 5908 5909 // Load Constant zero 5910 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{ 5911 match(Set dst src); 5912 effect(KILL cr); 5913 5914 ins_cost(50); 5915 format %{ "XOR $dst,$dst" %} 5916 opcode(0x33); /* + rd */ 5917 ins_encode( OpcP, RegReg( dst, dst ) ); 5918 ins_pipe( ialu_reg ); 5919 %} 5920 5921 instruct loadConP(eRegP dst, immP src) %{ 5922 match(Set dst src); 5923 5924 format %{ "MOV $dst,$src" %} 5925 opcode(0xB8); /* + rd */ 5926 ins_encode( LdImmP(dst, src) ); 5927 ins_pipe( ialu_reg_fat ); 5928 %} 5929 5930 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ 5931 match(Set dst src); 5932 effect(KILL cr); 5933 ins_cost(200); 5934 format %{ "MOV $dst.lo,$src.lo\n\t" 5935 "MOV $dst.hi,$src.hi" %} 5936 opcode(0xB8); 5937 ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); 5938 ins_pipe( ialu_reg_long_fat ); 5939 %} 5940 5941 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ 5942 match(Set dst src); 5943 effect(KILL cr); 5944 ins_cost(150); 5945 format %{ "XOR $dst.lo,$dst.lo\n\t" 5946 "XOR $dst.hi,$dst.hi" %} 5947 opcode(0x33,0x33); 5948 ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); 5949 ins_pipe( ialu_reg_long ); 5950 %} 5951 5952 // The instruction usage is guarded by predicate in operand immFPR(). 5953 instruct loadConFPR(regFPR dst, immFPR con) %{ 5954 match(Set dst con); 5955 ins_cost(125); 5956 format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" 5957 "FSTP $dst" %} 5958 ins_encode %{ 5959 __ fld_s($constantaddress($con)); 5960 __ fstp_d($dst$$reg); 5961 %} 5962 ins_pipe(fpu_reg_con); 5963 %} 5964 5965 // The instruction usage is guarded by predicate in operand immFPR0(). 5966 instruct loadConFPR0(regFPR dst, immFPR0 con) %{ 5967 match(Set dst con); 5968 ins_cost(125); 5969 format %{ "FLDZ ST\n\t" 5970 "FSTP $dst" %} 5971 ins_encode %{ 5972 __ fldz(); 5973 __ fstp_d($dst$$reg); 5974 %} 5975 ins_pipe(fpu_reg_con); 5976 %} 5977 5978 // The instruction usage is guarded by predicate in operand immFPR1(). 5979 instruct loadConFPR1(regFPR dst, immFPR1 con) %{ 5980 match(Set dst con); 5981 ins_cost(125); 5982 format %{ "FLD1 ST\n\t" 5983 "FSTP $dst" %} 5984 ins_encode %{ 5985 __ fld1(); 5986 __ fstp_d($dst$$reg); 5987 %} 5988 ins_pipe(fpu_reg_con); 5989 %} 5990 5991 // The instruction usage is guarded by predicate in operand immF(). 5992 instruct loadConF(regF dst, immF con) %{ 5993 match(Set dst con); 5994 ins_cost(125); 5995 format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} 5996 ins_encode %{ 5997 __ movflt($dst$$XMMRegister, $constantaddress($con)); 5998 %} 5999 ins_pipe(pipe_slow); 6000 %} 6001 6002 // The instruction usage is guarded by predicate in operand immF0(). 6003 instruct loadConF0(regF dst, immF0 src) %{ 6004 match(Set dst src); 6005 ins_cost(100); 6006 format %{ "XORPS $dst,$dst\t# float 0.0" %} 6007 ins_encode %{ 6008 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 6009 %} 6010 ins_pipe(pipe_slow); 6011 %} 6012 6013 // The instruction usage is guarded by predicate in operand immDPR(). 6014 instruct loadConDPR(regDPR dst, immDPR con) %{ 6015 match(Set dst con); 6016 ins_cost(125); 6017 6018 format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" 6019 "FSTP $dst" %} 6020 ins_encode %{ 6021 __ fld_d($constantaddress($con)); 6022 __ fstp_d($dst$$reg); 6023 %} 6024 ins_pipe(fpu_reg_con); 6025 %} 6026 6027 // The instruction usage is guarded by predicate in operand immDPR0(). 6028 instruct loadConDPR0(regDPR dst, immDPR0 con) %{ 6029 match(Set dst con); 6030 ins_cost(125); 6031 6032 format %{ "FLDZ ST\n\t" 6033 "FSTP $dst" %} 6034 ins_encode %{ 6035 __ fldz(); 6036 __ fstp_d($dst$$reg); 6037 %} 6038 ins_pipe(fpu_reg_con); 6039 %} 6040 6041 // The instruction usage is guarded by predicate in operand immDPR1(). 6042 instruct loadConDPR1(regDPR dst, immDPR1 con) %{ 6043 match(Set dst con); 6044 ins_cost(125); 6045 6046 format %{ "FLD1 ST\n\t" 6047 "FSTP $dst" %} 6048 ins_encode %{ 6049 __ fld1(); 6050 __ fstp_d($dst$$reg); 6051 %} 6052 ins_pipe(fpu_reg_con); 6053 %} 6054 6055 // The instruction usage is guarded by predicate in operand immD(). 6056 instruct loadConD(regD dst, immD con) %{ 6057 match(Set dst con); 6058 ins_cost(125); 6059 format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} 6060 ins_encode %{ 6061 __ movdbl($dst$$XMMRegister, $constantaddress($con)); 6062 %} 6063 ins_pipe(pipe_slow); 6064 %} 6065 6066 // The instruction usage is guarded by predicate in operand immD0(). 6067 instruct loadConD0(regD dst, immD0 src) %{ 6068 match(Set dst src); 6069 ins_cost(100); 6070 format %{ "XORPD $dst,$dst\t# double 0.0" %} 6071 ins_encode %{ 6072 __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); 6073 %} 6074 ins_pipe( pipe_slow ); 6075 %} 6076 6077 // Load Stack Slot 6078 instruct loadSSI(rRegI dst, stackSlotI src) %{ 6079 match(Set dst src); 6080 ins_cost(125); 6081 6082 format %{ "MOV $dst,$src" %} 6083 opcode(0x8B); 6084 ins_encode( OpcP, RegMem(dst,src)); 6085 ins_pipe( ialu_reg_mem ); 6086 %} 6087 6088 instruct loadSSL(eRegL dst, stackSlotL src) %{ 6089 match(Set dst src); 6090 6091 ins_cost(200); 6092 format %{ "MOV $dst,$src.lo\n\t" 6093 "MOV $dst+4,$src.hi" %} 6094 opcode(0x8B, 0x8B); 6095 ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) ); 6096 ins_pipe( ialu_mem_long_reg ); 6097 %} 6098 6099 // Load Stack Slot 6100 instruct loadSSP(eRegP dst, stackSlotP src) %{ 6101 match(Set dst src); 6102 ins_cost(125); 6103 6104 format %{ "MOV $dst,$src" %} 6105 opcode(0x8B); 6106 ins_encode( OpcP, RegMem(dst,src)); 6107 ins_pipe( ialu_reg_mem ); 6108 %} 6109 6110 // Load Stack Slot 6111 instruct loadSSF(regFPR dst, stackSlotF src) %{ 6112 match(Set dst src); 6113 ins_cost(125); 6114 6115 format %{ "FLD_S $src\n\t" 6116 "FSTP $dst" %} 6117 opcode(0xD9); /* D9 /0, FLD m32real */ 6118 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6119 Pop_Reg_FPR(dst) ); 6120 ins_pipe( fpu_reg_mem ); 6121 %} 6122 6123 // Load Stack Slot 6124 instruct loadSSD(regDPR dst, stackSlotD src) %{ 6125 match(Set dst src); 6126 ins_cost(125); 6127 6128 format %{ "FLD_D $src\n\t" 6129 "FSTP $dst" %} 6130 opcode(0xDD); /* DD /0, FLD m64real */ 6131 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 6132 Pop_Reg_DPR(dst) ); 6133 ins_pipe( fpu_reg_mem ); 6134 %} 6135 6136 // Prefetch instructions for allocation. 6137 // Must be safe to execute with invalid address (cannot fault). 6138 6139 instruct prefetchAlloc0( memory mem ) %{ 6140 predicate(UseSSE==0 && AllocatePrefetchInstr!=3); 6141 match(PrefetchAllocation mem); 6142 ins_cost(0); 6143 size(0); 6144 format %{ "Prefetch allocation (non-SSE is empty encoding)" %} 6145 ins_encode(); 6146 ins_pipe(empty); 6147 %} 6148 6149 instruct prefetchAlloc( memory mem ) %{ 6150 predicate(AllocatePrefetchInstr==3); 6151 match( PrefetchAllocation mem ); 6152 ins_cost(100); 6153 6154 format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} 6155 ins_encode %{ 6156 __ prefetchw($mem$$Address); 6157 %} 6158 ins_pipe(ialu_mem); 6159 %} 6160 6161 instruct prefetchAllocNTA( memory mem ) %{ 6162 predicate(UseSSE>=1 && AllocatePrefetchInstr==0); 6163 match(PrefetchAllocation mem); 6164 ins_cost(100); 6165 6166 format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} 6167 ins_encode %{ 6168 __ prefetchnta($mem$$Address); 6169 %} 6170 ins_pipe(ialu_mem); 6171 %} 6172 6173 instruct prefetchAllocT0( memory mem ) %{ 6174 predicate(UseSSE>=1 && AllocatePrefetchInstr==1); 6175 match(PrefetchAllocation mem); 6176 ins_cost(100); 6177 6178 format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} 6179 ins_encode %{ 6180 __ prefetcht0($mem$$Address); 6181 %} 6182 ins_pipe(ialu_mem); 6183 %} 6184 6185 instruct prefetchAllocT2( memory mem ) %{ 6186 predicate(UseSSE>=1 && AllocatePrefetchInstr==2); 6187 match(PrefetchAllocation mem); 6188 ins_cost(100); 6189 6190 format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} 6191 ins_encode %{ 6192 __ prefetcht2($mem$$Address); 6193 %} 6194 ins_pipe(ialu_mem); 6195 %} 6196 6197 //----------Store Instructions------------------------------------------------- 6198 6199 // Store Byte 6200 instruct storeB(memory mem, xRegI src) %{ 6201 match(Set mem (StoreB mem src)); 6202 6203 ins_cost(125); 6204 format %{ "MOV8 $mem,$src" %} 6205 opcode(0x88); 6206 ins_encode( OpcP, RegMem( src, mem ) ); 6207 ins_pipe( ialu_mem_reg ); 6208 %} 6209 6210 // Store Char/Short 6211 instruct storeC(memory mem, rRegI src) %{ 6212 match(Set mem (StoreC mem src)); 6213 6214 ins_cost(125); 6215 format %{ "MOV16 $mem,$src" %} 6216 opcode(0x89, 0x66); 6217 ins_encode( OpcS, OpcP, RegMem( src, mem ) ); 6218 ins_pipe( ialu_mem_reg ); 6219 %} 6220 6221 // Store Integer 6222 instruct storeI(memory mem, rRegI src) %{ 6223 match(Set mem (StoreI mem src)); 6224 6225 ins_cost(125); 6226 format %{ "MOV $mem,$src" %} 6227 opcode(0x89); 6228 ins_encode( OpcP, RegMem( src, mem ) ); 6229 ins_pipe( ialu_mem_reg ); 6230 %} 6231 6232 // Store Long 6233 instruct storeL(long_memory mem, eRegL src) %{ 6234 predicate(!((StoreLNode*)n)->require_atomic_access()); 6235 match(Set mem (StoreL mem src)); 6236 6237 ins_cost(200); 6238 format %{ "MOV $mem,$src.lo\n\t" 6239 "MOV $mem+4,$src.hi" %} 6240 opcode(0x89, 0x89); 6241 ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) ); 6242 ins_pipe( ialu_mem_long_reg ); 6243 %} 6244 6245 // Store Long to Integer 6246 instruct storeL2I(memory mem, eRegL src) %{ 6247 match(Set mem (StoreI mem (ConvL2I src))); 6248 6249 format %{ "MOV $mem,$src.lo\t# long -> int" %} 6250 ins_encode %{ 6251 __ movl($mem$$Address, $src$$Register); 6252 %} 6253 ins_pipe(ialu_mem_reg); 6254 %} 6255 6256 // Volatile Store Long. Must be atomic, so move it into 6257 // the FP TOS and then do a 64-bit FIST. Has to probe the 6258 // target address before the store (for null-ptr checks) 6259 // so the memory operand is used twice in the encoding. 6260 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ 6261 predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); 6262 match(Set mem (StoreL mem src)); 6263 effect( KILL cr ); 6264 ins_cost(400); 6265 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6266 "FILD $src\n\t" 6267 "FISTp $mem\t # 64-bit atomic volatile long store" %} 6268 opcode(0x3B); 6269 ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src)); 6270 ins_pipe( fpu_reg_mem ); 6271 %} 6272 6273 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ 6274 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6275 match(Set mem (StoreL mem src)); 6276 effect( TEMP tmp, KILL cr ); 6277 ins_cost(380); 6278 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6279 "MOVSD $tmp,$src\n\t" 6280 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6281 ins_encode %{ 6282 __ cmpl(rax, $mem$$Address); 6283 __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); 6284 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6285 %} 6286 ins_pipe( pipe_slow ); 6287 %} 6288 6289 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ 6290 predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); 6291 match(Set mem (StoreL mem src)); 6292 effect( TEMP tmp2 , TEMP tmp, KILL cr ); 6293 ins_cost(360); 6294 format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" 6295 "MOVD $tmp,$src.lo\n\t" 6296 "MOVD $tmp2,$src.hi\n\t" 6297 "PUNPCKLDQ $tmp,$tmp2\n\t" 6298 "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} 6299 ins_encode %{ 6300 __ cmpl(rax, $mem$$Address); 6301 __ movdl($tmp$$XMMRegister, $src$$Register); 6302 __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 6303 __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); 6304 __ movdbl($mem$$Address, $tmp$$XMMRegister); 6305 %} 6306 ins_pipe( pipe_slow ); 6307 %} 6308 6309 // Store Pointer; for storing unknown oops and raw pointers 6310 instruct storeP(memory mem, anyRegP src) %{ 6311 match(Set mem (StoreP mem src)); 6312 6313 ins_cost(125); 6314 format %{ "MOV $mem,$src" %} 6315 opcode(0x89); 6316 ins_encode( OpcP, RegMem( src, mem ) ); 6317 ins_pipe( ialu_mem_reg ); 6318 %} 6319 6320 // Store Integer Immediate 6321 instruct storeImmI(memory mem, immI src) %{ 6322 match(Set mem (StoreI mem src)); 6323 6324 ins_cost(150); 6325 format %{ "MOV $mem,$src" %} 6326 opcode(0xC7); /* C7 /0 */ 6327 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6328 ins_pipe( ialu_mem_imm ); 6329 %} 6330 6331 // Store Short/Char Immediate 6332 instruct storeImmI16(memory mem, immI16 src) %{ 6333 predicate(UseStoreImmI16); 6334 match(Set mem (StoreC mem src)); 6335 6336 ins_cost(150); 6337 format %{ "MOV16 $mem,$src" %} 6338 opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ 6339 ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16( src )); 6340 ins_pipe( ialu_mem_imm ); 6341 %} 6342 6343 // Store Pointer Immediate; null pointers or constant oops that do not 6344 // need card-mark barriers. 6345 instruct storeImmP(memory mem, immP src) %{ 6346 match(Set mem (StoreP mem src)); 6347 6348 ins_cost(150); 6349 format %{ "MOV $mem,$src" %} 6350 opcode(0xC7); /* C7 /0 */ 6351 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32( src )); 6352 ins_pipe( ialu_mem_imm ); 6353 %} 6354 6355 // Store Byte Immediate 6356 instruct storeImmB(memory mem, immI8 src) %{ 6357 match(Set mem (StoreB mem src)); 6358 6359 ins_cost(150); 6360 format %{ "MOV8 $mem,$src" %} 6361 opcode(0xC6); /* C6 /0 */ 6362 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6363 ins_pipe( ialu_mem_imm ); 6364 %} 6365 6366 // Store CMS card-mark Immediate 6367 instruct storeImmCM(memory mem, immI8 src) %{ 6368 match(Set mem (StoreCM mem src)); 6369 6370 ins_cost(150); 6371 format %{ "MOV8 $mem,$src\t! CMS card-mark imm0" %} 6372 opcode(0xC6); /* C6 /0 */ 6373 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con8or32( src )); 6374 ins_pipe( ialu_mem_imm ); 6375 %} 6376 6377 // Store Double 6378 instruct storeDPR( memory mem, regDPR1 src) %{ 6379 predicate(UseSSE<=1); 6380 match(Set mem (StoreD mem src)); 6381 6382 ins_cost(100); 6383 format %{ "FST_D $mem,$src" %} 6384 opcode(0xDD); /* DD /2 */ 6385 ins_encode( enc_FPR_store(mem,src) ); 6386 ins_pipe( fpu_mem_reg ); 6387 %} 6388 6389 // Store double does rounding on x86 6390 instruct storeDPR_rounded( memory mem, regDPR1 src) %{ 6391 predicate(UseSSE<=1); 6392 match(Set mem (StoreD mem (RoundDouble src))); 6393 6394 ins_cost(100); 6395 format %{ "FST_D $mem,$src\t# round" %} 6396 opcode(0xDD); /* DD /2 */ 6397 ins_encode( enc_FPR_store(mem,src) ); 6398 ins_pipe( fpu_mem_reg ); 6399 %} 6400 6401 // Store XMM register to memory (double-precision floating points) 6402 // MOVSD instruction 6403 instruct storeD(memory mem, regD src) %{ 6404 predicate(UseSSE>=2); 6405 match(Set mem (StoreD mem src)); 6406 ins_cost(95); 6407 format %{ "MOVSD $mem,$src" %} 6408 ins_encode %{ 6409 __ movdbl($mem$$Address, $src$$XMMRegister); 6410 %} 6411 ins_pipe( pipe_slow ); 6412 %} 6413 6414 // Store XMM register to memory (single-precision floating point) 6415 // MOVSS instruction 6416 instruct storeF(memory mem, regF src) %{ 6417 predicate(UseSSE>=1); 6418 match(Set mem (StoreF mem src)); 6419 ins_cost(95); 6420 format %{ "MOVSS $mem,$src" %} 6421 ins_encode %{ 6422 __ movflt($mem$$Address, $src$$XMMRegister); 6423 %} 6424 ins_pipe( pipe_slow ); 6425 %} 6426 6427 // Store Float 6428 instruct storeFPR( memory mem, regFPR1 src) %{ 6429 predicate(UseSSE==0); 6430 match(Set mem (StoreF mem src)); 6431 6432 ins_cost(100); 6433 format %{ "FST_S $mem,$src" %} 6434 opcode(0xD9); /* D9 /2 */ 6435 ins_encode( enc_FPR_store(mem,src) ); 6436 ins_pipe( fpu_mem_reg ); 6437 %} 6438 6439 // Store Float does rounding on x86 6440 instruct storeFPR_rounded( memory mem, regFPR1 src) %{ 6441 predicate(UseSSE==0); 6442 match(Set mem (StoreF mem (RoundFloat src))); 6443 6444 ins_cost(100); 6445 format %{ "FST_S $mem,$src\t# round" %} 6446 opcode(0xD9); /* D9 /2 */ 6447 ins_encode( enc_FPR_store(mem,src) ); 6448 ins_pipe( fpu_mem_reg ); 6449 %} 6450 6451 // Store Float does rounding on x86 6452 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ 6453 predicate(UseSSE<=1); 6454 match(Set mem (StoreF mem (ConvD2F src))); 6455 6456 ins_cost(100); 6457 format %{ "FST_S $mem,$src\t# D-round" %} 6458 opcode(0xD9); /* D9 /2 */ 6459 ins_encode( enc_FPR_store(mem,src) ); 6460 ins_pipe( fpu_mem_reg ); 6461 %} 6462 6463 // Store immediate Float value (it is faster than store from FPU register) 6464 // The instruction usage is guarded by predicate in operand immFPR(). 6465 instruct storeFPR_imm( memory mem, immFPR src) %{ 6466 match(Set mem (StoreF mem src)); 6467 6468 ins_cost(50); 6469 format %{ "MOV $mem,$src\t# store float" %} 6470 opcode(0xC7); /* C7 /0 */ 6471 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits( src )); 6472 ins_pipe( ialu_mem_imm ); 6473 %} 6474 6475 // Store immediate Float value (it is faster than store from XMM register) 6476 // The instruction usage is guarded by predicate in operand immF(). 6477 instruct storeF_imm( memory mem, immF src) %{ 6478 match(Set mem (StoreF mem src)); 6479 6480 ins_cost(50); 6481 format %{ "MOV $mem,$src\t# store float" %} 6482 opcode(0xC7); /* C7 /0 */ 6483 ins_encode( OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits( src )); 6484 ins_pipe( ialu_mem_imm ); 6485 %} 6486 6487 // Store Integer to stack slot 6488 instruct storeSSI(stackSlotI dst, rRegI src) %{ 6489 match(Set dst src); 6490 6491 ins_cost(100); 6492 format %{ "MOV $dst,$src" %} 6493 opcode(0x89); 6494 ins_encode( OpcPRegSS( dst, src ) ); 6495 ins_pipe( ialu_mem_reg ); 6496 %} 6497 6498 // Store Integer to stack slot 6499 instruct storeSSP(stackSlotP dst, eRegP src) %{ 6500 match(Set dst src); 6501 6502 ins_cost(100); 6503 format %{ "MOV $dst,$src" %} 6504 opcode(0x89); 6505 ins_encode( OpcPRegSS( dst, src ) ); 6506 ins_pipe( ialu_mem_reg ); 6507 %} 6508 6509 // Store Long to stack slot 6510 instruct storeSSL(stackSlotL dst, eRegL src) %{ 6511 match(Set dst src); 6512 6513 ins_cost(200); 6514 format %{ "MOV $dst,$src.lo\n\t" 6515 "MOV $dst+4,$src.hi" %} 6516 opcode(0x89, 0x89); 6517 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 6518 ins_pipe( ialu_mem_long_reg ); 6519 %} 6520 6521 //----------MemBar Instructions----------------------------------------------- 6522 // Memory barrier flavors 6523 6524 instruct membar_acquire() %{ 6525 match(MemBarAcquire); 6526 match(LoadFence); 6527 ins_cost(400); 6528 6529 size(0); 6530 format %{ "MEMBAR-acquire ! (empty encoding)" %} 6531 ins_encode(); 6532 ins_pipe(empty); 6533 %} 6534 6535 instruct membar_acquire_lock() %{ 6536 match(MemBarAcquireLock); 6537 ins_cost(0); 6538 6539 size(0); 6540 format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} 6541 ins_encode( ); 6542 ins_pipe(empty); 6543 %} 6544 6545 instruct membar_release() %{ 6546 match(MemBarRelease); 6547 match(StoreFence); 6548 ins_cost(400); 6549 6550 size(0); 6551 format %{ "MEMBAR-release ! (empty encoding)" %} 6552 ins_encode( ); 6553 ins_pipe(empty); 6554 %} 6555 6556 instruct membar_release_lock() %{ 6557 match(MemBarReleaseLock); 6558 ins_cost(0); 6559 6560 size(0); 6561 format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} 6562 ins_encode( ); 6563 ins_pipe(empty); 6564 %} 6565 6566 instruct membar_volatile(eFlagsReg cr) %{ 6567 match(MemBarVolatile); 6568 effect(KILL cr); 6569 ins_cost(400); 6570 6571 format %{ 6572 $$template 6573 if (os::is_MP()) { 6574 $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" 6575 } else { 6576 $$emit$$"MEMBAR-volatile ! (empty encoding)" 6577 } 6578 %} 6579 ins_encode %{ 6580 __ membar(Assembler::StoreLoad); 6581 %} 6582 ins_pipe(pipe_slow); 6583 %} 6584 6585 instruct unnecessary_membar_volatile() %{ 6586 match(MemBarVolatile); 6587 predicate(Matcher::post_store_load_barrier(n)); 6588 ins_cost(0); 6589 6590 size(0); 6591 format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} 6592 ins_encode( ); 6593 ins_pipe(empty); 6594 %} 6595 6596 instruct membar_storestore() %{ 6597 match(MemBarStoreStore); 6598 ins_cost(0); 6599 6600 size(0); 6601 format %{ "MEMBAR-storestore (empty encoding)" %} 6602 ins_encode( ); 6603 ins_pipe(empty); 6604 %} 6605 6606 //----------Move Instructions-------------------------------------------------- 6607 instruct castX2P(eAXRegP dst, eAXRegI src) %{ 6608 match(Set dst (CastX2P src)); 6609 format %{ "# X2P $dst, $src" %} 6610 ins_encode( /*empty encoding*/ ); 6611 ins_cost(0); 6612 ins_pipe(empty); 6613 %} 6614 6615 instruct castP2X(rRegI dst, eRegP src ) %{ 6616 match(Set dst (CastP2X src)); 6617 ins_cost(50); 6618 format %{ "MOV $dst, $src\t# CastP2X" %} 6619 ins_encode( enc_Copy( dst, src) ); 6620 ins_pipe( ialu_reg_reg ); 6621 %} 6622 6623 //----------Conditional Move--------------------------------------------------- 6624 // Conditional move 6625 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ 6626 predicate(!VM_Version::supports_cmov() ); 6627 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6628 ins_cost(200); 6629 format %{ "J$cop,us skip\t# signed cmove\n\t" 6630 "MOV $dst,$src\n" 6631 "skip:" %} 6632 ins_encode %{ 6633 Label Lskip; 6634 // Invert sense of branch from sense of CMOV 6635 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6636 __ movl($dst$$Register, $src$$Register); 6637 __ bind(Lskip); 6638 %} 6639 ins_pipe( pipe_cmov_reg ); 6640 %} 6641 6642 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ 6643 predicate(!VM_Version::supports_cmov() ); 6644 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6645 ins_cost(200); 6646 format %{ "J$cop,us skip\t# unsigned cmove\n\t" 6647 "MOV $dst,$src\n" 6648 "skip:" %} 6649 ins_encode %{ 6650 Label Lskip; 6651 // Invert sense of branch from sense of CMOV 6652 __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); 6653 __ movl($dst$$Register, $src$$Register); 6654 __ bind(Lskip); 6655 %} 6656 ins_pipe( pipe_cmov_reg ); 6657 %} 6658 6659 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ 6660 predicate(VM_Version::supports_cmov() ); 6661 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6662 ins_cost(200); 6663 format %{ "CMOV$cop $dst,$src" %} 6664 opcode(0x0F,0x40); 6665 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6666 ins_pipe( pipe_cmov_reg ); 6667 %} 6668 6669 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ 6670 predicate(VM_Version::supports_cmov() ); 6671 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6672 ins_cost(200); 6673 format %{ "CMOV$cop $dst,$src" %} 6674 opcode(0x0F,0x40); 6675 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6676 ins_pipe( pipe_cmov_reg ); 6677 %} 6678 6679 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ 6680 predicate(VM_Version::supports_cmov() ); 6681 match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); 6682 ins_cost(200); 6683 expand %{ 6684 cmovI_regU(cop, cr, dst, src); 6685 %} 6686 %} 6687 6688 // Conditional move 6689 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ 6690 predicate(VM_Version::supports_cmov() ); 6691 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6692 ins_cost(250); 6693 format %{ "CMOV$cop $dst,$src" %} 6694 opcode(0x0F,0x40); 6695 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6696 ins_pipe( pipe_cmov_mem ); 6697 %} 6698 6699 // Conditional move 6700 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ 6701 predicate(VM_Version::supports_cmov() ); 6702 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6703 ins_cost(250); 6704 format %{ "CMOV$cop $dst,$src" %} 6705 opcode(0x0F,0x40); 6706 ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6707 ins_pipe( pipe_cmov_mem ); 6708 %} 6709 6710 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ 6711 predicate(VM_Version::supports_cmov() ); 6712 match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); 6713 ins_cost(250); 6714 expand %{ 6715 cmovI_memU(cop, cr, dst, src); 6716 %} 6717 %} 6718 6719 // Conditional move 6720 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6721 predicate(VM_Version::supports_cmov() ); 6722 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6723 ins_cost(200); 6724 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6725 opcode(0x0F,0x40); 6726 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6727 ins_pipe( pipe_cmov_reg ); 6728 %} 6729 6730 // Conditional move (non-P6 version) 6731 // Note: a CMoveP is generated for stubs and native wrappers 6732 // regardless of whether we are on a P6, so we 6733 // emulate a cmov here 6734 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ 6735 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6736 ins_cost(300); 6737 format %{ "Jn$cop skip\n\t" 6738 "MOV $dst,$src\t# pointer\n" 6739 "skip:" %} 6740 opcode(0x8b); 6741 ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); 6742 ins_pipe( pipe_cmov_reg ); 6743 %} 6744 6745 // Conditional move 6746 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ 6747 predicate(VM_Version::supports_cmov() ); 6748 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6749 ins_cost(200); 6750 format %{ "CMOV$cop $dst,$src\t# ptr" %} 6751 opcode(0x0F,0x40); 6752 ins_encode( enc_cmov(cop), RegReg( dst, src ) ); 6753 ins_pipe( pipe_cmov_reg ); 6754 %} 6755 6756 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ 6757 predicate(VM_Version::supports_cmov() ); 6758 match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); 6759 ins_cost(200); 6760 expand %{ 6761 cmovP_regU(cop, cr, dst, src); 6762 %} 6763 %} 6764 6765 // DISABLED: Requires the ADLC to emit a bottom_type call that 6766 // correctly meets the two pointer arguments; one is an incoming 6767 // register but the other is a memory operand. ALSO appears to 6768 // be buggy with implicit null checks. 6769 // 6770 //// Conditional move 6771 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ 6772 // predicate(VM_Version::supports_cmov() ); 6773 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6774 // ins_cost(250); 6775 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6776 // opcode(0x0F,0x40); 6777 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6778 // ins_pipe( pipe_cmov_mem ); 6779 //%} 6780 // 6781 //// Conditional move 6782 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ 6783 // predicate(VM_Version::supports_cmov() ); 6784 // match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); 6785 // ins_cost(250); 6786 // format %{ "CMOV$cop $dst,$src\t# ptr" %} 6787 // opcode(0x0F,0x40); 6788 // ins_encode( enc_cmov(cop), RegMem( dst, src ) ); 6789 // ins_pipe( pipe_cmov_mem ); 6790 //%} 6791 6792 // Conditional move 6793 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ 6794 predicate(UseSSE<=1); 6795 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6796 ins_cost(200); 6797 format %{ "FCMOV$cop $dst,$src\t# double" %} 6798 opcode(0xDA); 6799 ins_encode( enc_cmov_dpr(cop,src) ); 6800 ins_pipe( pipe_cmovDPR_reg ); 6801 %} 6802 6803 // Conditional move 6804 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ 6805 predicate(UseSSE==0); 6806 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6807 ins_cost(200); 6808 format %{ "FCMOV$cop $dst,$src\t# float" %} 6809 opcode(0xDA); 6810 ins_encode( enc_cmov_dpr(cop,src) ); 6811 ins_pipe( pipe_cmovDPR_reg ); 6812 %} 6813 6814 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6815 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ 6816 predicate(UseSSE<=1); 6817 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6818 ins_cost(200); 6819 format %{ "Jn$cop skip\n\t" 6820 "MOV $dst,$src\t# double\n" 6821 "skip:" %} 6822 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6823 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); 6824 ins_pipe( pipe_cmovDPR_reg ); 6825 %} 6826 6827 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned. 6828 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ 6829 predicate(UseSSE==0); 6830 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6831 ins_cost(200); 6832 format %{ "Jn$cop skip\n\t" 6833 "MOV $dst,$src\t# float\n" 6834 "skip:" %} 6835 opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ 6836 ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); 6837 ins_pipe( pipe_cmovDPR_reg ); 6838 %} 6839 6840 // No CMOVE with SSE/SSE2 6841 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ 6842 predicate (UseSSE>=1); 6843 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6844 ins_cost(200); 6845 format %{ "Jn$cop skip\n\t" 6846 "MOVSS $dst,$src\t# float\n" 6847 "skip:" %} 6848 ins_encode %{ 6849 Label skip; 6850 // Invert sense of branch from sense of CMOV 6851 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6852 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6853 __ bind(skip); 6854 %} 6855 ins_pipe( pipe_slow ); 6856 %} 6857 6858 // No CMOVE with SSE/SSE2 6859 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ 6860 predicate (UseSSE>=2); 6861 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6862 ins_cost(200); 6863 format %{ "Jn$cop skip\n\t" 6864 "MOVSD $dst,$src\t# float\n" 6865 "skip:" %} 6866 ins_encode %{ 6867 Label skip; 6868 // Invert sense of branch from sense of CMOV 6869 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6870 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6871 __ bind(skip); 6872 %} 6873 ins_pipe( pipe_slow ); 6874 %} 6875 6876 // unsigned version 6877 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ 6878 predicate (UseSSE>=1); 6879 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6880 ins_cost(200); 6881 format %{ "Jn$cop skip\n\t" 6882 "MOVSS $dst,$src\t# float\n" 6883 "skip:" %} 6884 ins_encode %{ 6885 Label skip; 6886 // Invert sense of branch from sense of CMOV 6887 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6888 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6889 __ bind(skip); 6890 %} 6891 ins_pipe( pipe_slow ); 6892 %} 6893 6894 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ 6895 predicate (UseSSE>=1); 6896 match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); 6897 ins_cost(200); 6898 expand %{ 6899 fcmovF_regU(cop, cr, dst, src); 6900 %} 6901 %} 6902 6903 // unsigned version 6904 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ 6905 predicate (UseSSE>=2); 6906 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6907 ins_cost(200); 6908 format %{ "Jn$cop skip\n\t" 6909 "MOVSD $dst,$src\t# float\n" 6910 "skip:" %} 6911 ins_encode %{ 6912 Label skip; 6913 // Invert sense of branch from sense of CMOV 6914 __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); 6915 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6916 __ bind(skip); 6917 %} 6918 ins_pipe( pipe_slow ); 6919 %} 6920 6921 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ 6922 predicate (UseSSE>=2); 6923 match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); 6924 ins_cost(200); 6925 expand %{ 6926 fcmovD_regU(cop, cr, dst, src); 6927 %} 6928 %} 6929 6930 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ 6931 predicate(VM_Version::supports_cmov() ); 6932 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6933 ins_cost(200); 6934 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6935 "CMOV$cop $dst.hi,$src.hi" %} 6936 opcode(0x0F,0x40); 6937 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6938 ins_pipe( pipe_cmov_reg_long ); 6939 %} 6940 6941 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ 6942 predicate(VM_Version::supports_cmov() ); 6943 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6944 ins_cost(200); 6945 format %{ "CMOV$cop $dst.lo,$src.lo\n\t" 6946 "CMOV$cop $dst.hi,$src.hi" %} 6947 opcode(0x0F,0x40); 6948 ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); 6949 ins_pipe( pipe_cmov_reg_long ); 6950 %} 6951 6952 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ 6953 predicate(VM_Version::supports_cmov() ); 6954 match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); 6955 ins_cost(200); 6956 expand %{ 6957 cmovL_regU(cop, cr, dst, src); 6958 %} 6959 %} 6960 6961 //----------Arithmetic Instructions-------------------------------------------- 6962 //----------Addition Instructions---------------------------------------------- 6963 6964 // Integer Addition Instructions 6965 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 6966 match(Set dst (AddI dst src)); 6967 effect(KILL cr); 6968 6969 size(2); 6970 format %{ "ADD $dst,$src" %} 6971 opcode(0x03); 6972 ins_encode( OpcP, RegReg( dst, src) ); 6973 ins_pipe( ialu_reg_reg ); 6974 %} 6975 6976 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 6977 match(Set dst (AddI dst src)); 6978 effect(KILL cr); 6979 6980 format %{ "ADD $dst,$src" %} 6981 opcode(0x81, 0x00); /* /0 id */ 6982 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 6983 ins_pipe( ialu_reg ); 6984 %} 6985 6986 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 6987 predicate(UseIncDec); 6988 match(Set dst (AddI dst src)); 6989 effect(KILL cr); 6990 6991 size(1); 6992 format %{ "INC $dst" %} 6993 opcode(0x40); /* */ 6994 ins_encode( Opc_plus( primary, dst ) ); 6995 ins_pipe( ialu_reg ); 6996 %} 6997 6998 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ 6999 match(Set dst (AddI src0 src1)); 7000 ins_cost(110); 7001 7002 format %{ "LEA $dst,[$src0 + $src1]" %} 7003 opcode(0x8D); /* 0x8D /r */ 7004 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7005 ins_pipe( ialu_reg_reg ); 7006 %} 7007 7008 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ 7009 match(Set dst (AddP src0 src1)); 7010 ins_cost(110); 7011 7012 format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} 7013 opcode(0x8D); /* 0x8D /r */ 7014 ins_encode( OpcP, RegLea( dst, src0, src1 ) ); 7015 ins_pipe( ialu_reg_reg ); 7016 %} 7017 7018 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ 7019 predicate(UseIncDec); 7020 match(Set dst (AddI dst src)); 7021 effect(KILL cr); 7022 7023 size(1); 7024 format %{ "DEC $dst" %} 7025 opcode(0x48); /* */ 7026 ins_encode( Opc_plus( primary, dst ) ); 7027 ins_pipe( ialu_reg ); 7028 %} 7029 7030 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ 7031 match(Set dst (AddP dst src)); 7032 effect(KILL cr); 7033 7034 size(2); 7035 format %{ "ADD $dst,$src" %} 7036 opcode(0x03); 7037 ins_encode( OpcP, RegReg( dst, src) ); 7038 ins_pipe( ialu_reg_reg ); 7039 %} 7040 7041 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ 7042 match(Set dst (AddP dst src)); 7043 effect(KILL cr); 7044 7045 format %{ "ADD $dst,$src" %} 7046 opcode(0x81,0x00); /* Opcode 81 /0 id */ 7047 // ins_encode( RegImm( dst, src) ); 7048 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7049 ins_pipe( ialu_reg ); 7050 %} 7051 7052 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7053 match(Set dst (AddI dst (LoadI src))); 7054 effect(KILL cr); 7055 7056 ins_cost(125); 7057 format %{ "ADD $dst,$src" %} 7058 opcode(0x03); 7059 ins_encode( OpcP, RegMem( dst, src) ); 7060 ins_pipe( ialu_reg_mem ); 7061 %} 7062 7063 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7064 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7065 effect(KILL cr); 7066 7067 ins_cost(150); 7068 format %{ "ADD $dst,$src" %} 7069 opcode(0x01); /* Opcode 01 /r */ 7070 ins_encode( OpcP, RegMem( src, dst ) ); 7071 ins_pipe( ialu_mem_reg ); 7072 %} 7073 7074 // Add Memory with Immediate 7075 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7076 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7077 effect(KILL cr); 7078 7079 ins_cost(125); 7080 format %{ "ADD $dst,$src" %} 7081 opcode(0x81); /* Opcode 81 /0 id */ 7082 ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) ); 7083 ins_pipe( ialu_mem_imm ); 7084 %} 7085 7086 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{ 7087 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7088 effect(KILL cr); 7089 7090 ins_cost(125); 7091 format %{ "INC $dst" %} 7092 opcode(0xFF); /* Opcode FF /0 */ 7093 ins_encode( OpcP, RMopc_Mem(0x00,dst)); 7094 ins_pipe( ialu_mem_imm ); 7095 %} 7096 7097 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ 7098 match(Set dst (StoreI dst (AddI (LoadI dst) src))); 7099 effect(KILL cr); 7100 7101 ins_cost(125); 7102 format %{ "DEC $dst" %} 7103 opcode(0xFF); /* Opcode FF /1 */ 7104 ins_encode( OpcP, RMopc_Mem(0x01,dst)); 7105 ins_pipe( ialu_mem_imm ); 7106 %} 7107 7108 7109 instruct checkCastPP( eRegP dst ) %{ 7110 match(Set dst (CheckCastPP dst)); 7111 7112 size(0); 7113 format %{ "#checkcastPP of $dst" %} 7114 ins_encode( /*empty encoding*/ ); 7115 ins_pipe( empty ); 7116 %} 7117 7118 instruct castPP( eRegP dst ) %{ 7119 match(Set dst (CastPP dst)); 7120 format %{ "#castPP of $dst" %} 7121 ins_encode( /*empty encoding*/ ); 7122 ins_pipe( empty ); 7123 %} 7124 7125 instruct castII( rRegI dst ) %{ 7126 match(Set dst (CastII dst)); 7127 format %{ "#castII of $dst" %} 7128 ins_encode( /*empty encoding*/ ); 7129 ins_cost(0); 7130 ins_pipe( empty ); 7131 %} 7132 7133 7134 // Load-locked - same as a regular pointer load when used with compare-swap 7135 instruct loadPLocked(eRegP dst, memory mem) %{ 7136 match(Set dst (LoadPLocked mem)); 7137 7138 ins_cost(125); 7139 format %{ "MOV $dst,$mem\t# Load ptr. locked" %} 7140 opcode(0x8B); 7141 ins_encode( OpcP, RegMem(dst,mem)); 7142 ins_pipe( ialu_reg_mem ); 7143 %} 7144 7145 // Conditional-store of the updated heap-top. 7146 // Used during allocation of the shared heap. 7147 // Sets flags (EQ) on success. Implemented with a CMPXCHG on Intel. 7148 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{ 7149 match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); 7150 // EAX is killed if there is contention, but then it's also unused. 7151 // In the common case of no contention, EAX holds the new oop address. 7152 format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %} 7153 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) ); 7154 ins_pipe( pipe_cmpxchg ); 7155 %} 7156 7157 // Conditional-store of an int value. 7158 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG on Intel. 7159 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{ 7160 match(Set cr (StoreIConditional mem (Binary oldval newval))); 7161 effect(KILL oldval); 7162 format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %} 7163 ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) ); 7164 ins_pipe( pipe_cmpxchg ); 7165 %} 7166 7167 // Conditional-store of a long value. 7168 // ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG8 on Intel. 7169 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7170 match(Set cr (StoreLConditional mem (Binary oldval newval))); 7171 effect(KILL oldval); 7172 format %{ "XCHG EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t" 7173 "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t" 7174 "XCHG EBX,ECX" 7175 %} 7176 ins_encode %{ 7177 // Note: we need to swap rbx, and rcx before and after the 7178 // cmpxchg8 instruction because the instruction uses 7179 // rcx as the high order word of the new value to store but 7180 // our register encoding uses rbx. 7181 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7182 if( os::is_MP() ) 7183 __ lock(); 7184 __ cmpxchg8($mem$$Address); 7185 __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc)); 7186 %} 7187 ins_pipe( pipe_cmpxchg ); 7188 %} 7189 7190 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them 7191 7192 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ 7193 predicate(VM_Version::supports_cx8()); 7194 match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); 7195 effect(KILL cr, KILL oldval); 7196 format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7197 "MOV $res,0\n\t" 7198 "JNE,s fail\n\t" 7199 "MOV $res,1\n" 7200 "fail:" %} 7201 ins_encode( enc_cmpxchg8(mem_ptr), 7202 enc_flags_ne_to_boolean(res) ); 7203 ins_pipe( pipe_cmpxchg ); 7204 %} 7205 7206 instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ 7207 match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); 7208 effect(KILL cr, KILL oldval); 7209 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7210 "MOV $res,0\n\t" 7211 "JNE,s fail\n\t" 7212 "MOV $res,1\n" 7213 "fail:" %} 7214 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7215 ins_pipe( pipe_cmpxchg ); 7216 %} 7217 7218 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ 7219 match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); 7220 effect(KILL cr, KILL oldval); 7221 format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" 7222 "MOV $res,0\n\t" 7223 "JNE,s fail\n\t" 7224 "MOV $res,1\n" 7225 "fail:" %} 7226 ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); 7227 ins_pipe( pipe_cmpxchg ); 7228 %} 7229 7230 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ 7231 predicate(n->as_LoadStore()->result_not_used()); 7232 match(Set dummy (GetAndAddI mem add)); 7233 effect(KILL cr); 7234 format %{ "ADDL [$mem],$add" %} 7235 ins_encode %{ 7236 if (os::is_MP()) { __ lock(); } 7237 __ addl($mem$$Address, $add$$constant); 7238 %} 7239 ins_pipe( pipe_cmpxchg ); 7240 %} 7241 7242 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ 7243 match(Set newval (GetAndAddI mem newval)); 7244 effect(KILL cr); 7245 format %{ "XADDL [$mem],$newval" %} 7246 ins_encode %{ 7247 if (os::is_MP()) { __ lock(); } 7248 __ xaddl($mem$$Address, $newval$$Register); 7249 %} 7250 ins_pipe( pipe_cmpxchg ); 7251 %} 7252 7253 instruct xchgI( memory mem, rRegI newval) %{ 7254 match(Set newval (GetAndSetI mem newval)); 7255 format %{ "XCHGL $newval,[$mem]" %} 7256 ins_encode %{ 7257 __ xchgl($newval$$Register, $mem$$Address); 7258 %} 7259 ins_pipe( pipe_cmpxchg ); 7260 %} 7261 7262 instruct xchgP( memory mem, pRegP newval) %{ 7263 match(Set newval (GetAndSetP mem newval)); 7264 format %{ "XCHGL $newval,[$mem]" %} 7265 ins_encode %{ 7266 __ xchgl($newval$$Register, $mem$$Address); 7267 %} 7268 ins_pipe( pipe_cmpxchg ); 7269 %} 7270 7271 //----------Subtraction Instructions------------------------------------------- 7272 7273 // Integer Subtraction Instructions 7274 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7275 match(Set dst (SubI dst src)); 7276 effect(KILL cr); 7277 7278 size(2); 7279 format %{ "SUB $dst,$src" %} 7280 opcode(0x2B); 7281 ins_encode( OpcP, RegReg( dst, src) ); 7282 ins_pipe( ialu_reg_reg ); 7283 %} 7284 7285 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7286 match(Set dst (SubI dst src)); 7287 effect(KILL cr); 7288 7289 format %{ "SUB $dst,$src" %} 7290 opcode(0x81,0x05); /* Opcode 81 /5 */ 7291 // ins_encode( RegImm( dst, src) ); 7292 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7293 ins_pipe( ialu_reg ); 7294 %} 7295 7296 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7297 match(Set dst (SubI dst (LoadI src))); 7298 effect(KILL cr); 7299 7300 ins_cost(125); 7301 format %{ "SUB $dst,$src" %} 7302 opcode(0x2B); 7303 ins_encode( OpcP, RegMem( dst, src) ); 7304 ins_pipe( ialu_reg_mem ); 7305 %} 7306 7307 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7308 match(Set dst (StoreI dst (SubI (LoadI dst) src))); 7309 effect(KILL cr); 7310 7311 ins_cost(150); 7312 format %{ "SUB $dst,$src" %} 7313 opcode(0x29); /* Opcode 29 /r */ 7314 ins_encode( OpcP, RegMem( src, dst ) ); 7315 ins_pipe( ialu_mem_reg ); 7316 %} 7317 7318 // Subtract from a pointer 7319 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{ 7320 match(Set dst (AddP dst (SubI zero src))); 7321 effect(KILL cr); 7322 7323 size(2); 7324 format %{ "SUB $dst,$src" %} 7325 opcode(0x2B); 7326 ins_encode( OpcP, RegReg( dst, src) ); 7327 ins_pipe( ialu_reg_reg ); 7328 %} 7329 7330 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{ 7331 match(Set dst (SubI zero dst)); 7332 effect(KILL cr); 7333 7334 size(2); 7335 format %{ "NEG $dst" %} 7336 opcode(0xF7,0x03); // Opcode F7 /3 7337 ins_encode( OpcP, RegOpc( dst ) ); 7338 ins_pipe( ialu_reg ); 7339 %} 7340 7341 //----------Multiplication/Division Instructions------------------------------- 7342 // Integer Multiplication Instructions 7343 // Multiply Register 7344 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7345 match(Set dst (MulI dst src)); 7346 effect(KILL cr); 7347 7348 size(3); 7349 ins_cost(300); 7350 format %{ "IMUL $dst,$src" %} 7351 opcode(0xAF, 0x0F); 7352 ins_encode( OpcS, OpcP, RegReg( dst, src) ); 7353 ins_pipe( ialu_reg_reg_alu0 ); 7354 %} 7355 7356 // Multiply 32-bit Immediate 7357 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ 7358 match(Set dst (MulI src imm)); 7359 effect(KILL cr); 7360 7361 ins_cost(300); 7362 format %{ "IMUL $dst,$src,$imm" %} 7363 opcode(0x69); /* 69 /r id */ 7364 ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); 7365 ins_pipe( ialu_reg_reg_alu0 ); 7366 %} 7367 7368 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ 7369 match(Set dst src); 7370 effect(KILL cr); 7371 7372 // Note that this is artificially increased to make it more expensive than loadConL 7373 ins_cost(250); 7374 format %{ "MOV EAX,$src\t// low word only" %} 7375 opcode(0xB8); 7376 ins_encode( LdImmL_Lo(dst, src) ); 7377 ins_pipe( ialu_reg_fat ); 7378 %} 7379 7380 // Multiply by 32-bit Immediate, taking the shifted high order results 7381 // (special case for shift by 32) 7382 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ 7383 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7384 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7385 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7386 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7387 effect(USE src1, KILL cr); 7388 7389 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7390 ins_cost(0*100 + 1*400 - 150); 7391 format %{ "IMUL EDX:EAX,$src1" %} 7392 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7393 ins_pipe( pipe_slow ); 7394 %} 7395 7396 // Multiply by 32-bit Immediate, taking the shifted high order results 7397 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ 7398 match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); 7399 predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && 7400 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && 7401 _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); 7402 effect(USE src1, KILL cr); 7403 7404 // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only 7405 ins_cost(1*100 + 1*400 - 150); 7406 format %{ "IMUL EDX:EAX,$src1\n\t" 7407 "SAR EDX,$cnt-32" %} 7408 ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); 7409 ins_pipe( pipe_slow ); 7410 %} 7411 7412 // Multiply Memory 32-bit Immediate 7413 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ 7414 match(Set dst (MulI (LoadI src) imm)); 7415 effect(KILL cr); 7416 7417 ins_cost(300); 7418 format %{ "IMUL $dst,$src,$imm" %} 7419 opcode(0x69); /* 69 /r id */ 7420 ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) ); 7421 ins_pipe( ialu_reg_mem_alu0 ); 7422 %} 7423 7424 // Multiply Memory 7425 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ 7426 match(Set dst (MulI dst (LoadI src))); 7427 effect(KILL cr); 7428 7429 ins_cost(350); 7430 format %{ "IMUL $dst,$src" %} 7431 opcode(0xAF, 0x0F); 7432 ins_encode( OpcS, OpcP, RegMem( dst, src) ); 7433 ins_pipe( ialu_reg_mem_alu0 ); 7434 %} 7435 7436 // Multiply Register Int to Long 7437 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ 7438 // Basic Idea: long = (long)int * (long)int 7439 match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); 7440 effect(DEF dst, USE src, USE src1, KILL flags); 7441 7442 ins_cost(300); 7443 format %{ "IMUL $dst,$src1" %} 7444 7445 ins_encode( long_int_multiply( dst, src1 ) ); 7446 ins_pipe( ialu_reg_reg_alu0 ); 7447 %} 7448 7449 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ 7450 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) 7451 match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); 7452 effect(KILL flags); 7453 7454 ins_cost(300); 7455 format %{ "MUL $dst,$src1" %} 7456 7457 ins_encode( long_uint_multiply(dst, src1) ); 7458 ins_pipe( ialu_reg_reg_alu0 ); 7459 %} 7460 7461 // Multiply Register Long 7462 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7463 match(Set dst (MulL dst src)); 7464 effect(KILL cr, TEMP tmp); 7465 ins_cost(4*100+3*400); 7466 // Basic idea: lo(result) = lo(x_lo * y_lo) 7467 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) 7468 format %{ "MOV $tmp,$src.lo\n\t" 7469 "IMUL $tmp,EDX\n\t" 7470 "MOV EDX,$src.hi\n\t" 7471 "IMUL EDX,EAX\n\t" 7472 "ADD $tmp,EDX\n\t" 7473 "MUL EDX:EAX,$src.lo\n\t" 7474 "ADD EDX,$tmp" %} 7475 ins_encode( long_multiply( dst, src, tmp ) ); 7476 ins_pipe( pipe_slow ); 7477 %} 7478 7479 // Multiply Register Long where the left operand's high 32 bits are zero 7480 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7481 predicate(is_operand_hi32_zero(n->in(1))); 7482 match(Set dst (MulL dst src)); 7483 effect(KILL cr, TEMP tmp); 7484 ins_cost(2*100+2*400); 7485 // Basic idea: lo(result) = lo(x_lo * y_lo) 7486 // hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 7487 format %{ "MOV $tmp,$src.hi\n\t" 7488 "IMUL $tmp,EAX\n\t" 7489 "MUL EDX:EAX,$src.lo\n\t" 7490 "ADD EDX,$tmp" %} 7491 ins_encode %{ 7492 __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); 7493 __ imull($tmp$$Register, rax); 7494 __ mull($src$$Register); 7495 __ addl(rdx, $tmp$$Register); 7496 %} 7497 ins_pipe( pipe_slow ); 7498 %} 7499 7500 // Multiply Register Long where the right operand's high 32 bits are zero 7501 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ 7502 predicate(is_operand_hi32_zero(n->in(2))); 7503 match(Set dst (MulL dst src)); 7504 effect(KILL cr, TEMP tmp); 7505 ins_cost(2*100+2*400); 7506 // Basic idea: lo(result) = lo(x_lo * y_lo) 7507 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 7508 format %{ "MOV $tmp,$src.lo\n\t" 7509 "IMUL $tmp,EDX\n\t" 7510 "MUL EDX:EAX,$src.lo\n\t" 7511 "ADD EDX,$tmp" %} 7512 ins_encode %{ 7513 __ movl($tmp$$Register, $src$$Register); 7514 __ imull($tmp$$Register, rdx); 7515 __ mull($src$$Register); 7516 __ addl(rdx, $tmp$$Register); 7517 %} 7518 ins_pipe( pipe_slow ); 7519 %} 7520 7521 // Multiply Register Long where the left and the right operands' high 32 bits are zero 7522 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ 7523 predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); 7524 match(Set dst (MulL dst src)); 7525 effect(KILL cr); 7526 ins_cost(1*400); 7527 // Basic idea: lo(result) = lo(x_lo * y_lo) 7528 // hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 7529 format %{ "MUL EDX:EAX,$src.lo\n\t" %} 7530 ins_encode %{ 7531 __ mull($src$$Register); 7532 %} 7533 ins_pipe( pipe_slow ); 7534 %} 7535 7536 // Multiply Register Long by small constant 7537 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ 7538 match(Set dst (MulL dst src)); 7539 effect(KILL cr, TEMP tmp); 7540 ins_cost(2*100+2*400); 7541 size(12); 7542 // Basic idea: lo(result) = lo(src * EAX) 7543 // hi(result) = hi(src * EAX) + lo(src * EDX) 7544 format %{ "IMUL $tmp,EDX,$src\n\t" 7545 "MOV EDX,$src\n\t" 7546 "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" 7547 "ADD EDX,$tmp" %} 7548 ins_encode( long_multiply_con( dst, src, tmp ) ); 7549 ins_pipe( pipe_slow ); 7550 %} 7551 7552 // Integer DIV with Register 7553 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7554 match(Set rax (DivI rax div)); 7555 effect(KILL rdx, KILL cr); 7556 size(26); 7557 ins_cost(30*100+10*100); 7558 format %{ "CMP EAX,0x80000000\n\t" 7559 "JNE,s normal\n\t" 7560 "XOR EDX,EDX\n\t" 7561 "CMP ECX,-1\n\t" 7562 "JE,s done\n" 7563 "normal: CDQ\n\t" 7564 "IDIV $div\n\t" 7565 "done:" %} 7566 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7567 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7568 ins_pipe( ialu_reg_reg_alu0 ); 7569 %} 7570 7571 // Divide Register Long 7572 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7573 match(Set dst (DivL src1 src2)); 7574 effect( KILL cr, KILL cx, KILL bx ); 7575 ins_cost(10000); 7576 format %{ "PUSH $src1.hi\n\t" 7577 "PUSH $src1.lo\n\t" 7578 "PUSH $src2.hi\n\t" 7579 "PUSH $src2.lo\n\t" 7580 "CALL SharedRuntime::ldiv\n\t" 7581 "ADD ESP,16" %} 7582 ins_encode( long_div(src1,src2) ); 7583 ins_pipe( pipe_slow ); 7584 %} 7585 7586 // Integer DIVMOD with Register, both quotient and mod results 7587 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ 7588 match(DivModI rax div); 7589 effect(KILL cr); 7590 size(26); 7591 ins_cost(30*100+10*100); 7592 format %{ "CMP EAX,0x80000000\n\t" 7593 "JNE,s normal\n\t" 7594 "XOR EDX,EDX\n\t" 7595 "CMP ECX,-1\n\t" 7596 "JE,s done\n" 7597 "normal: CDQ\n\t" 7598 "IDIV $div\n\t" 7599 "done:" %} 7600 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7601 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7602 ins_pipe( pipe_slow ); 7603 %} 7604 7605 // Integer MOD with Register 7606 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ 7607 match(Set rdx (ModI rax div)); 7608 effect(KILL rax, KILL cr); 7609 7610 size(26); 7611 ins_cost(300); 7612 format %{ "CDQ\n\t" 7613 "IDIV $div" %} 7614 opcode(0xF7, 0x7); /* Opcode F7 /7 */ 7615 ins_encode( cdq_enc, OpcP, RegOpc(div) ); 7616 ins_pipe( ialu_reg_reg_alu0 ); 7617 %} 7618 7619 // Remainder Register Long 7620 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{ 7621 match(Set dst (ModL src1 src2)); 7622 effect( KILL cr, KILL cx, KILL bx ); 7623 ins_cost(10000); 7624 format %{ "PUSH $src1.hi\n\t" 7625 "PUSH $src1.lo\n\t" 7626 "PUSH $src2.hi\n\t" 7627 "PUSH $src2.lo\n\t" 7628 "CALL SharedRuntime::lrem\n\t" 7629 "ADD ESP,16" %} 7630 ins_encode( long_mod(src1,src2) ); 7631 ins_pipe( pipe_slow ); 7632 %} 7633 7634 // Divide Register Long (no special case since divisor != -1) 7635 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7636 match(Set dst (DivL dst imm)); 7637 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7638 ins_cost(1000); 7639 format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" 7640 "XOR $tmp2,$tmp2\n\t" 7641 "CMP $tmp,EDX\n\t" 7642 "JA,s fast\n\t" 7643 "MOV $tmp2,EAX\n\t" 7644 "MOV EAX,EDX\n\t" 7645 "MOV EDX,0\n\t" 7646 "JLE,s pos\n\t" 7647 "LNEG EAX : $tmp2\n\t" 7648 "DIV $tmp # unsigned division\n\t" 7649 "XCHG EAX,$tmp2\n\t" 7650 "DIV $tmp\n\t" 7651 "LNEG $tmp2 : EAX\n\t" 7652 "JMP,s done\n" 7653 "pos:\n\t" 7654 "DIV $tmp\n\t" 7655 "XCHG EAX,$tmp2\n" 7656 "fast:\n\t" 7657 "DIV $tmp\n" 7658 "done:\n\t" 7659 "MOV EDX,$tmp2\n\t" 7660 "NEG EDX:EAX # if $imm < 0" %} 7661 ins_encode %{ 7662 int con = (int)$imm$$constant; 7663 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7664 int pcon = (con > 0) ? con : -con; 7665 Label Lfast, Lpos, Ldone; 7666 7667 __ movl($tmp$$Register, pcon); 7668 __ xorl($tmp2$$Register,$tmp2$$Register); 7669 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7670 __ jccb(Assembler::above, Lfast); // result fits into 32 bit 7671 7672 __ movl($tmp2$$Register, $dst$$Register); // save 7673 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7674 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7675 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7676 7677 // Negative dividend. 7678 // convert value to positive to use unsigned division 7679 __ lneg($dst$$Register, $tmp2$$Register); 7680 __ divl($tmp$$Register); 7681 __ xchgl($dst$$Register, $tmp2$$Register); 7682 __ divl($tmp$$Register); 7683 // revert result back to negative 7684 __ lneg($tmp2$$Register, $dst$$Register); 7685 __ jmpb(Ldone); 7686 7687 __ bind(Lpos); 7688 __ divl($tmp$$Register); // Use unsigned division 7689 __ xchgl($dst$$Register, $tmp2$$Register); 7690 // Fallthrow for final divide, tmp2 has 32 bit hi result 7691 7692 __ bind(Lfast); 7693 // fast path: src is positive 7694 __ divl($tmp$$Register); // Use unsigned division 7695 7696 __ bind(Ldone); 7697 __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); 7698 if (con < 0) { 7699 __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); 7700 } 7701 %} 7702 ins_pipe( pipe_slow ); 7703 %} 7704 7705 // Remainder Register Long (remainder fit into 32 bits) 7706 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ 7707 match(Set dst (ModL dst imm)); 7708 effect( TEMP tmp, TEMP tmp2, KILL cr ); 7709 ins_cost(1000); 7710 format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" 7711 "CMP $tmp,EDX\n\t" 7712 "JA,s fast\n\t" 7713 "MOV $tmp2,EAX\n\t" 7714 "MOV EAX,EDX\n\t" 7715 "MOV EDX,0\n\t" 7716 "JLE,s pos\n\t" 7717 "LNEG EAX : $tmp2\n\t" 7718 "DIV $tmp # unsigned division\n\t" 7719 "MOV EAX,$tmp2\n\t" 7720 "DIV $tmp\n\t" 7721 "NEG EDX\n\t" 7722 "JMP,s done\n" 7723 "pos:\n\t" 7724 "DIV $tmp\n\t" 7725 "MOV EAX,$tmp2\n" 7726 "fast:\n\t" 7727 "DIV $tmp\n" 7728 "done:\n\t" 7729 "MOV EAX,EDX\n\t" 7730 "SAR EDX,31\n\t" %} 7731 ins_encode %{ 7732 int con = (int)$imm$$constant; 7733 assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); 7734 int pcon = (con > 0) ? con : -con; 7735 Label Lfast, Lpos, Ldone; 7736 7737 __ movl($tmp$$Register, pcon); 7738 __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); 7739 __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit 7740 7741 __ movl($tmp2$$Register, $dst$$Register); // save 7742 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7743 __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags 7744 __ jccb(Assembler::lessEqual, Lpos); // result is positive 7745 7746 // Negative dividend. 7747 // convert value to positive to use unsigned division 7748 __ lneg($dst$$Register, $tmp2$$Register); 7749 __ divl($tmp$$Register); 7750 __ movl($dst$$Register, $tmp2$$Register); 7751 __ divl($tmp$$Register); 7752 // revert remainder back to negative 7753 __ negl(HIGH_FROM_LOW($dst$$Register)); 7754 __ jmpb(Ldone); 7755 7756 __ bind(Lpos); 7757 __ divl($tmp$$Register); 7758 __ movl($dst$$Register, $tmp2$$Register); 7759 7760 __ bind(Lfast); 7761 // fast path: src is positive 7762 __ divl($tmp$$Register); 7763 7764 __ bind(Ldone); 7765 __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); 7766 __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign 7767 7768 %} 7769 ins_pipe( pipe_slow ); 7770 %} 7771 7772 // Integer Shift Instructions 7773 // Shift Left by one 7774 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7775 match(Set dst (LShiftI dst shift)); 7776 effect(KILL cr); 7777 7778 size(2); 7779 format %{ "SHL $dst,$shift" %} 7780 opcode(0xD1, 0x4); /* D1 /4 */ 7781 ins_encode( OpcP, RegOpc( dst ) ); 7782 ins_pipe( ialu_reg ); 7783 %} 7784 7785 // Shift Left by 8-bit immediate 7786 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7787 match(Set dst (LShiftI dst shift)); 7788 effect(KILL cr); 7789 7790 size(3); 7791 format %{ "SHL $dst,$shift" %} 7792 opcode(0xC1, 0x4); /* C1 /4 ib */ 7793 ins_encode( RegOpcImm( dst, shift) ); 7794 ins_pipe( ialu_reg ); 7795 %} 7796 7797 // Shift Left by variable 7798 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7799 match(Set dst (LShiftI dst shift)); 7800 effect(KILL cr); 7801 7802 size(2); 7803 format %{ "SHL $dst,$shift" %} 7804 opcode(0xD3, 0x4); /* D3 /4 */ 7805 ins_encode( OpcP, RegOpc( dst ) ); 7806 ins_pipe( ialu_reg_reg ); 7807 %} 7808 7809 // Arithmetic shift right by one 7810 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7811 match(Set dst (RShiftI dst shift)); 7812 effect(KILL cr); 7813 7814 size(2); 7815 format %{ "SAR $dst,$shift" %} 7816 opcode(0xD1, 0x7); /* D1 /7 */ 7817 ins_encode( OpcP, RegOpc( dst ) ); 7818 ins_pipe( ialu_reg ); 7819 %} 7820 7821 // Arithmetic shift right by one 7822 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{ 7823 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7824 effect(KILL cr); 7825 format %{ "SAR $dst,$shift" %} 7826 opcode(0xD1, 0x7); /* D1 /7 */ 7827 ins_encode( OpcP, RMopc_Mem(secondary,dst) ); 7828 ins_pipe( ialu_mem_imm ); 7829 %} 7830 7831 // Arithmetic Shift Right by 8-bit immediate 7832 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7833 match(Set dst (RShiftI dst shift)); 7834 effect(KILL cr); 7835 7836 size(3); 7837 format %{ "SAR $dst,$shift" %} 7838 opcode(0xC1, 0x7); /* C1 /7 ib */ 7839 ins_encode( RegOpcImm( dst, shift ) ); 7840 ins_pipe( ialu_mem_imm ); 7841 %} 7842 7843 // Arithmetic Shift Right by 8-bit immediate 7844 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ 7845 match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); 7846 effect(KILL cr); 7847 7848 format %{ "SAR $dst,$shift" %} 7849 opcode(0xC1, 0x7); /* C1 /7 ib */ 7850 ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) ); 7851 ins_pipe( ialu_mem_imm ); 7852 %} 7853 7854 // Arithmetic Shift Right by variable 7855 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7856 match(Set dst (RShiftI dst shift)); 7857 effect(KILL cr); 7858 7859 size(2); 7860 format %{ "SAR $dst,$shift" %} 7861 opcode(0xD3, 0x7); /* D3 /7 */ 7862 ins_encode( OpcP, RegOpc( dst ) ); 7863 ins_pipe( ialu_reg_reg ); 7864 %} 7865 7866 // Logical shift right by one 7867 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 7868 match(Set dst (URShiftI dst shift)); 7869 effect(KILL cr); 7870 7871 size(2); 7872 format %{ "SHR $dst,$shift" %} 7873 opcode(0xD1, 0x5); /* D1 /5 */ 7874 ins_encode( OpcP, RegOpc( dst ) ); 7875 ins_pipe( ialu_reg ); 7876 %} 7877 7878 // Logical Shift Right by 8-bit immediate 7879 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ 7880 match(Set dst (URShiftI dst shift)); 7881 effect(KILL cr); 7882 7883 size(3); 7884 format %{ "SHR $dst,$shift" %} 7885 opcode(0xC1, 0x5); /* C1 /5 ib */ 7886 ins_encode( RegOpcImm( dst, shift) ); 7887 ins_pipe( ialu_reg ); 7888 %} 7889 7890 7891 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. 7892 // This idiom is used by the compiler for the i2b bytecode. 7893 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ 7894 match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); 7895 7896 size(3); 7897 format %{ "MOVSX $dst,$src :8" %} 7898 ins_encode %{ 7899 __ movsbl($dst$$Register, $src$$Register); 7900 %} 7901 ins_pipe(ialu_reg_reg); 7902 %} 7903 7904 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. 7905 // This idiom is used by the compiler the i2s bytecode. 7906 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ 7907 match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); 7908 7909 size(3); 7910 format %{ "MOVSX $dst,$src :16" %} 7911 ins_encode %{ 7912 __ movswl($dst$$Register, $src$$Register); 7913 %} 7914 ins_pipe(ialu_reg_reg); 7915 %} 7916 7917 7918 // Logical Shift Right by variable 7919 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ 7920 match(Set dst (URShiftI dst shift)); 7921 effect(KILL cr); 7922 7923 size(2); 7924 format %{ "SHR $dst,$shift" %} 7925 opcode(0xD3, 0x5); /* D3 /5 */ 7926 ins_encode( OpcP, RegOpc( dst ) ); 7927 ins_pipe( ialu_reg_reg ); 7928 %} 7929 7930 7931 //----------Logical Instructions----------------------------------------------- 7932 //----------Integer Logical Instructions--------------------------------------- 7933 // And Instructions 7934 // And Register with Register 7935 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 7936 match(Set dst (AndI dst src)); 7937 effect(KILL cr); 7938 7939 size(2); 7940 format %{ "AND $dst,$src" %} 7941 opcode(0x23); 7942 ins_encode( OpcP, RegReg( dst, src) ); 7943 ins_pipe( ialu_reg_reg ); 7944 %} 7945 7946 // And Register with Immediate 7947 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 7948 match(Set dst (AndI dst src)); 7949 effect(KILL cr); 7950 7951 format %{ "AND $dst,$src" %} 7952 opcode(0x81,0x04); /* Opcode 81 /4 */ 7953 // ins_encode( RegImm( dst, src) ); 7954 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 7955 ins_pipe( ialu_reg ); 7956 %} 7957 7958 // And Register with Memory 7959 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 7960 match(Set dst (AndI dst (LoadI src))); 7961 effect(KILL cr); 7962 7963 ins_cost(125); 7964 format %{ "AND $dst,$src" %} 7965 opcode(0x23); 7966 ins_encode( OpcP, RegMem( dst, src) ); 7967 ins_pipe( ialu_reg_mem ); 7968 %} 7969 7970 // And Memory with Register 7971 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 7972 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 7973 effect(KILL cr); 7974 7975 ins_cost(150); 7976 format %{ "AND $dst,$src" %} 7977 opcode(0x21); /* Opcode 21 /r */ 7978 ins_encode( OpcP, RegMem( src, dst ) ); 7979 ins_pipe( ialu_mem_reg ); 7980 %} 7981 7982 // And Memory with Immediate 7983 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 7984 match(Set dst (StoreI dst (AndI (LoadI dst) src))); 7985 effect(KILL cr); 7986 7987 ins_cost(125); 7988 format %{ "AND $dst,$src" %} 7989 opcode(0x81, 0x4); /* Opcode 81 /4 id */ 7990 // ins_encode( MemImm( dst, src) ); 7991 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 7992 ins_pipe( ialu_mem_imm ); 7993 %} 7994 7995 // BMI1 instructions 7996 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ 7997 match(Set dst (AndI (XorI src1 minus_1) src2)); 7998 predicate(UseBMI1Instructions); 7999 effect(KILL cr); 8000 8001 format %{ "ANDNL $dst, $src1, $src2" %} 8002 8003 ins_encode %{ 8004 __ andnl($dst$$Register, $src1$$Register, $src2$$Register); 8005 %} 8006 ins_pipe(ialu_reg); 8007 %} 8008 8009 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ 8010 match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); 8011 predicate(UseBMI1Instructions); 8012 effect(KILL cr); 8013 8014 ins_cost(125); 8015 format %{ "ANDNL $dst, $src1, $src2" %} 8016 8017 ins_encode %{ 8018 __ andnl($dst$$Register, $src1$$Register, $src2$$Address); 8019 %} 8020 ins_pipe(ialu_reg_mem); 8021 %} 8022 8023 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{ 8024 match(Set dst (AndI (SubI imm_zero src) src)); 8025 predicate(UseBMI1Instructions); 8026 effect(KILL cr); 8027 8028 format %{ "BLSIL $dst, $src" %} 8029 8030 ins_encode %{ 8031 __ blsil($dst$$Register, $src$$Register); 8032 %} 8033 ins_pipe(ialu_reg); 8034 %} 8035 8036 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{ 8037 match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); 8038 predicate(UseBMI1Instructions); 8039 effect(KILL cr); 8040 8041 ins_cost(125); 8042 format %{ "BLSIL $dst, $src" %} 8043 8044 ins_encode %{ 8045 __ blsil($dst$$Register, $src$$Address); 8046 %} 8047 ins_pipe(ialu_reg_mem); 8048 %} 8049 8050 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8051 %{ 8052 match(Set dst (XorI (AddI src minus_1) src)); 8053 predicate(UseBMI1Instructions); 8054 effect(KILL cr); 8055 8056 format %{ "BLSMSKL $dst, $src" %} 8057 8058 ins_encode %{ 8059 __ blsmskl($dst$$Register, $src$$Register); 8060 %} 8061 8062 ins_pipe(ialu_reg); 8063 %} 8064 8065 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8066 %{ 8067 match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); 8068 predicate(UseBMI1Instructions); 8069 effect(KILL cr); 8070 8071 ins_cost(125); 8072 format %{ "BLSMSKL $dst, $src" %} 8073 8074 ins_encode %{ 8075 __ blsmskl($dst$$Register, $src$$Address); 8076 %} 8077 8078 ins_pipe(ialu_reg_mem); 8079 %} 8080 8081 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) 8082 %{ 8083 match(Set dst (AndI (AddI src minus_1) src) ); 8084 predicate(UseBMI1Instructions); 8085 effect(KILL cr); 8086 8087 format %{ "BLSRL $dst, $src" %} 8088 8089 ins_encode %{ 8090 __ blsrl($dst$$Register, $src$$Register); 8091 %} 8092 8093 ins_pipe(ialu_reg); 8094 %} 8095 8096 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) 8097 %{ 8098 match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); 8099 predicate(UseBMI1Instructions); 8100 effect(KILL cr); 8101 8102 ins_cost(125); 8103 format %{ "BLSRL $dst, $src" %} 8104 8105 ins_encode %{ 8106 __ blsrl($dst$$Register, $src$$Address); 8107 %} 8108 8109 ins_pipe(ialu_reg_mem); 8110 %} 8111 8112 // Or Instructions 8113 // Or Register with Register 8114 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8115 match(Set dst (OrI dst src)); 8116 effect(KILL cr); 8117 8118 size(2); 8119 format %{ "OR $dst,$src" %} 8120 opcode(0x0B); 8121 ins_encode( OpcP, RegReg( dst, src) ); 8122 ins_pipe( ialu_reg_reg ); 8123 %} 8124 8125 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ 8126 match(Set dst (OrI dst (CastP2X src))); 8127 effect(KILL cr); 8128 8129 size(2); 8130 format %{ "OR $dst,$src" %} 8131 opcode(0x0B); 8132 ins_encode( OpcP, RegReg( dst, src) ); 8133 ins_pipe( ialu_reg_reg ); 8134 %} 8135 8136 8137 // Or Register with Immediate 8138 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8139 match(Set dst (OrI dst src)); 8140 effect(KILL cr); 8141 8142 format %{ "OR $dst,$src" %} 8143 opcode(0x81,0x01); /* Opcode 81 /1 id */ 8144 // ins_encode( RegImm( dst, src) ); 8145 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8146 ins_pipe( ialu_reg ); 8147 %} 8148 8149 // Or Register with Memory 8150 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8151 match(Set dst (OrI dst (LoadI src))); 8152 effect(KILL cr); 8153 8154 ins_cost(125); 8155 format %{ "OR $dst,$src" %} 8156 opcode(0x0B); 8157 ins_encode( OpcP, RegMem( dst, src) ); 8158 ins_pipe( ialu_reg_mem ); 8159 %} 8160 8161 // Or Memory with Register 8162 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8163 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8164 effect(KILL cr); 8165 8166 ins_cost(150); 8167 format %{ "OR $dst,$src" %} 8168 opcode(0x09); /* Opcode 09 /r */ 8169 ins_encode( OpcP, RegMem( src, dst ) ); 8170 ins_pipe( ialu_mem_reg ); 8171 %} 8172 8173 // Or Memory with Immediate 8174 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8175 match(Set dst (StoreI dst (OrI (LoadI dst) src))); 8176 effect(KILL cr); 8177 8178 ins_cost(125); 8179 format %{ "OR $dst,$src" %} 8180 opcode(0x81,0x1); /* Opcode 81 /1 id */ 8181 // ins_encode( MemImm( dst, src) ); 8182 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8183 ins_pipe( ialu_mem_imm ); 8184 %} 8185 8186 // ROL/ROR 8187 // ROL expand 8188 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8189 effect(USE_DEF dst, USE shift, KILL cr); 8190 8191 format %{ "ROL $dst, $shift" %} 8192 opcode(0xD1, 0x0); /* Opcode D1 /0 */ 8193 ins_encode( OpcP, RegOpc( dst )); 8194 ins_pipe( ialu_reg ); 8195 %} 8196 8197 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8198 effect(USE_DEF dst, USE shift, KILL cr); 8199 8200 format %{ "ROL $dst, $shift" %} 8201 opcode(0xC1, 0x0); /*Opcode /C1 /0 */ 8202 ins_encode( RegOpcImm(dst, shift) ); 8203 ins_pipe(ialu_reg); 8204 %} 8205 8206 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ 8207 effect(USE_DEF dst, USE shift, KILL cr); 8208 8209 format %{ "ROL $dst, $shift" %} 8210 opcode(0xD3, 0x0); /* Opcode D3 /0 */ 8211 ins_encode(OpcP, RegOpc(dst)); 8212 ins_pipe( ialu_reg_reg ); 8213 %} 8214 // end of ROL expand 8215 8216 // ROL 32bit by one once 8217 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{ 8218 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8219 8220 expand %{ 8221 rolI_eReg_imm1(dst, lshift, cr); 8222 %} 8223 %} 8224 8225 // ROL 32bit var by imm8 once 8226 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ 8227 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8228 match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); 8229 8230 expand %{ 8231 rolI_eReg_imm8(dst, lshift, cr); 8232 %} 8233 %} 8234 8235 // ROL 32bit var by var once 8236 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8237 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); 8238 8239 expand %{ 8240 rolI_eReg_CL(dst, shift, cr); 8241 %} 8242 %} 8243 8244 // ROL 32bit var by var once 8245 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8246 match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); 8247 8248 expand %{ 8249 rolI_eReg_CL(dst, shift, cr); 8250 %} 8251 %} 8252 8253 // ROR expand 8254 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{ 8255 effect(USE_DEF dst, USE shift, KILL cr); 8256 8257 format %{ "ROR $dst, $shift" %} 8258 opcode(0xD1,0x1); /* Opcode D1 /1 */ 8259 ins_encode( OpcP, RegOpc( dst ) ); 8260 ins_pipe( ialu_reg ); 8261 %} 8262 8263 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ 8264 effect (USE_DEF dst, USE shift, KILL cr); 8265 8266 format %{ "ROR $dst, $shift" %} 8267 opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ 8268 ins_encode( RegOpcImm(dst, shift) ); 8269 ins_pipe( ialu_reg ); 8270 %} 8271 8272 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ 8273 effect(USE_DEF dst, USE shift, KILL cr); 8274 8275 format %{ "ROR $dst, $shift" %} 8276 opcode(0xD3, 0x1); /* Opcode D3 /1 */ 8277 ins_encode(OpcP, RegOpc(dst)); 8278 ins_pipe( ialu_reg_reg ); 8279 %} 8280 // end of ROR expand 8281 8282 // ROR right once 8283 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{ 8284 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8285 8286 expand %{ 8287 rorI_eReg_imm1(dst, rshift, cr); 8288 %} 8289 %} 8290 8291 // ROR 32bit by immI8 once 8292 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ 8293 predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); 8294 match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); 8295 8296 expand %{ 8297 rorI_eReg_imm8(dst, rshift, cr); 8298 %} 8299 %} 8300 8301 // ROR 32bit var by var once 8302 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{ 8303 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); 8304 8305 expand %{ 8306 rorI_eReg_CL(dst, shift, cr); 8307 %} 8308 %} 8309 8310 // ROR 32bit var by var once 8311 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ 8312 match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); 8313 8314 expand %{ 8315 rorI_eReg_CL(dst, shift, cr); 8316 %} 8317 %} 8318 8319 // Xor Instructions 8320 // Xor Register with Register 8321 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ 8322 match(Set dst (XorI dst src)); 8323 effect(KILL cr); 8324 8325 size(2); 8326 format %{ "XOR $dst,$src" %} 8327 opcode(0x33); 8328 ins_encode( OpcP, RegReg( dst, src) ); 8329 ins_pipe( ialu_reg_reg ); 8330 %} 8331 8332 // Xor Register with Immediate -1 8333 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ 8334 match(Set dst (XorI dst imm)); 8335 8336 size(2); 8337 format %{ "NOT $dst" %} 8338 ins_encode %{ 8339 __ notl($dst$$Register); 8340 %} 8341 ins_pipe( ialu_reg ); 8342 %} 8343 8344 // Xor Register with Immediate 8345 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ 8346 match(Set dst (XorI dst src)); 8347 effect(KILL cr); 8348 8349 format %{ "XOR $dst,$src" %} 8350 opcode(0x81,0x06); /* Opcode 81 /6 id */ 8351 // ins_encode( RegImm( dst, src) ); 8352 ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); 8353 ins_pipe( ialu_reg ); 8354 %} 8355 8356 // Xor Register with Memory 8357 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ 8358 match(Set dst (XorI dst (LoadI src))); 8359 effect(KILL cr); 8360 8361 ins_cost(125); 8362 format %{ "XOR $dst,$src" %} 8363 opcode(0x33); 8364 ins_encode( OpcP, RegMem(dst, src) ); 8365 ins_pipe( ialu_reg_mem ); 8366 %} 8367 8368 // Xor Memory with Register 8369 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ 8370 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8371 effect(KILL cr); 8372 8373 ins_cost(150); 8374 format %{ "XOR $dst,$src" %} 8375 opcode(0x31); /* Opcode 31 /r */ 8376 ins_encode( OpcP, RegMem( src, dst ) ); 8377 ins_pipe( ialu_mem_reg ); 8378 %} 8379 8380 // Xor Memory with Immediate 8381 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ 8382 match(Set dst (StoreI dst (XorI (LoadI dst) src))); 8383 effect(KILL cr); 8384 8385 ins_cost(125); 8386 format %{ "XOR $dst,$src" %} 8387 opcode(0x81,0x6); /* Opcode 81 /6 id */ 8388 ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) ); 8389 ins_pipe( ialu_mem_imm ); 8390 %} 8391 8392 //----------Convert Int to Boolean--------------------------------------------- 8393 8394 instruct movI_nocopy(rRegI dst, rRegI src) %{ 8395 effect( DEF dst, USE src ); 8396 format %{ "MOV $dst,$src" %} 8397 ins_encode( enc_Copy( dst, src) ); 8398 ins_pipe( ialu_reg_reg ); 8399 %} 8400 8401 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8402 effect( USE_DEF dst, USE src, KILL cr ); 8403 8404 size(4); 8405 format %{ "NEG $dst\n\t" 8406 "ADC $dst,$src" %} 8407 ins_encode( neg_reg(dst), 8408 OpcRegReg(0x13,dst,src) ); 8409 ins_pipe( ialu_reg_reg_long ); 8410 %} 8411 8412 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ 8413 match(Set dst (Conv2B src)); 8414 8415 expand %{ 8416 movI_nocopy(dst,src); 8417 ci2b(dst,src,cr); 8418 %} 8419 %} 8420 8421 instruct movP_nocopy(rRegI dst, eRegP src) %{ 8422 effect( DEF dst, USE src ); 8423 format %{ "MOV $dst,$src" %} 8424 ins_encode( enc_Copy( dst, src) ); 8425 ins_pipe( ialu_reg_reg ); 8426 %} 8427 8428 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8429 effect( USE_DEF dst, USE src, KILL cr ); 8430 format %{ "NEG $dst\n\t" 8431 "ADC $dst,$src" %} 8432 ins_encode( neg_reg(dst), 8433 OpcRegReg(0x13,dst,src) ); 8434 ins_pipe( ialu_reg_reg_long ); 8435 %} 8436 8437 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ 8438 match(Set dst (Conv2B src)); 8439 8440 expand %{ 8441 movP_nocopy(dst,src); 8442 cp2b(dst,src,cr); 8443 %} 8444 %} 8445 8446 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ 8447 match(Set dst (CmpLTMask p q)); 8448 effect(KILL cr); 8449 ins_cost(400); 8450 8451 // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination 8452 format %{ "XOR $dst,$dst\n\t" 8453 "CMP $p,$q\n\t" 8454 "SETlt $dst\n\t" 8455 "NEG $dst" %} 8456 ins_encode %{ 8457 Register Rp = $p$$Register; 8458 Register Rq = $q$$Register; 8459 Register Rd = $dst$$Register; 8460 Label done; 8461 __ xorl(Rd, Rd); 8462 __ cmpl(Rp, Rq); 8463 __ setb(Assembler::less, Rd); 8464 __ negl(Rd); 8465 %} 8466 8467 ins_pipe(pipe_slow); 8468 %} 8469 8470 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{ 8471 match(Set dst (CmpLTMask dst zero)); 8472 effect(DEF dst, KILL cr); 8473 ins_cost(100); 8474 8475 format %{ "SAR $dst,31\t# cmpLTMask0" %} 8476 ins_encode %{ 8477 __ sarl($dst$$Register, 31); 8478 %} 8479 ins_pipe(ialu_reg); 8480 %} 8481 8482 /* better to save a register than avoid a branch */ 8483 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8484 match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); 8485 effect(KILL cr); 8486 ins_cost(400); 8487 format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" 8488 "JGE done\n\t" 8489 "ADD $p,$y\n" 8490 "done: " %} 8491 ins_encode %{ 8492 Register Rp = $p$$Register; 8493 Register Rq = $q$$Register; 8494 Register Ry = $y$$Register; 8495 Label done; 8496 __ subl(Rp, Rq); 8497 __ jccb(Assembler::greaterEqual, done); 8498 __ addl(Rp, Ry); 8499 __ bind(done); 8500 %} 8501 8502 ins_pipe(pipe_cmplt); 8503 %} 8504 8505 /* better to save a register than avoid a branch */ 8506 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ 8507 match(Set y (AndI (CmpLTMask p q) y)); 8508 effect(KILL cr); 8509 8510 ins_cost(300); 8511 8512 format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" 8513 "JLT done\n\t" 8514 "XORL $y, $y\n" 8515 "done: " %} 8516 ins_encode %{ 8517 Register Rp = $p$$Register; 8518 Register Rq = $q$$Register; 8519 Register Ry = $y$$Register; 8520 Label done; 8521 __ cmpl(Rp, Rq); 8522 __ jccb(Assembler::less, done); 8523 __ xorl(Ry, Ry); 8524 __ bind(done); 8525 %} 8526 8527 ins_pipe(pipe_cmplt); 8528 %} 8529 8530 /* If I enable this, I encourage spilling in the inner loop of compress. 8531 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ 8532 match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); 8533 */ 8534 //----------Overflow Math Instructions----------------------------------------- 8535 8536 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8537 %{ 8538 match(Set cr (OverflowAddI op1 op2)); 8539 effect(DEF cr, USE_KILL op1, USE op2); 8540 8541 format %{ "ADD $op1, $op2\t# overflow check int" %} 8542 8543 ins_encode %{ 8544 __ addl($op1$$Register, $op2$$Register); 8545 %} 8546 ins_pipe(ialu_reg_reg); 8547 %} 8548 8549 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) 8550 %{ 8551 match(Set cr (OverflowAddI op1 op2)); 8552 effect(DEF cr, USE_KILL op1, USE op2); 8553 8554 format %{ "ADD $op1, $op2\t# overflow check int" %} 8555 8556 ins_encode %{ 8557 __ addl($op1$$Register, $op2$$constant); 8558 %} 8559 ins_pipe(ialu_reg_reg); 8560 %} 8561 8562 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) 8563 %{ 8564 match(Set cr (OverflowSubI op1 op2)); 8565 8566 format %{ "CMP $op1, $op2\t# overflow check int" %} 8567 ins_encode %{ 8568 __ cmpl($op1$$Register, $op2$$Register); 8569 %} 8570 ins_pipe(ialu_reg_reg); 8571 %} 8572 8573 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) 8574 %{ 8575 match(Set cr (OverflowSubI op1 op2)); 8576 8577 format %{ "CMP $op1, $op2\t# overflow check int" %} 8578 ins_encode %{ 8579 __ cmpl($op1$$Register, $op2$$constant); 8580 %} 8581 ins_pipe(ialu_reg_reg); 8582 %} 8583 8584 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2) 8585 %{ 8586 match(Set cr (OverflowSubI zero op2)); 8587 effect(DEF cr, USE_KILL op2); 8588 8589 format %{ "NEG $op2\t# overflow check int" %} 8590 ins_encode %{ 8591 __ negl($op2$$Register); 8592 %} 8593 ins_pipe(ialu_reg_reg); 8594 %} 8595 8596 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) 8597 %{ 8598 match(Set cr (OverflowMulI op1 op2)); 8599 effect(DEF cr, USE_KILL op1, USE op2); 8600 8601 format %{ "IMUL $op1, $op2\t# overflow check int" %} 8602 ins_encode %{ 8603 __ imull($op1$$Register, $op2$$Register); 8604 %} 8605 ins_pipe(ialu_reg_reg_alu0); 8606 %} 8607 8608 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) 8609 %{ 8610 match(Set cr (OverflowMulI op1 op2)); 8611 effect(DEF cr, TEMP tmp, USE op1, USE op2); 8612 8613 format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} 8614 ins_encode %{ 8615 __ imull($tmp$$Register, $op1$$Register, $op2$$constant); 8616 %} 8617 ins_pipe(ialu_reg_reg_alu0); 8618 %} 8619 8620 //----------Long Instructions------------------------------------------------ 8621 // Add Long Register with Register 8622 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8623 match(Set dst (AddL dst src)); 8624 effect(KILL cr); 8625 ins_cost(200); 8626 format %{ "ADD $dst.lo,$src.lo\n\t" 8627 "ADC $dst.hi,$src.hi" %} 8628 opcode(0x03, 0x13); 8629 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8630 ins_pipe( ialu_reg_reg_long ); 8631 %} 8632 8633 // Add Long Register with Immediate 8634 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8635 match(Set dst (AddL dst src)); 8636 effect(KILL cr); 8637 format %{ "ADD $dst.lo,$src.lo\n\t" 8638 "ADC $dst.hi,$src.hi" %} 8639 opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ 8640 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8641 ins_pipe( ialu_reg_long ); 8642 %} 8643 8644 // Add Long Register with Memory 8645 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8646 match(Set dst (AddL dst (LoadL mem))); 8647 effect(KILL cr); 8648 ins_cost(125); 8649 format %{ "ADD $dst.lo,$mem\n\t" 8650 "ADC $dst.hi,$mem+4" %} 8651 opcode(0x03, 0x13); 8652 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8653 ins_pipe( ialu_reg_long_mem ); 8654 %} 8655 8656 // Subtract Long Register with Register. 8657 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8658 match(Set dst (SubL dst src)); 8659 effect(KILL cr); 8660 ins_cost(200); 8661 format %{ "SUB $dst.lo,$src.lo\n\t" 8662 "SBB $dst.hi,$src.hi" %} 8663 opcode(0x2B, 0x1B); 8664 ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); 8665 ins_pipe( ialu_reg_reg_long ); 8666 %} 8667 8668 // Subtract Long Register with Immediate 8669 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8670 match(Set dst (SubL dst src)); 8671 effect(KILL cr); 8672 format %{ "SUB $dst.lo,$src.lo\n\t" 8673 "SBB $dst.hi,$src.hi" %} 8674 opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ 8675 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8676 ins_pipe( ialu_reg_long ); 8677 %} 8678 8679 // Subtract Long Register with Memory 8680 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8681 match(Set dst (SubL dst (LoadL mem))); 8682 effect(KILL cr); 8683 ins_cost(125); 8684 format %{ "SUB $dst.lo,$mem\n\t" 8685 "SBB $dst.hi,$mem+4" %} 8686 opcode(0x2B, 0x1B); 8687 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8688 ins_pipe( ialu_reg_long_mem ); 8689 %} 8690 8691 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ 8692 match(Set dst (SubL zero dst)); 8693 effect(KILL cr); 8694 ins_cost(300); 8695 format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} 8696 ins_encode( neg_long(dst) ); 8697 ins_pipe( ialu_reg_reg_long ); 8698 %} 8699 8700 // And Long Register with Register 8701 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8702 match(Set dst (AndL dst src)); 8703 effect(KILL cr); 8704 format %{ "AND $dst.lo,$src.lo\n\t" 8705 "AND $dst.hi,$src.hi" %} 8706 opcode(0x23,0x23); 8707 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8708 ins_pipe( ialu_reg_reg_long ); 8709 %} 8710 8711 // And Long Register with Immediate 8712 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8713 match(Set dst (AndL dst src)); 8714 effect(KILL cr); 8715 format %{ "AND $dst.lo,$src.lo\n\t" 8716 "AND $dst.hi,$src.hi" %} 8717 opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ 8718 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8719 ins_pipe( ialu_reg_long ); 8720 %} 8721 8722 // And Long Register with Memory 8723 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8724 match(Set dst (AndL dst (LoadL mem))); 8725 effect(KILL cr); 8726 ins_cost(125); 8727 format %{ "AND $dst.lo,$mem\n\t" 8728 "AND $dst.hi,$mem+4" %} 8729 opcode(0x23, 0x23); 8730 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8731 ins_pipe( ialu_reg_long_mem ); 8732 %} 8733 8734 // BMI1 instructions 8735 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ 8736 match(Set dst (AndL (XorL src1 minus_1) src2)); 8737 predicate(UseBMI1Instructions); 8738 effect(KILL cr, TEMP dst); 8739 8740 format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" 8741 "ANDNL $dst.hi, $src1.hi, $src2.hi" 8742 %} 8743 8744 ins_encode %{ 8745 Register Rdst = $dst$$Register; 8746 Register Rsrc1 = $src1$$Register; 8747 Register Rsrc2 = $src2$$Register; 8748 __ andnl(Rdst, Rsrc1, Rsrc2); 8749 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); 8750 %} 8751 ins_pipe(ialu_reg_reg_long); 8752 %} 8753 8754 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ 8755 match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); 8756 predicate(UseBMI1Instructions); 8757 effect(KILL cr, TEMP dst); 8758 8759 ins_cost(125); 8760 format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" 8761 "ANDNL $dst.hi, $src1.hi, $src2+4" 8762 %} 8763 8764 ins_encode %{ 8765 Register Rdst = $dst$$Register; 8766 Register Rsrc1 = $src1$$Register; 8767 Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); 8768 8769 __ andnl(Rdst, Rsrc1, $src2$$Address); 8770 __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); 8771 %} 8772 ins_pipe(ialu_reg_mem); 8773 %} 8774 8775 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ 8776 match(Set dst (AndL (SubL imm_zero src) src)); 8777 predicate(UseBMI1Instructions); 8778 effect(KILL cr, TEMP dst); 8779 8780 format %{ "MOVL $dst.hi, 0\n\t" 8781 "BLSIL $dst.lo, $src.lo\n\t" 8782 "JNZ done\n\t" 8783 "BLSIL $dst.hi, $src.hi\n" 8784 "done:" 8785 %} 8786 8787 ins_encode %{ 8788 Label done; 8789 Register Rdst = $dst$$Register; 8790 Register Rsrc = $src$$Register; 8791 __ movl(HIGH_FROM_LOW(Rdst), 0); 8792 __ blsil(Rdst, Rsrc); 8793 __ jccb(Assembler::notZero, done); 8794 __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8795 __ bind(done); 8796 %} 8797 ins_pipe(ialu_reg); 8798 %} 8799 8800 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ 8801 match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); 8802 predicate(UseBMI1Instructions); 8803 effect(KILL cr, TEMP dst); 8804 8805 ins_cost(125); 8806 format %{ "MOVL $dst.hi, 0\n\t" 8807 "BLSIL $dst.lo, $src\n\t" 8808 "JNZ done\n\t" 8809 "BLSIL $dst.hi, $src+4\n" 8810 "done:" 8811 %} 8812 8813 ins_encode %{ 8814 Label done; 8815 Register Rdst = $dst$$Register; 8816 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8817 8818 __ movl(HIGH_FROM_LOW(Rdst), 0); 8819 __ blsil(Rdst, $src$$Address); 8820 __ jccb(Assembler::notZero, done); 8821 __ blsil(HIGH_FROM_LOW(Rdst), src_hi); 8822 __ bind(done); 8823 %} 8824 ins_pipe(ialu_reg_mem); 8825 %} 8826 8827 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8828 %{ 8829 match(Set dst (XorL (AddL src minus_1) src)); 8830 predicate(UseBMI1Instructions); 8831 effect(KILL cr, TEMP dst); 8832 8833 format %{ "MOVL $dst.hi, 0\n\t" 8834 "BLSMSKL $dst.lo, $src.lo\n\t" 8835 "JNC done\n\t" 8836 "BLSMSKL $dst.hi, $src.hi\n" 8837 "done:" 8838 %} 8839 8840 ins_encode %{ 8841 Label done; 8842 Register Rdst = $dst$$Register; 8843 Register Rsrc = $src$$Register; 8844 __ movl(HIGH_FROM_LOW(Rdst), 0); 8845 __ blsmskl(Rdst, Rsrc); 8846 __ jccb(Assembler::carryClear, done); 8847 __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8848 __ bind(done); 8849 %} 8850 8851 ins_pipe(ialu_reg); 8852 %} 8853 8854 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8855 %{ 8856 match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); 8857 predicate(UseBMI1Instructions); 8858 effect(KILL cr, TEMP dst); 8859 8860 ins_cost(125); 8861 format %{ "MOVL $dst.hi, 0\n\t" 8862 "BLSMSKL $dst.lo, $src\n\t" 8863 "JNC done\n\t" 8864 "BLSMSKL $dst.hi, $src+4\n" 8865 "done:" 8866 %} 8867 8868 ins_encode %{ 8869 Label done; 8870 Register Rdst = $dst$$Register; 8871 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8872 8873 __ movl(HIGH_FROM_LOW(Rdst), 0); 8874 __ blsmskl(Rdst, $src$$Address); 8875 __ jccb(Assembler::carryClear, done); 8876 __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); 8877 __ bind(done); 8878 %} 8879 8880 ins_pipe(ialu_reg_mem); 8881 %} 8882 8883 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) 8884 %{ 8885 match(Set dst (AndL (AddL src minus_1) src) ); 8886 predicate(UseBMI1Instructions); 8887 effect(KILL cr, TEMP dst); 8888 8889 format %{ "MOVL $dst.hi, $src.hi\n\t" 8890 "BLSRL $dst.lo, $src.lo\n\t" 8891 "JNC done\n\t" 8892 "BLSRL $dst.hi, $src.hi\n" 8893 "done:" 8894 %} 8895 8896 ins_encode %{ 8897 Label done; 8898 Register Rdst = $dst$$Register; 8899 Register Rsrc = $src$$Register; 8900 __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8901 __ blsrl(Rdst, Rsrc); 8902 __ jccb(Assembler::carryClear, done); 8903 __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); 8904 __ bind(done); 8905 %} 8906 8907 ins_pipe(ialu_reg); 8908 %} 8909 8910 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) 8911 %{ 8912 match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); 8913 predicate(UseBMI1Instructions); 8914 effect(KILL cr, TEMP dst); 8915 8916 ins_cost(125); 8917 format %{ "MOVL $dst.hi, $src+4\n\t" 8918 "BLSRL $dst.lo, $src\n\t" 8919 "JNC done\n\t" 8920 "BLSRL $dst.hi, $src+4\n" 8921 "done:" 8922 %} 8923 8924 ins_encode %{ 8925 Label done; 8926 Register Rdst = $dst$$Register; 8927 Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); 8928 __ movl(HIGH_FROM_LOW(Rdst), src_hi); 8929 __ blsrl(Rdst, $src$$Address); 8930 __ jccb(Assembler::carryClear, done); 8931 __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); 8932 __ bind(done); 8933 %} 8934 8935 ins_pipe(ialu_reg_mem); 8936 %} 8937 8938 // Or Long Register with Register 8939 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8940 match(Set dst (OrL dst src)); 8941 effect(KILL cr); 8942 format %{ "OR $dst.lo,$src.lo\n\t" 8943 "OR $dst.hi,$src.hi" %} 8944 opcode(0x0B,0x0B); 8945 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8946 ins_pipe( ialu_reg_reg_long ); 8947 %} 8948 8949 // Or Long Register with Immediate 8950 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8951 match(Set dst (OrL dst src)); 8952 effect(KILL cr); 8953 format %{ "OR $dst.lo,$src.lo\n\t" 8954 "OR $dst.hi,$src.hi" %} 8955 opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ 8956 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 8957 ins_pipe( ialu_reg_long ); 8958 %} 8959 8960 // Or Long Register with Memory 8961 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 8962 match(Set dst (OrL dst (LoadL mem))); 8963 effect(KILL cr); 8964 ins_cost(125); 8965 format %{ "OR $dst.lo,$mem\n\t" 8966 "OR $dst.hi,$mem+4" %} 8967 opcode(0x0B,0x0B); 8968 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 8969 ins_pipe( ialu_reg_long_mem ); 8970 %} 8971 8972 // Xor Long Register with Register 8973 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ 8974 match(Set dst (XorL dst src)); 8975 effect(KILL cr); 8976 format %{ "XOR $dst.lo,$src.lo\n\t" 8977 "XOR $dst.hi,$src.hi" %} 8978 opcode(0x33,0x33); 8979 ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); 8980 ins_pipe( ialu_reg_reg_long ); 8981 %} 8982 8983 // Xor Long Register with Immediate -1 8984 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ 8985 match(Set dst (XorL dst imm)); 8986 format %{ "NOT $dst.lo\n\t" 8987 "NOT $dst.hi" %} 8988 ins_encode %{ 8989 __ notl($dst$$Register); 8990 __ notl(HIGH_FROM_LOW($dst$$Register)); 8991 %} 8992 ins_pipe( ialu_reg_long ); 8993 %} 8994 8995 // Xor Long Register with Immediate 8996 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ 8997 match(Set dst (XorL dst src)); 8998 effect(KILL cr); 8999 format %{ "XOR $dst.lo,$src.lo\n\t" 9000 "XOR $dst.hi,$src.hi" %} 9001 opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ 9002 ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); 9003 ins_pipe( ialu_reg_long ); 9004 %} 9005 9006 // Xor Long Register with Memory 9007 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ 9008 match(Set dst (XorL dst (LoadL mem))); 9009 effect(KILL cr); 9010 ins_cost(125); 9011 format %{ "XOR $dst.lo,$mem\n\t" 9012 "XOR $dst.hi,$mem+4" %} 9013 opcode(0x33,0x33); 9014 ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) ); 9015 ins_pipe( ialu_reg_long_mem ); 9016 %} 9017 9018 // Shift Left Long by 1 9019 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ 9020 predicate(UseNewLongLShift); 9021 match(Set dst (LShiftL dst cnt)); 9022 effect(KILL cr); 9023 ins_cost(100); 9024 format %{ "ADD $dst.lo,$dst.lo\n\t" 9025 "ADC $dst.hi,$dst.hi" %} 9026 ins_encode %{ 9027 __ addl($dst$$Register,$dst$$Register); 9028 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9029 %} 9030 ins_pipe( ialu_reg_long ); 9031 %} 9032 9033 // Shift Left Long by 2 9034 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ 9035 predicate(UseNewLongLShift); 9036 match(Set dst (LShiftL dst cnt)); 9037 effect(KILL cr); 9038 ins_cost(100); 9039 format %{ "ADD $dst.lo,$dst.lo\n\t" 9040 "ADC $dst.hi,$dst.hi\n\t" 9041 "ADD $dst.lo,$dst.lo\n\t" 9042 "ADC $dst.hi,$dst.hi" %} 9043 ins_encode %{ 9044 __ addl($dst$$Register,$dst$$Register); 9045 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9046 __ addl($dst$$Register,$dst$$Register); 9047 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9048 %} 9049 ins_pipe( ialu_reg_long ); 9050 %} 9051 9052 // Shift Left Long by 3 9053 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ 9054 predicate(UseNewLongLShift); 9055 match(Set dst (LShiftL dst cnt)); 9056 effect(KILL cr); 9057 ins_cost(100); 9058 format %{ "ADD $dst.lo,$dst.lo\n\t" 9059 "ADC $dst.hi,$dst.hi\n\t" 9060 "ADD $dst.lo,$dst.lo\n\t" 9061 "ADC $dst.hi,$dst.hi\n\t" 9062 "ADD $dst.lo,$dst.lo\n\t" 9063 "ADC $dst.hi,$dst.hi" %} 9064 ins_encode %{ 9065 __ addl($dst$$Register,$dst$$Register); 9066 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9067 __ addl($dst$$Register,$dst$$Register); 9068 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9069 __ addl($dst$$Register,$dst$$Register); 9070 __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); 9071 %} 9072 ins_pipe( ialu_reg_long ); 9073 %} 9074 9075 // Shift Left Long by 1-31 9076 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9077 match(Set dst (LShiftL dst cnt)); 9078 effect(KILL cr); 9079 ins_cost(200); 9080 format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" 9081 "SHL $dst.lo,$cnt" %} 9082 opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ 9083 ins_encode( move_long_small_shift(dst,cnt) ); 9084 ins_pipe( ialu_reg_long ); 9085 %} 9086 9087 // Shift Left Long by 32-63 9088 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9089 match(Set dst (LShiftL dst cnt)); 9090 effect(KILL cr); 9091 ins_cost(300); 9092 format %{ "MOV $dst.hi,$dst.lo\n" 9093 "\tSHL $dst.hi,$cnt-32\n" 9094 "\tXOR $dst.lo,$dst.lo" %} 9095 opcode(0xC1, 0x4); /* C1 /4 ib */ 9096 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9097 ins_pipe( ialu_reg_long ); 9098 %} 9099 9100 // Shift Left Long by variable 9101 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9102 match(Set dst (LShiftL dst shift)); 9103 effect(KILL cr); 9104 ins_cost(500+200); 9105 size(17); 9106 format %{ "TEST $shift,32\n\t" 9107 "JEQ,s small\n\t" 9108 "MOV $dst.hi,$dst.lo\n\t" 9109 "XOR $dst.lo,$dst.lo\n" 9110 "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" 9111 "SHL $dst.lo,$shift" %} 9112 ins_encode( shift_left_long( dst, shift ) ); 9113 ins_pipe( pipe_slow ); 9114 %} 9115 9116 // Shift Right Long by 1-31 9117 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9118 match(Set dst (URShiftL dst cnt)); 9119 effect(KILL cr); 9120 ins_cost(200); 9121 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9122 "SHR $dst.hi,$cnt" %} 9123 opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ 9124 ins_encode( move_long_small_shift(dst,cnt) ); 9125 ins_pipe( ialu_reg_long ); 9126 %} 9127 9128 // Shift Right Long by 32-63 9129 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9130 match(Set dst (URShiftL dst cnt)); 9131 effect(KILL cr); 9132 ins_cost(300); 9133 format %{ "MOV $dst.lo,$dst.hi\n" 9134 "\tSHR $dst.lo,$cnt-32\n" 9135 "\tXOR $dst.hi,$dst.hi" %} 9136 opcode(0xC1, 0x5); /* C1 /5 ib */ 9137 ins_encode( move_long_big_shift_clr(dst,cnt) ); 9138 ins_pipe( ialu_reg_long ); 9139 %} 9140 9141 // Shift Right Long by variable 9142 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9143 match(Set dst (URShiftL dst shift)); 9144 effect(KILL cr); 9145 ins_cost(600); 9146 size(17); 9147 format %{ "TEST $shift,32\n\t" 9148 "JEQ,s small\n\t" 9149 "MOV $dst.lo,$dst.hi\n\t" 9150 "XOR $dst.hi,$dst.hi\n" 9151 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9152 "SHR $dst.hi,$shift" %} 9153 ins_encode( shift_right_long( dst, shift ) ); 9154 ins_pipe( pipe_slow ); 9155 %} 9156 9157 // Shift Right Long by 1-31 9158 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ 9159 match(Set dst (RShiftL dst cnt)); 9160 effect(KILL cr); 9161 ins_cost(200); 9162 format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" 9163 "SAR $dst.hi,$cnt" %} 9164 opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ 9165 ins_encode( move_long_small_shift(dst,cnt) ); 9166 ins_pipe( ialu_reg_long ); 9167 %} 9168 9169 // Shift Right Long by 32-63 9170 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ 9171 match(Set dst (RShiftL dst cnt)); 9172 effect(KILL cr); 9173 ins_cost(300); 9174 format %{ "MOV $dst.lo,$dst.hi\n" 9175 "\tSAR $dst.lo,$cnt-32\n" 9176 "\tSAR $dst.hi,31" %} 9177 opcode(0xC1, 0x7); /* C1 /7 ib */ 9178 ins_encode( move_long_big_shift_sign(dst,cnt) ); 9179 ins_pipe( ialu_reg_long ); 9180 %} 9181 9182 // Shift Right arithmetic Long by variable 9183 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ 9184 match(Set dst (RShiftL dst shift)); 9185 effect(KILL cr); 9186 ins_cost(600); 9187 size(18); 9188 format %{ "TEST $shift,32\n\t" 9189 "JEQ,s small\n\t" 9190 "MOV $dst.lo,$dst.hi\n\t" 9191 "SAR $dst.hi,31\n" 9192 "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" 9193 "SAR $dst.hi,$shift" %} 9194 ins_encode( shift_right_arith_long( dst, shift ) ); 9195 ins_pipe( pipe_slow ); 9196 %} 9197 9198 9199 //----------Double Instructions------------------------------------------------ 9200 // Double Math 9201 9202 // Compare & branch 9203 9204 // P6 version of float compare, sets condition codes in EFLAGS 9205 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9206 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9207 match(Set cr (CmpD src1 src2)); 9208 effect(KILL rax); 9209 ins_cost(150); 9210 format %{ "FLD $src1\n\t" 9211 "FUCOMIP ST,$src2 // P6 instruction\n\t" 9212 "JNP exit\n\t" 9213 "MOV ah,1 // saw a NaN, set CF\n\t" 9214 "SAHF\n" 9215 "exit:\tNOP // avoid branch to branch" %} 9216 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9217 ins_encode( Push_Reg_DPR(src1), 9218 OpcP, RegOpc(src2), 9219 cmpF_P6_fixup ); 9220 ins_pipe( pipe_slow ); 9221 %} 9222 9223 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ 9224 predicate(VM_Version::supports_cmov() && UseSSE <=1); 9225 match(Set cr (CmpD src1 src2)); 9226 ins_cost(150); 9227 format %{ "FLD $src1\n\t" 9228 "FUCOMIP ST,$src2 // P6 instruction" %} 9229 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 9230 ins_encode( Push_Reg_DPR(src1), 9231 OpcP, RegOpc(src2)); 9232 ins_pipe( pipe_slow ); 9233 %} 9234 9235 // Compare & branch 9236 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ 9237 predicate(UseSSE<=1); 9238 match(Set cr (CmpD src1 src2)); 9239 effect(KILL rax); 9240 ins_cost(200); 9241 format %{ "FLD $src1\n\t" 9242 "FCOMp $src2\n\t" 9243 "FNSTSW AX\n\t" 9244 "TEST AX,0x400\n\t" 9245 "JZ,s flags\n\t" 9246 "MOV AH,1\t# unordered treat as LT\n" 9247 "flags:\tSAHF" %} 9248 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9249 ins_encode( Push_Reg_DPR(src1), 9250 OpcP, RegOpc(src2), 9251 fpu_flags); 9252 ins_pipe( pipe_slow ); 9253 %} 9254 9255 // Compare vs zero into -1,0,1 9256 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 9257 predicate(UseSSE<=1); 9258 match(Set dst (CmpD3 src1 zero)); 9259 effect(KILL cr, KILL rax); 9260 ins_cost(280); 9261 format %{ "FTSTD $dst,$src1" %} 9262 opcode(0xE4, 0xD9); 9263 ins_encode( Push_Reg_DPR(src1), 9264 OpcS, OpcP, PopFPU, 9265 CmpF_Result(dst)); 9266 ins_pipe( pipe_slow ); 9267 %} 9268 9269 // Compare into -1,0,1 9270 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ 9271 predicate(UseSSE<=1); 9272 match(Set dst (CmpD3 src1 src2)); 9273 effect(KILL cr, KILL rax); 9274 ins_cost(300); 9275 format %{ "FCMPD $dst,$src1,$src2" %} 9276 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 9277 ins_encode( Push_Reg_DPR(src1), 9278 OpcP, RegOpc(src2), 9279 CmpF_Result(dst)); 9280 ins_pipe( pipe_slow ); 9281 %} 9282 9283 // float compare and set condition codes in EFLAGS by XMM regs 9284 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ 9285 predicate(UseSSE>=2); 9286 match(Set cr (CmpD src1 src2)); 9287 ins_cost(145); 9288 format %{ "UCOMISD $src1,$src2\n\t" 9289 "JNP,s exit\n\t" 9290 "PUSHF\t# saw NaN, set CF\n\t" 9291 "AND [rsp], #0xffffff2b\n\t" 9292 "POPF\n" 9293 "exit:" %} 9294 ins_encode %{ 9295 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9296 emit_cmpfp_fixup(_masm); 9297 %} 9298 ins_pipe( pipe_slow ); 9299 %} 9300 9301 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ 9302 predicate(UseSSE>=2); 9303 match(Set cr (CmpD src1 src2)); 9304 ins_cost(100); 9305 format %{ "UCOMISD $src1,$src2" %} 9306 ins_encode %{ 9307 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9308 %} 9309 ins_pipe( pipe_slow ); 9310 %} 9311 9312 // float compare and set condition codes in EFLAGS by XMM regs 9313 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ 9314 predicate(UseSSE>=2); 9315 match(Set cr (CmpD src1 (LoadD src2))); 9316 ins_cost(145); 9317 format %{ "UCOMISD $src1,$src2\n\t" 9318 "JNP,s exit\n\t" 9319 "PUSHF\t# saw NaN, set CF\n\t" 9320 "AND [rsp], #0xffffff2b\n\t" 9321 "POPF\n" 9322 "exit:" %} 9323 ins_encode %{ 9324 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9325 emit_cmpfp_fixup(_masm); 9326 %} 9327 ins_pipe( pipe_slow ); 9328 %} 9329 9330 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ 9331 predicate(UseSSE>=2); 9332 match(Set cr (CmpD src1 (LoadD src2))); 9333 ins_cost(100); 9334 format %{ "UCOMISD $src1,$src2" %} 9335 ins_encode %{ 9336 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9337 %} 9338 ins_pipe( pipe_slow ); 9339 %} 9340 9341 // Compare into -1,0,1 in XMM 9342 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ 9343 predicate(UseSSE>=2); 9344 match(Set dst (CmpD3 src1 src2)); 9345 effect(KILL cr); 9346 ins_cost(255); 9347 format %{ "UCOMISD $src1, $src2\n\t" 9348 "MOV $dst, #-1\n\t" 9349 "JP,s done\n\t" 9350 "JB,s done\n\t" 9351 "SETNE $dst\n\t" 9352 "MOVZB $dst, $dst\n" 9353 "done:" %} 9354 ins_encode %{ 9355 __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); 9356 emit_cmpfp3(_masm, $dst$$Register); 9357 %} 9358 ins_pipe( pipe_slow ); 9359 %} 9360 9361 // Compare into -1,0,1 in XMM and memory 9362 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ 9363 predicate(UseSSE>=2); 9364 match(Set dst (CmpD3 src1 (LoadD src2))); 9365 effect(KILL cr); 9366 ins_cost(275); 9367 format %{ "UCOMISD $src1, $src2\n\t" 9368 "MOV $dst, #-1\n\t" 9369 "JP,s done\n\t" 9370 "JB,s done\n\t" 9371 "SETNE $dst\n\t" 9372 "MOVZB $dst, $dst\n" 9373 "done:" %} 9374 ins_encode %{ 9375 __ ucomisd($src1$$XMMRegister, $src2$$Address); 9376 emit_cmpfp3(_masm, $dst$$Register); 9377 %} 9378 ins_pipe( pipe_slow ); 9379 %} 9380 9381 9382 instruct subDPR_reg(regDPR dst, regDPR src) %{ 9383 predicate (UseSSE <=1); 9384 match(Set dst (SubD dst src)); 9385 9386 format %{ "FLD $src\n\t" 9387 "DSUBp $dst,ST" %} 9388 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 9389 ins_cost(150); 9390 ins_encode( Push_Reg_DPR(src), 9391 OpcP, RegOpc(dst) ); 9392 ins_pipe( fpu_reg_reg ); 9393 %} 9394 9395 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9396 predicate (UseSSE <=1); 9397 match(Set dst (RoundDouble (SubD src1 src2))); 9398 ins_cost(250); 9399 9400 format %{ "FLD $src2\n\t" 9401 "DSUB ST,$src1\n\t" 9402 "FSTP_D $dst\t# D-round" %} 9403 opcode(0xD8, 0x5); 9404 ins_encode( Push_Reg_DPR(src2), 9405 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9406 ins_pipe( fpu_mem_reg_reg ); 9407 %} 9408 9409 9410 instruct subDPR_reg_mem(regDPR dst, memory src) %{ 9411 predicate (UseSSE <=1); 9412 match(Set dst (SubD dst (LoadD src))); 9413 ins_cost(150); 9414 9415 format %{ "FLD $src\n\t" 9416 "DSUBp $dst,ST" %} 9417 opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9418 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9419 OpcP, RegOpc(dst) ); 9420 ins_pipe( fpu_reg_mem ); 9421 %} 9422 9423 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ 9424 predicate (UseSSE<=1); 9425 match(Set dst (AbsD src)); 9426 ins_cost(100); 9427 format %{ "FABS" %} 9428 opcode(0xE1, 0xD9); 9429 ins_encode( OpcS, OpcP ); 9430 ins_pipe( fpu_reg_reg ); 9431 %} 9432 9433 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ 9434 predicate(UseSSE<=1); 9435 match(Set dst (NegD src)); 9436 ins_cost(100); 9437 format %{ "FCHS" %} 9438 opcode(0xE0, 0xD9); 9439 ins_encode( OpcS, OpcP ); 9440 ins_pipe( fpu_reg_reg ); 9441 %} 9442 9443 instruct addDPR_reg(regDPR dst, regDPR src) %{ 9444 predicate(UseSSE<=1); 9445 match(Set dst (AddD dst src)); 9446 format %{ "FLD $src\n\t" 9447 "DADD $dst,ST" %} 9448 size(4); 9449 ins_cost(150); 9450 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 9451 ins_encode( Push_Reg_DPR(src), 9452 OpcP, RegOpc(dst) ); 9453 ins_pipe( fpu_reg_reg ); 9454 %} 9455 9456 9457 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9458 predicate(UseSSE<=1); 9459 match(Set dst (RoundDouble (AddD src1 src2))); 9460 ins_cost(250); 9461 9462 format %{ "FLD $src2\n\t" 9463 "DADD ST,$src1\n\t" 9464 "FSTP_D $dst\t# D-round" %} 9465 opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ 9466 ins_encode( Push_Reg_DPR(src2), 9467 OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); 9468 ins_pipe( fpu_mem_reg_reg ); 9469 %} 9470 9471 9472 instruct addDPR_reg_mem(regDPR dst, memory src) %{ 9473 predicate(UseSSE<=1); 9474 match(Set dst (AddD dst (LoadD src))); 9475 ins_cost(150); 9476 9477 format %{ "FLD $src\n\t" 9478 "DADDp $dst,ST" %} 9479 opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ 9480 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9481 OpcP, RegOpc(dst) ); 9482 ins_pipe( fpu_reg_mem ); 9483 %} 9484 9485 // add-to-memory 9486 instruct addDPR_mem_reg(memory dst, regDPR src) %{ 9487 predicate(UseSSE<=1); 9488 match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); 9489 ins_cost(150); 9490 9491 format %{ "FLD_D $dst\n\t" 9492 "DADD ST,$src\n\t" 9493 "FST_D $dst" %} 9494 opcode(0xDD, 0x0); 9495 ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst), 9496 Opcode(0xD8), RegOpc(src), 9497 set_instruction_start, 9498 Opcode(0xDD), RMopc_Mem(0x03,dst) ); 9499 ins_pipe( fpu_reg_mem ); 9500 %} 9501 9502 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ 9503 predicate(UseSSE<=1); 9504 match(Set dst (AddD dst con)); 9505 ins_cost(125); 9506 format %{ "FLD1\n\t" 9507 "DADDp $dst,ST" %} 9508 ins_encode %{ 9509 __ fld1(); 9510 __ faddp($dst$$reg); 9511 %} 9512 ins_pipe(fpu_reg); 9513 %} 9514 9515 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ 9516 predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9517 match(Set dst (AddD dst con)); 9518 ins_cost(200); 9519 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9520 "DADDp $dst,ST" %} 9521 ins_encode %{ 9522 __ fld_d($constantaddress($con)); 9523 __ faddp($dst$$reg); 9524 %} 9525 ins_pipe(fpu_reg_mem); 9526 %} 9527 9528 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ 9529 predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); 9530 match(Set dst (RoundDouble (AddD src con))); 9531 ins_cost(200); 9532 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9533 "DADD ST,$src\n\t" 9534 "FSTP_D $dst\t# D-round" %} 9535 ins_encode %{ 9536 __ fld_d($constantaddress($con)); 9537 __ fadd($src$$reg); 9538 __ fstp_d(Address(rsp, $dst$$disp)); 9539 %} 9540 ins_pipe(fpu_mem_reg_con); 9541 %} 9542 9543 instruct mulDPR_reg(regDPR dst, regDPR src) %{ 9544 predicate(UseSSE<=1); 9545 match(Set dst (MulD dst src)); 9546 format %{ "FLD $src\n\t" 9547 "DMULp $dst,ST" %} 9548 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9549 ins_cost(150); 9550 ins_encode( Push_Reg_DPR(src), 9551 OpcP, RegOpc(dst) ); 9552 ins_pipe( fpu_reg_reg ); 9553 %} 9554 9555 // Strict FP instruction biases argument before multiply then 9556 // biases result to avoid double rounding of subnormals. 9557 // 9558 // scale arg1 by multiplying arg1 by 2^(-15360) 9559 // load arg2 9560 // multiply scaled arg1 by arg2 9561 // rescale product by 2^(15360) 9562 // 9563 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9564 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9565 match(Set dst (MulD dst src)); 9566 ins_cost(1); // Select this instruction for all strict FP double multiplies 9567 9568 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9569 "DMULp $dst,ST\n\t" 9570 "FLD $src\n\t" 9571 "DMULp $dst,ST\n\t" 9572 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9573 "DMULp $dst,ST\n\t" %} 9574 opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ 9575 ins_encode( strictfp_bias1(dst), 9576 Push_Reg_DPR(src), 9577 OpcP, RegOpc(dst), 9578 strictfp_bias2(dst) ); 9579 ins_pipe( fpu_reg_reg ); 9580 %} 9581 9582 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ 9583 predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); 9584 match(Set dst (MulD dst con)); 9585 ins_cost(200); 9586 format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" 9587 "DMULp $dst,ST" %} 9588 ins_encode %{ 9589 __ fld_d($constantaddress($con)); 9590 __ fmulp($dst$$reg); 9591 %} 9592 ins_pipe(fpu_reg_mem); 9593 %} 9594 9595 9596 instruct mulDPR_reg_mem(regDPR dst, memory src) %{ 9597 predicate( UseSSE<=1 ); 9598 match(Set dst (MulD dst (LoadD src))); 9599 ins_cost(200); 9600 format %{ "FLD_D $src\n\t" 9601 "DMULp $dst,ST" %} 9602 opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ 9603 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 9604 OpcP, RegOpc(dst) ); 9605 ins_pipe( fpu_reg_mem ); 9606 %} 9607 9608 // 9609 // Cisc-alternate to reg-reg multiply 9610 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ 9611 predicate( UseSSE<=1 ); 9612 match(Set dst (MulD src (LoadD mem))); 9613 ins_cost(250); 9614 format %{ "FLD_D $mem\n\t" 9615 "DMUL ST,$src\n\t" 9616 "FSTP_D $dst" %} 9617 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ 9618 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem), 9619 OpcReg_FPR(src), 9620 Pop_Reg_DPR(dst) ); 9621 ins_pipe( fpu_reg_reg_mem ); 9622 %} 9623 9624 9625 // MACRO3 -- addDPR a mulDPR 9626 // This instruction is a '2-address' instruction in that the result goes 9627 // back to src2. This eliminates a move from the macro; possibly the 9628 // register allocator will have to add it back (and maybe not). 9629 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9630 predicate( UseSSE<=1 ); 9631 match(Set src2 (AddD (MulD src0 src1) src2)); 9632 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9633 "DMUL ST,$src1\n\t" 9634 "DADDp $src2,ST" %} 9635 ins_cost(250); 9636 opcode(0xDD); /* LoadD DD /0 */ 9637 ins_encode( Push_Reg_FPR(src0), 9638 FMul_ST_reg(src1), 9639 FAddP_reg_ST(src2) ); 9640 ins_pipe( fpu_reg_reg_reg ); 9641 %} 9642 9643 9644 // MACRO3 -- subDPR a mulDPR 9645 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ 9646 predicate( UseSSE<=1 ); 9647 match(Set src2 (SubD (MulD src0 src1) src2)); 9648 format %{ "FLD $src0\t# ===MACRO3d===\n\t" 9649 "DMUL ST,$src1\n\t" 9650 "DSUBRp $src2,ST" %} 9651 ins_cost(250); 9652 ins_encode( Push_Reg_FPR(src0), 9653 FMul_ST_reg(src1), 9654 Opcode(0xDE), Opc_plus(0xE0,src2)); 9655 ins_pipe( fpu_reg_reg_reg ); 9656 %} 9657 9658 9659 instruct divDPR_reg(regDPR dst, regDPR src) %{ 9660 predicate( UseSSE<=1 ); 9661 match(Set dst (DivD dst src)); 9662 9663 format %{ "FLD $src\n\t" 9664 "FDIVp $dst,ST" %} 9665 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9666 ins_cost(150); 9667 ins_encode( Push_Reg_DPR(src), 9668 OpcP, RegOpc(dst) ); 9669 ins_pipe( fpu_reg_reg ); 9670 %} 9671 9672 // Strict FP instruction biases argument before division then 9673 // biases result, to avoid double rounding of subnormals. 9674 // 9675 // scale dividend by multiplying dividend by 2^(-15360) 9676 // load divisor 9677 // divide scaled dividend by divisor 9678 // rescale quotient by 2^(15360) 9679 // 9680 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ 9681 predicate (UseSSE<=1); 9682 match(Set dst (DivD dst src)); 9683 predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() ); 9684 ins_cost(01); 9685 9686 format %{ "FLD StubRoutines::_fpu_subnormal_bias1\n\t" 9687 "DMULp $dst,ST\n\t" 9688 "FLD $src\n\t" 9689 "FDIVp $dst,ST\n\t" 9690 "FLD StubRoutines::_fpu_subnormal_bias2\n\t" 9691 "DMULp $dst,ST\n\t" %} 9692 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 9693 ins_encode( strictfp_bias1(dst), 9694 Push_Reg_DPR(src), 9695 OpcP, RegOpc(dst), 9696 strictfp_bias2(dst) ); 9697 ins_pipe( fpu_reg_reg ); 9698 %} 9699 9700 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ 9701 predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) ); 9702 match(Set dst (RoundDouble (DivD src1 src2))); 9703 9704 format %{ "FLD $src1\n\t" 9705 "FDIV ST,$src2\n\t" 9706 "FSTP_D $dst\t# D-round" %} 9707 opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */ 9708 ins_encode( Push_Reg_DPR(src1), 9709 OpcP, RegOpc(src2), Pop_Mem_DPR(dst) ); 9710 ins_pipe( fpu_mem_reg_reg ); 9711 %} 9712 9713 9714 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ 9715 predicate(UseSSE<=1); 9716 match(Set dst (ModD dst src)); 9717 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 9718 9719 format %{ "DMOD $dst,$src" %} 9720 ins_cost(250); 9721 ins_encode(Push_Reg_Mod_DPR(dst, src), 9722 emitModDPR(), 9723 Push_Result_Mod_DPR(src), 9724 Pop_Reg_DPR(dst)); 9725 ins_pipe( pipe_slow ); 9726 %} 9727 9728 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ 9729 predicate(UseSSE>=2); 9730 match(Set dst (ModD src0 src1)); 9731 effect(KILL rax, KILL cr); 9732 9733 format %{ "SUB ESP,8\t # DMOD\n" 9734 "\tMOVSD [ESP+0],$src1\n" 9735 "\tFLD_D [ESP+0]\n" 9736 "\tMOVSD [ESP+0],$src0\n" 9737 "\tFLD_D [ESP+0]\n" 9738 "loop:\tFPREM\n" 9739 "\tFWAIT\n" 9740 "\tFNSTSW AX\n" 9741 "\tSAHF\n" 9742 "\tJP loop\n" 9743 "\tFSTP_D [ESP+0]\n" 9744 "\tMOVSD $dst,[ESP+0]\n" 9745 "\tADD ESP,8\n" 9746 "\tFSTP ST0\t # Restore FPU Stack" 9747 %} 9748 ins_cost(250); 9749 ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); 9750 ins_pipe( pipe_slow ); 9751 %} 9752 9753 instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{ 9754 predicate (UseSSE<=1); 9755 match(Set dst (SinD src)); 9756 ins_cost(1800); 9757 format %{ "DSIN $dst" %} 9758 opcode(0xD9, 0xFE); 9759 ins_encode( OpcP, OpcS ); 9760 ins_pipe( pipe_slow ); 9761 %} 9762 9763 instruct sinD_reg(regD dst, eFlagsReg cr) %{ 9764 predicate (UseSSE>=2); 9765 match(Set dst (SinD dst)); 9766 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9767 ins_cost(1800); 9768 format %{ "DSIN $dst" %} 9769 opcode(0xD9, 0xFE); 9770 ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); 9771 ins_pipe( pipe_slow ); 9772 %} 9773 9774 instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{ 9775 predicate (UseSSE<=1); 9776 match(Set dst (CosD src)); 9777 ins_cost(1800); 9778 format %{ "DCOS $dst" %} 9779 opcode(0xD9, 0xFF); 9780 ins_encode( OpcP, OpcS ); 9781 ins_pipe( pipe_slow ); 9782 %} 9783 9784 instruct cosD_reg(regD dst, eFlagsReg cr) %{ 9785 predicate (UseSSE>=2); 9786 match(Set dst (CosD dst)); 9787 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9788 ins_cost(1800); 9789 format %{ "DCOS $dst" %} 9790 opcode(0xD9, 0xFF); 9791 ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) ); 9792 ins_pipe( pipe_slow ); 9793 %} 9794 9795 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{ 9796 predicate (UseSSE<=1); 9797 match(Set dst(TanD src)); 9798 format %{ "DTAN $dst" %} 9799 ins_encode( Opcode(0xD9), Opcode(0xF2), // fptan 9800 Opcode(0xDD), Opcode(0xD8)); // fstp st 9801 ins_pipe( pipe_slow ); 9802 %} 9803 9804 instruct tanD_reg(regD dst, eFlagsReg cr) %{ 9805 predicate (UseSSE>=2); 9806 match(Set dst(TanD dst)); 9807 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9808 format %{ "DTAN $dst" %} 9809 ins_encode( Push_SrcD(dst), 9810 Opcode(0xD9), Opcode(0xF2), // fptan 9811 Opcode(0xDD), Opcode(0xD8), // fstp st 9812 Push_ResultD(dst) ); 9813 ins_pipe( pipe_slow ); 9814 %} 9815 9816 instruct atanDPR_reg(regDPR dst, regDPR src) %{ 9817 predicate (UseSSE<=1); 9818 match(Set dst(AtanD dst src)); 9819 format %{ "DATA $dst,$src" %} 9820 opcode(0xD9, 0xF3); 9821 ins_encode( Push_Reg_DPR(src), 9822 OpcP, OpcS, RegOpc(dst) ); 9823 ins_pipe( pipe_slow ); 9824 %} 9825 9826 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ 9827 predicate (UseSSE>=2); 9828 match(Set dst(AtanD dst src)); 9829 effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" 9830 format %{ "DATA $dst,$src" %} 9831 opcode(0xD9, 0xF3); 9832 ins_encode( Push_SrcD(src), 9833 OpcP, OpcS, Push_ResultD(dst) ); 9834 ins_pipe( pipe_slow ); 9835 %} 9836 9837 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ 9838 predicate (UseSSE<=1); 9839 match(Set dst (SqrtD src)); 9840 format %{ "DSQRT $dst,$src" %} 9841 opcode(0xFA, 0xD9); 9842 ins_encode( Push_Reg_DPR(src), 9843 OpcS, OpcP, Pop_Reg_DPR(dst) ); 9844 ins_pipe( pipe_slow ); 9845 %} 9846 9847 instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ 9848 predicate (UseSSE<=1); 9849 match(Set Y (PowD X Y)); // Raise X to the Yth power 9850 effect(KILL rax, KILL rdx, KILL rcx, KILL cr); 9851 format %{ "fast_pow $X $Y -> $Y // KILL $rax, $rcx, $rdx" %} 9852 ins_encode %{ 9853 __ subptr(rsp, 8); 9854 __ fld_s($X$$reg - 1); 9855 __ fast_pow(); 9856 __ addptr(rsp, 8); 9857 %} 9858 ins_pipe( pipe_slow ); 9859 %} 9860 9861 instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ 9862 predicate (UseSSE>=2); 9863 match(Set dst (PowD src0 src1)); // Raise src0 to the src1'th power 9864 effect(KILL rax, KILL rdx, KILL rcx, KILL cr); 9865 format %{ "fast_pow $src0 $src1 -> $dst // KILL $rax, $rcx, $rdx" %} 9866 ins_encode %{ 9867 __ subptr(rsp, 8); 9868 __ movdbl(Address(rsp, 0), $src1$$XMMRegister); 9869 __ fld_d(Address(rsp, 0)); 9870 __ movdbl(Address(rsp, 0), $src0$$XMMRegister); 9871 __ fld_d(Address(rsp, 0)); 9872 __ fast_pow(); 9873 __ fstp_d(Address(rsp, 0)); 9874 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 9875 __ addptr(rsp, 8); 9876 %} 9877 ins_pipe( pipe_slow ); 9878 %} 9879 9880 9881 instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ 9882 predicate (UseSSE<=1); 9883 match(Set dpr1 (ExpD dpr1)); 9884 effect(KILL rax, KILL rcx, KILL rdx, KILL cr); 9885 format %{ "fast_exp $dpr1 -> $dpr1 // KILL $rax, $rcx, $rdx" %} 9886 ins_encode %{ 9887 __ fast_exp(); 9888 %} 9889 ins_pipe( pipe_slow ); 9890 %} 9891 9892 instruct expD_reg(regD dst, regD src, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{ 9893 predicate (UseSSE>=2); 9894 match(Set dst (ExpD src)); 9895 effect(KILL rax, KILL rcx, KILL rdx, KILL cr); 9896 format %{ "fast_exp $dst -> $src // KILL $rax, $rcx, $rdx" %} 9897 ins_encode %{ 9898 __ subptr(rsp, 8); 9899 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 9900 __ fld_d(Address(rsp, 0)); 9901 __ fast_exp(); 9902 __ fstp_d(Address(rsp, 0)); 9903 __ movdbl($dst$$XMMRegister, Address(rsp, 0)); 9904 __ addptr(rsp, 8); 9905 %} 9906 ins_pipe( pipe_slow ); 9907 %} 9908 9909 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{ 9910 predicate (UseSSE<=1); 9911 // The source Double operand on FPU stack 9912 match(Set dst (Log10D src)); 9913 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 9914 // fxch ; swap ST(0) with ST(1) 9915 // fyl2x ; compute log_10(2) * log_2(x) 9916 format %{ "FLDLG2 \t\t\t#Log10\n\t" 9917 "FXCH \n\t" 9918 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 9919 %} 9920 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 9921 Opcode(0xD9), Opcode(0xC9), // fxch 9922 Opcode(0xD9), Opcode(0xF1)); // fyl2x 9923 9924 ins_pipe( pipe_slow ); 9925 %} 9926 9927 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{ 9928 predicate (UseSSE>=2); 9929 effect(KILL cr); 9930 match(Set dst (Log10D src)); 9931 // fldlg2 ; push log_10(2) on the FPU stack; full 80-bit number 9932 // fyl2x ; compute log_10(2) * log_2(x) 9933 format %{ "FLDLG2 \t\t\t#Log10\n\t" 9934 "FYL2X \t\t\t# Q=Log10*Log_2(x)" 9935 %} 9936 ins_encode( Opcode(0xD9), Opcode(0xEC), // fldlg2 9937 Push_SrcD(src), 9938 Opcode(0xD9), Opcode(0xF1), // fyl2x 9939 Push_ResultD(dst)); 9940 9941 ins_pipe( pipe_slow ); 9942 %} 9943 9944 instruct logDPR_reg(regDPR1 dst, regDPR1 src) %{ 9945 predicate (UseSSE<=1); 9946 // The source Double operand on FPU stack 9947 match(Set dst (LogD src)); 9948 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number 9949 // fxch ; swap ST(0) with ST(1) 9950 // fyl2x ; compute log_e(2) * log_2(x) 9951 format %{ "FLDLN2 \t\t\t#Log_e\n\t" 9952 "FXCH \n\t" 9953 "FYL2X \t\t\t# Q=Log_e*Log_2(x)" 9954 %} 9955 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2 9956 Opcode(0xD9), Opcode(0xC9), // fxch 9957 Opcode(0xD9), Opcode(0xF1)); // fyl2x 9958 9959 ins_pipe( pipe_slow ); 9960 %} 9961 9962 instruct logD_reg(regD dst, regD src, eFlagsReg cr) %{ 9963 predicate (UseSSE>=2); 9964 effect(KILL cr); 9965 // The source and result Double operands in XMM registers 9966 match(Set dst (LogD src)); 9967 // fldln2 ; push log_e(2) on the FPU stack; full 80-bit number 9968 // fyl2x ; compute log_e(2) * log_2(x) 9969 format %{ "FLDLN2 \t\t\t#Log_e\n\t" 9970 "FYL2X \t\t\t# Q=Log_e*Log_2(x)" 9971 %} 9972 ins_encode( Opcode(0xD9), Opcode(0xED), // fldln2 9973 Push_SrcD(src), 9974 Opcode(0xD9), Opcode(0xF1), // fyl2x 9975 Push_ResultD(dst)); 9976 ins_pipe( pipe_slow ); 9977 %} 9978 9979 //-------------Float Instructions------------------------------- 9980 // Float Math 9981 9982 // Code for float compare: 9983 // fcompp(); 9984 // fwait(); fnstsw_ax(); 9985 // sahf(); 9986 // movl(dst, unordered_result); 9987 // jcc(Assembler::parity, exit); 9988 // movl(dst, less_result); 9989 // jcc(Assembler::below, exit); 9990 // movl(dst, equal_result); 9991 // jcc(Assembler::equal, exit); 9992 // movl(dst, greater_result); 9993 // exit: 9994 9995 // P6 version of float compare, sets condition codes in EFLAGS 9996 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 9997 predicate(VM_Version::supports_cmov() && UseSSE == 0); 9998 match(Set cr (CmpF src1 src2)); 9999 effect(KILL rax); 10000 ins_cost(150); 10001 format %{ "FLD $src1\n\t" 10002 "FUCOMIP ST,$src2 // P6 instruction\n\t" 10003 "JNP exit\n\t" 10004 "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" 10005 "SAHF\n" 10006 "exit:\tNOP // avoid branch to branch" %} 10007 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10008 ins_encode( Push_Reg_DPR(src1), 10009 OpcP, RegOpc(src2), 10010 cmpF_P6_fixup ); 10011 ins_pipe( pipe_slow ); 10012 %} 10013 10014 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ 10015 predicate(VM_Version::supports_cmov() && UseSSE == 0); 10016 match(Set cr (CmpF src1 src2)); 10017 ins_cost(100); 10018 format %{ "FLD $src1\n\t" 10019 "FUCOMIP ST,$src2 // P6 instruction" %} 10020 opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ 10021 ins_encode( Push_Reg_DPR(src1), 10022 OpcP, RegOpc(src2)); 10023 ins_pipe( pipe_slow ); 10024 %} 10025 10026 10027 // Compare & branch 10028 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ 10029 predicate(UseSSE == 0); 10030 match(Set cr (CmpF src1 src2)); 10031 effect(KILL rax); 10032 ins_cost(200); 10033 format %{ "FLD $src1\n\t" 10034 "FCOMp $src2\n\t" 10035 "FNSTSW AX\n\t" 10036 "TEST AX,0x400\n\t" 10037 "JZ,s flags\n\t" 10038 "MOV AH,1\t# unordered treat as LT\n" 10039 "flags:\tSAHF" %} 10040 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10041 ins_encode( Push_Reg_DPR(src1), 10042 OpcP, RegOpc(src2), 10043 fpu_flags); 10044 ins_pipe( pipe_slow ); 10045 %} 10046 10047 // Compare vs zero into -1,0,1 10048 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ 10049 predicate(UseSSE == 0); 10050 match(Set dst (CmpF3 src1 zero)); 10051 effect(KILL cr, KILL rax); 10052 ins_cost(280); 10053 format %{ "FTSTF $dst,$src1" %} 10054 opcode(0xE4, 0xD9); 10055 ins_encode( Push_Reg_DPR(src1), 10056 OpcS, OpcP, PopFPU, 10057 CmpF_Result(dst)); 10058 ins_pipe( pipe_slow ); 10059 %} 10060 10061 // Compare into -1,0,1 10062 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10063 predicate(UseSSE == 0); 10064 match(Set dst (CmpF3 src1 src2)); 10065 effect(KILL cr, KILL rax); 10066 ins_cost(300); 10067 format %{ "FCMPF $dst,$src1,$src2" %} 10068 opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ 10069 ins_encode( Push_Reg_DPR(src1), 10070 OpcP, RegOpc(src2), 10071 CmpF_Result(dst)); 10072 ins_pipe( pipe_slow ); 10073 %} 10074 10075 // float compare and set condition codes in EFLAGS by XMM regs 10076 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ 10077 predicate(UseSSE>=1); 10078 match(Set cr (CmpF src1 src2)); 10079 ins_cost(145); 10080 format %{ "UCOMISS $src1,$src2\n\t" 10081 "JNP,s exit\n\t" 10082 "PUSHF\t# saw NaN, set CF\n\t" 10083 "AND [rsp], #0xffffff2b\n\t" 10084 "POPF\n" 10085 "exit:" %} 10086 ins_encode %{ 10087 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10088 emit_cmpfp_fixup(_masm); 10089 %} 10090 ins_pipe( pipe_slow ); 10091 %} 10092 10093 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ 10094 predicate(UseSSE>=1); 10095 match(Set cr (CmpF src1 src2)); 10096 ins_cost(100); 10097 format %{ "UCOMISS $src1,$src2" %} 10098 ins_encode %{ 10099 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10100 %} 10101 ins_pipe( pipe_slow ); 10102 %} 10103 10104 // float compare and set condition codes in EFLAGS by XMM regs 10105 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ 10106 predicate(UseSSE>=1); 10107 match(Set cr (CmpF src1 (LoadF src2))); 10108 ins_cost(165); 10109 format %{ "UCOMISS $src1,$src2\n\t" 10110 "JNP,s exit\n\t" 10111 "PUSHF\t# saw NaN, set CF\n\t" 10112 "AND [rsp], #0xffffff2b\n\t" 10113 "POPF\n" 10114 "exit:" %} 10115 ins_encode %{ 10116 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10117 emit_cmpfp_fixup(_masm); 10118 %} 10119 ins_pipe( pipe_slow ); 10120 %} 10121 10122 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ 10123 predicate(UseSSE>=1); 10124 match(Set cr (CmpF src1 (LoadF src2))); 10125 ins_cost(100); 10126 format %{ "UCOMISS $src1,$src2" %} 10127 ins_encode %{ 10128 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10129 %} 10130 ins_pipe( pipe_slow ); 10131 %} 10132 10133 // Compare into -1,0,1 in XMM 10134 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ 10135 predicate(UseSSE>=1); 10136 match(Set dst (CmpF3 src1 src2)); 10137 effect(KILL cr); 10138 ins_cost(255); 10139 format %{ "UCOMISS $src1, $src2\n\t" 10140 "MOV $dst, #-1\n\t" 10141 "JP,s done\n\t" 10142 "JB,s done\n\t" 10143 "SETNE $dst\n\t" 10144 "MOVZB $dst, $dst\n" 10145 "done:" %} 10146 ins_encode %{ 10147 __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); 10148 emit_cmpfp3(_masm, $dst$$Register); 10149 %} 10150 ins_pipe( pipe_slow ); 10151 %} 10152 10153 // Compare into -1,0,1 in XMM and memory 10154 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ 10155 predicate(UseSSE>=1); 10156 match(Set dst (CmpF3 src1 (LoadF src2))); 10157 effect(KILL cr); 10158 ins_cost(275); 10159 format %{ "UCOMISS $src1, $src2\n\t" 10160 "MOV $dst, #-1\n\t" 10161 "JP,s done\n\t" 10162 "JB,s done\n\t" 10163 "SETNE $dst\n\t" 10164 "MOVZB $dst, $dst\n" 10165 "done:" %} 10166 ins_encode %{ 10167 __ ucomiss($src1$$XMMRegister, $src2$$Address); 10168 emit_cmpfp3(_masm, $dst$$Register); 10169 %} 10170 ins_pipe( pipe_slow ); 10171 %} 10172 10173 // Spill to obtain 24-bit precision 10174 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10175 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10176 match(Set dst (SubF src1 src2)); 10177 10178 format %{ "FSUB $dst,$src1 - $src2" %} 10179 opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ 10180 ins_encode( Push_Reg_FPR(src1), 10181 OpcReg_FPR(src2), 10182 Pop_Mem_FPR(dst) ); 10183 ins_pipe( fpu_mem_reg_reg ); 10184 %} 10185 // 10186 // This instruction does not round to 24-bits 10187 instruct subFPR_reg(regFPR dst, regFPR src) %{ 10188 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10189 match(Set dst (SubF dst src)); 10190 10191 format %{ "FSUB $dst,$src" %} 10192 opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ 10193 ins_encode( Push_Reg_FPR(src), 10194 OpcP, RegOpc(dst) ); 10195 ins_pipe( fpu_reg_reg ); 10196 %} 10197 10198 // Spill to obtain 24-bit precision 10199 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10200 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10201 match(Set dst (AddF src1 src2)); 10202 10203 format %{ "FADD $dst,$src1,$src2" %} 10204 opcode(0xD8, 0x0); /* D8 C0+i */ 10205 ins_encode( Push_Reg_FPR(src2), 10206 OpcReg_FPR(src1), 10207 Pop_Mem_FPR(dst) ); 10208 ins_pipe( fpu_mem_reg_reg ); 10209 %} 10210 // 10211 // This instruction does not round to 24-bits 10212 instruct addFPR_reg(regFPR dst, regFPR src) %{ 10213 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10214 match(Set dst (AddF dst src)); 10215 10216 format %{ "FLD $src\n\t" 10217 "FADDp $dst,ST" %} 10218 opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ 10219 ins_encode( Push_Reg_FPR(src), 10220 OpcP, RegOpc(dst) ); 10221 ins_pipe( fpu_reg_reg ); 10222 %} 10223 10224 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ 10225 predicate(UseSSE==0); 10226 match(Set dst (AbsF src)); 10227 ins_cost(100); 10228 format %{ "FABS" %} 10229 opcode(0xE1, 0xD9); 10230 ins_encode( OpcS, OpcP ); 10231 ins_pipe( fpu_reg_reg ); 10232 %} 10233 10234 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ 10235 predicate(UseSSE==0); 10236 match(Set dst (NegF src)); 10237 ins_cost(100); 10238 format %{ "FCHS" %} 10239 opcode(0xE0, 0xD9); 10240 ins_encode( OpcS, OpcP ); 10241 ins_pipe( fpu_reg_reg ); 10242 %} 10243 10244 // Cisc-alternate to addFPR_reg 10245 // Spill to obtain 24-bit precision 10246 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10247 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10248 match(Set dst (AddF src1 (LoadF src2))); 10249 10250 format %{ "FLD $src2\n\t" 10251 "FADD ST,$src1\n\t" 10252 "FSTP_S $dst" %} 10253 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10254 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10255 OpcReg_FPR(src1), 10256 Pop_Mem_FPR(dst) ); 10257 ins_pipe( fpu_mem_reg_mem ); 10258 %} 10259 // 10260 // Cisc-alternate to addFPR_reg 10261 // This instruction does not round to 24-bits 10262 instruct addFPR_reg_mem(regFPR dst, memory src) %{ 10263 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10264 match(Set dst (AddF dst (LoadF src))); 10265 10266 format %{ "FADD $dst,$src" %} 10267 opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ 10268 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src), 10269 OpcP, RegOpc(dst) ); 10270 ins_pipe( fpu_reg_mem ); 10271 %} 10272 10273 // // Following two instructions for _222_mpegaudio 10274 // Spill to obtain 24-bit precision 10275 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ 10276 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10277 match(Set dst (AddF src1 src2)); 10278 10279 format %{ "FADD $dst,$src1,$src2" %} 10280 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10281 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1), 10282 OpcReg_FPR(src2), 10283 Pop_Mem_FPR(dst) ); 10284 ins_pipe( fpu_mem_reg_mem ); 10285 %} 10286 10287 // Cisc-spill variant 10288 // Spill to obtain 24-bit precision 10289 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ 10290 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10291 match(Set dst (AddF src1 (LoadF src2))); 10292 10293 format %{ "FADD $dst,$src1,$src2 cisc" %} 10294 opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ 10295 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10296 set_instruction_start, 10297 OpcP, RMopc_Mem(secondary,src1), 10298 Pop_Mem_FPR(dst) ); 10299 ins_pipe( fpu_mem_mem_mem ); 10300 %} 10301 10302 // Spill to obtain 24-bit precision 10303 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10304 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10305 match(Set dst (AddF src1 src2)); 10306 10307 format %{ "FADD $dst,$src1,$src2" %} 10308 opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ 10309 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10310 set_instruction_start, 10311 OpcP, RMopc_Mem(secondary,src1), 10312 Pop_Mem_FPR(dst) ); 10313 ins_pipe( fpu_mem_mem_mem ); 10314 %} 10315 10316 10317 // Spill to obtain 24-bit precision 10318 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10319 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10320 match(Set dst (AddF src con)); 10321 format %{ "FLD $src\n\t" 10322 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10323 "FSTP_S $dst" %} 10324 ins_encode %{ 10325 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10326 __ fadd_s($constantaddress($con)); 10327 __ fstp_s(Address(rsp, $dst$$disp)); 10328 %} 10329 ins_pipe(fpu_mem_reg_con); 10330 %} 10331 // 10332 // This instruction does not round to 24-bits 10333 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10334 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10335 match(Set dst (AddF src con)); 10336 format %{ "FLD $src\n\t" 10337 "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10338 "FSTP $dst" %} 10339 ins_encode %{ 10340 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10341 __ fadd_s($constantaddress($con)); 10342 __ fstp_d($dst$$reg); 10343 %} 10344 ins_pipe(fpu_reg_reg_con); 10345 %} 10346 10347 // Spill to obtain 24-bit precision 10348 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10349 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10350 match(Set dst (MulF src1 src2)); 10351 10352 format %{ "FLD $src1\n\t" 10353 "FMUL $src2\n\t" 10354 "FSTP_S $dst" %} 10355 opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ 10356 ins_encode( Push_Reg_FPR(src1), 10357 OpcReg_FPR(src2), 10358 Pop_Mem_FPR(dst) ); 10359 ins_pipe( fpu_mem_reg_reg ); 10360 %} 10361 // 10362 // This instruction does not round to 24-bits 10363 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ 10364 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10365 match(Set dst (MulF src1 src2)); 10366 10367 format %{ "FLD $src1\n\t" 10368 "FMUL $src2\n\t" 10369 "FSTP_S $dst" %} 10370 opcode(0xD8, 0x1); /* D8 C8+i */ 10371 ins_encode( Push_Reg_FPR(src2), 10372 OpcReg_FPR(src1), 10373 Pop_Reg_FPR(dst) ); 10374 ins_pipe( fpu_reg_reg_reg ); 10375 %} 10376 10377 10378 // Spill to obtain 24-bit precision 10379 // Cisc-alternate to reg-reg multiply 10380 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ 10381 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10382 match(Set dst (MulF src1 (LoadF src2))); 10383 10384 format %{ "FLD_S $src2\n\t" 10385 "FMUL $src1\n\t" 10386 "FSTP_S $dst" %} 10387 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ 10388 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10389 OpcReg_FPR(src1), 10390 Pop_Mem_FPR(dst) ); 10391 ins_pipe( fpu_mem_reg_mem ); 10392 %} 10393 // 10394 // This instruction does not round to 24-bits 10395 // Cisc-alternate to reg-reg multiply 10396 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ 10397 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10398 match(Set dst (MulF src1 (LoadF src2))); 10399 10400 format %{ "FMUL $dst,$src1,$src2" %} 10401 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ 10402 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10403 OpcReg_FPR(src1), 10404 Pop_Reg_FPR(dst) ); 10405 ins_pipe( fpu_reg_reg_mem ); 10406 %} 10407 10408 // Spill to obtain 24-bit precision 10409 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ 10410 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10411 match(Set dst (MulF src1 src2)); 10412 10413 format %{ "FMUL $dst,$src1,$src2" %} 10414 opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ 10415 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2), 10416 set_instruction_start, 10417 OpcP, RMopc_Mem(secondary,src1), 10418 Pop_Mem_FPR(dst) ); 10419 ins_pipe( fpu_mem_mem_mem ); 10420 %} 10421 10422 // Spill to obtain 24-bit precision 10423 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ 10424 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10425 match(Set dst (MulF src con)); 10426 10427 format %{ "FLD $src\n\t" 10428 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10429 "FSTP_S $dst" %} 10430 ins_encode %{ 10431 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10432 __ fmul_s($constantaddress($con)); 10433 __ fstp_s(Address(rsp, $dst$$disp)); 10434 %} 10435 ins_pipe(fpu_mem_reg_con); 10436 %} 10437 // 10438 // This instruction does not round to 24-bits 10439 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ 10440 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10441 match(Set dst (MulF src con)); 10442 10443 format %{ "FLD $src\n\t" 10444 "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" 10445 "FSTP $dst" %} 10446 ins_encode %{ 10447 __ fld_s($src$$reg - 1); // FLD ST(i-1) 10448 __ fmul_s($constantaddress($con)); 10449 __ fstp_d($dst$$reg); 10450 %} 10451 ins_pipe(fpu_reg_reg_con); 10452 %} 10453 10454 10455 // 10456 // MACRO1 -- subsume unshared load into mulFPR 10457 // This instruction does not round to 24-bits 10458 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ 10459 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10460 match(Set dst (MulF (LoadF mem1) src)); 10461 10462 format %{ "FLD $mem1 ===MACRO1===\n\t" 10463 "FMUL ST,$src\n\t" 10464 "FSTP $dst" %} 10465 opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ 10466 ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1), 10467 OpcReg_FPR(src), 10468 Pop_Reg_FPR(dst) ); 10469 ins_pipe( fpu_reg_reg_mem ); 10470 %} 10471 // 10472 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load 10473 // This instruction does not round to 24-bits 10474 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ 10475 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10476 match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); 10477 ins_cost(95); 10478 10479 format %{ "FLD $mem1 ===MACRO2===\n\t" 10480 "FMUL ST,$src1 subsume mulFPR left load\n\t" 10481 "FADD ST,$src2\n\t" 10482 "FSTP $dst" %} 10483 opcode(0xD9); /* LoadF D9 /0 */ 10484 ins_encode( OpcP, RMopc_Mem(0x00,mem1), 10485 FMul_ST_reg(src1), 10486 FAdd_ST_reg(src2), 10487 Pop_Reg_FPR(dst) ); 10488 ins_pipe( fpu_reg_mem_reg_reg ); 10489 %} 10490 10491 // MACRO3 -- addFPR a mulFPR 10492 // This instruction does not round to 24-bits. It is a '2-address' 10493 // instruction in that the result goes back to src2. This eliminates 10494 // a move from the macro; possibly the register allocator will have 10495 // to add it back (and maybe not). 10496 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ 10497 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10498 match(Set src2 (AddF (MulF src0 src1) src2)); 10499 10500 format %{ "FLD $src0 ===MACRO3===\n\t" 10501 "FMUL ST,$src1\n\t" 10502 "FADDP $src2,ST" %} 10503 opcode(0xD9); /* LoadF D9 /0 */ 10504 ins_encode( Push_Reg_FPR(src0), 10505 FMul_ST_reg(src1), 10506 FAddP_reg_ST(src2) ); 10507 ins_pipe( fpu_reg_reg_reg ); 10508 %} 10509 10510 // MACRO4 -- divFPR subFPR 10511 // This instruction does not round to 24-bits 10512 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ 10513 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10514 match(Set dst (DivF (SubF src2 src1) src3)); 10515 10516 format %{ "FLD $src2 ===MACRO4===\n\t" 10517 "FSUB ST,$src1\n\t" 10518 "FDIV ST,$src3\n\t" 10519 "FSTP $dst" %} 10520 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10521 ins_encode( Push_Reg_FPR(src2), 10522 subFPR_divFPR_encode(src1,src3), 10523 Pop_Reg_FPR(dst) ); 10524 ins_pipe( fpu_reg_reg_reg_reg ); 10525 %} 10526 10527 // Spill to obtain 24-bit precision 10528 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ 10529 predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); 10530 match(Set dst (DivF src1 src2)); 10531 10532 format %{ "FDIV $dst,$src1,$src2" %} 10533 opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ 10534 ins_encode( Push_Reg_FPR(src1), 10535 OpcReg_FPR(src2), 10536 Pop_Mem_FPR(dst) ); 10537 ins_pipe( fpu_mem_reg_reg ); 10538 %} 10539 // 10540 // This instruction does not round to 24-bits 10541 instruct divFPR_reg(regFPR dst, regFPR src) %{ 10542 predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10543 match(Set dst (DivF dst src)); 10544 10545 format %{ "FDIV $dst,$src" %} 10546 opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ 10547 ins_encode( Push_Reg_FPR(src), 10548 OpcP, RegOpc(dst) ); 10549 ins_pipe( fpu_reg_reg ); 10550 %} 10551 10552 10553 // Spill to obtain 24-bit precision 10554 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ 10555 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 10556 match(Set dst (ModF src1 src2)); 10557 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10558 10559 format %{ "FMOD $dst,$src1,$src2" %} 10560 ins_encode( Push_Reg_Mod_DPR(src1, src2), 10561 emitModDPR(), 10562 Push_Result_Mod_DPR(src2), 10563 Pop_Mem_FPR(dst)); 10564 ins_pipe( pipe_slow ); 10565 %} 10566 // 10567 // This instruction does not round to 24-bits 10568 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ 10569 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 10570 match(Set dst (ModF dst src)); 10571 effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS 10572 10573 format %{ "FMOD $dst,$src" %} 10574 ins_encode(Push_Reg_Mod_DPR(dst, src), 10575 emitModDPR(), 10576 Push_Result_Mod_DPR(src), 10577 Pop_Reg_FPR(dst)); 10578 ins_pipe( pipe_slow ); 10579 %} 10580 10581 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ 10582 predicate(UseSSE>=1); 10583 match(Set dst (ModF src0 src1)); 10584 effect(KILL rax, KILL cr); 10585 format %{ "SUB ESP,4\t # FMOD\n" 10586 "\tMOVSS [ESP+0],$src1\n" 10587 "\tFLD_S [ESP+0]\n" 10588 "\tMOVSS [ESP+0],$src0\n" 10589 "\tFLD_S [ESP+0]\n" 10590 "loop:\tFPREM\n" 10591 "\tFWAIT\n" 10592 "\tFNSTSW AX\n" 10593 "\tSAHF\n" 10594 "\tJP loop\n" 10595 "\tFSTP_S [ESP+0]\n" 10596 "\tMOVSS $dst,[ESP+0]\n" 10597 "\tADD ESP,4\n" 10598 "\tFSTP ST0\t # Restore FPU Stack" 10599 %} 10600 ins_cost(250); 10601 ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); 10602 ins_pipe( pipe_slow ); 10603 %} 10604 10605 10606 //----------Arithmetic Conversion Instructions--------------------------------- 10607 // The conversions operations are all Alpha sorted. Please keep it that way! 10608 10609 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ 10610 predicate(UseSSE==0); 10611 match(Set dst (RoundFloat src)); 10612 ins_cost(125); 10613 format %{ "FST_S $dst,$src\t# F-round" %} 10614 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 10615 ins_pipe( fpu_mem_reg ); 10616 %} 10617 10618 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ 10619 predicate(UseSSE<=1); 10620 match(Set dst (RoundDouble src)); 10621 ins_cost(125); 10622 format %{ "FST_D $dst,$src\t# D-round" %} 10623 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 10624 ins_pipe( fpu_mem_reg ); 10625 %} 10626 10627 // Force rounding to 24-bit precision and 6-bit exponent 10628 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ 10629 predicate(UseSSE==0); 10630 match(Set dst (ConvD2F src)); 10631 format %{ "FST_S $dst,$src\t# F-round" %} 10632 expand %{ 10633 roundFloat_mem_reg(dst,src); 10634 %} 10635 %} 10636 10637 // Force rounding to 24-bit precision and 6-bit exponent 10638 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ 10639 predicate(UseSSE==1); 10640 match(Set dst (ConvD2F src)); 10641 effect( KILL cr ); 10642 format %{ "SUB ESP,4\n\t" 10643 "FST_S [ESP],$src\t# F-round\n\t" 10644 "MOVSS $dst,[ESP]\n\t" 10645 "ADD ESP,4" %} 10646 ins_encode %{ 10647 __ subptr(rsp, 4); 10648 if ($src$$reg != FPR1L_enc) { 10649 __ fld_s($src$$reg-1); 10650 __ fstp_s(Address(rsp, 0)); 10651 } else { 10652 __ fst_s(Address(rsp, 0)); 10653 } 10654 __ movflt($dst$$XMMRegister, Address(rsp, 0)); 10655 __ addptr(rsp, 4); 10656 %} 10657 ins_pipe( pipe_slow ); 10658 %} 10659 10660 // Force rounding double precision to single precision 10661 instruct convD2F_reg(regF dst, regD src) %{ 10662 predicate(UseSSE>=2); 10663 match(Set dst (ConvD2F src)); 10664 format %{ "CVTSD2SS $dst,$src\t# F-round" %} 10665 ins_encode %{ 10666 __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); 10667 %} 10668 ins_pipe( pipe_slow ); 10669 %} 10670 10671 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ 10672 predicate(UseSSE==0); 10673 match(Set dst (ConvF2D src)); 10674 format %{ "FST_S $dst,$src\t# D-round" %} 10675 ins_encode( Pop_Reg_Reg_DPR(dst, src)); 10676 ins_pipe( fpu_reg_reg ); 10677 %} 10678 10679 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ 10680 predicate(UseSSE==1); 10681 match(Set dst (ConvF2D src)); 10682 format %{ "FST_D $dst,$src\t# D-round" %} 10683 expand %{ 10684 roundDouble_mem_reg(dst,src); 10685 %} 10686 %} 10687 10688 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ 10689 predicate(UseSSE==1); 10690 match(Set dst (ConvF2D src)); 10691 effect( KILL cr ); 10692 format %{ "SUB ESP,4\n\t" 10693 "MOVSS [ESP] $src\n\t" 10694 "FLD_S [ESP]\n\t" 10695 "ADD ESP,4\n\t" 10696 "FSTP $dst\t# D-round" %} 10697 ins_encode %{ 10698 __ subptr(rsp, 4); 10699 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10700 __ fld_s(Address(rsp, 0)); 10701 __ addptr(rsp, 4); 10702 __ fstp_d($dst$$reg); 10703 %} 10704 ins_pipe( pipe_slow ); 10705 %} 10706 10707 instruct convF2D_reg(regD dst, regF src) %{ 10708 predicate(UseSSE>=2); 10709 match(Set dst (ConvF2D src)); 10710 format %{ "CVTSS2SD $dst,$src\t# D-round" %} 10711 ins_encode %{ 10712 __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); 10713 %} 10714 ins_pipe( pipe_slow ); 10715 %} 10716 10717 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10718 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ 10719 predicate(UseSSE<=1); 10720 match(Set dst (ConvD2I src)); 10721 effect( KILL tmp, KILL cr ); 10722 format %{ "FLD $src\t# Convert double to int \n\t" 10723 "FLDCW trunc mode\n\t" 10724 "SUB ESP,4\n\t" 10725 "FISTp [ESP + #0]\n\t" 10726 "FLDCW std/24-bit mode\n\t" 10727 "POP EAX\n\t" 10728 "CMP EAX,0x80000000\n\t" 10729 "JNE,s fast\n\t" 10730 "FLD_D $src\n\t" 10731 "CALL d2i_wrapper\n" 10732 "fast:" %} 10733 ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); 10734 ins_pipe( pipe_slow ); 10735 %} 10736 10737 // Convert a double to an int. If the double is a NAN, stuff a zero in instead. 10738 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ 10739 predicate(UseSSE>=2); 10740 match(Set dst (ConvD2I src)); 10741 effect( KILL tmp, KILL cr ); 10742 format %{ "CVTTSD2SI $dst, $src\n\t" 10743 "CMP $dst,0x80000000\n\t" 10744 "JNE,s fast\n\t" 10745 "SUB ESP, 8\n\t" 10746 "MOVSD [ESP], $src\n\t" 10747 "FLD_D [ESP]\n\t" 10748 "ADD ESP, 8\n\t" 10749 "CALL d2i_wrapper\n" 10750 "fast:" %} 10751 ins_encode %{ 10752 Label fast; 10753 __ cvttsd2sil($dst$$Register, $src$$XMMRegister); 10754 __ cmpl($dst$$Register, 0x80000000); 10755 __ jccb(Assembler::notEqual, fast); 10756 __ subptr(rsp, 8); 10757 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10758 __ fld_d(Address(rsp, 0)); 10759 __ addptr(rsp, 8); 10760 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10761 __ bind(fast); 10762 %} 10763 ins_pipe( pipe_slow ); 10764 %} 10765 10766 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ 10767 predicate(UseSSE<=1); 10768 match(Set dst (ConvD2L src)); 10769 effect( KILL cr ); 10770 format %{ "FLD $src\t# Convert double to long\n\t" 10771 "FLDCW trunc mode\n\t" 10772 "SUB ESP,8\n\t" 10773 "FISTp [ESP + #0]\n\t" 10774 "FLDCW std/24-bit mode\n\t" 10775 "POP EAX\n\t" 10776 "POP EDX\n\t" 10777 "CMP EDX,0x80000000\n\t" 10778 "JNE,s fast\n\t" 10779 "TEST EAX,EAX\n\t" 10780 "JNE,s fast\n\t" 10781 "FLD $src\n\t" 10782 "CALL d2l_wrapper\n" 10783 "fast:" %} 10784 ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); 10785 ins_pipe( pipe_slow ); 10786 %} 10787 10788 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10789 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ 10790 predicate (UseSSE>=2); 10791 match(Set dst (ConvD2L src)); 10792 effect( KILL cr ); 10793 format %{ "SUB ESP,8\t# Convert double to long\n\t" 10794 "MOVSD [ESP],$src\n\t" 10795 "FLD_D [ESP]\n\t" 10796 "FLDCW trunc mode\n\t" 10797 "FISTp [ESP + #0]\n\t" 10798 "FLDCW std/24-bit mode\n\t" 10799 "POP EAX\n\t" 10800 "POP EDX\n\t" 10801 "CMP EDX,0x80000000\n\t" 10802 "JNE,s fast\n\t" 10803 "TEST EAX,EAX\n\t" 10804 "JNE,s fast\n\t" 10805 "SUB ESP,8\n\t" 10806 "MOVSD [ESP],$src\n\t" 10807 "FLD_D [ESP]\n\t" 10808 "ADD ESP,8\n\t" 10809 "CALL d2l_wrapper\n" 10810 "fast:" %} 10811 ins_encode %{ 10812 Label fast; 10813 __ subptr(rsp, 8); 10814 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10815 __ fld_d(Address(rsp, 0)); 10816 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10817 __ fistp_d(Address(rsp, 0)); 10818 // Restore the rounding mode, mask the exception 10819 if (Compile::current()->in_24_bit_fp_mode()) { 10820 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10821 } else { 10822 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10823 } 10824 // Load the converted long, adjust CPU stack 10825 __ pop(rax); 10826 __ pop(rdx); 10827 __ cmpl(rdx, 0x80000000); 10828 __ jccb(Assembler::notEqual, fast); 10829 __ testl(rax, rax); 10830 __ jccb(Assembler::notEqual, fast); 10831 __ subptr(rsp, 8); 10832 __ movdbl(Address(rsp, 0), $src$$XMMRegister); 10833 __ fld_d(Address(rsp, 0)); 10834 __ addptr(rsp, 8); 10835 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10836 __ bind(fast); 10837 %} 10838 ins_pipe( pipe_slow ); 10839 %} 10840 10841 // Convert a double to an int. Java semantics require we do complex 10842 // manglations in the corner cases. So we set the rounding mode to 10843 // 'zero', store the darned double down as an int, and reset the 10844 // rounding mode to 'nearest'. The hardware stores a flag value down 10845 // if we would overflow or converted a NAN; we check for this and 10846 // and go the slow path if needed. 10847 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ 10848 predicate(UseSSE==0); 10849 match(Set dst (ConvF2I src)); 10850 effect( KILL tmp, KILL cr ); 10851 format %{ "FLD $src\t# Convert float to int \n\t" 10852 "FLDCW trunc mode\n\t" 10853 "SUB ESP,4\n\t" 10854 "FISTp [ESP + #0]\n\t" 10855 "FLDCW std/24-bit mode\n\t" 10856 "POP EAX\n\t" 10857 "CMP EAX,0x80000000\n\t" 10858 "JNE,s fast\n\t" 10859 "FLD $src\n\t" 10860 "CALL d2i_wrapper\n" 10861 "fast:" %} 10862 // DPR2I_encoding works for FPR2I 10863 ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); 10864 ins_pipe( pipe_slow ); 10865 %} 10866 10867 // Convert a float in xmm to an int reg. 10868 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ 10869 predicate(UseSSE>=1); 10870 match(Set dst (ConvF2I src)); 10871 effect( KILL tmp, KILL cr ); 10872 format %{ "CVTTSS2SI $dst, $src\n\t" 10873 "CMP $dst,0x80000000\n\t" 10874 "JNE,s fast\n\t" 10875 "SUB ESP, 4\n\t" 10876 "MOVSS [ESP], $src\n\t" 10877 "FLD [ESP]\n\t" 10878 "ADD ESP, 4\n\t" 10879 "CALL d2i_wrapper\n" 10880 "fast:" %} 10881 ins_encode %{ 10882 Label fast; 10883 __ cvttss2sil($dst$$Register, $src$$XMMRegister); 10884 __ cmpl($dst$$Register, 0x80000000); 10885 __ jccb(Assembler::notEqual, fast); 10886 __ subptr(rsp, 4); 10887 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10888 __ fld_s(Address(rsp, 0)); 10889 __ addptr(rsp, 4); 10890 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper()))); 10891 __ bind(fast); 10892 %} 10893 ins_pipe( pipe_slow ); 10894 %} 10895 10896 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ 10897 predicate(UseSSE==0); 10898 match(Set dst (ConvF2L src)); 10899 effect( KILL cr ); 10900 format %{ "FLD $src\t# Convert float to long\n\t" 10901 "FLDCW trunc mode\n\t" 10902 "SUB ESP,8\n\t" 10903 "FISTp [ESP + #0]\n\t" 10904 "FLDCW std/24-bit mode\n\t" 10905 "POP EAX\n\t" 10906 "POP EDX\n\t" 10907 "CMP EDX,0x80000000\n\t" 10908 "JNE,s fast\n\t" 10909 "TEST EAX,EAX\n\t" 10910 "JNE,s fast\n\t" 10911 "FLD $src\n\t" 10912 "CALL d2l_wrapper\n" 10913 "fast:" %} 10914 // DPR2L_encoding works for FPR2L 10915 ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); 10916 ins_pipe( pipe_slow ); 10917 %} 10918 10919 // XMM lacks a float/double->long conversion, so use the old FPU stack. 10920 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ 10921 predicate (UseSSE>=1); 10922 match(Set dst (ConvF2L src)); 10923 effect( KILL cr ); 10924 format %{ "SUB ESP,8\t# Convert float to long\n\t" 10925 "MOVSS [ESP],$src\n\t" 10926 "FLD_S [ESP]\n\t" 10927 "FLDCW trunc mode\n\t" 10928 "FISTp [ESP + #0]\n\t" 10929 "FLDCW std/24-bit mode\n\t" 10930 "POP EAX\n\t" 10931 "POP EDX\n\t" 10932 "CMP EDX,0x80000000\n\t" 10933 "JNE,s fast\n\t" 10934 "TEST EAX,EAX\n\t" 10935 "JNE,s fast\n\t" 10936 "SUB ESP,4\t# Convert float to long\n\t" 10937 "MOVSS [ESP],$src\n\t" 10938 "FLD_S [ESP]\n\t" 10939 "ADD ESP,4\n\t" 10940 "CALL d2l_wrapper\n" 10941 "fast:" %} 10942 ins_encode %{ 10943 Label fast; 10944 __ subptr(rsp, 8); 10945 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10946 __ fld_s(Address(rsp, 0)); 10947 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc())); 10948 __ fistp_d(Address(rsp, 0)); 10949 // Restore the rounding mode, mask the exception 10950 if (Compile::current()->in_24_bit_fp_mode()) { 10951 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); 10952 } else { 10953 __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); 10954 } 10955 // Load the converted long, adjust CPU stack 10956 __ pop(rax); 10957 __ pop(rdx); 10958 __ cmpl(rdx, 0x80000000); 10959 __ jccb(Assembler::notEqual, fast); 10960 __ testl(rax, rax); 10961 __ jccb(Assembler::notEqual, fast); 10962 __ subptr(rsp, 4); 10963 __ movflt(Address(rsp, 0), $src$$XMMRegister); 10964 __ fld_s(Address(rsp, 0)); 10965 __ addptr(rsp, 4); 10966 __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper()))); 10967 __ bind(fast); 10968 %} 10969 ins_pipe( pipe_slow ); 10970 %} 10971 10972 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ 10973 predicate( UseSSE<=1 ); 10974 match(Set dst (ConvI2D src)); 10975 format %{ "FILD $src\n\t" 10976 "FSTP $dst" %} 10977 opcode(0xDB, 0x0); /* DB /0 */ 10978 ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); 10979 ins_pipe( fpu_reg_mem ); 10980 %} 10981 10982 instruct convI2D_reg(regD dst, rRegI src) %{ 10983 predicate( UseSSE>=2 && !UseXmmI2D ); 10984 match(Set dst (ConvI2D src)); 10985 format %{ "CVTSI2SD $dst,$src" %} 10986 ins_encode %{ 10987 __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); 10988 %} 10989 ins_pipe( pipe_slow ); 10990 %} 10991 10992 instruct convI2D_mem(regD dst, memory mem) %{ 10993 predicate( UseSSE>=2 ); 10994 match(Set dst (ConvI2D (LoadI mem))); 10995 format %{ "CVTSI2SD $dst,$mem" %} 10996 ins_encode %{ 10997 __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); 10998 %} 10999 ins_pipe( pipe_slow ); 11000 %} 11001 11002 instruct convXI2D_reg(regD dst, rRegI src) 11003 %{ 11004 predicate( UseSSE>=2 && UseXmmI2D ); 11005 match(Set dst (ConvI2D src)); 11006 11007 format %{ "MOVD $dst,$src\n\t" 11008 "CVTDQ2PD $dst,$dst\t# i2d" %} 11009 ins_encode %{ 11010 __ movdl($dst$$XMMRegister, $src$$Register); 11011 __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); 11012 %} 11013 ins_pipe(pipe_slow); // XXX 11014 %} 11015 11016 instruct convI2DPR_mem(regDPR dst, memory mem) %{ 11017 predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); 11018 match(Set dst (ConvI2D (LoadI mem))); 11019 format %{ "FILD $mem\n\t" 11020 "FSTP $dst" %} 11021 opcode(0xDB); /* DB /0 */ 11022 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11023 Pop_Reg_DPR(dst)); 11024 ins_pipe( fpu_reg_mem ); 11025 %} 11026 11027 // Convert a byte to a float; no rounding step needed. 11028 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ 11029 predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); 11030 match(Set dst (ConvI2F src)); 11031 format %{ "FILD $src\n\t" 11032 "FSTP $dst" %} 11033 11034 opcode(0xDB, 0x0); /* DB /0 */ 11035 ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); 11036 ins_pipe( fpu_reg_mem ); 11037 %} 11038 11039 // In 24-bit mode, force exponent rounding by storing back out 11040 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ 11041 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11042 match(Set dst (ConvI2F src)); 11043 ins_cost(200); 11044 format %{ "FILD $src\n\t" 11045 "FSTP_S $dst" %} 11046 opcode(0xDB, 0x0); /* DB /0 */ 11047 ins_encode( Push_Mem_I(src), 11048 Pop_Mem_FPR(dst)); 11049 ins_pipe( fpu_mem_mem ); 11050 %} 11051 11052 // In 24-bit mode, force exponent rounding by storing back out 11053 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ 11054 predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); 11055 match(Set dst (ConvI2F (LoadI mem))); 11056 ins_cost(200); 11057 format %{ "FILD $mem\n\t" 11058 "FSTP_S $dst" %} 11059 opcode(0xDB); /* DB /0 */ 11060 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11061 Pop_Mem_FPR(dst)); 11062 ins_pipe( fpu_mem_mem ); 11063 %} 11064 11065 // This instruction does not round to 24-bits 11066 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ 11067 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11068 match(Set dst (ConvI2F src)); 11069 format %{ "FILD $src\n\t" 11070 "FSTP $dst" %} 11071 opcode(0xDB, 0x0); /* DB /0 */ 11072 ins_encode( Push_Mem_I(src), 11073 Pop_Reg_FPR(dst)); 11074 ins_pipe( fpu_reg_mem ); 11075 %} 11076 11077 // This instruction does not round to 24-bits 11078 instruct convI2FPR_mem(regFPR dst, memory mem) %{ 11079 predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); 11080 match(Set dst (ConvI2F (LoadI mem))); 11081 format %{ "FILD $mem\n\t" 11082 "FSTP $dst" %} 11083 opcode(0xDB); /* DB /0 */ 11084 ins_encode( OpcP, RMopc_Mem(0x00,mem), 11085 Pop_Reg_FPR(dst)); 11086 ins_pipe( fpu_reg_mem ); 11087 %} 11088 11089 // Convert an int to a float in xmm; no rounding step needed. 11090 instruct convI2F_reg(regF dst, rRegI src) %{ 11091 predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F ); 11092 match(Set dst (ConvI2F src)); 11093 format %{ "CVTSI2SS $dst, $src" %} 11094 ins_encode %{ 11095 __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); 11096 %} 11097 ins_pipe( pipe_slow ); 11098 %} 11099 11100 instruct convXI2F_reg(regF dst, rRegI src) 11101 %{ 11102 predicate( UseSSE>=2 && UseXmmI2F ); 11103 match(Set dst (ConvI2F src)); 11104 11105 format %{ "MOVD $dst,$src\n\t" 11106 "CVTDQ2PS $dst,$dst\t# i2f" %} 11107 ins_encode %{ 11108 __ movdl($dst$$XMMRegister, $src$$Register); 11109 __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); 11110 %} 11111 ins_pipe(pipe_slow); // XXX 11112 %} 11113 11114 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ 11115 match(Set dst (ConvI2L src)); 11116 effect(KILL cr); 11117 ins_cost(375); 11118 format %{ "MOV $dst.lo,$src\n\t" 11119 "MOV $dst.hi,$src\n\t" 11120 "SAR $dst.hi,31" %} 11121 ins_encode(convert_int_long(dst,src)); 11122 ins_pipe( ialu_reg_reg_long ); 11123 %} 11124 11125 // Zero-extend convert int to long 11126 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ 11127 match(Set dst (AndL (ConvI2L src) mask) ); 11128 effect( KILL flags ); 11129 ins_cost(250); 11130 format %{ "MOV $dst.lo,$src\n\t" 11131 "XOR $dst.hi,$dst.hi" %} 11132 opcode(0x33); // XOR 11133 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11134 ins_pipe( ialu_reg_reg_long ); 11135 %} 11136 11137 // Zero-extend long 11138 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ 11139 match(Set dst (AndL src mask) ); 11140 effect( KILL flags ); 11141 ins_cost(250); 11142 format %{ "MOV $dst.lo,$src.lo\n\t" 11143 "XOR $dst.hi,$dst.hi\n\t" %} 11144 opcode(0x33); // XOR 11145 ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); 11146 ins_pipe( ialu_reg_reg_long ); 11147 %} 11148 11149 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ 11150 predicate (UseSSE<=1); 11151 match(Set dst (ConvL2D src)); 11152 effect( KILL cr ); 11153 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11154 "PUSH $src.lo\n\t" 11155 "FILD ST,[ESP + #0]\n\t" 11156 "ADD ESP,8\n\t" 11157 "FSTP_D $dst\t# D-round" %} 11158 opcode(0xDF, 0x5); /* DF /5 */ 11159 ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); 11160 ins_pipe( pipe_slow ); 11161 %} 11162 11163 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ 11164 predicate (UseSSE>=2); 11165 match(Set dst (ConvL2D src)); 11166 effect( KILL cr ); 11167 format %{ "PUSH $src.hi\t# Convert long to double\n\t" 11168 "PUSH $src.lo\n\t" 11169 "FILD_D [ESP]\n\t" 11170 "FSTP_D [ESP]\n\t" 11171 "MOVSD $dst,[ESP]\n\t" 11172 "ADD ESP,8" %} 11173 opcode(0xDF, 0x5); /* DF /5 */ 11174 ins_encode(convert_long_double2(src), Push_ResultD(dst)); 11175 ins_pipe( pipe_slow ); 11176 %} 11177 11178 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ 11179 predicate (UseSSE>=1); 11180 match(Set dst (ConvL2F src)); 11181 effect( KILL cr ); 11182 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11183 "PUSH $src.lo\n\t" 11184 "FILD_D [ESP]\n\t" 11185 "FSTP_S [ESP]\n\t" 11186 "MOVSS $dst,[ESP]\n\t" 11187 "ADD ESP,8" %} 11188 opcode(0xDF, 0x5); /* DF /5 */ 11189 ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); 11190 ins_pipe( pipe_slow ); 11191 %} 11192 11193 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ 11194 match(Set dst (ConvL2F src)); 11195 effect( KILL cr ); 11196 format %{ "PUSH $src.hi\t# Convert long to single float\n\t" 11197 "PUSH $src.lo\n\t" 11198 "FILD ST,[ESP + #0]\n\t" 11199 "ADD ESP,8\n\t" 11200 "FSTP_S $dst\t# F-round" %} 11201 opcode(0xDF, 0x5); /* DF /5 */ 11202 ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); 11203 ins_pipe( pipe_slow ); 11204 %} 11205 11206 instruct convL2I_reg( rRegI dst, eRegL src ) %{ 11207 match(Set dst (ConvL2I src)); 11208 effect( DEF dst, USE src ); 11209 format %{ "MOV $dst,$src.lo" %} 11210 ins_encode(enc_CopyL_Lo(dst,src)); 11211 ins_pipe( ialu_reg_reg ); 11212 %} 11213 11214 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ 11215 match(Set dst (MoveF2I src)); 11216 effect( DEF dst, USE src ); 11217 ins_cost(100); 11218 format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} 11219 ins_encode %{ 11220 __ movl($dst$$Register, Address(rsp, $src$$disp)); 11221 %} 11222 ins_pipe( ialu_reg_mem ); 11223 %} 11224 11225 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ 11226 predicate(UseSSE==0); 11227 match(Set dst (MoveF2I src)); 11228 effect( DEF dst, USE src ); 11229 11230 ins_cost(125); 11231 format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} 11232 ins_encode( Pop_Mem_Reg_FPR(dst, src) ); 11233 ins_pipe( fpu_mem_reg ); 11234 %} 11235 11236 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ 11237 predicate(UseSSE>=1); 11238 match(Set dst (MoveF2I src)); 11239 effect( DEF dst, USE src ); 11240 11241 ins_cost(95); 11242 format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} 11243 ins_encode %{ 11244 __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); 11245 %} 11246 ins_pipe( pipe_slow ); 11247 %} 11248 11249 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ 11250 predicate(UseSSE>=2); 11251 match(Set dst (MoveF2I src)); 11252 effect( DEF dst, USE src ); 11253 ins_cost(85); 11254 format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} 11255 ins_encode %{ 11256 __ movdl($dst$$Register, $src$$XMMRegister); 11257 %} 11258 ins_pipe( pipe_slow ); 11259 %} 11260 11261 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ 11262 match(Set dst (MoveI2F src)); 11263 effect( DEF dst, USE src ); 11264 11265 ins_cost(100); 11266 format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} 11267 ins_encode %{ 11268 __ movl(Address(rsp, $dst$$disp), $src$$Register); 11269 %} 11270 ins_pipe( ialu_mem_reg ); 11271 %} 11272 11273 11274 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ 11275 predicate(UseSSE==0); 11276 match(Set dst (MoveI2F src)); 11277 effect(DEF dst, USE src); 11278 11279 ins_cost(125); 11280 format %{ "FLD_S $src\n\t" 11281 "FSTP $dst\t# MoveI2F_stack_reg" %} 11282 opcode(0xD9); /* D9 /0, FLD m32real */ 11283 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11284 Pop_Reg_FPR(dst) ); 11285 ins_pipe( fpu_reg_mem ); 11286 %} 11287 11288 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ 11289 predicate(UseSSE>=1); 11290 match(Set dst (MoveI2F src)); 11291 effect( DEF dst, USE src ); 11292 11293 ins_cost(95); 11294 format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} 11295 ins_encode %{ 11296 __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); 11297 %} 11298 ins_pipe( pipe_slow ); 11299 %} 11300 11301 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ 11302 predicate(UseSSE>=2); 11303 match(Set dst (MoveI2F src)); 11304 effect( DEF dst, USE src ); 11305 11306 ins_cost(85); 11307 format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} 11308 ins_encode %{ 11309 __ movdl($dst$$XMMRegister, $src$$Register); 11310 %} 11311 ins_pipe( pipe_slow ); 11312 %} 11313 11314 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ 11315 match(Set dst (MoveD2L src)); 11316 effect(DEF dst, USE src); 11317 11318 ins_cost(250); 11319 format %{ "MOV $dst.lo,$src\n\t" 11320 "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} 11321 opcode(0x8B, 0x8B); 11322 ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src)); 11323 ins_pipe( ialu_mem_long_reg ); 11324 %} 11325 11326 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ 11327 predicate(UseSSE<=1); 11328 match(Set dst (MoveD2L src)); 11329 effect(DEF dst, USE src); 11330 11331 ins_cost(125); 11332 format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} 11333 ins_encode( Pop_Mem_Reg_DPR(dst, src) ); 11334 ins_pipe( fpu_mem_reg ); 11335 %} 11336 11337 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ 11338 predicate(UseSSE>=2); 11339 match(Set dst (MoveD2L src)); 11340 effect(DEF dst, USE src); 11341 ins_cost(95); 11342 format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} 11343 ins_encode %{ 11344 __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); 11345 %} 11346 ins_pipe( pipe_slow ); 11347 %} 11348 11349 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ 11350 predicate(UseSSE>=2); 11351 match(Set dst (MoveD2L src)); 11352 effect(DEF dst, USE src, TEMP tmp); 11353 ins_cost(85); 11354 format %{ "MOVD $dst.lo,$src\n\t" 11355 "PSHUFLW $tmp,$src,0x4E\n\t" 11356 "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} 11357 ins_encode %{ 11358 __ movdl($dst$$Register, $src$$XMMRegister); 11359 __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); 11360 __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); 11361 %} 11362 ins_pipe( pipe_slow ); 11363 %} 11364 11365 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ 11366 match(Set dst (MoveL2D src)); 11367 effect(DEF dst, USE src); 11368 11369 ins_cost(200); 11370 format %{ "MOV $dst,$src.lo\n\t" 11371 "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} 11372 opcode(0x89, 0x89); 11373 ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) ); 11374 ins_pipe( ialu_mem_long_reg ); 11375 %} 11376 11377 11378 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ 11379 predicate(UseSSE<=1); 11380 match(Set dst (MoveL2D src)); 11381 effect(DEF dst, USE src); 11382 ins_cost(125); 11383 11384 format %{ "FLD_D $src\n\t" 11385 "FSTP $dst\t# MoveL2D_stack_reg" %} 11386 opcode(0xDD); /* DD /0, FLD m64real */ 11387 ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src), 11388 Pop_Reg_DPR(dst) ); 11389 ins_pipe( fpu_reg_mem ); 11390 %} 11391 11392 11393 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ 11394 predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); 11395 match(Set dst (MoveL2D src)); 11396 effect(DEF dst, USE src); 11397 11398 ins_cost(95); 11399 format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11400 ins_encode %{ 11401 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11402 %} 11403 ins_pipe( pipe_slow ); 11404 %} 11405 11406 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ 11407 predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); 11408 match(Set dst (MoveL2D src)); 11409 effect(DEF dst, USE src); 11410 11411 ins_cost(95); 11412 format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} 11413 ins_encode %{ 11414 __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); 11415 %} 11416 ins_pipe( pipe_slow ); 11417 %} 11418 11419 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ 11420 predicate(UseSSE>=2); 11421 match(Set dst (MoveL2D src)); 11422 effect(TEMP dst, USE src, TEMP tmp); 11423 ins_cost(85); 11424 format %{ "MOVD $dst,$src.lo\n\t" 11425 "MOVD $tmp,$src.hi\n\t" 11426 "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} 11427 ins_encode %{ 11428 __ movdl($dst$$XMMRegister, $src$$Register); 11429 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 11430 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 11431 %} 11432 ins_pipe( pipe_slow ); 11433 %} 11434 11435 11436 // ======================================================================= 11437 // fast clearing of an array 11438 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11439 predicate(!UseFastStosb); 11440 match(Set dummy (ClearArray cnt base)); 11441 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11442 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11443 "SHL ECX,1\t# Convert doublewords to words\n\t" 11444 "REP STOS\t# store EAX into [EDI++] while ECX--" %} 11445 ins_encode %{ 11446 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); 11447 %} 11448 ins_pipe( pipe_slow ); 11449 %} 11450 11451 instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ 11452 predicate(UseFastStosb); 11453 match(Set dummy (ClearArray cnt base)); 11454 effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); 11455 format %{ "XOR EAX,EAX\t# ClearArray:\n\t" 11456 "SHL ECX,3\t# Convert doublewords to bytes\n\t" 11457 "REP STOSB\t# store EAX into [EDI++] while ECX--" %} 11458 ins_encode %{ 11459 __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register); 11460 %} 11461 ins_pipe( pipe_slow ); 11462 %} 11463 11464 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, 11465 eAXRegI result, regD tmp1, eFlagsReg cr) %{ 11466 match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); 11467 effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); 11468 11469 format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} 11470 ins_encode %{ 11471 __ string_compare($str1$$Register, $str2$$Register, 11472 $cnt1$$Register, $cnt2$$Register, $result$$Register, 11473 $tmp1$$XMMRegister); 11474 %} 11475 ins_pipe( pipe_slow ); 11476 %} 11477 11478 // fast string equals 11479 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, 11480 regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ 11481 match(Set result (StrEquals (Binary str1 str2) cnt)); 11482 effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); 11483 11484 format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} 11485 ins_encode %{ 11486 __ char_arrays_equals(false, $str1$$Register, $str2$$Register, 11487 $cnt$$Register, $result$$Register, $tmp3$$Register, 11488 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11489 %} 11490 ins_pipe( pipe_slow ); 11491 %} 11492 11493 // fast search of substring with known size. 11494 instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, 11495 eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ 11496 predicate(UseSSE42Intrinsics); 11497 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); 11498 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); 11499 11500 format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec, $cnt1, $cnt2, $tmp" %} 11501 ins_encode %{ 11502 int icnt2 = (int)$int_cnt2$$constant; 11503 if (icnt2 >= 8) { 11504 // IndexOf for constant substrings with size >= 8 elements 11505 // which don't need to be loaded through stack. 11506 __ string_indexofC8($str1$$Register, $str2$$Register, 11507 $cnt1$$Register, $cnt2$$Register, 11508 icnt2, $result$$Register, 11509 $vec$$XMMRegister, $tmp$$Register); 11510 } else { 11511 // Small strings are loaded through stack if they cross page boundary. 11512 __ string_indexof($str1$$Register, $str2$$Register, 11513 $cnt1$$Register, $cnt2$$Register, 11514 icnt2, $result$$Register, 11515 $vec$$XMMRegister, $tmp$$Register); 11516 } 11517 %} 11518 ins_pipe( pipe_slow ); 11519 %} 11520 11521 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, 11522 eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{ 11523 predicate(UseSSE42Intrinsics); 11524 match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); 11525 effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); 11526 11527 format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} 11528 ins_encode %{ 11529 __ string_indexof($str1$$Register, $str2$$Register, 11530 $cnt1$$Register, $cnt2$$Register, 11531 (-1), $result$$Register, 11532 $vec$$XMMRegister, $tmp$$Register); 11533 %} 11534 ins_pipe( pipe_slow ); 11535 %} 11536 11537 // fast array equals 11538 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result, 11539 regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) 11540 %{ 11541 match(Set result (AryEq ary1 ary2)); 11542 effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); 11543 //ins_cost(300); 11544 11545 format %{ "Array Equals $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} 11546 ins_encode %{ 11547 __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register, 11548 $tmp3$$Register, $result$$Register, $tmp4$$Register, 11549 $tmp1$$XMMRegister, $tmp2$$XMMRegister); 11550 %} 11551 ins_pipe( pipe_slow ); 11552 %} 11553 11554 // encode char[] to byte[] in ISO_8859_1 11555 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, 11556 regD tmp1, regD tmp2, regD tmp3, regD tmp4, 11557 eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ 11558 match(Set result (EncodeISOArray src (Binary dst len))); 11559 effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); 11560 11561 format %{ "Encode array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} 11562 ins_encode %{ 11563 __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, 11564 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, 11565 $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register); 11566 %} 11567 ins_pipe( pipe_slow ); 11568 %} 11569 11570 11571 //----------Control Flow Instructions------------------------------------------ 11572 // Signed compare Instructions 11573 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ 11574 match(Set cr (CmpI op1 op2)); 11575 effect( DEF cr, USE op1, USE op2 ); 11576 format %{ "CMP $op1,$op2" %} 11577 opcode(0x3B); /* Opcode 3B /r */ 11578 ins_encode( OpcP, RegReg( op1, op2) ); 11579 ins_pipe( ialu_cr_reg_reg ); 11580 %} 11581 11582 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ 11583 match(Set cr (CmpI op1 op2)); 11584 effect( DEF cr, USE op1 ); 11585 format %{ "CMP $op1,$op2" %} 11586 opcode(0x81,0x07); /* Opcode 81 /7 */ 11587 // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ 11588 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11589 ins_pipe( ialu_cr_reg_imm ); 11590 %} 11591 11592 // Cisc-spilled version of cmpI_eReg 11593 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ 11594 match(Set cr (CmpI op1 (LoadI op2))); 11595 11596 format %{ "CMP $op1,$op2" %} 11597 ins_cost(500); 11598 opcode(0x3B); /* Opcode 3B /r */ 11599 ins_encode( OpcP, RegMem( op1, op2) ); 11600 ins_pipe( ialu_cr_reg_mem ); 11601 %} 11602 11603 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{ 11604 match(Set cr (CmpI src zero)); 11605 effect( DEF cr, USE src ); 11606 11607 format %{ "TEST $src,$src" %} 11608 opcode(0x85); 11609 ins_encode( OpcP, RegReg( src, src ) ); 11610 ins_pipe( ialu_cr_reg_imm ); 11611 %} 11612 11613 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{ 11614 match(Set cr (CmpI (AndI src con) zero)); 11615 11616 format %{ "TEST $src,$con" %} 11617 opcode(0xF7,0x00); 11618 ins_encode( OpcP, RegOpc(src), Con32(con) ); 11619 ins_pipe( ialu_cr_reg_imm ); 11620 %} 11621 11622 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{ 11623 match(Set cr (CmpI (AndI src mem) zero)); 11624 11625 format %{ "TEST $src,$mem" %} 11626 opcode(0x85); 11627 ins_encode( OpcP, RegMem( src, mem ) ); 11628 ins_pipe( ialu_cr_reg_mem ); 11629 %} 11630 11631 // Unsigned compare Instructions; really, same as signed except they 11632 // produce an eFlagsRegU instead of eFlagsReg. 11633 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ 11634 match(Set cr (CmpU op1 op2)); 11635 11636 format %{ "CMPu $op1,$op2" %} 11637 opcode(0x3B); /* Opcode 3B /r */ 11638 ins_encode( OpcP, RegReg( op1, op2) ); 11639 ins_pipe( ialu_cr_reg_reg ); 11640 %} 11641 11642 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ 11643 match(Set cr (CmpU op1 op2)); 11644 11645 format %{ "CMPu $op1,$op2" %} 11646 opcode(0x81,0x07); /* Opcode 81 /7 */ 11647 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11648 ins_pipe( ialu_cr_reg_imm ); 11649 %} 11650 11651 // // Cisc-spilled version of cmpU_eReg 11652 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ 11653 match(Set cr (CmpU op1 (LoadI op2))); 11654 11655 format %{ "CMPu $op1,$op2" %} 11656 ins_cost(500); 11657 opcode(0x3B); /* Opcode 3B /r */ 11658 ins_encode( OpcP, RegMem( op1, op2) ); 11659 ins_pipe( ialu_cr_reg_mem ); 11660 %} 11661 11662 // // Cisc-spilled version of cmpU_eReg 11663 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ 11664 // match(Set cr (CmpU (LoadI op1) op2)); 11665 // 11666 // format %{ "CMPu $op1,$op2" %} 11667 // ins_cost(500); 11668 // opcode(0x39); /* Opcode 39 /r */ 11669 // ins_encode( OpcP, RegMem( op1, op2) ); 11670 //%} 11671 11672 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{ 11673 match(Set cr (CmpU src zero)); 11674 11675 format %{ "TESTu $src,$src" %} 11676 opcode(0x85); 11677 ins_encode( OpcP, RegReg( src, src ) ); 11678 ins_pipe( ialu_cr_reg_imm ); 11679 %} 11680 11681 // Unsigned pointer compare Instructions 11682 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ 11683 match(Set cr (CmpP op1 op2)); 11684 11685 format %{ "CMPu $op1,$op2" %} 11686 opcode(0x3B); /* Opcode 3B /r */ 11687 ins_encode( OpcP, RegReg( op1, op2) ); 11688 ins_pipe( ialu_cr_reg_reg ); 11689 %} 11690 11691 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ 11692 match(Set cr (CmpP op1 op2)); 11693 11694 format %{ "CMPu $op1,$op2" %} 11695 opcode(0x81,0x07); /* Opcode 81 /7 */ 11696 ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); 11697 ins_pipe( ialu_cr_reg_imm ); 11698 %} 11699 11700 // // Cisc-spilled version of cmpP_eReg 11701 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ 11702 match(Set cr (CmpP op1 (LoadP op2))); 11703 11704 format %{ "CMPu $op1,$op2" %} 11705 ins_cost(500); 11706 opcode(0x3B); /* Opcode 3B /r */ 11707 ins_encode( OpcP, RegMem( op1, op2) ); 11708 ins_pipe( ialu_cr_reg_mem ); 11709 %} 11710 11711 // // Cisc-spilled version of cmpP_eReg 11712 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ 11713 // match(Set cr (CmpP (LoadP op1) op2)); 11714 // 11715 // format %{ "CMPu $op1,$op2" %} 11716 // ins_cost(500); 11717 // opcode(0x39); /* Opcode 39 /r */ 11718 // ins_encode( OpcP, RegMem( op1, op2) ); 11719 //%} 11720 11721 // Compare raw pointer (used in out-of-heap check). 11722 // Only works because non-oop pointers must be raw pointers 11723 // and raw pointers have no anti-dependencies. 11724 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ 11725 predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); 11726 match(Set cr (CmpP op1 (LoadP op2))); 11727 11728 format %{ "CMPu $op1,$op2" %} 11729 opcode(0x3B); /* Opcode 3B /r */ 11730 ins_encode( OpcP, RegMem( op1, op2) ); 11731 ins_pipe( ialu_cr_reg_mem ); 11732 %} 11733 11734 // 11735 // This will generate a signed flags result. This should be ok 11736 // since any compare to a zero should be eq/neq. 11737 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ 11738 match(Set cr (CmpP src zero)); 11739 11740 format %{ "TEST $src,$src" %} 11741 opcode(0x85); 11742 ins_encode( OpcP, RegReg( src, src ) ); 11743 ins_pipe( ialu_cr_reg_imm ); 11744 %} 11745 11746 // Cisc-spilled version of testP_reg 11747 // This will generate a signed flags result. This should be ok 11748 // since any compare to a zero should be eq/neq. 11749 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{ 11750 match(Set cr (CmpP (LoadP op) zero)); 11751 11752 format %{ "TEST $op,0xFFFFFFFF" %} 11753 ins_cost(500); 11754 opcode(0xF7); /* Opcode F7 /0 */ 11755 ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) ); 11756 ins_pipe( ialu_cr_reg_imm ); 11757 %} 11758 11759 // Yanked all unsigned pointer compare operations. 11760 // Pointer compares are done with CmpP which is already unsigned. 11761 11762 //----------Max and Min-------------------------------------------------------- 11763 // Min Instructions 11764 //// 11765 // *** Min and Max using the conditional move are slower than the 11766 // *** branch version on a Pentium III. 11767 // // Conditional move for min 11768 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11769 // effect( USE_DEF op2, USE op1, USE cr ); 11770 // format %{ "CMOVlt $op2,$op1\t! min" %} 11771 // opcode(0x4C,0x0F); 11772 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11773 // ins_pipe( pipe_cmov_reg ); 11774 //%} 11775 // 11776 //// Min Register with Register (P6 version) 11777 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11778 // predicate(VM_Version::supports_cmov() ); 11779 // match(Set op2 (MinI op1 op2)); 11780 // ins_cost(200); 11781 // expand %{ 11782 // eFlagsReg cr; 11783 // compI_eReg(cr,op1,op2); 11784 // cmovI_reg_lt(op2,op1,cr); 11785 // %} 11786 //%} 11787 11788 // Min Register with Register (generic version) 11789 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11790 match(Set dst (MinI dst src)); 11791 effect(KILL flags); 11792 ins_cost(300); 11793 11794 format %{ "MIN $dst,$src" %} 11795 opcode(0xCC); 11796 ins_encode( min_enc(dst,src) ); 11797 ins_pipe( pipe_slow ); 11798 %} 11799 11800 // Max Register with Register 11801 // *** Min and Max using the conditional move are slower than the 11802 // *** branch version on a Pentium III. 11803 // // Conditional move for max 11804 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ 11805 // effect( USE_DEF op2, USE op1, USE cr ); 11806 // format %{ "CMOVgt $op2,$op1\t! max" %} 11807 // opcode(0x4F,0x0F); 11808 // ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); 11809 // ins_pipe( pipe_cmov_reg ); 11810 //%} 11811 // 11812 // // Max Register with Register (P6 version) 11813 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ 11814 // predicate(VM_Version::supports_cmov() ); 11815 // match(Set op2 (MaxI op1 op2)); 11816 // ins_cost(200); 11817 // expand %{ 11818 // eFlagsReg cr; 11819 // compI_eReg(cr,op1,op2); 11820 // cmovI_reg_gt(op2,op1,cr); 11821 // %} 11822 //%} 11823 11824 // Max Register with Register (generic version) 11825 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ 11826 match(Set dst (MaxI dst src)); 11827 effect(KILL flags); 11828 ins_cost(300); 11829 11830 format %{ "MAX $dst,$src" %} 11831 opcode(0xCC); 11832 ins_encode( max_enc(dst,src) ); 11833 ins_pipe( pipe_slow ); 11834 %} 11835 11836 // ============================================================================ 11837 // Counted Loop limit node which represents exact final iterator value. 11838 // Note: the resulting value should fit into integer range since 11839 // counted loops have limit check on overflow. 11840 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ 11841 match(Set limit (LoopLimit (Binary init limit) stride)); 11842 effect(TEMP limit_hi, TEMP tmp, KILL flags); 11843 ins_cost(300); 11844 11845 format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} 11846 ins_encode %{ 11847 int strd = (int)$stride$$constant; 11848 assert(strd != 1 && strd != -1, "sanity"); 11849 int m1 = (strd > 0) ? 1 : -1; 11850 // Convert limit to long (EAX:EDX) 11851 __ cdql(); 11852 // Convert init to long (init:tmp) 11853 __ movl($tmp$$Register, $init$$Register); 11854 __ sarl($tmp$$Register, 31); 11855 // $limit - $init 11856 __ subl($limit$$Register, $init$$Register); 11857 __ sbbl($limit_hi$$Register, $tmp$$Register); 11858 // + ($stride - 1) 11859 if (strd > 0) { 11860 __ addl($limit$$Register, (strd - 1)); 11861 __ adcl($limit_hi$$Register, 0); 11862 __ movl($tmp$$Register, strd); 11863 } else { 11864 __ addl($limit$$Register, (strd + 1)); 11865 __ adcl($limit_hi$$Register, -1); 11866 __ lneg($limit_hi$$Register, $limit$$Register); 11867 __ movl($tmp$$Register, -strd); 11868 } 11869 // signed devision: (EAX:EDX) / pos_stride 11870 __ idivl($tmp$$Register); 11871 if (strd < 0) { 11872 // restore sign 11873 __ negl($tmp$$Register); 11874 } 11875 // (EAX) * stride 11876 __ mull($tmp$$Register); 11877 // + init (ignore upper bits) 11878 __ addl($limit$$Register, $init$$Register); 11879 %} 11880 ins_pipe( pipe_slow ); 11881 %} 11882 11883 // ============================================================================ 11884 // Branch Instructions 11885 // Jump Table 11886 instruct jumpXtnd(rRegI switch_val) %{ 11887 match(Jump switch_val); 11888 ins_cost(350); 11889 format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} 11890 ins_encode %{ 11891 // Jump to Address(table_base + switch_reg) 11892 Address index(noreg, $switch_val$$Register, Address::times_1); 11893 __ jump(ArrayAddress($constantaddress, index)); 11894 %} 11895 ins_pipe(pipe_jmp); 11896 %} 11897 11898 // Jump Direct - Label defines a relative address from JMP+1 11899 instruct jmpDir(label labl) %{ 11900 match(Goto); 11901 effect(USE labl); 11902 11903 ins_cost(300); 11904 format %{ "JMP $labl" %} 11905 size(5); 11906 ins_encode %{ 11907 Label* L = $labl$$label; 11908 __ jmp(*L, false); // Always long jump 11909 %} 11910 ins_pipe( pipe_jmp ); 11911 %} 11912 11913 // Jump Direct Conditional - Label defines a relative address from Jcc+1 11914 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ 11915 match(If cop cr); 11916 effect(USE labl); 11917 11918 ins_cost(300); 11919 format %{ "J$cop $labl" %} 11920 size(6); 11921 ins_encode %{ 11922 Label* L = $labl$$label; 11923 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 11924 %} 11925 ins_pipe( pipe_jcc ); 11926 %} 11927 11928 // Jump Direct Conditional - Label defines a relative address from Jcc+1 11929 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ 11930 match(CountedLoopEnd cop cr); 11931 effect(USE labl); 11932 11933 ins_cost(300); 11934 format %{ "J$cop $labl\t# Loop end" %} 11935 size(6); 11936 ins_encode %{ 11937 Label* L = $labl$$label; 11938 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 11939 %} 11940 ins_pipe( pipe_jcc ); 11941 %} 11942 11943 // Jump Direct Conditional - Label defines a relative address from Jcc+1 11944 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 11945 match(CountedLoopEnd cop cmp); 11946 effect(USE labl); 11947 11948 ins_cost(300); 11949 format %{ "J$cop,u $labl\t# Loop end" %} 11950 size(6); 11951 ins_encode %{ 11952 Label* L = $labl$$label; 11953 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 11954 %} 11955 ins_pipe( pipe_jcc ); 11956 %} 11957 11958 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 11959 match(CountedLoopEnd cop cmp); 11960 effect(USE labl); 11961 11962 ins_cost(200); 11963 format %{ "J$cop,u $labl\t# Loop end" %} 11964 size(6); 11965 ins_encode %{ 11966 Label* L = $labl$$label; 11967 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 11968 %} 11969 ins_pipe( pipe_jcc ); 11970 %} 11971 11972 // Jump Direct Conditional - using unsigned comparison 11973 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 11974 match(If cop cmp); 11975 effect(USE labl); 11976 11977 ins_cost(300); 11978 format %{ "J$cop,u $labl" %} 11979 size(6); 11980 ins_encode %{ 11981 Label* L = $labl$$label; 11982 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 11983 %} 11984 ins_pipe(pipe_jcc); 11985 %} 11986 11987 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 11988 match(If cop cmp); 11989 effect(USE labl); 11990 11991 ins_cost(200); 11992 format %{ "J$cop,u $labl" %} 11993 size(6); 11994 ins_encode %{ 11995 Label* L = $labl$$label; 11996 __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump 11997 %} 11998 ins_pipe(pipe_jcc); 11999 %} 12000 12001 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12002 match(If cop cmp); 12003 effect(USE labl); 12004 12005 ins_cost(200); 12006 format %{ $$template 12007 if ($cop$$cmpcode == Assembler::notEqual) { 12008 $$emit$$"JP,u $labl\n\t" 12009 $$emit$$"J$cop,u $labl" 12010 } else { 12011 $$emit$$"JP,u done\n\t" 12012 $$emit$$"J$cop,u $labl\n\t" 12013 $$emit$$"done:" 12014 } 12015 %} 12016 ins_encode %{ 12017 Label* l = $labl$$label; 12018 if ($cop$$cmpcode == Assembler::notEqual) { 12019 __ jcc(Assembler::parity, *l, false); 12020 __ jcc(Assembler::notEqual, *l, false); 12021 } else if ($cop$$cmpcode == Assembler::equal) { 12022 Label done; 12023 __ jccb(Assembler::parity, done); 12024 __ jcc(Assembler::equal, *l, false); 12025 __ bind(done); 12026 } else { 12027 ShouldNotReachHere(); 12028 } 12029 %} 12030 ins_pipe(pipe_jcc); 12031 %} 12032 12033 // ============================================================================ 12034 // The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass 12035 // array for an instance of the superklass. Set a hidden internal cache on a 12036 // hit (cache is checked with exposed code in gen_subtype_check()). Return 12037 // NZ for a miss or zero for a hit. The encoding ALSO sets flags. 12038 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ 12039 match(Set result (PartialSubtypeCheck sub super)); 12040 effect( KILL rcx, KILL cr ); 12041 12042 ins_cost(1100); // slightly larger than the next version 12043 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12044 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12045 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12046 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12047 "JNE,s miss\t\t# Missed: EDI not-zero\n\t" 12048 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" 12049 "XOR $result,$result\t\t Hit: EDI zero\n\t" 12050 "miss:\t" %} 12051 12052 opcode(0x1); // Force a XOR of EDI 12053 ins_encode( enc_PartialSubtypeCheck() ); 12054 ins_pipe( pipe_slow ); 12055 %} 12056 12057 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ 12058 match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); 12059 effect( KILL rcx, KILL result ); 12060 12061 ins_cost(1000); 12062 format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" 12063 "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" 12064 "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" 12065 "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" 12066 "JNE,s miss\t\t# Missed: flags NZ\n\t" 12067 "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" 12068 "miss:\t" %} 12069 12070 opcode(0x0); // No need to XOR EDI 12071 ins_encode( enc_PartialSubtypeCheck() ); 12072 ins_pipe( pipe_slow ); 12073 %} 12074 12075 // ============================================================================ 12076 // Branch Instructions -- short offset versions 12077 // 12078 // These instructions are used to replace jumps of a long offset (the default 12079 // match) with jumps of a shorter offset. These instructions are all tagged 12080 // with the ins_short_branch attribute, which causes the ADLC to suppress the 12081 // match rules in general matching. Instead, the ADLC generates a conversion 12082 // method in the MachNode which can be used to do in-place replacement of the 12083 // long variant with the shorter variant. The compiler will determine if a 12084 // branch can be taken by the is_short_branch_offset() predicate in the machine 12085 // specific code section of the file. 12086 12087 // Jump Direct - Label defines a relative address from JMP+1 12088 instruct jmpDir_short(label labl) %{ 12089 match(Goto); 12090 effect(USE labl); 12091 12092 ins_cost(300); 12093 format %{ "JMP,s $labl" %} 12094 size(2); 12095 ins_encode %{ 12096 Label* L = $labl$$label; 12097 __ jmpb(*L); 12098 %} 12099 ins_pipe( pipe_jmp ); 12100 ins_short_branch(1); 12101 %} 12102 12103 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12104 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12105 match(If cop cr); 12106 effect(USE labl); 12107 12108 ins_cost(300); 12109 format %{ "J$cop,s $labl" %} 12110 size(2); 12111 ins_encode %{ 12112 Label* L = $labl$$label; 12113 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12114 %} 12115 ins_pipe( pipe_jcc ); 12116 ins_short_branch(1); 12117 %} 12118 12119 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12120 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ 12121 match(CountedLoopEnd cop cr); 12122 effect(USE labl); 12123 12124 ins_cost(300); 12125 format %{ "J$cop,s $labl\t# Loop end" %} 12126 size(2); 12127 ins_encode %{ 12128 Label* L = $labl$$label; 12129 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12130 %} 12131 ins_pipe( pipe_jcc ); 12132 ins_short_branch(1); 12133 %} 12134 12135 // Jump Direct Conditional - Label defines a relative address from Jcc+1 12136 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12137 match(CountedLoopEnd cop cmp); 12138 effect(USE labl); 12139 12140 ins_cost(300); 12141 format %{ "J$cop,us $labl\t# Loop end" %} 12142 size(2); 12143 ins_encode %{ 12144 Label* L = $labl$$label; 12145 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12146 %} 12147 ins_pipe( pipe_jcc ); 12148 ins_short_branch(1); 12149 %} 12150 12151 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12152 match(CountedLoopEnd cop cmp); 12153 effect(USE labl); 12154 12155 ins_cost(300); 12156 format %{ "J$cop,us $labl\t# Loop end" %} 12157 size(2); 12158 ins_encode %{ 12159 Label* L = $labl$$label; 12160 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12161 %} 12162 ins_pipe( pipe_jcc ); 12163 ins_short_branch(1); 12164 %} 12165 12166 // Jump Direct Conditional - using unsigned comparison 12167 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ 12168 match(If cop cmp); 12169 effect(USE labl); 12170 12171 ins_cost(300); 12172 format %{ "J$cop,us $labl" %} 12173 size(2); 12174 ins_encode %{ 12175 Label* L = $labl$$label; 12176 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12177 %} 12178 ins_pipe( pipe_jcc ); 12179 ins_short_branch(1); 12180 %} 12181 12182 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ 12183 match(If cop cmp); 12184 effect(USE labl); 12185 12186 ins_cost(300); 12187 format %{ "J$cop,us $labl" %} 12188 size(2); 12189 ins_encode %{ 12190 Label* L = $labl$$label; 12191 __ jccb((Assembler::Condition)($cop$$cmpcode), *L); 12192 %} 12193 ins_pipe( pipe_jcc ); 12194 ins_short_branch(1); 12195 %} 12196 12197 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ 12198 match(If cop cmp); 12199 effect(USE labl); 12200 12201 ins_cost(300); 12202 format %{ $$template 12203 if ($cop$$cmpcode == Assembler::notEqual) { 12204 $$emit$$"JP,u,s $labl\n\t" 12205 $$emit$$"J$cop,u,s $labl" 12206 } else { 12207 $$emit$$"JP,u,s done\n\t" 12208 $$emit$$"J$cop,u,s $labl\n\t" 12209 $$emit$$"done:" 12210 } 12211 %} 12212 size(4); 12213 ins_encode %{ 12214 Label* l = $labl$$label; 12215 if ($cop$$cmpcode == Assembler::notEqual) { 12216 __ jccb(Assembler::parity, *l); 12217 __ jccb(Assembler::notEqual, *l); 12218 } else if ($cop$$cmpcode == Assembler::equal) { 12219 Label done; 12220 __ jccb(Assembler::parity, done); 12221 __ jccb(Assembler::equal, *l); 12222 __ bind(done); 12223 } else { 12224 ShouldNotReachHere(); 12225 } 12226 %} 12227 ins_pipe(pipe_jcc); 12228 ins_short_branch(1); 12229 %} 12230 12231 // ============================================================================ 12232 // Long Compare 12233 // 12234 // Currently we hold longs in 2 registers. Comparing such values efficiently 12235 // is tricky. The flavor of compare used depends on whether we are testing 12236 // for LT, LE, or EQ. For a simple LT test we can check just the sign bit. 12237 // The GE test is the negated LT test. The LE test can be had by commuting 12238 // the operands (yielding a GE test) and then negating; negate again for the 12239 // GT test. The EQ test is done by ORcc'ing the high and low halves, and the 12240 // NE test is negated from that. 12241 12242 // Due to a shortcoming in the ADLC, it mixes up expressions like: 12243 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the 12244 // difference between 'Y' and '0L'. The tree-matches for the CmpI sections 12245 // are collapsed internally in the ADLC's dfa-gen code. The match for 12246 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the 12247 // foo match ends up with the wrong leaf. One fix is to not match both 12248 // reg-reg and reg-zero forms of long-compare. This is unfortunate because 12249 // both forms beat the trinary form of long-compare and both are very useful 12250 // on Intel which has so few registers. 12251 12252 // Manifest a CmpL result in an integer register. Very painful. 12253 // This is the test to avoid. 12254 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ 12255 match(Set dst (CmpL3 src1 src2)); 12256 effect( KILL flags ); 12257 ins_cost(1000); 12258 format %{ "XOR $dst,$dst\n\t" 12259 "CMP $src1.hi,$src2.hi\n\t" 12260 "JLT,s m_one\n\t" 12261 "JGT,s p_one\n\t" 12262 "CMP $src1.lo,$src2.lo\n\t" 12263 "JB,s m_one\n\t" 12264 "JEQ,s done\n" 12265 "p_one:\tINC $dst\n\t" 12266 "JMP,s done\n" 12267 "m_one:\tDEC $dst\n" 12268 "done:" %} 12269 ins_encode %{ 12270 Label p_one, m_one, done; 12271 __ xorptr($dst$$Register, $dst$$Register); 12272 __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); 12273 __ jccb(Assembler::less, m_one); 12274 __ jccb(Assembler::greater, p_one); 12275 __ cmpl($src1$$Register, $src2$$Register); 12276 __ jccb(Assembler::below, m_one); 12277 __ jccb(Assembler::equal, done); 12278 __ bind(p_one); 12279 __ incrementl($dst$$Register); 12280 __ jmpb(done); 12281 __ bind(m_one); 12282 __ decrementl($dst$$Register); 12283 __ bind(done); 12284 %} 12285 ins_pipe( pipe_slow ); 12286 %} 12287 12288 //====== 12289 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12290 // compares. Can be used for LE or GT compares by reversing arguments. 12291 // NOT GOOD FOR EQ/NE tests. 12292 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ 12293 match( Set flags (CmpL src zero )); 12294 ins_cost(100); 12295 format %{ "TEST $src.hi,$src.hi" %} 12296 opcode(0x85); 12297 ins_encode( OpcP, RegReg_Hi2( src, src ) ); 12298 ins_pipe( ialu_cr_reg_reg ); 12299 %} 12300 12301 // Manifest a CmpL result in the normal flags. Only good for LT or GE 12302 // compares. Can be used for LE or GT compares by reversing arguments. 12303 // NOT GOOD FOR EQ/NE tests. 12304 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12305 match( Set flags (CmpL src1 src2 )); 12306 effect( TEMP tmp ); 12307 ins_cost(300); 12308 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12309 "MOV $tmp,$src1.hi\n\t" 12310 "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} 12311 ins_encode( long_cmp_flags2( src1, src2, tmp ) ); 12312 ins_pipe( ialu_cr_reg_reg ); 12313 %} 12314 12315 // Long compares reg < zero/req OR reg >= zero/req. 12316 // Just a wrapper for a normal branch, plus the predicate test. 12317 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ 12318 match(If cmp flags); 12319 effect(USE labl); 12320 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12321 expand %{ 12322 jmpCon(cmp,flags,labl); // JLT or JGE... 12323 %} 12324 %} 12325 12326 // Compare 2 longs and CMOVE longs. 12327 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ 12328 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12329 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12330 ins_cost(400); 12331 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12332 "CMOV$cmp $dst.hi,$src.hi" %} 12333 opcode(0x0F,0x40); 12334 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12335 ins_pipe( pipe_cmov_reg_long ); 12336 %} 12337 12338 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ 12339 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12340 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12341 ins_cost(500); 12342 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12343 "CMOV$cmp $dst.hi,$src.hi" %} 12344 opcode(0x0F,0x40); 12345 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12346 ins_pipe( pipe_cmov_reg_long ); 12347 %} 12348 12349 // Compare 2 longs and CMOVE ints. 12350 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ 12351 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12352 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12353 ins_cost(200); 12354 format %{ "CMOV$cmp $dst,$src" %} 12355 opcode(0x0F,0x40); 12356 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12357 ins_pipe( pipe_cmov_reg ); 12358 %} 12359 12360 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ 12361 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12362 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12363 ins_cost(250); 12364 format %{ "CMOV$cmp $dst,$src" %} 12365 opcode(0x0F,0x40); 12366 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12367 ins_pipe( pipe_cmov_mem ); 12368 %} 12369 12370 // Compare 2 longs and CMOVE ints. 12371 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ 12372 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); 12373 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12374 ins_cost(200); 12375 format %{ "CMOV$cmp $dst,$src" %} 12376 opcode(0x0F,0x40); 12377 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12378 ins_pipe( pipe_cmov_reg ); 12379 %} 12380 12381 // Compare 2 longs and CMOVE doubles 12382 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ 12383 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12384 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12385 ins_cost(200); 12386 expand %{ 12387 fcmovDPR_regS(cmp,flags,dst,src); 12388 %} 12389 %} 12390 12391 // Compare 2 longs and CMOVE doubles 12392 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ 12393 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12394 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12395 ins_cost(200); 12396 expand %{ 12397 fcmovD_regS(cmp,flags,dst,src); 12398 %} 12399 %} 12400 12401 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ 12402 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12403 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12404 ins_cost(200); 12405 expand %{ 12406 fcmovFPR_regS(cmp,flags,dst,src); 12407 %} 12408 %} 12409 12410 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ 12411 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); 12412 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12413 ins_cost(200); 12414 expand %{ 12415 fcmovF_regS(cmp,flags,dst,src); 12416 %} 12417 %} 12418 12419 //====== 12420 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12421 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12422 match( Set flags (CmpL src zero )); 12423 effect(TEMP tmp); 12424 ins_cost(200); 12425 format %{ "MOV $tmp,$src.lo\n\t" 12426 "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} 12427 ins_encode( long_cmp_flags0( src, tmp ) ); 12428 ins_pipe( ialu_reg_reg_long ); 12429 %} 12430 12431 // Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. 12432 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ 12433 match( Set flags (CmpL src1 src2 )); 12434 ins_cost(200+300); 12435 format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" 12436 "JNE,s skip\n\t" 12437 "CMP $src1.hi,$src2.hi\n\t" 12438 "skip:\t" %} 12439 ins_encode( long_cmp_flags1( src1, src2 ) ); 12440 ins_pipe( ialu_cr_reg_reg ); 12441 %} 12442 12443 // Long compare reg == zero/reg OR reg != zero/reg 12444 // Just a wrapper for a normal branch, plus the predicate test. 12445 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ 12446 match(If cmp flags); 12447 effect(USE labl); 12448 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12449 expand %{ 12450 jmpCon(cmp,flags,labl); // JEQ or JNE... 12451 %} 12452 %} 12453 12454 // Compare 2 longs and CMOVE longs. 12455 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ 12456 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12457 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12458 ins_cost(400); 12459 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12460 "CMOV$cmp $dst.hi,$src.hi" %} 12461 opcode(0x0F,0x40); 12462 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12463 ins_pipe( pipe_cmov_reg_long ); 12464 %} 12465 12466 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ 12467 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12468 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12469 ins_cost(500); 12470 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12471 "CMOV$cmp $dst.hi,$src.hi" %} 12472 opcode(0x0F,0x40); 12473 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12474 ins_pipe( pipe_cmov_reg_long ); 12475 %} 12476 12477 // Compare 2 longs and CMOVE ints. 12478 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ 12479 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12480 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12481 ins_cost(200); 12482 format %{ "CMOV$cmp $dst,$src" %} 12483 opcode(0x0F,0x40); 12484 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12485 ins_pipe( pipe_cmov_reg ); 12486 %} 12487 12488 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ 12489 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12490 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12491 ins_cost(250); 12492 format %{ "CMOV$cmp $dst,$src" %} 12493 opcode(0x0F,0x40); 12494 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12495 ins_pipe( pipe_cmov_mem ); 12496 %} 12497 12498 // Compare 2 longs and CMOVE ints. 12499 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ 12500 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); 12501 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12502 ins_cost(200); 12503 format %{ "CMOV$cmp $dst,$src" %} 12504 opcode(0x0F,0x40); 12505 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12506 ins_pipe( pipe_cmov_reg ); 12507 %} 12508 12509 // Compare 2 longs and CMOVE doubles 12510 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ 12511 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12512 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12513 ins_cost(200); 12514 expand %{ 12515 fcmovDPR_regS(cmp,flags,dst,src); 12516 %} 12517 %} 12518 12519 // Compare 2 longs and CMOVE doubles 12520 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ 12521 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12522 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12523 ins_cost(200); 12524 expand %{ 12525 fcmovD_regS(cmp,flags,dst,src); 12526 %} 12527 %} 12528 12529 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ 12530 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12531 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12532 ins_cost(200); 12533 expand %{ 12534 fcmovFPR_regS(cmp,flags,dst,src); 12535 %} 12536 %} 12537 12538 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ 12539 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); 12540 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12541 ins_cost(200); 12542 expand %{ 12543 fcmovF_regS(cmp,flags,dst,src); 12544 %} 12545 %} 12546 12547 //====== 12548 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12549 // Same as cmpL_reg_flags_LEGT except must negate src 12550 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ 12551 match( Set flags (CmpL src zero )); 12552 effect( TEMP tmp ); 12553 ins_cost(300); 12554 format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" 12555 "CMP $tmp,$src.lo\n\t" 12556 "SBB $tmp,$src.hi\n\t" %} 12557 ins_encode( long_cmp_flags3(src, tmp) ); 12558 ins_pipe( ialu_reg_reg_long ); 12559 %} 12560 12561 // Manifest a CmpL result in the normal flags. Only good for LE or GT compares. 12562 // Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands 12563 // requires a commuted test to get the same result. 12564 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ 12565 match( Set flags (CmpL src1 src2 )); 12566 effect( TEMP tmp ); 12567 ins_cost(300); 12568 format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" 12569 "MOV $tmp,$src2.hi\n\t" 12570 "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} 12571 ins_encode( long_cmp_flags2( src2, src1, tmp ) ); 12572 ins_pipe( ialu_cr_reg_reg ); 12573 %} 12574 12575 // Long compares reg < zero/req OR reg >= zero/req. 12576 // Just a wrapper for a normal branch, plus the predicate test 12577 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ 12578 match(If cmp flags); 12579 effect(USE labl); 12580 predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); 12581 ins_cost(300); 12582 expand %{ 12583 jmpCon(cmp,flags,labl); // JGT or JLE... 12584 %} 12585 %} 12586 12587 // Compare 2 longs and CMOVE longs. 12588 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ 12589 match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); 12590 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12591 ins_cost(400); 12592 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12593 "CMOV$cmp $dst.hi,$src.hi" %} 12594 opcode(0x0F,0x40); 12595 ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); 12596 ins_pipe( pipe_cmov_reg_long ); 12597 %} 12598 12599 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ 12600 match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); 12601 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12602 ins_cost(500); 12603 format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" 12604 "CMOV$cmp $dst.hi,$src.hi+4" %} 12605 opcode(0x0F,0x40); 12606 ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) ); 12607 ins_pipe( pipe_cmov_reg_long ); 12608 %} 12609 12610 // Compare 2 longs and CMOVE ints. 12611 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ 12612 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12613 match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); 12614 ins_cost(200); 12615 format %{ "CMOV$cmp $dst,$src" %} 12616 opcode(0x0F,0x40); 12617 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12618 ins_pipe( pipe_cmov_reg ); 12619 %} 12620 12621 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ 12622 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12623 match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); 12624 ins_cost(250); 12625 format %{ "CMOV$cmp $dst,$src" %} 12626 opcode(0x0F,0x40); 12627 ins_encode( enc_cmov(cmp), RegMem( dst, src ) ); 12628 ins_pipe( pipe_cmov_mem ); 12629 %} 12630 12631 // Compare 2 longs and CMOVE ptrs. 12632 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ 12633 predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); 12634 match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); 12635 ins_cost(200); 12636 format %{ "CMOV$cmp $dst,$src" %} 12637 opcode(0x0F,0x40); 12638 ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); 12639 ins_pipe( pipe_cmov_reg ); 12640 %} 12641 12642 // Compare 2 longs and CMOVE doubles 12643 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ 12644 predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12645 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12646 ins_cost(200); 12647 expand %{ 12648 fcmovDPR_regS(cmp,flags,dst,src); 12649 %} 12650 %} 12651 12652 // Compare 2 longs and CMOVE doubles 12653 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ 12654 predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12655 match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); 12656 ins_cost(200); 12657 expand %{ 12658 fcmovD_regS(cmp,flags,dst,src); 12659 %} 12660 %} 12661 12662 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ 12663 predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12664 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12665 ins_cost(200); 12666 expand %{ 12667 fcmovFPR_regS(cmp,flags,dst,src); 12668 %} 12669 %} 12670 12671 12672 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ 12673 predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ); 12674 match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); 12675 ins_cost(200); 12676 expand %{ 12677 fcmovF_regS(cmp,flags,dst,src); 12678 %} 12679 %} 12680 12681 12682 // ============================================================================ 12683 // Procedure Call/Return Instructions 12684 // Call Java Static Instruction 12685 // Note: If this code changes, the corresponding ret_addr_offset() and 12686 // compute_padding() functions will have to be adjusted. 12687 instruct CallStaticJavaDirect(method meth) %{ 12688 match(CallStaticJava); 12689 predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke()); 12690 effect(USE meth); 12691 12692 ins_cost(300); 12693 format %{ "CALL,static " %} 12694 opcode(0xE8); /* E8 cd */ 12695 ins_encode( pre_call_resets, 12696 Java_Static_Call( meth ), 12697 call_epilog, 12698 post_call_FPU ); 12699 ins_pipe( pipe_slow ); 12700 ins_alignment(4); 12701 %} 12702 12703 // Call Java Static Instruction (method handle version) 12704 // Note: If this code changes, the corresponding ret_addr_offset() and 12705 // compute_padding() functions will have to be adjusted. 12706 instruct CallStaticJavaHandle(method meth, eBPRegP ebp_mh_SP_save) %{ 12707 match(CallStaticJava); 12708 predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke()); 12709 effect(USE meth); 12710 // EBP is saved by all callees (for interpreter stack correction). 12711 // We use it here for a similar purpose, in {preserve,restore}_SP. 12712 12713 ins_cost(300); 12714 format %{ "CALL,static/MethodHandle " %} 12715 opcode(0xE8); /* E8 cd */ 12716 ins_encode( pre_call_resets, 12717 preserve_SP, 12718 Java_Static_Call( meth ), 12719 restore_SP, 12720 call_epilog, 12721 post_call_FPU ); 12722 ins_pipe( pipe_slow ); 12723 ins_alignment(4); 12724 %} 12725 12726 // Call Java Dynamic Instruction 12727 // Note: If this code changes, the corresponding ret_addr_offset() and 12728 // compute_padding() functions will have to be adjusted. 12729 instruct CallDynamicJavaDirect(method meth) %{ 12730 match(CallDynamicJava); 12731 effect(USE meth); 12732 12733 ins_cost(300); 12734 format %{ "MOV EAX,(oop)-1\n\t" 12735 "CALL,dynamic" %} 12736 opcode(0xE8); /* E8 cd */ 12737 ins_encode( pre_call_resets, 12738 Java_Dynamic_Call( meth ), 12739 call_epilog, 12740 post_call_FPU ); 12741 ins_pipe( pipe_slow ); 12742 ins_alignment(4); 12743 %} 12744 12745 // Call Runtime Instruction 12746 instruct CallRuntimeDirect(method meth) %{ 12747 match(CallRuntime ); 12748 effect(USE meth); 12749 12750 ins_cost(300); 12751 format %{ "CALL,runtime " %} 12752 opcode(0xE8); /* E8 cd */ 12753 // Use FFREEs to clear entries in float stack 12754 ins_encode( pre_call_resets, 12755 FFree_Float_Stack_All, 12756 Java_To_Runtime( meth ), 12757 post_call_FPU ); 12758 ins_pipe( pipe_slow ); 12759 %} 12760 12761 // Call runtime without safepoint 12762 instruct CallLeafDirect(method meth) %{ 12763 match(CallLeaf); 12764 effect(USE meth); 12765 12766 ins_cost(300); 12767 format %{ "CALL_LEAF,runtime " %} 12768 opcode(0xE8); /* E8 cd */ 12769 ins_encode( pre_call_resets, 12770 FFree_Float_Stack_All, 12771 Java_To_Runtime( meth ), 12772 Verify_FPU_For_Leaf, post_call_FPU ); 12773 ins_pipe( pipe_slow ); 12774 %} 12775 12776 instruct CallLeafNoFPDirect(method meth) %{ 12777 match(CallLeafNoFP); 12778 effect(USE meth); 12779 12780 ins_cost(300); 12781 format %{ "CALL_LEAF_NOFP,runtime " %} 12782 opcode(0xE8); /* E8 cd */ 12783 ins_encode(Java_To_Runtime(meth)); 12784 ins_pipe( pipe_slow ); 12785 %} 12786 12787 12788 // Return Instruction 12789 // Remove the return address & jump to it. 12790 instruct Ret() %{ 12791 match(Return); 12792 format %{ "RET" %} 12793 opcode(0xC3); 12794 ins_encode(OpcP); 12795 ins_pipe( pipe_jmp ); 12796 %} 12797 12798 // Tail Call; Jump from runtime stub to Java code. 12799 // Also known as an 'interprocedural jump'. 12800 // Target of jump will eventually return to caller. 12801 // TailJump below removes the return address. 12802 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{ 12803 match(TailCall jump_target method_oop ); 12804 ins_cost(300); 12805 format %{ "JMP $jump_target \t# EBX holds method oop" %} 12806 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12807 ins_encode( OpcP, RegOpc(jump_target) ); 12808 ins_pipe( pipe_jmp ); 12809 %} 12810 12811 12812 // Tail Jump; remove the return address; jump to target. 12813 // TailCall above leaves the return address around. 12814 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ 12815 match( TailJump jump_target ex_oop ); 12816 ins_cost(300); 12817 format %{ "POP EDX\t# pop return address into dummy\n\t" 12818 "JMP $jump_target " %} 12819 opcode(0xFF, 0x4); /* Opcode FF /4 */ 12820 ins_encode( enc_pop_rdx, 12821 OpcP, RegOpc(jump_target) ); 12822 ins_pipe( pipe_jmp ); 12823 %} 12824 12825 // Create exception oop: created by stack-crawling runtime code. 12826 // Created exception is now available to this handler, and is setup 12827 // just prior to jumping to this handler. No code emitted. 12828 instruct CreateException( eAXRegP ex_oop ) 12829 %{ 12830 match(Set ex_oop (CreateEx)); 12831 12832 size(0); 12833 // use the following format syntax 12834 format %{ "# exception oop is in EAX; no code emitted" %} 12835 ins_encode(); 12836 ins_pipe( empty ); 12837 %} 12838 12839 12840 // Rethrow exception: 12841 // The exception oop will come in the first argument position. 12842 // Then JUMP (not call) to the rethrow stub code. 12843 instruct RethrowException() 12844 %{ 12845 match(Rethrow); 12846 12847 // use the following format syntax 12848 format %{ "JMP rethrow_stub" %} 12849 ins_encode(enc_rethrow); 12850 ins_pipe( pipe_jmp ); 12851 %} 12852 12853 // inlined locking and unlocking 12854 12855 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{ 12856 predicate(Compile::current()->use_rtm()); 12857 match(Set cr (FastLock object box)); 12858 effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box); 12859 ins_cost(300); 12860 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %} 12861 ins_encode %{ 12862 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 12863 $scr$$Register, $cx1$$Register, $cx2$$Register, 12864 _counters, _rtm_counters, _stack_rtm_counters, 12865 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), 12866 true, ra_->C->profile_rtm()); 12867 %} 12868 ins_pipe(pipe_slow); 12869 %} 12870 12871 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{ 12872 predicate(!Compile::current()->use_rtm()); 12873 match(Set cr (FastLock object box)); 12874 effect(TEMP tmp, TEMP scr, USE_KILL box); 12875 ins_cost(300); 12876 format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} 12877 ins_encode %{ 12878 __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, 12879 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false); 12880 %} 12881 ins_pipe(pipe_slow); 12882 %} 12883 12884 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ 12885 match(Set cr (FastUnlock object box)); 12886 effect(TEMP tmp, USE_KILL box); 12887 ins_cost(300); 12888 format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} 12889 ins_encode %{ 12890 __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm()); 12891 %} 12892 ins_pipe(pipe_slow); 12893 %} 12894 12895 12896 12897 // ============================================================================ 12898 // Safepoint Instruction 12899 instruct safePoint_poll(eFlagsReg cr) %{ 12900 match(SafePoint); 12901 effect(KILL cr); 12902 12903 // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page. 12904 // On SPARC that might be acceptable as we can generate the address with 12905 // just a sethi, saving an or. By polling at offset 0 we can end up 12906 // putting additional pressure on the index-0 in the D$. Because of 12907 // alignment (just like the situation at hand) the lower indices tend 12908 // to see more traffic. It'd be better to change the polling address 12909 // to offset 0 of the last $line in the polling page. 12910 12911 format %{ "TSTL #polladdr,EAX\t! Safepoint: poll for GC" %} 12912 ins_cost(125); 12913 size(6) ; 12914 ins_encode( Safepoint_Poll() ); 12915 ins_pipe( ialu_reg_mem ); 12916 %} 12917 12918 12919 // ============================================================================ 12920 // This name is KNOWN by the ADLC and cannot be changed. 12921 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type 12922 // for this guy. 12923 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ 12924 match(Set dst (ThreadLocal)); 12925 effect(DEF dst, KILL cr); 12926 12927 format %{ "MOV $dst, Thread::current()" %} 12928 ins_encode %{ 12929 Register dstReg = as_Register($dst$$reg); 12930 __ get_thread(dstReg); 12931 %} 12932 ins_pipe( ialu_reg_fat ); 12933 %} 12934 12935 12936 12937 //----------PEEPHOLE RULES----------------------------------------------------- 12938 // These must follow all instruction definitions as they use the names 12939 // defined in the instructions definitions. 12940 // 12941 // peepmatch ( root_instr_name [preceding_instruction]* ); 12942 // 12943 // peepconstraint %{ 12944 // (instruction_number.operand_name relational_op instruction_number.operand_name 12945 // [, ...] ); 12946 // // instruction numbers are zero-based using left to right order in peepmatch 12947 // 12948 // peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); 12949 // // provide an instruction_number.operand_name for each operand that appears 12950 // // in the replacement instruction's match rule 12951 // 12952 // ---------VM FLAGS--------------------------------------------------------- 12953 // 12954 // All peephole optimizations can be turned off using -XX:-OptoPeephole 12955 // 12956 // Each peephole rule is given an identifying number starting with zero and 12957 // increasing by one in the order seen by the parser. An individual peephole 12958 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# 12959 // on the command-line. 12960 // 12961 // ---------CURRENT LIMITATIONS---------------------------------------------- 12962 // 12963 // Only match adjacent instructions in same basic block 12964 // Only equality constraints 12965 // Only constraints between operands, not (0.dest_reg == EAX_enc) 12966 // Only one replacement instruction 12967 // 12968 // ---------EXAMPLE---------------------------------------------------------- 12969 // 12970 // // pertinent parts of existing instructions in architecture description 12971 // instruct movI(rRegI dst, rRegI src) %{ 12972 // match(Set dst (CopyI src)); 12973 // %} 12974 // 12975 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{ 12976 // match(Set dst (AddI dst src)); 12977 // effect(KILL cr); 12978 // %} 12979 // 12980 // // Change (inc mov) to lea 12981 // peephole %{ 12982 // // increment preceeded by register-register move 12983 // peepmatch ( incI_eReg movI ); 12984 // // require that the destination register of the increment 12985 // // match the destination register of the move 12986 // peepconstraint ( 0.dst == 1.dst ); 12987 // // construct a replacement instruction that sets 12988 // // the destination to ( move's source register + one ) 12989 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 12990 // %} 12991 // 12992 // Implementation no longer uses movX instructions since 12993 // machine-independent system no longer uses CopyX nodes. 12994 // 12995 // peephole %{ 12996 // peepmatch ( incI_eReg movI ); 12997 // peepconstraint ( 0.dst == 1.dst ); 12998 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 12999 // %} 13000 // 13001 // peephole %{ 13002 // peepmatch ( decI_eReg movI ); 13003 // peepconstraint ( 0.dst == 1.dst ); 13004 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13005 // %} 13006 // 13007 // peephole %{ 13008 // peepmatch ( addI_eReg_imm movI ); 13009 // peepconstraint ( 0.dst == 1.dst ); 13010 // peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); 13011 // %} 13012 // 13013 // peephole %{ 13014 // peepmatch ( addP_eReg_imm movP ); 13015 // peepconstraint ( 0.dst == 1.dst ); 13016 // peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); 13017 // %} 13018 13019 // // Change load of spilled value to only a spill 13020 // instruct storeI(memory mem, rRegI src) %{ 13021 // match(Set mem (StoreI mem src)); 13022 // %} 13023 // 13024 // instruct loadI(rRegI dst, memory mem) %{ 13025 // match(Set dst (LoadI mem)); 13026 // %} 13027 // 13028 peephole %{ 13029 peepmatch ( loadI storeI ); 13030 peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); 13031 peepreplace ( storeI( 1.mem 1.mem 1.src ) ); 13032 %} 13033 13034 //----------SMARTSPILL RULES--------------------------------------------------- 13035 // These must follow all instruction definitions as they use the names 13036 // defined in the instructions definitions.