Old src/cpu/x86/vm/x86

   1 //
   2 // Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Special Registers
  78 reg_def EFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
  79 
  80 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  81 // allocator, and only shows up in the encodings.
  82 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  83 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  84 // Ok so here's the trick FPR1 is really st(0) except in the midst
  85 // of emission of assembly for a machnode. During the emission the fpu stack
  86 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  87 // the stack will not have this element so FPR1 == st(0) from the
  88 // oopMap viewpoint. This same weirdness with numbering causes
  89 // instruction encoding to have to play games with the register
  90 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  91 // where it does flt->flt moves to see an example
  92 //
  93 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  94 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  95 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  96 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  97 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  98 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  99 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
 100 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
 101 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
 102 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 103 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 104 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 105 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 106 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 107 
 108 // XMM registers.  128-bit registers or 4 words each, labeled a-d.
 109 // Word a in each register holds a Float, words ab hold a Double.
 110 // We currently do not use the SIMD capabilities, so registers cd
 111 // are unused at the moment.
 112 reg_def XMM0a( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
 113 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next());
 114 reg_def XMM1a( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
 115 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next());
 116 reg_def XMM2a( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
 117 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next());
 118 reg_def XMM3a( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
 119 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next());
 120 reg_def XMM4a( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
 121 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next());
 122 reg_def XMM5a( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
 123 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next());
 124 reg_def XMM6a( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
 125 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next());
 126 reg_def XMM7a( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
 127 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next());
 128 
 129 // Specify priority of register selection within phases of register
 130 // allocation.  Highest priority is first.  A useful heuristic is to
 131 // give registers a low priority when they are required by machine
 132 // instructions, like EAX and EDX.  Registers which are used as
 133 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 134 // For the Intel integer registers, the equivalent Long pairs are
 135 // EDX:EAX, EBX:ECX, and EDI:EBP.
 136 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 137                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 138                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 139                     FPR6L, FPR6H, FPR7L, FPR7H );
 140 
 141 alloc_class chunk1( XMM0a, XMM0b,
 142                     XMM1a, XMM1b,
 143                     XMM2a, XMM2b,
 144                     XMM3a, XMM3b,
 145                     XMM4a, XMM4b,
 146                     XMM5a, XMM5b,
 147                     XMM6a, XMM6b,
 148                     XMM7a, XMM7b, EFLAGS);
 149 
 150 
 151 //----------Architecture Description Register Classes--------------------------
 152 // Several register classes are automatically defined based upon information in
 153 // this architecture description.
 154 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 155 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 156 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 157 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 158 //
 159 // Class for all registers
 160 reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 161 // Class for general registers
 162 reg_class e_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 163 // Class for general registers which may be used for implicit null checks on win95
 164 // Also safe for use by tailjump. We don't want to allocate in rbp,
 165 reg_class e_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX);
 166 // Class of "X" registers
 167 reg_class x_reg(EBX, ECX, EDX, EAX);
 168 // Class of registers that can appear in an address with no offset.
 169 // EBP and ESP require an extra instruction byte for zero offset.
 170 // Used in fast-unlock
 171 reg_class p_reg(EDX, EDI, ESI, EBX);
 172 // Class for general registers not including ECX
 173 reg_class ncx_reg(EAX, EDX, EBP, EDI, ESI, EBX);
 174 // Class for general registers not including EAX
 175 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 176 // Class for general registers not including EAX or EBX.
 177 reg_class nabx_reg(EDX, EDI, ESI, ECX, EBP);
 178 // Class of EAX (for multiply and divide operations)
 179 reg_class eax_reg(EAX);
 180 // Class of EBX (for atomic add)
 181 reg_class ebx_reg(EBX);
 182 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 183 reg_class ecx_reg(ECX);
 184 // Class of EDX (for multiply and divide operations)
 185 reg_class edx_reg(EDX);
 186 // Class of EDI (for synchronization)
 187 reg_class edi_reg(EDI);
 188 // Class of ESI (for synchronization)
 189 reg_class esi_reg(ESI);
 190 // Singleton class for interpreter's stack pointer
 191 reg_class ebp_reg(EBP);
 192 // Singleton class for stack pointer
 193 reg_class sp_reg(ESP);
 194 // Singleton class for instruction pointer
 195 // reg_class ip_reg(EIP);
 196 // Singleton class for condition codes
 197 reg_class int_flags(EFLAGS);
 198 // Class of integer register pairs
 199 reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI );
 200 // Class of integer register pairs that aligns with calling convention
 201 reg_class eadx_reg( EAX,EDX );
 202 reg_class ebcx_reg( ECX,EBX );
 203 // Not AX or DX, used in divides
 204 reg_class nadx_reg( EBX,ECX,ESI,EDI,EBP );
 205 
 206 // Floating point registers.  Notice FPR0 is not a choice.
 207 // FPR0 is not ever allocated; we use clever encodings to fake
 208 // a 2-address instructions out of Intels FP stack.
 209 reg_class flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 210 
 211 // make a register class for SSE registers
 212 reg_class xmm_reg(XMM0a, XMM1a, XMM2a, XMM3a, XMM4a, XMM5a, XMM6a, XMM7a);
 213 
 214 // make a double register class for SSE2 registers
 215 reg_class xdb_reg(XMM0a,XMM0b, XMM1a,XMM1b, XMM2a,XMM2b, XMM3a,XMM3b,
 216                   XMM4a,XMM4b, XMM5a,XMM5b, XMM6a,XMM6b, XMM7a,XMM7b );
 217 
 218 reg_class dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 219                    FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 220                    FPR7L,FPR7H );
 221 
 222 reg_class flt_reg0( FPR1L );
 223 reg_class dbl_reg0( FPR1L,FPR1H );
 224 reg_class dbl_reg1( FPR2L,FPR2H );
 225 reg_class dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 226                        FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 227 
 228 // XMM6 and XMM7 could be used as temporary registers for long, float and
 229 // double values for SSE2.
 230 reg_class xdb_reg6( XMM6a,XMM6b );
 231 reg_class xdb_reg7( XMM7a,XMM7b );
 232 %}
 233 
 234 
 235 //----------SOURCE BLOCK-------------------------------------------------------
 236 // This is a block of C++ code which provides values, functions, and
 237 // definitions necessary in the rest of the architecture description
 238 source_hpp %{
 239 // Must be visible to the DFA in dfa_x86_32.cpp
 240 extern bool is_operand_hi32_zero(Node* n);
 241 %}
 242 
 243 source %{
 244 #define   RELOC_IMM32    Assembler::imm_operand
 245 #define   RELOC_DISP32   Assembler::disp32_operand
 246 
 247 #define __ _masm.
 248 
 249 // How to find the high register of a Long pair, given the low register
 250 #define   HIGH_FROM_LOW(x) ((x)+2)
 251 
 252 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 253 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 254 // fast versions of NegF/NegD and AbsF/AbsD.
 255 
 256 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 257 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 258   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 259   // of 128-bits operands for SSE instructions.
 260   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 261   // Store the value to a 128-bits operand.
 262   operand[0] = lo;
 263   operand[1] = hi;
 264   return operand;
 265 }
 266 
 267 // Buffer for 128-bits masks used by SSE instructions.
 268 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 269 
 270 // Static initialization during VM startup.
 271 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 272 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 273 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 274 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 275 
 276 // Offset hacking within calls.
 277 static int pre_call_FPU_size() {
 278   if (Compile::current()->in_24_bit_fp_mode())
 279     return 6; // fldcw
 280   return 0;
 281 }
 282 
 283 static int preserve_SP_size() {
 284   return LP64_ONLY(1 +) 2;  // [rex,] op, rm(reg/reg)
 285 }
 286 
 287 // !!!!! Special hack to get all type of calls to specify the byte offset
 288 //       from the start of the call to the point where the return address
 289 //       will point.
 290 int MachCallStaticJavaNode::ret_addr_offset() {
 291   int offset = 5 + pre_call_FPU_size();  // 5 bytes from start of call to where return address points
 292   if (_method_handle_invoke)
 293     offset += preserve_SP_size();
 294   return offset;
 295 }
 296 
 297 int MachCallDynamicJavaNode::ret_addr_offset() {
 298   return 10 + pre_call_FPU_size();  // 10 bytes from start of call to where return address points
 299 }
 300 
 301 static int sizeof_FFree_Float_Stack_All = -1;
 302 
 303 int MachCallRuntimeNode::ret_addr_offset() {
 304   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 305   return sizeof_FFree_Float_Stack_All + 5 + pre_call_FPU_size();
 306 }
 307 
 308 // Indicate if the safepoint node needs the polling page as an input.
 309 // Since x86 does have absolute addressing, it doesn't.
 310 bool SafePointNode::needs_polling_address_input() {
 311   return false;
 312 }
 313 
 314 //
 315 // Compute padding required for nodes which need alignment
 316 //
 317 
 318 // The address of the call instruction needs to be 4-byte aligned to
 319 // ensure that it does not span a cache line so that it can be patched.
 320 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 321   current_offset += pre_call_FPU_size();  // skip fldcw, if any
 322   current_offset += 1;      // skip call opcode byte
 323   return round_to(current_offset, alignment_required()) - current_offset;
 324 }
 325 
 326 // The address of the call instruction needs to be 4-byte aligned to
 327 // ensure that it does not span a cache line so that it can be patched.
 328 int CallStaticJavaHandleNode::compute_padding(int current_offset) const {
 329   current_offset += pre_call_FPU_size();  // skip fldcw, if any
 330   current_offset += preserve_SP_size();   // skip mov rbp, rsp
 331   current_offset += 1;      // skip call opcode byte
 332   return round_to(current_offset, alignment_required()) - current_offset;
 333 }
 334 
 335 // The address of the call instruction needs to be 4-byte aligned to
 336 // ensure that it does not span a cache line so that it can be patched.
 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 338   current_offset += pre_call_FPU_size();  // skip fldcw, if any
 339   current_offset += 5;      // skip MOV instruction
 340   current_offset += 1;      // skip call opcode byte
 341   return round_to(current_offset, alignment_required()) - current_offset;
 342 }
 343 
 344 #ifndef PRODUCT
 345 void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const {
 346   st->print("INT3");
 347 }
 348 #endif
 349 
 350 // EMIT_RM()
 351 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 352   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 353   cbuf.insts()->emit_int8(c);
 354 }
 355 
 356 // EMIT_CC()
 357 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 358   unsigned char c = (unsigned char)( f1 | f2 );
 359   cbuf.insts()->emit_int8(c);
 360 }
 361 
 362 // EMIT_OPCODE()
 363 void emit_opcode(CodeBuffer &cbuf, int code) {
 364   cbuf.insts()->emit_int8((unsigned char) code);
 365 }
 366 
 367 // EMIT_OPCODE() w/ relocation information
 368 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 369   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 370   emit_opcode(cbuf, code);
 371 }
 372 
 373 // EMIT_D8()
 374 void emit_d8(CodeBuffer &cbuf, int d8) {
 375   cbuf.insts()->emit_int8((unsigned char) d8);
 376 }
 377 
 378 // EMIT_D16()
 379 void emit_d16(CodeBuffer &cbuf, int d16) {
 380   cbuf.insts()->emit_int16(d16);
 381 }
 382 
 383 // EMIT_D32()
 384 void emit_d32(CodeBuffer &cbuf, int d32) {
 385   cbuf.insts()->emit_int32(d32);
 386 }
 387 
 388 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 389 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 390         int format) {
 391   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 392   cbuf.insts()->emit_int32(d32);
 393 }
 394 
 395 // emit 32 bit value and construct relocation entry from RelocationHolder
 396 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 397         int format) {
 398 #ifdef ASSERT
 399   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 400     assert(oop(d32)->is_oop() && (ScavengeRootsInCode || !oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
 401   }
 402 #endif
 403   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 404   cbuf.insts()->emit_int32(d32);
 405 }
 406 
 407 // Access stack slot for load or store
 408 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 409   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 410   if( -128 <= disp && disp <= 127 ) {
 411     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 412     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 413     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 414   } else {
 415     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 416     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 417     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 418   }
 419 }
 420 
 421    // eRegI ereg, memory mem) %{    // emit_reg_mem
 422 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, bool displace_is_oop ) {
 423   // There is no index & no scale, use form without SIB byte
 424   if ((index == 0x4) &&
 425       (scale == 0) && (base != ESP_enc)) {
 426     // If no displacement, mode is 0x0; unless base is [EBP]
 427     if ( (displace == 0) && (base != EBP_enc) ) {
 428       emit_rm(cbuf, 0x0, reg_encoding, base);
 429     }
 430     else {                    // If 8-bit displacement, mode 0x1
 431       if ((displace >= -128) && (displace <= 127)
 432           && !(displace_is_oop) ) {
 433         emit_rm(cbuf, 0x1, reg_encoding, base);
 434         emit_d8(cbuf, displace);
 435       }
 436       else {                  // If 32-bit displacement
 437         if (base == -1) { // Special flag for absolute address
 438           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 439           // (manual lies; no SIB needed here)
 440           if ( displace_is_oop ) {
 441             emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
 442           } else {
 443             emit_d32      (cbuf, displace);
 444           }
 445         }
 446         else {                // Normal base + offset
 447           emit_rm(cbuf, 0x2, reg_encoding, base);
 448           if ( displace_is_oop ) {
 449             emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
 450           } else {
 451             emit_d32      (cbuf, displace);
 452           }
 453         }
 454       }
 455     }
 456   }
 457   else {                      // Else, encode with the SIB byte
 458     // If no displacement, mode is 0x0; unless base is [EBP]
 459     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 460       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 461       emit_rm(cbuf, scale, index, base);
 462     }
 463     else {                    // If 8-bit displacement, mode 0x1
 464       if ((displace >= -128) && (displace <= 127)
 465           && !(displace_is_oop) ) {
 466         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 467         emit_rm(cbuf, scale, index, base);
 468         emit_d8(cbuf, displace);
 469       }
 470       else {                  // If 32-bit displacement
 471         if (base == 0x04 ) {
 472           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 473           emit_rm(cbuf, scale, index, 0x04);
 474         } else {
 475           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 476           emit_rm(cbuf, scale, index, base);
 477         }
 478         if ( displace_is_oop ) {
 479           emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
 480         } else {
 481           emit_d32      (cbuf, displace);
 482         }
 483       }
 484     }
 485   }
 486 }
 487 
 488 
 489 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 490   if( dst_encoding == src_encoding ) {
 491     // reg-reg copy, use an empty encoding
 492   } else {
 493     emit_opcode( cbuf, 0x8B );
 494     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 495   }
 496 }
 497 
 498 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 499   if( dst_encoding == src_encoding ) {
 500     // reg-reg copy, use an empty encoding
 501   } else {
 502     MacroAssembler _masm(&cbuf);
 503 
 504     __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
 505   }
 506 }
 507 
 508 
 509 //=============================================================================
 510 #ifndef PRODUCT
 511 void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 512   Compile* C = ra_->C;
 513   if( C->in_24_bit_fp_mode() ) {
 514     st->print("FLDCW  24 bit fpu control word");
 515     st->print_cr(""); st->print("\t");
 516   }
 517 
 518   int framesize = C->frame_slots() << LogBytesPerInt;
 519   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 520   // Remove two words for return addr and rbp,
 521   framesize -= 2*wordSize;
 522 
 523   // Calls to C2R adapters often do not accept exceptional returns.
 524   // We require that their callers must bang for them.  But be careful, because
 525   // some VM calls (such as call site linkage) can use several kilobytes of
 526   // stack.  But the stack safety zone should account for that.
 527   // See bugs 4446381, 4468289, 4497237.
 528   if (C->need_stack_bang(framesize)) {
 529     st->print_cr("# stack bang"); st->print("\t");
 530   }
 531   st->print_cr("PUSHL  EBP"); st->print("\t");
 532 
 533   if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth
 534     st->print("PUSH   0xBADB100D\t# Majik cookie for stack depth check");
 535     st->print_cr(""); st->print("\t");
 536     framesize -= wordSize;
 537   }
 538 
 539   if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) {
 540     if (framesize) {
 541       st->print("SUB    ESP,%d\t# Create frame",framesize);
 542     }
 543   } else {
 544     st->print("SUB    ESP,%d\t# Create frame",framesize);
 545   }
 546 }
 547 #endif
 548 
 549 
 550 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 551   Compile* C = ra_->C;
 552 
 553   if (UseSSE >= 2 && VerifyFPU) {
 554     MacroAssembler masm(&cbuf);
 555     masm.verify_FPU(0, "FPU stack must be clean on entry");
 556   }
 557 
 558   // WARNING: Initial instruction MUST be 5 bytes or longer so that
 559   // NativeJump::patch_verified_entry will be able to patch out the entry
 560   // code safely. The fldcw is ok at 6 bytes, the push to verify stack
 561   // depth is ok at 5 bytes, the frame allocation can be either 3 or
 562   // 6 bytes. So if we don't do the fldcw or the push then we must
 563   // use the 6 byte frame allocation even if we have no frame. :-(
 564   // If method sets FPU control word do it now
 565   if( C->in_24_bit_fp_mode() ) {
 566     MacroAssembler masm(&cbuf);
 567     masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
 568   }
 569 
 570   int framesize = C->frame_slots() << LogBytesPerInt;
 571   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 572   // Remove two words for return addr and rbp,
 573   framesize -= 2*wordSize;
 574 
 575   // Calls to C2R adapters often do not accept exceptional returns.
 576   // We require that their callers must bang for them.  But be careful, because
 577   // some VM calls (such as call site linkage) can use several kilobytes of
 578   // stack.  But the stack safety zone should account for that.
 579   // See bugs 4446381, 4468289, 4497237.
 580   if (C->need_stack_bang(framesize)) {
 581     MacroAssembler masm(&cbuf);
 582     masm.generate_stack_overflow_check(framesize);
 583   }
 584 
 585   // We always push rbp, so that on return to interpreter rbp, will be
 586   // restored correctly and we can correct the stack.
 587   emit_opcode(cbuf, 0x50 | EBP_enc);
 588 
 589   if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth
 590     emit_opcode(cbuf, 0x68); // push 0xbadb100d
 591     emit_d32(cbuf, 0xbadb100d);
 592     framesize -= wordSize;
 593   }
 594 
 595   if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) {
 596     if (framesize) {
 597       emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
 598       emit_rm(cbuf, 0x3, 0x05, ESP_enc);
 599       emit_d8(cbuf, framesize);
 600     }
 601   } else {
 602     emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
 603     emit_rm(cbuf, 0x3, 0x05, ESP_enc);
 604     emit_d32(cbuf, framesize);
 605   }
 606   C->set_frame_complete(cbuf.insts_size());
 607 
 608 #ifdef ASSERT
 609   if (VerifyStackAtCalls) {
 610     Label L;
 611     MacroAssembler masm(&cbuf);
 612     masm.push(rax);
 613     masm.mov(rax, rsp);
 614     masm.andptr(rax, StackAlignmentInBytes-1);
 615     masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
 616     masm.pop(rax);
 617     masm.jcc(Assembler::equal, L);
 618     masm.stop("Stack is not properly aligned!");
 619     masm.bind(L);
 620   }
 621 #endif
 622 
 623 }
 624 
 625 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 626   return MachNode::size(ra_); // too many variables; just compute it the hard way
 627 }
 628 
 629 int MachPrologNode::reloc() const {
 630   return 0; // a large enough number
 631 }
 632 
 633 //=============================================================================
 634 #ifndef PRODUCT
 635 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 636   Compile *C = ra_->C;
 637   int framesize = C->frame_slots() << LogBytesPerInt;
 638   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 639   // Remove two words for return addr and rbp,
 640   framesize -= 2*wordSize;
 641 
 642   if( C->in_24_bit_fp_mode() ) {
 643     st->print("FLDCW  standard control word");
 644     st->cr(); st->print("\t");
 645   }
 646   if( framesize ) {
 647     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 648     st->cr(); st->print("\t");
 649   }
 650   st->print_cr("POPL   EBP"); st->print("\t");
 651   if( do_polling() && C->is_method_compilation() ) {
 652     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 653     st->cr(); st->print("\t");
 654   }
 655 }
 656 #endif
 657 
 658 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 659   Compile *C = ra_->C;
 660 
 661   // If method set FPU control word, restore to standard control word
 662   if( C->in_24_bit_fp_mode() ) {
 663     MacroAssembler masm(&cbuf);
 664     masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 665   }
 666 
 667   int framesize = C->frame_slots() << LogBytesPerInt;
 668   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 669   // Remove two words for return addr and rbp,
 670   framesize -= 2*wordSize;
 671 
 672   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 673 
 674   if( framesize >= 128 ) {
 675     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 676     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 677     emit_d32(cbuf, framesize);
 678   }
 679   else if( framesize ) {
 680     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 681     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 682     emit_d8(cbuf, framesize);
 683   }
 684 
 685   emit_opcode(cbuf, 0x58 | EBP_enc);
 686 
 687   if( do_polling() && C->is_method_compilation() ) {
 688     cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 689     emit_opcode(cbuf,0x85);
 690     emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 691     emit_d32(cbuf, (intptr_t)os::get_polling_page());
 692   }
 693 }
 694 
 695 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 696   Compile *C = ra_->C;
 697   // If method set FPU control word, restore to standard control word
 698   int size = C->in_24_bit_fp_mode() ? 6 : 0;
 699   if( do_polling() && C->is_method_compilation() ) size += 6;
 700 
 701   int framesize = C->frame_slots() << LogBytesPerInt;
 702   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 703   // Remove two words for return addr and rbp,
 704   framesize -= 2*wordSize;
 705 
 706   size++; // popl rbp,
 707 
 708   if( framesize >= 128 ) {
 709     size += 6;
 710   } else {
 711     size += framesize ? 3 : 0;
 712   }
 713   return size;
 714 }
 715 
 716 int MachEpilogNode::reloc() const {
 717   return 0; // a large enough number
 718 }
 719 
 720 const Pipeline * MachEpilogNode::pipeline() const {
 721   return MachNode::pipeline_class();
 722 }
 723 
 724 int MachEpilogNode::safepoint_offset() const { return 0; }
 725 
 726 //=============================================================================
 727 
 728 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 729 static enum RC rc_class( OptoReg::Name reg ) {
 730 
 731   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 732   if (OptoReg::is_stack(reg)) return rc_stack;
 733 
 734   VMReg r = OptoReg::as_VMReg(reg);
 735   if (r->is_Register()) return rc_int;
 736   if (r->is_FloatRegister()) {
 737     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 738     return rc_float;
 739   }
 740   assert(r->is_XMMRegister(), "must be");
 741   return rc_xmm;
 742 }
 743 
 744 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 745                         int opcode, const char *op_str, int size, outputStream* st ) {
 746   if( cbuf ) {
 747     emit_opcode  (*cbuf, opcode );
 748     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, false);
 749 #ifndef PRODUCT
 750   } else if( !do_size ) {
 751     if( size != 0 ) st->print("\n\t");
 752     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 753       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 754       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 755     } else { // FLD, FST, PUSH, POP
 756       st->print("%s [ESP + #%d]",op_str,offset);
 757     }
 758 #endif
 759   }
 760   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 761   return size+3+offset_size;
 762 }
 763 
 764 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 765 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 766                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 767   if( cbuf ) {
 768     if( reg_lo+1 == reg_hi ) { // double move?
 769       if( is_load && !UseXmmLoadAndClearUpper )
 770         emit_opcode(*cbuf, 0x66 ); // use 'movlpd' for load
 771       else
 772         emit_opcode(*cbuf, 0xF2 ); // use 'movsd' otherwise
 773     } else {
 774       emit_opcode(*cbuf, 0xF3 );
 775     }
 776     emit_opcode(*cbuf, 0x0F );
 777     if( reg_lo+1 == reg_hi && is_load && !UseXmmLoadAndClearUpper )
 778       emit_opcode(*cbuf, 0x12 );   // use 'movlpd' for load
 779     else
 780       emit_opcode(*cbuf, is_load ? 0x10 : 0x11 );
 781     encode_RegMem(*cbuf, Matcher::_regEncode[reg_lo], ESP_enc, 0x4, 0, offset, false);
 782 #ifndef PRODUCT
 783   } else if( !do_size ) {
 784     if( size != 0 ) st->print("\n\t");
 785     if( reg_lo+1 == reg_hi ) { // double move?
 786       if( is_load ) st->print("%s %s,[ESP + #%d]",
 787                                UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 788                                Matcher::regName[reg_lo], offset);
 789       else          st->print("MOVSD  [ESP + #%d],%s",
 790                                offset, Matcher::regName[reg_lo]);
 791     } else {
 792       if( is_load ) st->print("MOVSS  %s,[ESP + #%d]",
 793                                Matcher::regName[reg_lo], offset);
 794       else          st->print("MOVSS  [ESP + #%d],%s",
 795                                offset, Matcher::regName[reg_lo]);
 796     }
 797 #endif
 798   }
 799   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 800   return size+5+offset_size;
 801 }
 802 
 803 
 804 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 805                             int src_hi, int dst_hi, int size, outputStream* st ) {
 806   if( UseXmmRegToRegMoveAll ) {//Use movaps,movapd to move between xmm registers
 807     if( cbuf ) {
 808       if( (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ) {
 809         emit_opcode(*cbuf, 0x66 );
 810       }
 811       emit_opcode(*cbuf, 0x0F );
 812       emit_opcode(*cbuf, 0x28 );
 813       emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
 814 #ifndef PRODUCT
 815     } else if( !do_size ) {
 816       if( size != 0 ) st->print("\n\t");
 817       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 818         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 819       } else {
 820         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 821       }
 822 #endif
 823     }
 824     return size + ((src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 4 : 3);
 825   } else {
 826     if( cbuf ) {
 827       emit_opcode(*cbuf, (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 0xF2 : 0xF3 );
 828       emit_opcode(*cbuf, 0x0F );
 829       emit_opcode(*cbuf, 0x10 );
 830       emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
 831 #ifndef PRODUCT
 832     } else if( !do_size ) {
 833       if( size != 0 ) st->print("\n\t");
 834       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 835         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 836       } else {
 837         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 838       }
 839 #endif
 840     }
 841     return size+4;
 842   }
 843 }
 844 
 845 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 846                             int src_hi, int dst_hi, int size, outputStream* st ) {
 847   // 32-bit
 848   if (cbuf) {
 849     emit_opcode(*cbuf, 0x66);
 850     emit_opcode(*cbuf, 0x0F);
 851     emit_opcode(*cbuf, 0x6E);
 852     emit_rm(*cbuf, 0x3, Matcher::_regEncode[dst_lo] & 7, Matcher::_regEncode[src_lo] & 7);
 853 #ifndef PRODUCT
 854   } else if (!do_size) {
 855     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 856 #endif
 857   }
 858   return 4;
 859 }
 860 
 861 
 862 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 863                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 864   // 32-bit
 865   if (cbuf) {
 866     emit_opcode(*cbuf, 0x66);
 867     emit_opcode(*cbuf, 0x0F);
 868     emit_opcode(*cbuf, 0x7E);
 869     emit_rm(*cbuf, 0x3, Matcher::_regEncode[src_lo] & 7, Matcher::_regEncode[dst_lo] & 7);
 870 #ifndef PRODUCT
 871   } else if (!do_size) {
 872     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 873 #endif
 874   }
 875   return 4;
 876 }
 877 
 878 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 879   if( cbuf ) {
 880     emit_opcode(*cbuf, 0x8B );
 881     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 882 #ifndef PRODUCT
 883   } else if( !do_size ) {
 884     if( size != 0 ) st->print("\n\t");
 885     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 886 #endif
 887   }
 888   return size+2;
 889 }
 890 
 891 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 892                                  int offset, int size, outputStream* st ) {
 893   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 894     if( cbuf ) {
 895       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 896       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 897 #ifndef PRODUCT
 898     } else if( !do_size ) {
 899       if( size != 0 ) st->print("\n\t");
 900       st->print("FLD    %s",Matcher::regName[src_lo]);
 901 #endif
 902     }
 903     size += 2;
 904   }
 905 
 906   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 907   const char *op_str;
 908   int op;
 909   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 910     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 911     op = 0xDD;
 912   } else {                   // 32-bit store
 913     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 914     op = 0xD9;
 915     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 916   }
 917 
 918   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 919 }
 920 
 921 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 922   // Get registers to move
 923   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 924   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 925   OptoReg::Name dst_second = ra_->get_reg_second(this );
 926   OptoReg::Name dst_first = ra_->get_reg_first(this );
 927 
 928   enum RC src_second_rc = rc_class(src_second);
 929   enum RC src_first_rc = rc_class(src_first);
 930   enum RC dst_second_rc = rc_class(dst_second);
 931   enum RC dst_first_rc = rc_class(dst_first);
 932 
 933   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 934 
 935   // Generate spill code!
 936   int size = 0;
 937 
 938   if( src_first == dst_first && src_second == dst_second )
 939     return size;            // Self copy, no move
 940 
 941   // --------------------------------------
 942   // Check for mem-mem move.  push/pop to move.
 943   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 944     if( src_second == dst_first ) { // overlapping stack copy ranges
 945       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 946       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 947       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 948       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 949     }
 950     // move low bits
 951     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 952     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 953     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 954       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 955       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 956     }
 957     return size;
 958   }
 959 
 960   // --------------------------------------
 961   // Check for integer reg-reg copy
 962   if( src_first_rc == rc_int && dst_first_rc == rc_int )
 963     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
 964 
 965   // Check for integer store
 966   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 967     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 968 
 969   // Check for integer load
 970   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
 971     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 972 
 973   // Check for integer reg-xmm reg copy
 974   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
 975     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 976             "no 64 bit integer-float reg moves" );
 977     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 978   }
 979   // --------------------------------------
 980   // Check for float reg-reg copy
 981   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
 982     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
 983             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
 984     if( cbuf ) {
 985 
 986       // Note the mucking with the register encode to compensate for the 0/1
 987       // indexing issue mentioned in a comment in the reg_def sections
 988       // for FPR registers many lines above here.
 989 
 990       if( src_first != FPR1L_num ) {
 991         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
 992         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
 993         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
 994         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
 995      } else {
 996         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
 997         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
 998      }
 999 #ifndef PRODUCT
1000     } else if( !do_size ) {
1001       if( size != 0 ) st->print("\n\t");
1002       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1003       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1004 #endif
1005     }
1006     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1007   }
1008 
1009   // Check for float store
1010   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1011     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1012   }
1013 
1014   // Check for float load
1015   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1016     int offset = ra_->reg2offset(src_first);
1017     const char *op_str;
1018     int op;
1019     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1020       op_str = "FLD_D";
1021       op = 0xDD;
1022     } else {                   // 32-bit load
1023       op_str = "FLD_S";
1024       op = 0xD9;
1025       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1026     }
1027     if( cbuf ) {
1028       emit_opcode  (*cbuf, op );
1029       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, false);
1030       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1031       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1032 #ifndef PRODUCT
1033     } else if( !do_size ) {
1034       if( size != 0 ) st->print("\n\t");
1035       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1036 #endif
1037     }
1038     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1039     return size + 3+offset_size+2;
1040   }
1041 
1042   // Check for xmm reg-reg copy
1043   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1044     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1045             (src_first+1 == src_second && dst_first+1 == dst_second),
1046             "no non-adjacent float-moves" );
1047     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1048   }
1049 
1050   // Check for xmm reg-integer reg copy
1051   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1052     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1053             "no 64 bit float-integer reg moves" );
1054     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1055   }
1056 
1057   // Check for xmm store
1058   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1059     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1060   }
1061 
1062   // Check for float xmm load
1063   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1064     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1065   }
1066 
1067   // Copy from float reg to xmm reg
1068   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1069     // copy to the top of stack from floating point reg
1070     // and use LEA to preserve flags
1071     if( cbuf ) {
1072       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1073       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1074       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1075       emit_d8(*cbuf,0xF8);
1076 #ifndef PRODUCT
1077     } else if( !do_size ) {
1078       if( size != 0 ) st->print("\n\t");
1079       st->print("LEA    ESP,[ESP-8]");
1080 #endif
1081     }
1082     size += 4;
1083 
1084     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1085 
1086     // Copy from the temp memory to the xmm reg.
1087     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1088 
1089     if( cbuf ) {
1090       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1091       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1092       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1093       emit_d8(*cbuf,0x08);
1094 #ifndef PRODUCT
1095     } else if( !do_size ) {
1096       if( size != 0 ) st->print("\n\t");
1097       st->print("LEA    ESP,[ESP+8]");
1098 #endif
1099     }
1100     size += 4;
1101     return size;
1102   }
1103 
1104   assert( size > 0, "missed a case" );
1105 
1106   // --------------------------------------------------------------------
1107   // Check for second bits still needing moving.
1108   if( src_second == dst_second )
1109     return size;               // Self copy; no move
1110   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1111 
1112   // Check for second word int-int move
1113   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1114     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1115 
1116   // Check for second word integer store
1117   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1118     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1119 
1120   // Check for second word integer load
1121   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1122     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1123 
1124 
1125   Unimplemented();
1126 }
1127 
1128 #ifndef PRODUCT
1129 void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1130   implementation( NULL, ra_, false, st );
1131 }
1132 #endif
1133 
1134 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1135   implementation( &cbuf, ra_, false, NULL );
1136 }
1137 
1138 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1139   return implementation( NULL, ra_, true, NULL );
1140 }
1141 
1142 //=============================================================================
1143 #ifndef PRODUCT
1144 void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const {
1145   st->print("NOP \t# %d bytes pad for loops and calls", _count);
1146 }
1147 #endif
1148 
1149 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
1150   MacroAssembler _masm(&cbuf);
1151   __ nop(_count);
1152 }
1153 
1154 uint MachNopNode::size(PhaseRegAlloc *) const {
1155   return _count;
1156 }
1157 
1158 
1159 //=============================================================================
1160 #ifndef PRODUCT
1161 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1162   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1163   int reg = ra_->get_reg_first(this);
1164   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1165 }
1166 #endif
1167 
1168 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1169   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1170   int reg = ra_->get_encode(this);
1171   if( offset >= 128 ) {
1172     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1173     emit_rm(cbuf, 0x2, reg, 0x04);
1174     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1175     emit_d32(cbuf, offset);
1176   }
1177   else {
1178     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1179     emit_rm(cbuf, 0x1, reg, 0x04);
1180     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1181     emit_d8(cbuf, offset);
1182   }
1183 }
1184 
1185 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1186   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1187   if( offset >= 128 ) {
1188     return 7;
1189   }
1190   else {
1191     return 4;
1192   }
1193 }
1194 
1195 //=============================================================================
1196 
1197 // emit call stub, compiled java to interpreter
1198 void emit_java_to_interp(CodeBuffer &cbuf ) {
1199   // Stub is fixed up when the corresponding call is converted from calling
1200   // compiled code to calling interpreted code.
1201   // mov rbx,0
1202   // jmp -1
1203 
1204   address mark = cbuf.insts_mark();  // get mark within main instrs section
1205 
1206   // Note that the code buffer's insts_mark is always relative to insts.
1207   // That's why we must use the macroassembler to generate a stub.
1208   MacroAssembler _masm(&cbuf);
1209 
1210   address base =
1211   __ start_a_stub(Compile::MAX_stubs_size);
1212   if (base == NULL)  return;  // CodeBuffer::expand failed
1213   // static stub relocation stores the instruction address of the call
1214   __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM32);
1215   // static stub relocation also tags the methodOop in the code-stream.
1216   __ movoop(rbx, (jobject)NULL);  // method is zapped till fixup time
1217   // This is recognized as unresolved by relocs/nativeInst/ic code
1218   __ jump(RuntimeAddress(__ pc()));
1219 
1220   __ end_a_stub();
1221   // Update current stubs pointer and restore insts_end.
1222 }
1223 // size of call stub, compiled java to interpretor
1224 uint size_java_to_interp() {
1225   return 10;  // movl; jmp
1226 }
1227 // relocation entries for call stub, compiled java to interpretor
1228 uint reloc_java_to_interp() {
1229   return 4;  // 3 in emit_java_to_interp + 1 in Java_Static_Call
1230 }
1231 
1232 //=============================================================================
1233 #ifndef PRODUCT
1234 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1235   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1236   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1237   st->print_cr("\tNOP");
1238   st->print_cr("\tNOP");
1239   if( !OptoBreakpoint )
1240     st->print_cr("\tNOP");
1241 }
1242 #endif
1243 
1244 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1245   MacroAssembler masm(&cbuf);
1246 #ifdef ASSERT
1247   uint insts_size = cbuf.insts_size();
1248 #endif
1249   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1250   masm.jump_cc(Assembler::notEqual,
1251                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1252   /* WARNING these NOPs are critical so that verified entry point is properly
1253      aligned for patching by NativeJump::patch_verified_entry() */
1254   int nops_cnt = 2;
1255   if( !OptoBreakpoint ) // Leave space for int3
1256      nops_cnt += 1;
1257   masm.nop(nops_cnt);
1258 
1259   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1260 }
1261 
1262 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1263   return OptoBreakpoint ? 11 : 12;
1264 }
1265 
1266 
1267 //=============================================================================
1268 uint size_exception_handler() {
1269   // NativeCall instruction size is the same as NativeJump.
1270   // exception handler starts out as jump and can be patched to
1271   // a call be deoptimization.  (4932387)
1272   // Note that this value is also credited (in output.cpp) to
1273   // the size of the code section.
1274   return NativeJump::instruction_size;
1275 }
1276 
1277 // Emit exception handler code.  Stuff framesize into a register
1278 // and call a VM stub routine.
1279 int emit_exception_handler(CodeBuffer& cbuf) {
1280 
1281   // Note that the code buffer's insts_mark is always relative to insts.
1282   // That's why we must use the macroassembler to generate a handler.
1283   MacroAssembler _masm(&cbuf);
1284   address base =
1285   __ start_a_stub(size_exception_handler());
1286   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1287   int offset = __ offset();
1288   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1289   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1290   __ end_a_stub();
1291   return offset;
1292 }
1293 
1294 uint size_deopt_handler() {
1295   // NativeCall instruction size is the same as NativeJump.
1296   // exception handler starts out as jump and can be patched to
1297   // a call be deoptimization.  (4932387)
1298   // Note that this value is also credited (in output.cpp) to
1299   // the size of the code section.
1300   return 5 + NativeJump::instruction_size; // pushl(); jmp;
1301 }
1302 
1303 // Emit deopt handler code.
1304 int emit_deopt_handler(CodeBuffer& cbuf) {
1305 
1306   // Note that the code buffer's insts_mark is always relative to insts.
1307   // That's why we must use the macroassembler to generate a handler.
1308   MacroAssembler _masm(&cbuf);
1309   address base =
1310   __ start_a_stub(size_exception_handler());
1311   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1312   int offset = __ offset();
1313   InternalAddress here(__ pc());
1314   __ pushptr(here.addr());
1315 
1316   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1317   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1318   __ end_a_stub();
1319   return offset;
1320 }
1321 
1322 
1323 static void emit_double_constant(CodeBuffer& cbuf, double x) {
1324   int mark = cbuf.insts()->mark_off();
1325   MacroAssembler _masm(&cbuf);
1326   address double_address = __ double_constant(x);
1327   cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1328   emit_d32_reloc(cbuf,
1329                  (int)double_address,
1330                  internal_word_Relocation::spec(double_address),
1331                  RELOC_DISP32);
1332 }
1333 
1334 static void emit_float_constant(CodeBuffer& cbuf, float x) {
1335   int mark = cbuf.insts()->mark_off();
1336   MacroAssembler _masm(&cbuf);
1337   address float_address = __ float_constant(x);
1338   cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1339   emit_d32_reloc(cbuf,
1340                  (int)float_address,
1341                  internal_word_Relocation::spec(float_address),
1342                  RELOC_DISP32);
1343 }
1344 
1345 
1346 const bool Matcher::match_rule_supported(int opcode) {
1347   if (!has_match_rule(opcode))
1348     return false;
1349 
1350   return true;  // Per default match rules are supported.
1351 }
1352 
1353 int Matcher::regnum_to_fpu_offset(int regnum) {
1354   return regnum - 32; // The FP registers are in the second chunk
1355 }
1356 
1357 bool is_positive_zero_float(jfloat f) {
1358   return jint_cast(f) == jint_cast(0.0F);
1359 }
1360 
1361 bool is_positive_one_float(jfloat f) {
1362   return jint_cast(f) == jint_cast(1.0F);
1363 }
1364 
1365 bool is_positive_zero_double(jdouble d) {
1366   return jlong_cast(d) == jlong_cast(0.0);
1367 }
1368 
1369 bool is_positive_one_double(jdouble d) {
1370   return jlong_cast(d) == jlong_cast(1.0);
1371 }
1372 
1373 // This is UltraSparc specific, true just means we have fast l2f conversion
1374 const bool Matcher::convL2FSupported(void) {
1375   return true;
1376 }
1377 
1378 // Vector width in bytes
1379 const uint Matcher::vector_width_in_bytes(void) {
1380   return UseSSE >= 2 ? 8 : 0;
1381 }
1382 
1383 // Vector ideal reg
1384 const uint Matcher::vector_ideal_reg(void) {
1385   return Op_RegD;
1386 }
1387 
1388 // Is this branch offset short enough that a short branch can be used?
1389 //
1390 // NOTE: If the platform does not provide any short branch variants, then
1391 //       this method should return false for offset 0.
1392 bool Matcher::is_short_branch_offset(int rule, int offset) {
1393   // the short version of jmpConUCF2 contains multiple branches,
1394   // making the reach slightly less
1395   if (rule == jmpConUCF2_rule)
1396     return (-126 <= offset && offset <= 125);
1397   return (-128 <= offset && offset <= 127);
1398 }
1399 
1400 const bool Matcher::isSimpleConstant64(jlong value) {
1401   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1402   return false;
1403 }
1404 
1405 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1406 const bool Matcher::init_array_count_is_in_bytes = false;
1407 
1408 // Threshold size for cleararray.
1409 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1410 
1411 // Should the Matcher clone shifts on addressing modes, expecting them to
1412 // be subsumed into complex addressing expressions or compute them into
1413 // registers?  True for Intel but false for most RISCs
1414 const bool Matcher::clone_shift_expressions = true;
1415 
1416 bool Matcher::narrow_oop_use_complex_address() {
1417   ShouldNotCallThis();
1418   return true;
1419 }
1420 
1421 
1422 // Is it better to copy float constants, or load them directly from memory?
1423 // Intel can load a float constant from a direct address, requiring no
1424 // extra registers.  Most RISCs will have to materialize an address into a
1425 // register first, so they would do better to copy the constant from stack.
1426 const bool Matcher::rematerialize_float_constants = true;
1427 
1428 // If CPU can load and store mis-aligned doubles directly then no fixup is
1429 // needed.  Else we split the double into 2 integer pieces and move it
1430 // piece-by-piece.  Only happens when passing doubles into C code as the
1431 // Java calling convention forces doubles to be aligned.
1432 const bool Matcher::misaligned_doubles_ok = true;
1433 
1434 
1435 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1436   // Get the memory operand from the node
1437   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1438   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1439   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1440   uint opcnt     = 1;                 // First operand
1441   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1442   while( idx >= skipped+num_edges ) {
1443     skipped += num_edges;
1444     opcnt++;                          // Bump operand count
1445     assert( opcnt < numopnds, "Accessing non-existent operand" );
1446     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1447   }
1448 
1449   MachOper *memory = node->_opnds[opcnt];
1450   MachOper *new_memory = NULL;
1451   switch (memory->opcode()) {
1452   case DIRECT:
1453   case INDOFFSET32X:
1454     // No transformation necessary.
1455     return;
1456   case INDIRECT:
1457     new_memory = new (C) indirect_win95_safeOper( );
1458     break;
1459   case INDOFFSET8:
1460     new_memory = new (C) indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1461     break;
1462   case INDOFFSET32:
1463     new_memory = new (C) indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1464     break;
1465   case INDINDEXOFFSET:
1466     new_memory = new (C) indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1467     break;
1468   case INDINDEXSCALE:
1469     new_memory = new (C) indIndexScale_win95_safeOper(memory->scale());
1470     break;
1471   case INDINDEXSCALEOFFSET:
1472     new_memory = new (C) indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1473     break;
1474   case LOAD_LONG_INDIRECT:
1475   case LOAD_LONG_INDOFFSET32:
1476     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1477     return;
1478   default:
1479     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1480     return;
1481   }
1482   node->_opnds[opcnt] = new_memory;
1483 }
1484 
1485 // Advertise here if the CPU requires explicit rounding operations
1486 // to implement the UseStrictFP mode.
1487 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1488 
1489 // Are floats conerted to double when stored to stack during deoptimization?
1490 // On x32 it is stored with convertion only when FPU is used for floats.
1491 bool Matcher::float_in_double() { return (UseSSE == 0); }
1492 
1493 // Do ints take an entire long register or just half?
1494 const bool Matcher::int_in_long = false;
1495 
1496 // Return whether or not this register is ever used as an argument.  This
1497 // function is used on startup to build the trampoline stubs in generateOptoStub.
1498 // Registers not mentioned will be killed by the VM call in the trampoline, and
1499 // arguments in those registers not be available to the callee.
1500 bool Matcher::can_be_java_arg( int reg ) {
1501   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1502   if( (reg == XMM0a_num || reg == XMM1a_num) && UseSSE>=1 ) return true;
1503   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1504   return false;
1505 }
1506 
1507 bool Matcher::is_spillable_arg( int reg ) {
1508   return can_be_java_arg(reg);
1509 }
1510 
1511 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1512   // Use hardware integer DIV instruction when
1513   // it is faster than a code which use multiply.
1514   // Only when constant divisor fits into 32 bit
1515   // (min_jint is excluded to get only correct
1516   // positive 32 bit values from negative).
1517   return VM_Version::has_fast_idiv() &&
1518          (divisor == (int)divisor && divisor != min_jint);
1519 }
1520 
1521 // Register for DIVI projection of divmodI
1522 RegMask Matcher::divI_proj_mask() {
1523   return EAX_REG_mask;
1524 }
1525 
1526 // Register for MODI projection of divmodI
1527 RegMask Matcher::modI_proj_mask() {
1528   return EDX_REG_mask;
1529 }
1530 
1531 // Register for DIVL projection of divmodL
1532 RegMask Matcher::divL_proj_mask() {
1533   ShouldNotReachHere();
1534   return RegMask();
1535 }
1536 
1537 // Register for MODL projection of divmodL
1538 RegMask Matcher::modL_proj_mask() {
1539   ShouldNotReachHere();
1540   return RegMask();
1541 }
1542 
1543 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1544   return EBP_REG_mask;
1545 }
1546 
1547 // Returns true if the high 32 bits of the value is known to be zero.
1548 bool is_operand_hi32_zero(Node* n) {
1549   int opc = n->Opcode();
1550   if (opc == Op_LoadUI2L) {
1551     return true;
1552   }
1553   if (opc == Op_AndL) {
1554     Node* o2 = n->in(2);
1555     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1556       return true;
1557     }
1558   }
1559   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1560     return true;
1561   }
1562   return false;
1563 }
1564 
1565 %}
1566 
1567 //----------ENCODING BLOCK-----------------------------------------------------
1568 // This block specifies the encoding classes used by the compiler to output
1569 // byte streams.  Encoding classes generate functions which are called by
1570 // Machine Instruction Nodes in order to generate the bit encoding of the
1571 // instruction.  Operands specify their base encoding interface with the
1572 // interface keyword.  There are currently supported four interfaces,
1573 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1574 // operand to generate a function which returns its register number when
1575 // queried.   CONST_INTER causes an operand to generate a function which
1576 // returns the value of the constant when queried.  MEMORY_INTER causes an
1577 // operand to generate four functions which return the Base Register, the
1578 // Index Register, the Scale Value, and the Offset Value of the operand when
1579 // queried.  COND_INTER causes an operand to generate six functions which
1580 // return the encoding code (ie - encoding bits for the instruction)
1581 // associated with each basic boolean condition for a conditional instruction.
1582 // Instructions specify two basic values for encoding.  They use the
1583 // ins_encode keyword to specify their encoding class (which must be one of
1584 // the class names specified in the encoding block), and they use the
1585 // opcode keyword to specify, in order, their primary, secondary, and
1586 // tertiary opcode.  Only the opcode sections which a particular instruction
1587 // needs for encoding need to be specified.
1588 encode %{
1589   // Build emit functions for each basic byte or larger field in the intel
1590   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1591   // code in the enc_class source block.  Emit functions will live in the
1592   // main source block for now.  In future, we can generalize this by
1593   // adding a syntax that specifies the sizes of fields in an order,
1594   // so that the adlc can build the emit functions automagically
1595 
1596   // Emit primary opcode
1597   enc_class OpcP %{
1598     emit_opcode(cbuf, $primary);
1599   %}
1600 
1601   // Emit secondary opcode
1602   enc_class OpcS %{
1603     emit_opcode(cbuf, $secondary);
1604   %}
1605 
1606   // Emit opcode directly
1607   enc_class Opcode(immI d8) %{
1608     emit_opcode(cbuf, $d8$$constant);
1609   %}
1610 
1611   enc_class SizePrefix %{
1612     emit_opcode(cbuf,0x66);
1613   %}
1614 
1615   enc_class RegReg (eRegI dst, eRegI src) %{    // RegReg(Many)
1616     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1617   %}
1618 
1619   enc_class OpcRegReg (immI opcode, eRegI dst, eRegI src) %{    // OpcRegReg(Many)
1620     emit_opcode(cbuf,$opcode$$constant);
1621     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1622   %}
1623 
1624   enc_class mov_r32_imm0( eRegI dst ) %{
1625     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1626     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1627   %}
1628 
1629   enc_class cdq_enc %{
1630     // Full implementation of Java idiv and irem; checks for
1631     // special case as described in JVM spec., p.243 & p.271.
1632     //
1633     //         normal case                           special case
1634     //
1635     // input : rax,: dividend                         min_int
1636     //         reg: divisor                          -1
1637     //
1638     // output: rax,: quotient  (= rax, idiv reg)       min_int
1639     //         rdx: remainder (= rax, irem reg)       0
1640     //
1641     //  Code sequnce:
1642     //
1643     //  81 F8 00 00 00 80    cmp         rax,80000000h
1644     //  0F 85 0B 00 00 00    jne         normal_case
1645     //  33 D2                xor         rdx,edx
1646     //  83 F9 FF             cmp         rcx,0FFh
1647     //  0F 84 03 00 00 00    je          done
1648     //                  normal_case:
1649     //  99                   cdq
1650     //  F7 F9                idiv        rax,ecx
1651     //                  done:
1652     //
1653     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1654     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1655     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1656     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1657     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1658     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1659     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1660     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1661     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1662     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1663     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1664     // normal_case:
1665     emit_opcode(cbuf,0x99);                                         // cdq
1666     // idiv (note: must be emitted by the user of this rule)
1667     // normal:
1668   %}
1669 
1670   // Dense encoding for older common ops
1671   enc_class Opc_plus(immI opcode, eRegI reg) %{
1672     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1673   %}
1674 
1675 
1676   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1677   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1678     // Check for 8-bit immediate, and set sign extend bit in opcode
1679     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1680       emit_opcode(cbuf, $primary | 0x02);
1681     }
1682     else {                          // If 32-bit immediate
1683       emit_opcode(cbuf, $primary);
1684     }
1685   %}
1686 
1687   enc_class OpcSErm (eRegI dst, immI imm) %{    // OpcSEr/m
1688     // Emit primary opcode and set sign-extend bit
1689     // Check for 8-bit immediate, and set sign extend bit in opcode
1690     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1691       emit_opcode(cbuf, $primary | 0x02);    }
1692     else {                          // If 32-bit immediate
1693       emit_opcode(cbuf, $primary);
1694     }
1695     // Emit r/m byte with secondary opcode, after primary opcode.
1696     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1697   %}
1698 
1699   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1700     // Check for 8-bit immediate, and set sign extend bit in opcode
1701     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1702       $$$emit8$imm$$constant;
1703     }
1704     else {                          // If 32-bit immediate
1705       // Output immediate
1706       $$$emit32$imm$$constant;
1707     }
1708   %}
1709 
1710   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1711     // Emit primary opcode and set sign-extend bit
1712     // Check for 8-bit immediate, and set sign extend bit in opcode
1713     int con = (int)$imm$$constant; // Throw away top bits
1714     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1715     // Emit r/m byte with secondary opcode, after primary opcode.
1716     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1717     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1718     else                               emit_d32(cbuf,con);
1719   %}
1720 
1721   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1722     // Emit primary opcode and set sign-extend bit
1723     // Check for 8-bit immediate, and set sign extend bit in opcode
1724     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1725     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1726     // Emit r/m byte with tertiary opcode, after primary opcode.
1727     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1728     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1729     else                               emit_d32(cbuf,con);
1730   %}
1731 
1732   enc_class Lbl (label labl) %{ // JMP, CALL
1733     Label *l = $labl$$label;
1734     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size()+4)) : 0);
1735   %}
1736 
1737   enc_class LblShort (label labl) %{ // JMP, CALL
1738     Label *l = $labl$$label;
1739     int disp = l ? (l->loc_pos() - (cbuf.insts_size()+1)) : 0;
1740     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
1741     emit_d8(cbuf, disp);
1742   %}
1743 
1744   enc_class OpcSReg (eRegI dst) %{    // BSWAP
1745     emit_cc(cbuf, $secondary, $dst$$reg );
1746   %}
1747 
1748   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1749     int destlo = $dst$$reg;
1750     int desthi = HIGH_FROM_LOW(destlo);
1751     // bswap lo
1752     emit_opcode(cbuf, 0x0F);
1753     emit_cc(cbuf, 0xC8, destlo);
1754     // bswap hi
1755     emit_opcode(cbuf, 0x0F);
1756     emit_cc(cbuf, 0xC8, desthi);
1757     // xchg lo and hi
1758     emit_opcode(cbuf, 0x87);
1759     emit_rm(cbuf, 0x3, destlo, desthi);
1760   %}
1761 
1762   enc_class RegOpc (eRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1763     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1764   %}
1765 
1766   enc_class Jcc (cmpOp cop, label labl) %{    // JCC
1767     Label *l = $labl$$label;
1768     $$$emit8$primary;
1769     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1770     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size()+4)) : 0);
1771   %}
1772 
1773   enc_class JccShort (cmpOp cop, label labl) %{    // JCC
1774     Label *l = $labl$$label;
1775     emit_cc(cbuf, $primary, $cop$$cmpcode);
1776     int disp = l ? (l->loc_pos() - (cbuf.insts_size()+1)) : 0;
1777     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
1778     emit_d8(cbuf, disp);
1779   %}
1780 
1781   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1782     $$$emit8$primary;
1783     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1784   %}
1785 
1786   enc_class enc_cmov_d(cmpOp cop, regD src ) %{ // CMOV
1787     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1788     emit_d8(cbuf, op >> 8 );
1789     emit_d8(cbuf, op & 255);
1790   %}
1791 
1792   // emulate a CMOV with a conditional branch around a MOV
1793   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1794     // Invert sense of branch from sense of CMOV
1795     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1796     emit_d8( cbuf, $brOffs$$constant );
1797   %}
1798 
1799   enc_class enc_PartialSubtypeCheck( ) %{
1800     Register Redi = as_Register(EDI_enc); // result register
1801     Register Reax = as_Register(EAX_enc); // super class
1802     Register Recx = as_Register(ECX_enc); // killed
1803     Register Resi = as_Register(ESI_enc); // sub class
1804     Label miss;
1805 
1806     MacroAssembler _masm(&cbuf);
1807     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1808                                      NULL, &miss,
1809                                      /*set_cond_codes:*/ true);
1810     if ($primary) {
1811       __ xorptr(Redi, Redi);
1812     }
1813     __ bind(miss);
1814   %}
1815 
1816   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1817     MacroAssembler masm(&cbuf);
1818     int start = masm.offset();
1819     if (UseSSE >= 2) {
1820       if (VerifyFPU) {
1821         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1822       }
1823     } else {
1824       // External c_calling_convention expects the FPU stack to be 'clean'.
1825       // Compiled code leaves it dirty.  Do cleanup now.
1826       masm.empty_FPU_stack();
1827     }
1828     if (sizeof_FFree_Float_Stack_All == -1) {
1829       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1830     } else {
1831       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1832     }
1833   %}
1834 
1835   enc_class Verify_FPU_For_Leaf %{
1836     if( VerifyFPU ) {
1837       MacroAssembler masm(&cbuf);
1838       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1839     }
1840   %}
1841 
1842   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1843     // This is the instruction starting address for relocation info.
1844     cbuf.set_insts_mark();
1845     $$$emit8$primary;
1846     // CALL directly to the runtime
1847     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1848                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1849 
1850     if (UseSSE >= 2) {
1851       MacroAssembler _masm(&cbuf);
1852       BasicType rt = tf()->return_type();
1853 
1854       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1855         // A C runtime call where the return value is unused.  In SSE2+
1856         // mode the result needs to be removed from the FPU stack.  It's
1857         // likely that this function call could be removed by the
1858         // optimizer if the C function is a pure function.
1859         __ ffree(0);
1860       } else if (rt == T_FLOAT) {
1861         __ lea(rsp, Address(rsp, -4));
1862         __ fstp_s(Address(rsp, 0));
1863         __ movflt(xmm0, Address(rsp, 0));
1864         __ lea(rsp, Address(rsp,  4));
1865       } else if (rt == T_DOUBLE) {
1866         __ lea(rsp, Address(rsp, -8));
1867         __ fstp_d(Address(rsp, 0));
1868         __ movdbl(xmm0, Address(rsp, 0));
1869         __ lea(rsp, Address(rsp,  8));
1870       }
1871     }
1872   %}
1873 
1874 
1875   enc_class pre_call_FPU %{
1876     // If method sets FPU control word restore it here
1877     debug_only(int off0 = cbuf.insts_size());
1878     if( Compile::current()->in_24_bit_fp_mode() ) {
1879       MacroAssembler masm(&cbuf);
1880       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1881     }
1882     debug_only(int off1 = cbuf.insts_size());
1883     assert(off1 - off0 == pre_call_FPU_size(), "correct size prediction");
1884   %}
1885 
1886   enc_class post_call_FPU %{
1887     // If method sets FPU control word do it here also
1888     if( Compile::current()->in_24_bit_fp_mode() ) {
1889       MacroAssembler masm(&cbuf);
1890       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1891     }
1892   %}
1893 
1894   enc_class preserve_SP %{
1895     debug_only(int off0 = cbuf.insts_size());
1896     MacroAssembler _masm(&cbuf);
1897     // RBP is preserved across all calls, even compiled calls.
1898     // Use it to preserve RSP in places where the callee might change the SP.
1899     __ movptr(rbp_mh_SP_save, rsp);
1900     debug_only(int off1 = cbuf.insts_size());
1901     assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
1902   %}
1903 
1904   enc_class restore_SP %{
1905     MacroAssembler _masm(&cbuf);
1906     __ movptr(rsp, rbp_mh_SP_save);
1907   %}
1908 
1909   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1910     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1911     // who we intended to call.
1912     cbuf.set_insts_mark();
1913     $$$emit8$primary;
1914     if ( !_method ) {
1915       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1916                      runtime_call_Relocation::spec(), RELOC_IMM32 );
1917     } else if(_optimized_virtual) {
1918       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1919                      opt_virtual_call_Relocation::spec(), RELOC_IMM32 );
1920     } else {
1921       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1922                      static_call_Relocation::spec(), RELOC_IMM32 );
1923     }
1924     if( _method ) {  // Emit stub for static call
1925       emit_java_to_interp(cbuf);
1926     }
1927   %}
1928 
1929   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1930     // !!!!!
1931     // Generate  "Mov EAX,0x00", placeholder instruction to load oop-info
1932     // emit_call_dynamic_prologue( cbuf );
1933     cbuf.set_insts_mark();
1934     emit_opcode(cbuf, 0xB8 + EAX_enc);        // mov    EAX,-1
1935     emit_d32_reloc(cbuf, (int)Universe::non_oop_word(), oop_Relocation::spec_for_immediate(), RELOC_IMM32);
1936     address  virtual_call_oop_addr = cbuf.insts_mark();
1937     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1938     // who we intended to call.
1939     cbuf.set_insts_mark();
1940     $$$emit8$primary;
1941     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1942                 virtual_call_Relocation::spec(virtual_call_oop_addr), RELOC_IMM32 );
1943   %}
1944 
1945   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1946     int disp = in_bytes(methodOopDesc::from_compiled_offset());
1947     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1948 
1949     // CALL *[EAX+in_bytes(methodOopDesc::from_compiled_code_entry_point_offset())]
1950     cbuf.set_insts_mark();
1951     $$$emit8$primary;
1952     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1953     emit_d8(cbuf, disp);             // Displacement
1954 
1955   %}
1956 
1957   enc_class Xor_Reg (eRegI dst) %{
1958     emit_opcode(cbuf, 0x33);
1959     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
1960   %}
1961 
1962 //   Following encoding is no longer used, but may be restored if calling
1963 //   convention changes significantly.
1964 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1965 //
1966 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1967 //     // int ic_reg     = Matcher::inline_cache_reg();
1968 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1969 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1970 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1971 //
1972 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1973 //     // // so we load it immediately before the call
1974 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1975 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1976 //
1977 //     // xor rbp,ebp
1978 //     emit_opcode(cbuf, 0x33);
1979 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1980 //
1981 //     // CALL to interpreter.
1982 //     cbuf.set_insts_mark();
1983 //     $$$emit8$primary;
1984 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1985 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1986 //   %}
1987 
1988   enc_class RegOpcImm (eRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1989     $$$emit8$primary;
1990     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1991     $$$emit8$shift$$constant;
1992   %}
1993 
1994   enc_class LdImmI (eRegI dst, immI src) %{    // Load Immediate
1995     // Load immediate does not have a zero or sign extended version
1996     // for 8-bit immediates
1997     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1998     $$$emit32$src$$constant;
1999   %}
2000 
2001   enc_class LdImmP (eRegI dst, immI src) %{    // Load Immediate
2002     // Load immediate does not have a zero or sign extended version
2003     // for 8-bit immediates
2004     emit_opcode(cbuf, $primary + $dst$$reg);
2005     $$$emit32$src$$constant;
2006   %}
2007 
2008   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
2009     // Load immediate does not have a zero or sign extended version
2010     // for 8-bit immediates
2011     int dst_enc = $dst$$reg;
2012     int src_con = $src$$constant & 0x0FFFFFFFFL;
2013     if (src_con == 0) {
2014       // xor dst, dst
2015       emit_opcode(cbuf, 0x33);
2016       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2017     } else {
2018       emit_opcode(cbuf, $primary + dst_enc);
2019       emit_d32(cbuf, src_con);
2020     }
2021   %}
2022 
2023   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2024     // Load immediate does not have a zero or sign extended version
2025     // for 8-bit immediates
2026     int dst_enc = $dst$$reg + 2;
2027     int src_con = ((julong)($src$$constant)) >> 32;
2028     if (src_con == 0) {
2029       // xor dst, dst
2030       emit_opcode(cbuf, 0x33);
2031       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2032     } else {
2033       emit_opcode(cbuf, $primary + dst_enc);
2034       emit_d32(cbuf, src_con);
2035     }
2036   %}
2037 
2038 
2039   enc_class LdImmD (immD src) %{    // Load Immediate
2040     if( is_positive_zero_double($src$$constant)) {
2041       // FLDZ
2042       emit_opcode(cbuf,0xD9);
2043       emit_opcode(cbuf,0xEE);
2044     } else if( is_positive_one_double($src$$constant)) {
2045       // FLD1
2046       emit_opcode(cbuf,0xD9);
2047       emit_opcode(cbuf,0xE8);
2048     } else {
2049       emit_opcode(cbuf,0xDD);
2050       emit_rm(cbuf, 0x0, 0x0, 0x5);
2051       emit_double_constant(cbuf, $src$$constant);
2052     }
2053   %}
2054 
2055 
2056   enc_class LdImmF (immF src) %{    // Load Immediate
2057     if( is_positive_zero_float($src$$constant)) {
2058       emit_opcode(cbuf,0xD9);
2059       emit_opcode(cbuf,0xEE);
2060     } else if( is_positive_one_float($src$$constant)) {
2061       emit_opcode(cbuf,0xD9);
2062       emit_opcode(cbuf,0xE8);
2063     } else {
2064       $$$emit8$primary;
2065       // Load immediate does not have a zero or sign extended version
2066       // for 8-bit immediates
2067       // First load to TOS, then move to dst
2068       emit_rm(cbuf, 0x0, 0x0, 0x5);
2069       emit_float_constant(cbuf, $src$$constant);
2070     }
2071   %}
2072 
2073   enc_class LdImmX (regX dst, immXF con) %{    // Load Immediate
2074     emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
2075     emit_float_constant(cbuf, $con$$constant);
2076   %}
2077 
2078   enc_class LdImmXD (regXD dst, immXD con) %{    // Load Immediate
2079     emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
2080     emit_double_constant(cbuf, $con$$constant);
2081   %}
2082 
2083   enc_class load_conXD (regXD dst, immXD con) %{ // Load double constant
2084     // UseXmmLoadAndClearUpper ? movsd(dst, con) : movlpd(dst, con)
2085     emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2086     emit_opcode(cbuf, 0x0F);
2087     emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2088     emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
2089     emit_double_constant(cbuf, $con$$constant);
2090   %}
2091 
2092   enc_class Opc_MemImm_F(immF src) %{
2093     cbuf.set_insts_mark();
2094     $$$emit8$primary;
2095     emit_rm(cbuf, 0x0, $secondary, 0x5);
2096     emit_float_constant(cbuf, $src$$constant);
2097   %}
2098 
2099 
2100   enc_class MovI2X_reg(regX dst, eRegI src) %{
2101     emit_opcode(cbuf, 0x66 );     // MOVD dst,src
2102     emit_opcode(cbuf, 0x0F );
2103     emit_opcode(cbuf, 0x6E );
2104     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2105   %}
2106 
2107   enc_class MovX2I_reg(eRegI dst, regX src) %{
2108     emit_opcode(cbuf, 0x66 );     // MOVD dst,src
2109     emit_opcode(cbuf, 0x0F );
2110     emit_opcode(cbuf, 0x7E );
2111     emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
2112   %}
2113 
2114   enc_class MovL2XD_reg(regXD dst, eRegL src, regXD tmp) %{
2115     { // MOVD $dst,$src.lo
2116       emit_opcode(cbuf,0x66);
2117       emit_opcode(cbuf,0x0F);
2118       emit_opcode(cbuf,0x6E);
2119       emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2120     }
2121     { // MOVD $tmp,$src.hi
2122       emit_opcode(cbuf,0x66);
2123       emit_opcode(cbuf,0x0F);
2124       emit_opcode(cbuf,0x6E);
2125       emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2126     }
2127     { // PUNPCKLDQ $dst,$tmp
2128       emit_opcode(cbuf,0x66);
2129       emit_opcode(cbuf,0x0F);
2130       emit_opcode(cbuf,0x62);
2131       emit_rm(cbuf, 0x3, $dst$$reg, $tmp$$reg);
2132      }
2133   %}
2134 
2135   enc_class MovXD2L_reg(eRegL dst, regXD src, regXD tmp) %{
2136     { // MOVD $dst.lo,$src
2137       emit_opcode(cbuf,0x66);
2138       emit_opcode(cbuf,0x0F);
2139       emit_opcode(cbuf,0x7E);
2140       emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
2141     }
2142     { // PSHUFLW $tmp,$src,0x4E  (01001110b)
2143       emit_opcode(cbuf,0xF2);
2144       emit_opcode(cbuf,0x0F);
2145       emit_opcode(cbuf,0x70);
2146       emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2147       emit_d8(cbuf, 0x4E);
2148     }
2149     { // MOVD $dst.hi,$tmp
2150       emit_opcode(cbuf,0x66);
2151       emit_opcode(cbuf,0x0F);
2152       emit_opcode(cbuf,0x7E);
2153       emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
2154     }
2155   %}
2156 
2157 
2158   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2159   enc_class enc_Copy( eRegI dst, eRegI src ) %{
2160     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2161   %}
2162 
2163   enc_class enc_CopyL_Lo( eRegI dst, eRegL src ) %{
2164     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2165   %}
2166 
2167   // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
2168   enc_class enc_CopyXD( RegXD dst, RegXD src ) %{
2169     encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2170   %}
2171 
2172   enc_class RegReg (eRegI dst, eRegI src) %{    // RegReg(Many)
2173     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2174   %}
2175 
2176   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2177     $$$emit8$primary;
2178     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2179   %}
2180 
2181   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2182     $$$emit8$secondary;
2183     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2184   %}
2185 
2186   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2187     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2188   %}
2189 
2190   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2191     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2192   %}
2193 
2194   enc_class RegReg_HiLo( eRegL src, eRegI dst ) %{
2195     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2196   %}
2197 
2198   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2199     // Output immediate
2200     $$$emit32$src$$constant;
2201   %}
2202 
2203   enc_class Con32F_as_bits(immF src) %{        // storeF_imm
2204     // Output Float immediate bits
2205     jfloat jf = $src$$constant;
2206     int    jf_as_bits = jint_cast( jf );
2207     emit_d32(cbuf, jf_as_bits);
2208   %}
2209 
2210   enc_class Con32XF_as_bits(immXF src) %{      // storeX_imm
2211     // Output Float immediate bits
2212     jfloat jf = $src$$constant;
2213     int    jf_as_bits = jint_cast( jf );
2214     emit_d32(cbuf, jf_as_bits);
2215   %}
2216 
2217   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2218     // Output immediate
2219     $$$emit16$src$$constant;
2220   %}
2221 
2222   enc_class Con_d32(immI src) %{
2223     emit_d32(cbuf,$src$$constant);
2224   %}
2225 
2226   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2227     // Output immediate memory reference
2228     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2229     emit_d32(cbuf, 0x00);
2230   %}
2231 
2232   enc_class lock_prefix( ) %{
2233     if( os::is_MP() )
2234       emit_opcode(cbuf,0xF0);         // [Lock]
2235   %}
2236 
2237   // Cmp-xchg long value.
2238   // Note: we need to swap rbx, and rcx before and after the
2239   //       cmpxchg8 instruction because the instruction uses
2240   //       rcx as the high order word of the new value to store but
2241   //       our register encoding uses rbx,.
2242   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2243 
2244     // XCHG  rbx,ecx
2245     emit_opcode(cbuf,0x87);
2246     emit_opcode(cbuf,0xD9);
2247     // [Lock]
2248     if( os::is_MP() )
2249       emit_opcode(cbuf,0xF0);
2250     // CMPXCHG8 [Eptr]
2251     emit_opcode(cbuf,0x0F);
2252     emit_opcode(cbuf,0xC7);
2253     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2254     // XCHG  rbx,ecx
2255     emit_opcode(cbuf,0x87);
2256     emit_opcode(cbuf,0xD9);
2257   %}
2258 
2259   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2260     // [Lock]
2261     if( os::is_MP() )
2262       emit_opcode(cbuf,0xF0);
2263 
2264     // CMPXCHG [Eptr]
2265     emit_opcode(cbuf,0x0F);
2266     emit_opcode(cbuf,0xB1);
2267     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2268   %}
2269 
2270   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2271     int res_encoding = $res$$reg;
2272 
2273     // MOV  res,0
2274     emit_opcode( cbuf, 0xB8 + res_encoding);
2275     emit_d32( cbuf, 0 );
2276     // JNE,s  fail
2277     emit_opcode(cbuf,0x75);
2278     emit_d8(cbuf, 5 );
2279     // MOV  res,1
2280     emit_opcode( cbuf, 0xB8 + res_encoding);
2281     emit_d32( cbuf, 1 );
2282     // fail:
2283   %}
2284 
2285   enc_class set_instruction_start( ) %{
2286     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2287   %}
2288 
2289   enc_class RegMem (eRegI ereg, memory mem) %{    // emit_reg_mem
2290     int reg_encoding = $ereg$$reg;
2291     int base  = $mem$$base;
2292     int index = $mem$$index;
2293     int scale = $mem$$scale;
2294     int displace = $mem$$disp;
2295     bool disp_is_oop = $mem->disp_is_oop();
2296     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2297   %}
2298 
2299   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2300     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2301     int base  = $mem$$base;
2302     int index = $mem$$index;
2303     int scale = $mem$$scale;
2304     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2305     assert( !$mem->disp_is_oop(), "Cannot add 4 to oop" );
2306     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, false/*disp_is_oop*/);
2307   %}
2308 
2309   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2310     int r1, r2;
2311     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2312     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2313     emit_opcode(cbuf,0x0F);
2314     emit_opcode(cbuf,$tertiary);
2315     emit_rm(cbuf, 0x3, r1, r2);
2316     emit_d8(cbuf,$cnt$$constant);
2317     emit_d8(cbuf,$primary);
2318     emit_rm(cbuf, 0x3, $secondary, r1);
2319     emit_d8(cbuf,$cnt$$constant);
2320   %}
2321 
2322   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2323     emit_opcode( cbuf, 0x8B ); // Move
2324     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2325     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2326       emit_d8(cbuf,$primary);
2327       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2328       emit_d8(cbuf,$cnt$$constant-32);
2329     }
2330     emit_d8(cbuf,$primary);
2331     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2332     emit_d8(cbuf,31);
2333   %}
2334 
2335   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2336     int r1, r2;
2337     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2338     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2339 
2340     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2341     emit_rm(cbuf, 0x3, r1, r2);
2342     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2343       emit_opcode(cbuf,$primary);
2344       emit_rm(cbuf, 0x3, $secondary, r1);
2345       emit_d8(cbuf,$cnt$$constant-32);
2346     }
2347     emit_opcode(cbuf,0x33);  // XOR r2,r2
2348     emit_rm(cbuf, 0x3, r2, r2);
2349   %}
2350 
2351   // Clone of RegMem but accepts an extra parameter to access each
2352   // half of a double in memory; it never needs relocation info.
2353   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, eRegI rm_reg) %{
2354     emit_opcode(cbuf,$opcode$$constant);
2355     int reg_encoding = $rm_reg$$reg;
2356     int base     = $mem$$base;
2357     int index    = $mem$$index;
2358     int scale    = $mem$$scale;
2359     int displace = $mem$$disp + $disp_for_half$$constant;
2360     bool disp_is_oop = false;
2361     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2362   %}
2363 
2364   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2365   //
2366   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2367   // and it never needs relocation information.
2368   // Frequently used to move data between FPU's Stack Top and memory.
2369   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2370     int rm_byte_opcode = $rm_opcode$$constant;
2371     int base     = $mem$$base;
2372     int index    = $mem$$index;
2373     int scale    = $mem$$scale;
2374     int displace = $mem$$disp;
2375     assert( !$mem->disp_is_oop(), "No oops here because no relo info allowed" );
2376     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, false);
2377   %}
2378 
2379   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2380     int rm_byte_opcode = $rm_opcode$$constant;
2381     int base     = $mem$$base;
2382     int index    = $mem$$index;
2383     int scale    = $mem$$scale;
2384     int displace = $mem$$disp;
2385     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
2386     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
2387   %}
2388 
2389   enc_class RegLea (eRegI dst, eRegI src0, immI src1 ) %{    // emit_reg_lea
2390     int reg_encoding = $dst$$reg;
2391     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2392     int index        = 0x04;            // 0x04 indicates no index
2393     int scale        = 0x00;            // 0x00 indicates no scale
2394     int displace     = $src1$$constant; // 0x00 indicates no displacement
2395     bool disp_is_oop = false;
2396     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2397   %}
2398 
2399   enc_class min_enc (eRegI dst, eRegI src) %{    // MIN
2400     // Compare dst,src
2401     emit_opcode(cbuf,0x3B);
2402     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2403     // jmp dst < src around move
2404     emit_opcode(cbuf,0x7C);
2405     emit_d8(cbuf,2);
2406     // move dst,src
2407     emit_opcode(cbuf,0x8B);
2408     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2409   %}
2410 
2411   enc_class max_enc (eRegI dst, eRegI src) %{    // MAX
2412     // Compare dst,src
2413     emit_opcode(cbuf,0x3B);
2414     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2415     // jmp dst > src around move
2416     emit_opcode(cbuf,0x7F);
2417     emit_d8(cbuf,2);
2418     // move dst,src
2419     emit_opcode(cbuf,0x8B);
2420     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2421   %}
2422 
2423   enc_class enc_FP_store(memory mem, regD src) %{
2424     // If src is FPR1, we can just FST to store it.
2425     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2426     int reg_encoding = 0x2; // Just store
2427     int base  = $mem$$base;
2428     int index = $mem$$index;
2429     int scale = $mem$$scale;
2430     int displace = $mem$$disp;
2431     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
2432     if( $src$$reg != FPR1L_enc ) {
2433       reg_encoding = 0x3;  // Store & pop
2434       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2435       emit_d8( cbuf, 0xC0-1+$src$$reg );
2436     }
2437     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2438     emit_opcode(cbuf,$primary);
2439     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2440   %}
2441 
2442   enc_class neg_reg(eRegI dst) %{
2443     // NEG $dst
2444     emit_opcode(cbuf,0xF7);
2445     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2446   %}
2447 
2448   enc_class setLT_reg(eCXRegI dst) %{
2449     // SETLT $dst
2450     emit_opcode(cbuf,0x0F);
2451     emit_opcode(cbuf,0x9C);
2452     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2453   %}
2454 
2455   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2456     int tmpReg = $tmp$$reg;
2457 
2458     // SUB $p,$q
2459     emit_opcode(cbuf,0x2B);
2460     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2461     // SBB $tmp,$tmp
2462     emit_opcode(cbuf,0x1B);
2463     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2464     // AND $tmp,$y
2465     emit_opcode(cbuf,0x23);
2466     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2467     // ADD $p,$tmp
2468     emit_opcode(cbuf,0x03);
2469     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2470   %}
2471 
2472   enc_class enc_cmpLTP_mem(eRegI p, eRegI q, memory mem, eCXRegI tmp) %{    // cadd_cmpLT
2473     int tmpReg = $tmp$$reg;
2474 
2475     // SUB $p,$q
2476     emit_opcode(cbuf,0x2B);
2477     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2478     // SBB $tmp,$tmp
2479     emit_opcode(cbuf,0x1B);
2480     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2481     // AND $tmp,$y
2482     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2483     emit_opcode(cbuf,0x23);
2484     int reg_encoding = tmpReg;
2485     int base  = $mem$$base;
2486     int index = $mem$$index;
2487     int scale = $mem$$scale;
2488     int displace = $mem$$disp;
2489     bool disp_is_oop = $mem->disp_is_oop();
2490     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2491     // ADD $p,$tmp
2492     emit_opcode(cbuf,0x03);
2493     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2494   %}
2495 
2496   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2497     // TEST shift,32
2498     emit_opcode(cbuf,0xF7);
2499     emit_rm(cbuf, 0x3, 0, ECX_enc);
2500     emit_d32(cbuf,0x20);
2501     // JEQ,s small
2502     emit_opcode(cbuf, 0x74);
2503     emit_d8(cbuf, 0x04);
2504     // MOV    $dst.hi,$dst.lo
2505     emit_opcode( cbuf, 0x8B );
2506     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2507     // CLR    $dst.lo
2508     emit_opcode(cbuf, 0x33);
2509     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2510 // small:
2511     // SHLD   $dst.hi,$dst.lo,$shift
2512     emit_opcode(cbuf,0x0F);
2513     emit_opcode(cbuf,0xA5);
2514     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2515     // SHL    $dst.lo,$shift"
2516     emit_opcode(cbuf,0xD3);
2517     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2518   %}
2519 
2520   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2521     // TEST shift,32
2522     emit_opcode(cbuf,0xF7);
2523     emit_rm(cbuf, 0x3, 0, ECX_enc);
2524     emit_d32(cbuf,0x20);
2525     // JEQ,s small
2526     emit_opcode(cbuf, 0x74);
2527     emit_d8(cbuf, 0x04);
2528     // MOV    $dst.lo,$dst.hi
2529     emit_opcode( cbuf, 0x8B );
2530     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2531     // CLR    $dst.hi
2532     emit_opcode(cbuf, 0x33);
2533     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2534 // small:
2535     // SHRD   $dst.lo,$dst.hi,$shift
2536     emit_opcode(cbuf,0x0F);
2537     emit_opcode(cbuf,0xAD);
2538     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2539     // SHR    $dst.hi,$shift"
2540     emit_opcode(cbuf,0xD3);
2541     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2542   %}
2543 
2544   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2545     // TEST shift,32
2546     emit_opcode(cbuf,0xF7);
2547     emit_rm(cbuf, 0x3, 0, ECX_enc);
2548     emit_d32(cbuf,0x20);
2549     // JEQ,s small
2550     emit_opcode(cbuf, 0x74);
2551     emit_d8(cbuf, 0x05);
2552     // MOV    $dst.lo,$dst.hi
2553     emit_opcode( cbuf, 0x8B );
2554     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2555     // SAR    $dst.hi,31
2556     emit_opcode(cbuf, 0xC1);
2557     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2558     emit_d8(cbuf, 0x1F );
2559 // small:
2560     // SHRD   $dst.lo,$dst.hi,$shift
2561     emit_opcode(cbuf,0x0F);
2562     emit_opcode(cbuf,0xAD);
2563     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2564     // SAR    $dst.hi,$shift"
2565     emit_opcode(cbuf,0xD3);
2566     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2567   %}
2568 
2569 
2570   // ----------------- Encodings for floating point unit -----------------
2571   // May leave result in FPU-TOS or FPU reg depending on opcodes
2572   enc_class OpcReg_F (regF src) %{    // FMUL, FDIV
2573     $$$emit8$primary;
2574     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2575   %}
2576 
2577   // Pop argument in FPR0 with FSTP ST(0)
2578   enc_class PopFPU() %{
2579     emit_opcode( cbuf, 0xDD );
2580     emit_d8( cbuf, 0xD8 );
2581   %}
2582 
2583   // !!!!! equivalent to Pop_Reg_F
2584   enc_class Pop_Reg_D( regD dst ) %{
2585     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2586     emit_d8( cbuf, 0xD8+$dst$$reg );
2587   %}
2588 
2589   enc_class Push_Reg_D( regD dst ) %{
2590     emit_opcode( cbuf, 0xD9 );
2591     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2592   %}
2593 
2594   enc_class strictfp_bias1( regD dst ) %{
2595     emit_opcode( cbuf, 0xDB );           // FLD m80real
2596     emit_opcode( cbuf, 0x2D );
2597     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2598     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2599     emit_opcode( cbuf, 0xC8+$dst$$reg );
2600   %}
2601 
2602   enc_class strictfp_bias2( regD dst ) %{
2603     emit_opcode( cbuf, 0xDB );           // FLD m80real
2604     emit_opcode( cbuf, 0x2D );
2605     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2606     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2607     emit_opcode( cbuf, 0xC8+$dst$$reg );
2608   %}
2609 
2610   // Special case for moving an integer register to a stack slot.
2611   enc_class OpcPRegSS( stackSlotI dst, eRegI src ) %{ // RegSS
2612     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2613   %}
2614 
2615   // Special case for moving a register to a stack slot.
2616   enc_class RegSS( stackSlotI dst, eRegI src ) %{ // RegSS
2617     // Opcode already emitted
2618     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2619     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2620     emit_d32(cbuf, $dst$$disp);   // Displacement
2621   %}
2622 
2623   // Push the integer in stackSlot 'src' onto FP-stack
2624   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2625     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2626   %}
2627 
2628   // Push the float in stackSlot 'src' onto FP-stack
2629   enc_class Push_Mem_F( memory src ) %{    // FLD_S   [ESP+src]
2630     store_to_stackslot( cbuf, 0xD9, 0x00, $src$$disp );
2631   %}
2632 
2633   // Push the double in stackSlot 'src' onto FP-stack
2634   enc_class Push_Mem_D( memory src ) %{    // FLD_D   [ESP+src]
2635     store_to_stackslot( cbuf, 0xDD, 0x00, $src$$disp );
2636   %}
2637 
2638   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2639   enc_class Pop_Mem_F( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2640     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2641   %}
2642 
2643   // Same as Pop_Mem_F except for opcode
2644   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2645   enc_class Pop_Mem_D( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2646     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2647   %}
2648 
2649   enc_class Pop_Reg_F( regF dst ) %{
2650     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2651     emit_d8( cbuf, 0xD8+$dst$$reg );
2652   %}
2653 
2654   enc_class Push_Reg_F( regF dst ) %{
2655     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2656     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2657   %}
2658 
2659   // Push FPU's float to a stack-slot, and pop FPU-stack
2660   enc_class Pop_Mem_Reg_F( stackSlotF dst, regF src ) %{
2661     int pop = 0x02;
2662     if ($src$$reg != FPR1L_enc) {
2663       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2664       emit_d8( cbuf, 0xC0-1+$src$$reg );
2665       pop = 0x03;
2666     }
2667     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2668   %}
2669 
2670   // Push FPU's double to a stack-slot, and pop FPU-stack
2671   enc_class Pop_Mem_Reg_D( stackSlotD dst, regD src ) %{
2672     int pop = 0x02;
2673     if ($src$$reg != FPR1L_enc) {
2674       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2675       emit_d8( cbuf, 0xC0-1+$src$$reg );
2676       pop = 0x03;
2677     }
2678     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2679   %}
2680 
2681   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2682   enc_class Pop_Reg_Reg_D( regD dst, regF src ) %{
2683     int pop = 0xD0 - 1; // -1 since we skip FLD
2684     if ($src$$reg != FPR1L_enc) {
2685       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2686       emit_d8( cbuf, 0xC0-1+$src$$reg );
2687       pop = 0xD8;
2688     }
2689     emit_opcode( cbuf, 0xDD );
2690     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2691   %}
2692 
2693 
2694   enc_class Mul_Add_F( regF dst, regF src, regF src1, regF src2 ) %{
2695     MacroAssembler masm(&cbuf);
2696     masm.fld_s(  $src1$$reg-1);   // nothing at TOS, load TOS from src1.reg
2697     masm.fmul(   $src2$$reg+0);   // value at TOS
2698     masm.fadd(   $src$$reg+0);    // value at TOS
2699     masm.fstp_d( $dst$$reg+0);    // value at TOS, popped off after store
2700   %}
2701 
2702 
2703   enc_class Push_Reg_Mod_D( regD dst, regD src) %{
2704     // load dst in FPR0
2705     emit_opcode( cbuf, 0xD9 );
2706     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2707     if ($src$$reg != FPR1L_enc) {
2708       // fincstp
2709       emit_opcode (cbuf, 0xD9);
2710       emit_opcode (cbuf, 0xF7);
2711       // swap src with FPR1:
2712       // FXCH FPR1 with src
2713       emit_opcode(cbuf, 0xD9);
2714       emit_d8(cbuf, 0xC8-1+$src$$reg );
2715       // fdecstp
2716       emit_opcode (cbuf, 0xD9);
2717       emit_opcode (cbuf, 0xF6);
2718     }
2719   %}
2720 
2721   enc_class Push_ModD_encoding( regXD src0, regXD src1) %{
2722     // Allocate a word
2723     emit_opcode(cbuf,0x83);            // SUB ESP,8
2724     emit_opcode(cbuf,0xEC);
2725     emit_d8(cbuf,0x08);
2726 
2727     emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src1
2728     emit_opcode  (cbuf, 0x0F );
2729     emit_opcode  (cbuf, 0x11 );
2730     encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
2731 
2732     emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2733     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2734 
2735     emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src0
2736     emit_opcode  (cbuf, 0x0F );
2737     emit_opcode  (cbuf, 0x11 );
2738     encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
2739 
2740     emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2741     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2742 
2743   %}
2744 
2745   enc_class Push_ModX_encoding( regX src0, regX src1) %{
2746     // Allocate a word
2747     emit_opcode(cbuf,0x83);            // SUB ESP,4
2748     emit_opcode(cbuf,0xEC);
2749     emit_d8(cbuf,0x04);
2750 
2751     emit_opcode  (cbuf, 0xF3 );     // MOVSS [ESP], src1
2752     emit_opcode  (cbuf, 0x0F );
2753     emit_opcode  (cbuf, 0x11 );
2754     encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
2755 
2756     emit_opcode(cbuf,0xD9 );      // FLD [ESP]
2757     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2758 
2759     emit_opcode  (cbuf, 0xF3 );     // MOVSS [ESP], src0
2760     emit_opcode  (cbuf, 0x0F );
2761     emit_opcode  (cbuf, 0x11 );
2762     encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
2763 
2764     emit_opcode(cbuf,0xD9 );      // FLD [ESP]
2765     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2766 
2767   %}
2768 
2769   enc_class Push_ResultXD(regXD dst) %{
2770     store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [ESP]
2771 
2772     // UseXmmLoadAndClearUpper ? movsd dst,[esp] : movlpd dst,[esp]
2773     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2774     emit_opcode  (cbuf, 0x0F );
2775     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2776     encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
2777 
2778     emit_opcode(cbuf,0x83);    // ADD ESP,8
2779     emit_opcode(cbuf,0xC4);
2780     emit_d8(cbuf,0x08);
2781   %}
2782 
2783   enc_class Push_ResultX(regX dst, immI d8) %{
2784     store_to_stackslot( cbuf, 0xD9, 0x03, 0 ); //FSTP_S [ESP]
2785 
2786     emit_opcode  (cbuf, 0xF3 );     // MOVSS dst(xmm), [ESP]
2787     emit_opcode  (cbuf, 0x0F );
2788     emit_opcode  (cbuf, 0x10 );
2789     encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
2790 
2791     emit_opcode(cbuf,0x83);    // ADD ESP,d8 (4 or 8)
2792     emit_opcode(cbuf,0xC4);
2793     emit_d8(cbuf,$d8$$constant);
2794   %}
2795 
2796   enc_class Push_SrcXD(regXD src) %{
2797     // Allocate a word
2798     emit_opcode(cbuf,0x83);            // SUB ESP,8
2799     emit_opcode(cbuf,0xEC);
2800     emit_d8(cbuf,0x08);
2801 
2802     emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src
2803     emit_opcode  (cbuf, 0x0F );
2804     emit_opcode  (cbuf, 0x11 );
2805     encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
2806 
2807     emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2808     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2809   %}
2810 
2811   enc_class push_stack_temp_qword() %{
2812     emit_opcode(cbuf,0x83);     // SUB ESP,8
2813     emit_opcode(cbuf,0xEC);
2814     emit_d8    (cbuf,0x08);
2815   %}
2816 
2817   enc_class pop_stack_temp_qword() %{
2818     emit_opcode(cbuf,0x83);     // ADD ESP,8
2819     emit_opcode(cbuf,0xC4);
2820     emit_d8    (cbuf,0x08);
2821   %}
2822 
2823   enc_class push_xmm_to_fpr1( regXD xmm_src ) %{
2824     emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], xmm_src
2825     emit_opcode  (cbuf, 0x0F );
2826     emit_opcode  (cbuf, 0x11 );
2827     encode_RegMem(cbuf, $xmm_src$$reg, ESP_enc, 0x4, 0, 0, false);
2828 
2829     emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2830     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2831   %}
2832 
2833   // Compute X^Y using Intel's fast hardware instructions, if possible.
2834   // Otherwise return a NaN.
2835   enc_class pow_exp_core_encoding %{
2836     // FPR1 holds Y*ln2(X).  Compute FPR1 = 2^(Y*ln2(X))
2837     emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xC0);  // fdup = fld st(0)          Q       Q
2838     emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xFC);  // frndint               int(Q)      Q
2839     emit_opcode(cbuf,0xDC); emit_opcode(cbuf,0xE9);  // fsub st(1) -= st(0);  int(Q) frac(Q)
2840     emit_opcode(cbuf,0xDB);                          // FISTP [ESP]           frac(Q)
2841     emit_opcode(cbuf,0x1C);
2842     emit_d8(cbuf,0x24);
2843     emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xF0);  // f2xm1                 2^frac(Q)-1
2844     emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xE8);  // fld1                  1 2^frac(Q)-1
2845     emit_opcode(cbuf,0xDE); emit_opcode(cbuf,0xC1);  // faddp                 2^frac(Q)
2846     emit_opcode(cbuf,0x8B);                          // mov rax,[esp+0]=int(Q)
2847     encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 0, false);
2848     emit_opcode(cbuf,0xC7);                          // mov rcx,0xFFFFF800 - overflow mask
2849     emit_rm(cbuf, 0x3, 0x0, ECX_enc);
2850     emit_d32(cbuf,0xFFFFF800);
2851     emit_opcode(cbuf,0x81);                          // add rax,1023 - the double exponent bias
2852     emit_rm(cbuf, 0x3, 0x0, EAX_enc);
2853     emit_d32(cbuf,1023);
2854     emit_opcode(cbuf,0x8B);                          // mov rbx,eax
2855     emit_rm(cbuf, 0x3, EBX_enc, EAX_enc);
2856     emit_opcode(cbuf,0xC1);                          // shl rax,20 - Slide to exponent position
2857     emit_rm(cbuf,0x3,0x4,EAX_enc);
2858     emit_d8(cbuf,20);
2859     emit_opcode(cbuf,0x85);                          // test rbx,ecx - check for overflow
2860     emit_rm(cbuf, 0x3, EBX_enc, ECX_enc);
2861     emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0x45);  // CMOVne rax,ecx - overflow; stuff NAN into EAX
2862     emit_rm(cbuf, 0x3, EAX_enc, ECX_enc);
2863     emit_opcode(cbuf,0x89);                          // mov [esp+4],eax - Store as part of double word
2864     encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 4, false);
2865     emit_opcode(cbuf,0xC7);                          // mov [esp+0],0   - [ESP] = (double)(1<<int(Q)) = 2^int(Q)
2866     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2867     emit_d32(cbuf,0);
2868     emit_opcode(cbuf,0xDC);                          // fmul dword st(0),[esp+0]; FPR1 = 2^int(Q)*2^frac(Q) = 2^Q
2869     encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false);
2870   %}
2871 
2872 //   enc_class Pop_Reg_Mod_D( regD dst, regD src)
2873 //   was replaced by Push_Result_Mod_D followed by Pop_Reg_X() or Pop_Mem_X()
2874 
2875   enc_class Push_Result_Mod_D( regD src) %{
2876     if ($src$$reg != FPR1L_enc) {
2877       // fincstp
2878       emit_opcode (cbuf, 0xD9);
2879       emit_opcode (cbuf, 0xF7);
2880       // FXCH FPR1 with src
2881       emit_opcode(cbuf, 0xD9);
2882       emit_d8(cbuf, 0xC8-1+$src$$reg );
2883       // fdecstp
2884       emit_opcode (cbuf, 0xD9);
2885       emit_opcode (cbuf, 0xF6);
2886     }
2887     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2888     // // FSTP   FPR$dst$$reg
2889     // emit_opcode( cbuf, 0xDD );
2890     // emit_d8( cbuf, 0xD8+$dst$$reg );
2891   %}
2892 
2893   enc_class fnstsw_sahf_skip_parity() %{
2894     // fnstsw ax
2895     emit_opcode( cbuf, 0xDF );
2896     emit_opcode( cbuf, 0xE0 );
2897     // sahf
2898     emit_opcode( cbuf, 0x9E );
2899     // jnp  ::skip
2900     emit_opcode( cbuf, 0x7B );
2901     emit_opcode( cbuf, 0x05 );
2902   %}
2903 
2904   enc_class emitModD() %{
2905     // fprem must be iterative
2906     // :: loop
2907     // fprem
2908     emit_opcode( cbuf, 0xD9 );
2909     emit_opcode( cbuf, 0xF8 );
2910     // wait
2911     emit_opcode( cbuf, 0x9b );
2912     // fnstsw ax
2913     emit_opcode( cbuf, 0xDF );
2914     emit_opcode( cbuf, 0xE0 );
2915     // sahf
2916     emit_opcode( cbuf, 0x9E );
2917     // jp  ::loop
2918     emit_opcode( cbuf, 0x0F );
2919     emit_opcode( cbuf, 0x8A );
2920     emit_opcode( cbuf, 0xF4 );
2921     emit_opcode( cbuf, 0xFF );
2922     emit_opcode( cbuf, 0xFF );
2923     emit_opcode( cbuf, 0xFF );
2924   %}
2925 
2926   enc_class fpu_flags() %{
2927     // fnstsw_ax
2928     emit_opcode( cbuf, 0xDF);
2929     emit_opcode( cbuf, 0xE0);
2930     // test ax,0x0400
2931     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2932     emit_opcode( cbuf, 0xA9 );
2933     emit_d16   ( cbuf, 0x0400 );
2934     // // // This sequence works, but stalls for 12-16 cycles on PPro
2935     // // test rax,0x0400
2936     // emit_opcode( cbuf, 0xA9 );
2937     // emit_d32   ( cbuf, 0x00000400 );
2938     //
2939     // jz exit (no unordered comparison)
2940     emit_opcode( cbuf, 0x74 );
2941     emit_d8    ( cbuf, 0x02 );
2942     // mov ah,1 - treat as LT case (set carry flag)
2943     emit_opcode( cbuf, 0xB4 );
2944     emit_d8    ( cbuf, 0x01 );
2945     // sahf
2946     emit_opcode( cbuf, 0x9E);
2947   %}
2948 
2949   enc_class cmpF_P6_fixup() %{
2950     // Fixup the integer flags in case comparison involved a NaN
2951     //
2952     // JNP exit (no unordered comparison, P-flag is set by NaN)
2953     emit_opcode( cbuf, 0x7B );
2954     emit_d8    ( cbuf, 0x03 );
2955     // MOV AH,1 - treat as LT case (set carry flag)
2956     emit_opcode( cbuf, 0xB4 );
2957     emit_d8    ( cbuf, 0x01 );
2958     // SAHF
2959     emit_opcode( cbuf, 0x9E);
2960     // NOP     // target for branch to avoid branch to branch
2961     emit_opcode( cbuf, 0x90);
2962   %}
2963 
2964 //     fnstsw_ax();
2965 //     sahf();
2966 //     movl(dst, nan_result);
2967 //     jcc(Assembler::parity, exit);
2968 //     movl(dst, less_result);
2969 //     jcc(Assembler::below, exit);
2970 //     movl(dst, equal_result);
2971 //     jcc(Assembler::equal, exit);
2972 //     movl(dst, greater_result);
2973 
2974 // less_result     =  1;
2975 // greater_result  = -1;
2976 // equal_result    = 0;
2977 // nan_result      = -1;
2978 
2979   enc_class CmpF_Result(eRegI dst) %{
2980     // fnstsw_ax();
2981     emit_opcode( cbuf, 0xDF);
2982     emit_opcode( cbuf, 0xE0);
2983     // sahf
2984     emit_opcode( cbuf, 0x9E);
2985     // movl(dst, nan_result);
2986     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2987     emit_d32( cbuf, -1 );
2988     // jcc(Assembler::parity, exit);
2989     emit_opcode( cbuf, 0x7A );
2990     emit_d8    ( cbuf, 0x13 );
2991     // movl(dst, less_result);
2992     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2993     emit_d32( cbuf, -1 );
2994     // jcc(Assembler::below, exit);
2995     emit_opcode( cbuf, 0x72 );
2996     emit_d8    ( cbuf, 0x0C );
2997     // movl(dst, equal_result);
2998     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2999     emit_d32( cbuf, 0 );
3000     // jcc(Assembler::equal, exit);
3001     emit_opcode( cbuf, 0x74 );
3002     emit_d8    ( cbuf, 0x05 );
3003     // movl(dst, greater_result);
3004     emit_opcode( cbuf, 0xB8 + $dst$$reg);
3005     emit_d32( cbuf, 1 );
3006   %}
3007 
3008 
3009   // XMM version of CmpF_Result. Because the XMM compare
3010   // instructions set the EFLAGS directly. It becomes simpler than
3011   // the float version above.
3012   enc_class CmpX_Result(eRegI dst) %{
3013     MacroAssembler _masm(&cbuf);
3014     Label nan, inc, done;
3015 
3016     __ jccb(Assembler::parity, nan);
3017     __ jccb(Assembler::equal,  done);
3018     __ jccb(Assembler::above,  inc);
3019     __ bind(nan);
3020     __ decrement(as_Register($dst$$reg)); // NO L qqq
3021     __ jmpb(done);
3022     __ bind(inc);
3023     __ increment(as_Register($dst$$reg)); // NO L qqq
3024     __ bind(done);
3025   %}
3026 
3027   // Compare the longs and set flags
3028   // BROKEN!  Do Not use as-is
3029   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
3030     // CMP    $src1.hi,$src2.hi
3031     emit_opcode( cbuf, 0x3B );
3032     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
3033     // JNE,s  done
3034     emit_opcode(cbuf,0x75);
3035     emit_d8(cbuf, 2 );
3036     // CMP    $src1.lo,$src2.lo
3037     emit_opcode( cbuf, 0x3B );
3038     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
3039 // done:
3040   %}
3041 
3042   enc_class convert_int_long( regL dst, eRegI src ) %{
3043     // mov $dst.lo,$src
3044     int dst_encoding = $dst$$reg;
3045     int src_encoding = $src$$reg;
3046     encode_Copy( cbuf, dst_encoding  , src_encoding );
3047     // mov $dst.hi,$src
3048     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
3049     // sar $dst.hi,31
3050     emit_opcode( cbuf, 0xC1 );
3051     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
3052     emit_d8(cbuf, 0x1F );
3053   %}
3054 
3055   enc_class convert_long_double( eRegL src ) %{
3056     // push $src.hi
3057     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
3058     // push $src.lo
3059     emit_opcode(cbuf, 0x50+$src$$reg  );
3060     // fild 64-bits at [SP]
3061     emit_opcode(cbuf,0xdf);
3062     emit_d8(cbuf, 0x6C);
3063     emit_d8(cbuf, 0x24);
3064     emit_d8(cbuf, 0x00);
3065     // pop stack
3066     emit_opcode(cbuf, 0x83); // add  SP, #8
3067     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
3068     emit_d8(cbuf, 0x8);
3069   %}
3070 
3071   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
3072     // IMUL   EDX:EAX,$src1
3073     emit_opcode( cbuf, 0xF7 );
3074     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
3075     // SAR    EDX,$cnt-32
3076     int shift_count = ((int)$cnt$$constant) - 32;
3077     if (shift_count > 0) {
3078       emit_opcode(cbuf, 0xC1);
3079       emit_rm(cbuf, 0x3, 7, $dst$$reg );
3080       emit_d8(cbuf, shift_count);
3081     }
3082   %}
3083 
3084   // this version doesn't have add sp, 8
3085   enc_class convert_long_double2( eRegL src ) %{
3086     // push $src.hi
3087     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
3088     // push $src.lo
3089     emit_opcode(cbuf, 0x50+$src$$reg  );
3090     // fild 64-bits at [SP]
3091     emit_opcode(cbuf,0xdf);
3092     emit_d8(cbuf, 0x6C);
3093     emit_d8(cbuf, 0x24);
3094     emit_d8(cbuf, 0x00);
3095   %}
3096 
3097   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
3098     // Basic idea: long = (long)int * (long)int
3099     // IMUL EDX:EAX, src
3100     emit_opcode( cbuf, 0xF7 );
3101     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
3102   %}
3103 
3104   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
3105     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
3106     // MUL EDX:EAX, src
3107     emit_opcode( cbuf, 0xF7 );
3108     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
3109   %}
3110 
3111   enc_class long_multiply( eADXRegL dst, eRegL src, eRegI tmp ) %{
3112     // Basic idea: lo(result) = lo(x_lo * y_lo)
3113     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
3114     // MOV    $tmp,$src.lo
3115     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
3116     // IMUL   $tmp,EDX
3117     emit_opcode( cbuf, 0x0F );
3118     emit_opcode( cbuf, 0xAF );
3119     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
3120     // MOV    EDX,$src.hi
3121     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
3122     // IMUL   EDX,EAX
3123     emit_opcode( cbuf, 0x0F );
3124     emit_opcode( cbuf, 0xAF );
3125     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
3126     // ADD    $tmp,EDX
3127     emit_opcode( cbuf, 0x03 );
3128     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
3129     // MUL   EDX:EAX,$src.lo
3130     emit_opcode( cbuf, 0xF7 );
3131     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
3132     // ADD    EDX,ESI
3133     emit_opcode( cbuf, 0x03 );
3134     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
3135   %}
3136 
3137   enc_class long_multiply_con( eADXRegL dst, immL_127 src, eRegI tmp ) %{
3138     // Basic idea: lo(result) = lo(src * y_lo)
3139     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
3140     // IMUL   $tmp,EDX,$src
3141     emit_opcode( cbuf, 0x6B );
3142     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
3143     emit_d8( cbuf, (int)$src$$constant );
3144     // MOV    EDX,$src
3145     emit_opcode(cbuf, 0xB8 + EDX_enc);
3146     emit_d32( cbuf, (int)$src$$constant );
3147     // MUL   EDX:EAX,EDX
3148     emit_opcode( cbuf, 0xF7 );
3149     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
3150     // ADD    EDX,ESI
3151     emit_opcode( cbuf, 0x03 );
3152     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
3153   %}
3154 
3155   enc_class long_div( eRegL src1, eRegL src2 ) %{
3156     // PUSH src1.hi
3157     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
3158     // PUSH src1.lo
3159     emit_opcode(cbuf,               0x50+$src1$$reg  );
3160     // PUSH src2.hi
3161     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
3162     // PUSH src2.lo
3163     emit_opcode(cbuf,               0x50+$src2$$reg  );
3164     // CALL directly to the runtime
3165     cbuf.set_insts_mark();
3166     emit_opcode(cbuf,0xE8);       // Call into runtime
3167     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3168     // Restore stack
3169     emit_opcode(cbuf, 0x83); // add  SP, #framesize
3170     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
3171     emit_d8(cbuf, 4*4);
3172   %}
3173 
3174   enc_class long_mod( eRegL src1, eRegL src2 ) %{
3175     // PUSH src1.hi
3176     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
3177     // PUSH src1.lo
3178     emit_opcode(cbuf,               0x50+$src1$$reg  );
3179     // PUSH src2.hi
3180     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
3181     // PUSH src2.lo
3182     emit_opcode(cbuf,               0x50+$src2$$reg  );
3183     // CALL directly to the runtime
3184     cbuf.set_insts_mark();
3185     emit_opcode(cbuf,0xE8);       // Call into runtime
3186     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3187     // Restore stack
3188     emit_opcode(cbuf, 0x83); // add  SP, #framesize
3189     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
3190     emit_d8(cbuf, 4*4);
3191   %}
3192 
3193   enc_class long_cmp_flags0( eRegL src, eRegI tmp ) %{
3194     // MOV   $tmp,$src.lo
3195     emit_opcode(cbuf, 0x8B);
3196     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
3197     // OR    $tmp,$src.hi
3198     emit_opcode(cbuf, 0x0B);
3199     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
3200   %}
3201 
3202   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
3203     // CMP    $src1.lo,$src2.lo
3204     emit_opcode( cbuf, 0x3B );
3205     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
3206     // JNE,s  skip
3207     emit_cc(cbuf, 0x70, 0x5);
3208     emit_d8(cbuf,2);
3209     // CMP    $src1.hi,$src2.hi
3210     emit_opcode( cbuf, 0x3B );
3211     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
3212   %}
3213 
3214   enc_class long_cmp_flags2( eRegL src1, eRegL src2, eRegI tmp ) %{
3215     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
3216     emit_opcode( cbuf, 0x3B );
3217     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
3218     // MOV    $tmp,$src1.hi
3219     emit_opcode( cbuf, 0x8B );
3220     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
3221     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
3222     emit_opcode( cbuf, 0x1B );
3223     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
3224   %}
3225 
3226   enc_class long_cmp_flags3( eRegL src, eRegI tmp ) %{
3227     // XOR    $tmp,$tmp
3228     emit_opcode(cbuf,0x33);  // XOR
3229     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
3230     // CMP    $tmp,$src.lo
3231     emit_opcode( cbuf, 0x3B );
3232     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
3233     // SBB    $tmp,$src.hi
3234     emit_opcode( cbuf, 0x1B );
3235     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
3236   %}
3237 
3238  // Sniff, sniff... smells like Gnu Superoptimizer
3239   enc_class neg_long( eRegL dst ) %{
3240     emit_opcode(cbuf,0xF7);    // NEG hi
3241     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
3242     emit_opcode(cbuf,0xF7);    // NEG lo
3243     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
3244     emit_opcode(cbuf,0x83);    // SBB hi,0
3245     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
3246     emit_d8    (cbuf,0 );
3247   %}
3248 
3249   enc_class movq_ld(regXD dst, memory mem) %{
3250     MacroAssembler _masm(&cbuf);
3251     __ movq($dst$$XMMRegister, $mem$$Address);
3252   %}
3253 
3254   enc_class movq_st(memory mem, regXD src) %{
3255     MacroAssembler _masm(&cbuf);
3256     __ movq($mem$$Address, $src$$XMMRegister);
3257   %}
3258 
3259   enc_class pshufd_8x8(regX dst, regX src) %{
3260     MacroAssembler _masm(&cbuf);
3261 
3262     encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3263     __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3264     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3265   %}
3266 
3267   enc_class pshufd_4x16(regX dst, regX src) %{
3268     MacroAssembler _masm(&cbuf);
3269 
3270     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3271   %}
3272 
3273   enc_class pshufd(regXD dst, regXD src, int mode) %{
3274     MacroAssembler _masm(&cbuf);
3275 
3276     __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3277   %}
3278 
3279   enc_class pxor(regXD dst, regXD src) %{
3280     MacroAssembler _masm(&cbuf);
3281 
3282     __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3283   %}
3284 
3285   enc_class mov_i2x(regXD dst, eRegI src) %{
3286     MacroAssembler _masm(&cbuf);
3287 
3288     __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3289   %}
3290 
3291 
3292   // Because the transitions from emitted code to the runtime
3293   // monitorenter/exit helper stubs are so slow it's critical that
3294   // we inline both the stack-locking fast-path and the inflated fast path.
3295   //
3296   // See also: cmpFastLock and cmpFastUnlock.
3297   //
3298   // What follows is a specialized inline transliteration of the code
3299   // in slow_enter() and slow_exit().  If we're concerned about I$ bloat
3300   // another option would be to emit TrySlowEnter and TrySlowExit methods
3301   // at startup-time.  These methods would accept arguments as
3302   // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
3303   // indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
3304   // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
3305   // In practice, however, the # of lock sites is bounded and is usually small.
3306   // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
3307   // if the processor uses simple bimodal branch predictors keyed by EIP
3308   // Since the helper routines would be called from multiple synchronization
3309   // sites.
3310   //
3311   // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
3312   // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
3313   // to those specialized methods.  That'd give us a mostly platform-independent
3314   // implementation that the JITs could optimize and inline at their pleasure.
3315   // Done correctly, the only time we'd need to cross to native could would be
3316   // to park() or unpark() threads.  We'd also need a few more unsafe operators
3317   // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
3318   // (b) explicit barriers or fence operations.
3319   //
3320   // TODO:
3321   //
3322   // *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
3323   //    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
3324   //    Given TLAB allocation, Self is usually manifested in a register, so passing it into
3325   //    the lock operators would typically be faster than reifying Self.
3326   //
3327   // *  Ideally I'd define the primitives as:
3328   //       fast_lock   (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
3329   //       fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
3330   //    Unfortunately ADLC bugs prevent us from expressing the ideal form.
3331   //    Instead, we're stuck with a rather awkward and brittle register assignments below.
3332   //    Furthermore the register assignments are overconstrained, possibly resulting in
3333   //    sub-optimal code near the synchronization site.
3334   //
3335   // *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
3336   //    Alternately, use a better sp-proximity test.
3337   //
3338   // *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
3339   //    Either one is sufficient to uniquely identify a thread.
3340   //    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
3341   //
3342   // *  Intrinsify notify() and notifyAll() for the common cases where the
3343   //    object is locked by the calling thread but the waitlist is empty.
3344   //    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
3345   //
3346   // *  use jccb and jmpb instead of jcc and jmp to improve code density.
3347   //    But beware of excessive branch density on AMD Opterons.
3348   //
3349   // *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
3350   //    or failure of the fast-path.  If the fast-path fails then we pass
3351   //    control to the slow-path, typically in C.  In Fast_Lock and
3352   //    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
3353   //    will emit a conditional branch immediately after the node.
3354   //    So we have branches to branches and lots of ICC.ZF games.
3355   //    Instead, it might be better to have C2 pass a "FailureLabel"
3356   //    into Fast_Lock and Fast_Unlock.  In the case of success, control
3357   //    will drop through the node.  ICC.ZF is undefined at exit.
3358   //    In the case of failure, the node will branch directly to the
3359   //    FailureLabel
3360 
3361 
3362   // obj: object to lock
3363   // box: on-stack box address (displaced header location) - KILLED
3364   // rax,: tmp -- KILLED
3365   // scr: tmp -- KILLED
3366   enc_class Fast_Lock( eRegP obj, eRegP box, eAXRegI tmp, eRegP scr ) %{
3367 
3368     Register objReg = as_Register($obj$$reg);
3369     Register boxReg = as_Register($box$$reg);
3370     Register tmpReg = as_Register($tmp$$reg);
3371     Register scrReg = as_Register($scr$$reg);
3372 
3373     // Ensure the register assignents are disjoint
3374     guarantee (objReg != boxReg, "") ;
3375     guarantee (objReg != tmpReg, "") ;
3376     guarantee (objReg != scrReg, "") ;
3377     guarantee (boxReg != tmpReg, "") ;
3378     guarantee (boxReg != scrReg, "") ;
3379     guarantee (tmpReg == as_Register(EAX_enc), "") ;
3380 
3381     MacroAssembler masm(&cbuf);
3382 
3383     if (_counters != NULL) {
3384       masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3385     }
3386     if (EmitSync & 1) {
3387         // set box->dhw = unused_mark (3)
3388         // Force all sync thru slow-path: slow_enter() and slow_exit() 
3389         masm.movptr (Address(boxReg, 0), int32_t(markOopDesc::unused_mark())) ;             
3390         masm.cmpptr (rsp, (int32_t)0) ;                        
3391     } else 
3392     if (EmitSync & 2) { 
3393         Label DONE_LABEL ;           
3394         if (UseBiasedLocking) {
3395            // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3396            masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3397         }
3398 
3399         masm.movptr(tmpReg, Address(objReg, 0)) ;          // fetch markword 
3400         masm.orptr (tmpReg, 0x1);
3401         masm.movptr(Address(boxReg, 0), tmpReg);           // Anticipate successful CAS 
3402         if (os::is_MP()) { masm.lock();  }
3403         masm.cmpxchgptr(boxReg, Address(objReg, 0));          // Updates tmpReg
3404         masm.jcc(Assembler::equal, DONE_LABEL);
3405         // Recursive locking
3406         masm.subptr(tmpReg, rsp);
3407         masm.andptr(tmpReg, (int32_t) 0xFFFFF003 );
3408         masm.movptr(Address(boxReg, 0), tmpReg);
3409         masm.bind(DONE_LABEL) ; 
3410     } else {  
3411       // Possible cases that we'll encounter in fast_lock 
3412       // ------------------------------------------------
3413       // * Inflated
3414       //    -- unlocked
3415       //    -- Locked
3416       //       = by self
3417       //       = by other
3418       // * biased
3419       //    -- by Self
3420       //    -- by other
3421       // * neutral
3422       // * stack-locked
3423       //    -- by self
3424       //       = sp-proximity test hits
3425       //       = sp-proximity test generates false-negative
3426       //    -- by other
3427       //
3428 
3429       Label IsInflated, DONE_LABEL, PopDone ;
3430 
3431       // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
3432       // order to reduce the number of conditional branches in the most common cases.
3433       // Beware -- there's a subtle invariant that fetch of the markword
3434       // at [FETCH], below, will never observe a biased encoding (*101b).
3435       // If this invariant is not held we risk exclusion (safety) failure.
3436       if (UseBiasedLocking && !UseOptoBiasInlining) {
3437         masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3438       }
3439 
3440       masm.movptr(tmpReg, Address(objReg, 0)) ;         // [FETCH]
3441       masm.testptr(tmpReg, 0x02) ;                      // Inflated v (Stack-locked or neutral)
3442       masm.jccb  (Assembler::notZero, IsInflated) ;
3443 
3444       // Attempt stack-locking ...
3445       masm.orptr (tmpReg, 0x1);
3446       masm.movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
3447       if (os::is_MP()) { masm.lock();  }
3448       masm.cmpxchgptr(boxReg, Address(objReg, 0));           // Updates tmpReg
3449       if (_counters != NULL) {
3450         masm.cond_inc32(Assembler::equal,
3451                         ExternalAddress((address)_counters->fast_path_entry_count_addr()));
3452       }
3453       masm.jccb (Assembler::equal, DONE_LABEL);
3454 
3455       // Recursive locking
3456       masm.subptr(tmpReg, rsp);
3457       masm.andptr(tmpReg, 0xFFFFF003 );
3458       masm.movptr(Address(boxReg, 0), tmpReg);
3459       if (_counters != NULL) {
3460         masm.cond_inc32(Assembler::equal,
3461                         ExternalAddress((address)_counters->fast_path_entry_count_addr()));
3462       }
3463       masm.jmp  (DONE_LABEL) ;
3464 
3465       masm.bind (IsInflated) ;
3466 
3467       // The object is inflated.
3468       //
3469       // TODO-FIXME: eliminate the ugly use of manifest constants:
3470       //   Use markOopDesc::monitor_value instead of "2".
3471       //   use markOop::unused_mark() instead of "3".
3472       // The tmpReg value is an objectMonitor reference ORed with
3473       // markOopDesc::monitor_value (2).   We can either convert tmpReg to an
3474       // objectmonitor pointer by masking off the "2" bit or we can just
3475       // use tmpReg as an objectmonitor pointer but bias the objectmonitor
3476       // field offsets with "-2" to compensate for and annul the low-order tag bit.
3477       //
3478       // I use the latter as it avoids AGI stalls.
3479       // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]"
3480       // instead of "mov r, [tmpReg+OFFSETOF(Owner)]".
3481       //
3482       #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2)
3483 
3484       // boxReg refers to the on-stack BasicLock in the current frame.
3485       // We'd like to write:
3486       //   set box->_displaced_header = markOop::unused_mark().  Any non-0 value suffices.
3487       // This is convenient but results a ST-before-CAS penalty.  The following CAS suffers
3488       // additional latency as we have another ST in the store buffer that must drain.
3489 
3490       if (EmitSync & 8192) { 
3491          masm.movptr(Address(boxReg, 0), 3) ;            // results in ST-before-CAS penalty
3492          masm.get_thread (scrReg) ; 
3493          masm.movptr(boxReg, tmpReg);                    // consider: LEA box, [tmp-2] 
3494          masm.movptr(tmpReg, NULL_WORD);                 // consider: xor vs mov
3495          if (os::is_MP()) { masm.lock(); } 
3496          masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3497       } else 
3498       if ((EmitSync & 128) == 0) {                      // avoid ST-before-CAS
3499          masm.movptr(scrReg, boxReg) ; 
3500          masm.movptr(boxReg, tmpReg);                   // consider: LEA box, [tmp-2] 
3501 
3502          // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
3503          if ((EmitSync & 2048) && VM_Version::supports_3dnow() && os::is_MP()) {
3504             // prefetchw [eax + Offset(_owner)-2]
3505             masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
3506          }
3507 
3508          if ((EmitSync & 64) == 0) {
3509            // Optimistic form: consider XORL tmpReg,tmpReg
3510            masm.movptr(tmpReg, NULL_WORD) ; 
3511          } else { 
3512            // Can suffer RTS->RTO upgrades on shared or cold $ lines
3513            // Test-And-CAS instead of CAS
3514            masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;   // rax, = m->_owner
3515            masm.testptr(tmpReg, tmpReg) ;                   // Locked ? 
3516            masm.jccb  (Assembler::notZero, DONE_LABEL) ;                   
3517          }
3518 
3519          // Appears unlocked - try to swing _owner from null to non-null.
3520          // Ideally, I'd manifest "Self" with get_thread and then attempt
3521          // to CAS the register containing Self into m->Owner.
3522          // But we don't have enough registers, so instead we can either try to CAS
3523          // rsp or the address of the box (in scr) into &m->owner.  If the CAS succeeds
3524          // we later store "Self" into m->Owner.  Transiently storing a stack address
3525          // (rsp or the address of the box) into  m->owner is harmless.
3526          // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
3527          if (os::is_MP()) { masm.lock();  }
3528          masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3529          masm.movptr(Address(scrReg, 0), 3) ;          // box->_displaced_header = 3
3530          masm.jccb  (Assembler::notZero, DONE_LABEL) ; 
3531          masm.get_thread (scrReg) ;                    // beware: clobbers ICCs
3532          masm.movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg) ; 
3533          masm.xorptr(boxReg, boxReg) ;                 // set icc.ZFlag = 1 to indicate success
3534                        
3535          // If the CAS fails we can either retry or pass control to the slow-path.  
3536          // We use the latter tactic.  
3537          // Pass the CAS result in the icc.ZFlag into DONE_LABEL
3538          // If the CAS was successful ...
3539          //   Self has acquired the lock
3540          //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
3541          // Intentional fall-through into DONE_LABEL ...
3542       } else {
3543          masm.movptr(Address(boxReg, 0), 3) ;       // results in ST-before-CAS penalty
3544          masm.movptr(boxReg, tmpReg) ; 
3545 
3546          // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
3547          if ((EmitSync & 2048) && VM_Version::supports_3dnow() && os::is_MP()) {
3548             // prefetchw [eax + Offset(_owner)-2]
3549             masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
3550          }
3551 
3552          if ((EmitSync & 64) == 0) {
3553            // Optimistic form
3554            masm.xorptr  (tmpReg, tmpReg) ; 
3555          } else { 
3556            // Can suffer RTS->RTO upgrades on shared or cold $ lines
3557            masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;   // rax, = m->_owner
3558            masm.testptr(tmpReg, tmpReg) ;                   // Locked ? 
3559            masm.jccb  (Assembler::notZero, DONE_LABEL) ;                   
3560          }
3561 
3562          // Appears unlocked - try to swing _owner from null to non-null.
3563          // Use either "Self" (in scr) or rsp as thread identity in _owner.
3564          // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
3565          masm.get_thread (scrReg) ;
3566          if (os::is_MP()) { masm.lock(); }
3567          masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3568 
3569          // If the CAS fails we can either retry or pass control to the slow-path.
3570          // We use the latter tactic.
3571          // Pass the CAS result in the icc.ZFlag into DONE_LABEL
3572          // If the CAS was successful ...
3573          //   Self has acquired the lock
3574          //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
3575          // Intentional fall-through into DONE_LABEL ...
3576       }
3577 
3578       // DONE_LABEL is a hot target - we'd really like to place it at the
3579       // start of cache line by padding with NOPs.
3580       // See the AMD and Intel software optimization manuals for the
3581       // most efficient "long" NOP encodings.
3582       // Unfortunately none of our alignment mechanisms suffice.
3583       masm.bind(DONE_LABEL);
3584 
3585       // Avoid branch-to-branch on AMD processors
3586       // This appears to be superstition.
3587       if (EmitSync & 32) masm.nop() ;
3588 
3589 
3590       // At DONE_LABEL the icc ZFlag is set as follows ...
3591       // Fast_Unlock uses the same protocol.
3592       // ZFlag == 1 -> Success
3593       // ZFlag == 0 -> Failure - force control through the slow-path
3594     }
3595   %}
3596 
3597   // obj: object to unlock
3598   // box: box address (displaced header location), killed.  Must be EAX.
3599   // rbx,: killed tmp; cannot be obj nor box.
3600   //
3601   // Some commentary on balanced locking:
3602   //
3603   // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
3604   // Methods that don't have provably balanced locking are forced to run in the
3605   // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
3606   // The interpreter provides two properties:
3607   // I1:  At return-time the interpreter automatically and quietly unlocks any
3608   //      objects acquired the current activation (frame).  Recall that the
3609   //      interpreter maintains an on-stack list of locks currently held by
3610   //      a frame.
3611   // I2:  If a method attempts to unlock an object that is not held by the
3612   //      the frame the interpreter throws IMSX.
3613   //
3614   // Lets say A(), which has provably balanced locking, acquires O and then calls B().
3615   // B() doesn't have provably balanced locking so it runs in the interpreter.
3616   // Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
3617   // is still locked by A().
3618   //
3619   // The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
3620   // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
3621   // should not be unlocked by "normal" java-level locking and vice-versa.  The specification
3622   // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
3623 
3624   enc_class Fast_Unlock( nabxRegP obj, eAXRegP box, eRegP tmp) %{
3625 
3626     Register objReg = as_Register($obj$$reg);
3627     Register boxReg = as_Register($box$$reg);
3628     Register tmpReg = as_Register($tmp$$reg);
3629 
3630     guarantee (objReg != boxReg, "") ;
3631     guarantee (objReg != tmpReg, "") ;
3632     guarantee (boxReg != tmpReg, "") ;
3633     guarantee (boxReg == as_Register(EAX_enc), "") ;
3634     MacroAssembler masm(&cbuf);
3635 
3636     if (EmitSync & 4) {
3637       // Disable - inhibit all inlining.  Force control through the slow-path
3638       masm.cmpptr (rsp, 0) ; 
3639     } else 
3640     if (EmitSync & 8) {
3641       Label DONE_LABEL ;
3642       if (UseBiasedLocking) {
3643          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3644       }
3645       // classic stack-locking code ...
3646       masm.movptr(tmpReg, Address(boxReg, 0)) ;
3647       masm.testptr(tmpReg, tmpReg) ;
3648       masm.jcc   (Assembler::zero, DONE_LABEL) ;
3649       if (os::is_MP()) { masm.lock(); }
3650       masm.cmpxchgptr(tmpReg, Address(objReg, 0));          // Uses EAX which is box
3651       masm.bind(DONE_LABEL);
3652     } else {
3653       Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
3654 
3655       // Critically, the biased locking test must have precedence over
3656       // and appear before the (box->dhw == 0) recursive stack-lock test.
3657       if (UseBiasedLocking && !UseOptoBiasInlining) {
3658          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3659       }
3660       
3661       masm.cmpptr(Address(boxReg, 0), 0) ;            // Examine the displaced header
3662       masm.movptr(tmpReg, Address(objReg, 0)) ;       // Examine the object's markword
3663       masm.jccb  (Assembler::zero, DONE_LABEL) ;      // 0 indicates recursive stack-lock
3664 
3665       masm.testptr(tmpReg, 0x02) ;                     // Inflated? 
3666       masm.jccb  (Assembler::zero, Stacked) ;
3667 
3668       masm.bind  (Inflated) ;
3669       // It's inflated.
3670       // Despite our balanced locking property we still check that m->_owner == Self
3671       // as java routines or native JNI code called by this thread might
3672       // have released the lock.
3673       // Refer to the comments in synchronizer.cpp for how we might encode extra
3674       // state in _succ so we can avoid fetching EntryList|cxq.
3675       //
3676       // I'd like to add more cases in fast_lock() and fast_unlock() --
3677       // such as recursive enter and exit -- but we have to be wary of
3678       // I$ bloat, T$ effects and BP$ effects.
3679       //
3680       // If there's no contention try a 1-0 exit.  That is, exit without
3681       // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
3682       // we detect and recover from the race that the 1-0 exit admits.
3683       //
3684       // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
3685       // before it STs null into _owner, releasing the lock.  Updates
3686       // to data protected by the critical section must be visible before
3687       // we drop the lock (and thus before any other thread could acquire
3688       // the lock and observe the fields protected by the lock).
3689       // IA32's memory-model is SPO, so STs are ordered with respect to
3690       // each other and there's no need for an explicit barrier (fence).
3691       // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
3692 
3693       masm.get_thread (boxReg) ;
3694       if ((EmitSync & 4096) && VM_Version::supports_3dnow() && os::is_MP()) {
3695         // prefetchw [ebx + Offset(_owner)-2]
3696         masm.prefetchw(Address(rbx, ObjectMonitor::owner_offset_in_bytes()-2));
3697       }
3698 
3699       // Note that we could employ various encoding schemes to reduce
3700       // the number of loads below (currently 4) to just 2 or 3.
3701       // Refer to the comments in synchronizer.cpp.
3702       // In practice the chain of fetches doesn't seem to impact performance, however.
3703       if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
3704          // Attempt to reduce branch density - AMD's branch predictor.
3705          masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;  
3706          masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3707          masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
3708          masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
3709          masm.jccb  (Assembler::notZero, DONE_LABEL) ; 
3710          masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
3711          masm.jmpb  (DONE_LABEL) ; 
3712       } else { 
3713          masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;  
3714          masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3715          masm.jccb  (Assembler::notZero, DONE_LABEL) ; 
3716          masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
3717          masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
3718          masm.jccb  (Assembler::notZero, CheckSucc) ; 
3719          masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
3720          masm.jmpb  (DONE_LABEL) ; 
3721       }
3722 
3723       // The Following code fragment (EmitSync & 65536) improves the performance of
3724       // contended applications and contended synchronization microbenchmarks.
3725       // Unfortunately the emission of the code - even though not executed - causes regressions
3726       // in scimark and jetstream, evidently because of $ effects.  Replacing the code
3727       // with an equal number of never-executed NOPs results in the same regression.
3728       // We leave it off by default.
3729 
3730       if ((EmitSync & 65536) != 0) {
3731          Label LSuccess, LGoSlowPath ;
3732 
3733          masm.bind  (CheckSucc) ;
3734 
3735          // Optional pre-test ... it's safe to elide this
3736          if ((EmitSync & 16) == 0) { 
3737             masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; 
3738             masm.jccb  (Assembler::zero, LGoSlowPath) ; 
3739          }
3740 
3741          // We have a classic Dekker-style idiom:
3742          //    ST m->_owner = 0 ; MEMBAR; LD m->_succ
3743          // There are a number of ways to implement the barrier:
3744          // (1) lock:andl &m->_owner, 0
3745          //     is fast, but mask doesn't currently support the "ANDL M,IMM32" form.
3746          //     LOCK: ANDL [ebx+Offset(_Owner)-2], 0
3747          //     Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8
3748          // (2) If supported, an explicit MFENCE is appealing.
3749          //     In older IA32 processors MFENCE is slower than lock:add or xchg
3750          //     particularly if the write-buffer is full as might be the case if
3751          //     if stores closely precede the fence or fence-equivalent instruction.
3752          //     In more modern implementations MFENCE appears faster, however.
3753          // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
3754          //     The $lines underlying the top-of-stack should be in M-state.
3755          //     The locked add instruction is serializing, of course.
3756          // (4) Use xchg, which is serializing
3757          //     mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works
3758          // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
3759          //     The integer condition codes will tell us if succ was 0.
3760          //     Since _succ and _owner should reside in the same $line and
3761          //     we just stored into _owner, it's likely that the $line
3762          //     remains in M-state for the lock:orl.
3763          //
3764          // We currently use (3), although it's likely that switching to (2)
3765          // is correct for the future.
3766             
3767          masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
3768          if (os::is_MP()) { 
3769             if (VM_Version::supports_sse2() && 1 == FenceInstruction) { 
3770               masm.mfence();
3771             } else { 
3772               masm.lock () ; masm.addptr(Address(rsp, 0), 0) ; 
3773             }
3774          }
3775          // Ratify _succ remains non-null
3776          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; 
3777          masm.jccb  (Assembler::notZero, LSuccess) ; 
3778 
3779          masm.xorptr(boxReg, boxReg) ;                  // box is really EAX
3780          if (os::is_MP()) { masm.lock(); }
3781          masm.cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3782          masm.jccb  (Assembler::notEqual, LSuccess) ;
3783          // Since we're low on registers we installed rsp as a placeholding in _owner.
3784          // Now install Self over rsp.  This is safe as we're transitioning from
3785          // non-null to non=null
3786          masm.get_thread (boxReg) ;
3787          masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg) ;
3788          // Intentional fall-through into LGoSlowPath ...
3789 
3790          masm.bind  (LGoSlowPath) ; 
3791          masm.orptr(boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
3792          masm.jmpb  (DONE_LABEL) ; 
3793 
3794          masm.bind  (LSuccess) ; 
3795          masm.xorptr(boxReg, boxReg) ;                 // set ICC.ZF=1 to indicate success
3796          masm.jmpb  (DONE_LABEL) ; 
3797       }
3798 
3799       masm.bind (Stacked) ;
3800       // It's not inflated and it's not recursively stack-locked and it's not biased.
3801       // It must be stack-locked.
3802       // Try to reset the header to displaced header.
3803       // The "box" value on the stack is stable, so we can reload
3804       // and be assured we observe the same value as above.
3805       masm.movptr(tmpReg, Address(boxReg, 0)) ;
3806       if (os::is_MP()) {   masm.lock();    }
3807       masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box
3808       // Intention fall-thru into DONE_LABEL
3809 
3810 
3811       // DONE_LABEL is a hot target - we'd really like to place it at the
3812       // start of cache line by padding with NOPs.
3813       // See the AMD and Intel software optimization manuals for the
3814       // most efficient "long" NOP encodings.
3815       // Unfortunately none of our alignment mechanisms suffice.
3816       if ((EmitSync & 65536) == 0) {
3817          masm.bind (CheckSucc) ;
3818       }
3819       masm.bind(DONE_LABEL);
3820 
3821       // Avoid branch to branch on AMD processors
3822       if (EmitSync & 32768) { masm.nop() ; }
3823     }
3824   %}
3825 
3826 
3827   enc_class enc_pop_rdx() %{
3828     emit_opcode(cbuf,0x5A);
3829   %}
3830 
3831   enc_class enc_rethrow() %{
3832     cbuf.set_insts_mark();
3833     emit_opcode(cbuf, 0xE9);        // jmp    entry
3834     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
3835                    runtime_call_Relocation::spec(), RELOC_IMM32 );
3836   %}
3837 
3838 
3839   // Convert a double to an int.  Java semantics require we do complex
3840   // manglelations in the corner cases.  So we set the rounding mode to
3841   // 'zero', store the darned double down as an int, and reset the
3842   // rounding mode to 'nearest'.  The hardware throws an exception which
3843   // patches up the correct value directly to the stack.
3844   enc_class D2I_encoding( regD src ) %{
3845     // Flip to round-to-zero mode.  We attempted to allow invalid-op
3846     // exceptions here, so that a NAN or other corner-case value will
3847     // thrown an exception (but normal values get converted at full speed).
3848     // However, I2C adapters and other float-stack manglers leave pending
3849     // invalid-op exceptions hanging.  We would have to clear them before
3850     // enabling them and that is more expensive than just testing for the
3851     // invalid value Intel stores down in the corner cases.
3852     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3853     emit_opcode(cbuf,0x2D);
3854     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3855     // Allocate a word
3856     emit_opcode(cbuf,0x83);            // SUB ESP,4
3857     emit_opcode(cbuf,0xEC);
3858     emit_d8(cbuf,0x04);
3859     // Encoding assumes a double has been pushed into FPR0.
3860     // Store down the double as an int, popping the FPU stack
3861     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
3862     emit_opcode(cbuf,0x1C);
3863     emit_d8(cbuf,0x24);
3864     // Restore the rounding mode; mask the exception
3865     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3866     emit_opcode(cbuf,0x2D);
3867     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3868         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3869         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3870 
3871     // Load the converted int; adjust CPU stack
3872     emit_opcode(cbuf,0x58);       // POP EAX
3873     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
3874     emit_d32   (cbuf,0x80000000); //         0x80000000
3875     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3876     emit_d8    (cbuf,0x07);       // Size of slow_call
3877     // Push src onto stack slow-path
3878     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3879     emit_d8    (cbuf,0xC0-1+$src$$reg );
3880     // CALL directly to the runtime
3881     cbuf.set_insts_mark();
3882     emit_opcode(cbuf,0xE8);       // Call into runtime
3883     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3884     // Carry on here...
3885   %}
3886 
3887   enc_class D2L_encoding( regD src ) %{
3888     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3889     emit_opcode(cbuf,0x2D);
3890     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3891     // Allocate a word
3892     emit_opcode(cbuf,0x83);            // SUB ESP,8
3893     emit_opcode(cbuf,0xEC);
3894     emit_d8(cbuf,0x08);
3895     // Encoding assumes a double has been pushed into FPR0.
3896     // Store down the double as a long, popping the FPU stack
3897     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3898     emit_opcode(cbuf,0x3C);
3899     emit_d8(cbuf,0x24);
3900     // Restore the rounding mode; mask the exception
3901     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3902     emit_opcode(cbuf,0x2D);
3903     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3904         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3905         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3906 
3907     // Load the converted int; adjust CPU stack
3908     emit_opcode(cbuf,0x58);       // POP EAX
3909     emit_opcode(cbuf,0x5A);       // POP EDX
3910     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3911     emit_d8    (cbuf,0xFA);       // rdx
3912     emit_d32   (cbuf,0x80000000); //         0x80000000
3913     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3914     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3915     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3916     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3917     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3918     emit_d8    (cbuf,0x07);       // Size of slow_call
3919     // Push src onto stack slow-path
3920     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3921     emit_d8    (cbuf,0xC0-1+$src$$reg );
3922     // CALL directly to the runtime
3923     cbuf.set_insts_mark();
3924     emit_opcode(cbuf,0xE8);       // Call into runtime
3925     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3926     // Carry on here...
3927   %}
3928 
3929   enc_class X2L_encoding( regX src ) %{
3930     // Allocate a word
3931     emit_opcode(cbuf,0x83);      // SUB ESP,8
3932     emit_opcode(cbuf,0xEC);
3933     emit_d8(cbuf,0x08);
3934 
3935     emit_opcode  (cbuf, 0xF3 );  // MOVSS [ESP], src
3936     emit_opcode  (cbuf, 0x0F );
3937     emit_opcode  (cbuf, 0x11 );
3938     encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3939 
3940     emit_opcode(cbuf,0xD9 );     // FLD_S [ESP]
3941     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3942 
3943     emit_opcode(cbuf,0xD9);      // FLDCW  trunc
3944     emit_opcode(cbuf,0x2D);
3945     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3946 
3947     // Encoding assumes a double has been pushed into FPR0.
3948     // Store down the double as a long, popping the FPU stack
3949     emit_opcode(cbuf,0xDF);      // FISTP [ESP]
3950     emit_opcode(cbuf,0x3C);
3951     emit_d8(cbuf,0x24);
3952 
3953     // Restore the rounding mode; mask the exception
3954     emit_opcode(cbuf,0xD9);      // FLDCW   std/24-bit mode
3955     emit_opcode(cbuf,0x2D);
3956     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3957       ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3958       : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3959 
3960     // Load the converted int; adjust CPU stack
3961     emit_opcode(cbuf,0x58);      // POP EAX
3962 
3963     emit_opcode(cbuf,0x5A);      // POP EDX
3964 
3965     emit_opcode(cbuf,0x81);      // CMP EDX,imm
3966     emit_d8    (cbuf,0xFA);      // rdx
3967     emit_d32   (cbuf,0x80000000);//         0x80000000
3968 
3969     emit_opcode(cbuf,0x75);      // JNE around_slow_call
3970     emit_d8    (cbuf,0x13+4);    // Size of slow_call
3971 
3972     emit_opcode(cbuf,0x85);      // TEST EAX,EAX
3973     emit_opcode(cbuf,0xC0);      // 2/rax,/rax,
3974 
3975     emit_opcode(cbuf,0x75);      // JNE around_slow_call
3976     emit_d8    (cbuf,0x13);      // Size of slow_call
3977 
3978     // Allocate a word
3979     emit_opcode(cbuf,0x83);      // SUB ESP,4
3980     emit_opcode(cbuf,0xEC);
3981     emit_d8(cbuf,0x04);
3982 
3983     emit_opcode  (cbuf, 0xF3 );  // MOVSS [ESP], src
3984     emit_opcode  (cbuf, 0x0F );
3985     emit_opcode  (cbuf, 0x11 );
3986     encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3987 
3988     emit_opcode(cbuf,0xD9 );     // FLD_S [ESP]
3989     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3990 
3991     emit_opcode(cbuf,0x83);      // ADD ESP,4
3992     emit_opcode(cbuf,0xC4);
3993     emit_d8(cbuf,0x04);
3994 
3995     // CALL directly to the runtime
3996     cbuf.set_insts_mark();
3997     emit_opcode(cbuf,0xE8);       // Call into runtime
3998     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3999     // Carry on here...
4000   %}
4001 
4002   enc_class XD2L_encoding( regXD src ) %{
4003     // Allocate a word
4004     emit_opcode(cbuf,0x83);      // SUB ESP,8
4005     emit_opcode(cbuf,0xEC);
4006     emit_d8(cbuf,0x08);
4007 
4008     emit_opcode  (cbuf, 0xF2 );  // MOVSD [ESP], src
4009     emit_opcode  (cbuf, 0x0F );
4010     emit_opcode  (cbuf, 0x11 );
4011     encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4012 
4013     emit_opcode(cbuf,0xDD );     // FLD_D [ESP]
4014     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4015 
4016     emit_opcode(cbuf,0xD9);      // FLDCW  trunc
4017     emit_opcode(cbuf,0x2D);
4018     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
4019 
4020     // Encoding assumes a double has been pushed into FPR0.
4021     // Store down the double as a long, popping the FPU stack
4022     emit_opcode(cbuf,0xDF);      // FISTP [ESP]
4023     emit_opcode(cbuf,0x3C);
4024     emit_d8(cbuf,0x24);
4025 
4026     // Restore the rounding mode; mask the exception
4027     emit_opcode(cbuf,0xD9);      // FLDCW   std/24-bit mode
4028     emit_opcode(cbuf,0x2D);
4029     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
4030       ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
4031       : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
4032 
4033     // Load the converted int; adjust CPU stack
4034     emit_opcode(cbuf,0x58);      // POP EAX
4035 
4036     emit_opcode(cbuf,0x5A);      // POP EDX
4037 
4038     emit_opcode(cbuf,0x81);      // CMP EDX,imm
4039     emit_d8    (cbuf,0xFA);      // rdx
4040     emit_d32   (cbuf,0x80000000); //         0x80000000
4041 
4042     emit_opcode(cbuf,0x75);      // JNE around_slow_call
4043     emit_d8    (cbuf,0x13+4);    // Size of slow_call
4044 
4045     emit_opcode(cbuf,0x85);      // TEST EAX,EAX
4046     emit_opcode(cbuf,0xC0);      // 2/rax,/rax,
4047 
4048     emit_opcode(cbuf,0x75);      // JNE around_slow_call
4049     emit_d8    (cbuf,0x13);      // Size of slow_call
4050 
4051     // Push src onto stack slow-path
4052     // Allocate a word
4053     emit_opcode(cbuf,0x83);      // SUB ESP,8
4054     emit_opcode(cbuf,0xEC);
4055     emit_d8(cbuf,0x08);
4056 
4057     emit_opcode  (cbuf, 0xF2 );  // MOVSD [ESP], src
4058     emit_opcode  (cbuf, 0x0F );
4059     emit_opcode  (cbuf, 0x11 );
4060     encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4061 
4062     emit_opcode(cbuf,0xDD );     // FLD_D [ESP]
4063     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4064 
4065     emit_opcode(cbuf,0x83);      // ADD ESP,8
4066     emit_opcode(cbuf,0xC4);
4067     emit_d8(cbuf,0x08);
4068 
4069     // CALL directly to the runtime
4070     cbuf.set_insts_mark();
4071     emit_opcode(cbuf,0xE8);      // Call into runtime
4072     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
4073     // Carry on here...
4074   %}
4075 
4076   enc_class D2X_encoding( regX dst, regD src ) %{
4077     // Allocate a word
4078     emit_opcode(cbuf,0x83);            // SUB ESP,4
4079     emit_opcode(cbuf,0xEC);
4080     emit_d8(cbuf,0x04);
4081     int pop = 0x02;
4082     if ($src$$reg != FPR1L_enc) {
4083       emit_opcode( cbuf, 0xD9 );       // FLD    ST(i-1)
4084       emit_d8( cbuf, 0xC0-1+$src$$reg );
4085       pop = 0x03;
4086     }
4087     store_to_stackslot( cbuf, 0xD9, pop, 0 ); // FST<P>_S  [ESP]
4088 
4089     emit_opcode  (cbuf, 0xF3 );        // MOVSS dst(xmm), [ESP]
4090     emit_opcode  (cbuf, 0x0F );
4091     emit_opcode  (cbuf, 0x10 );
4092     encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
4093 
4094     emit_opcode(cbuf,0x83);            // ADD ESP,4
4095     emit_opcode(cbuf,0xC4);
4096     emit_d8(cbuf,0x04);
4097     // Carry on here...
4098   %}
4099 
4100   enc_class FX2I_encoding( regX src, eRegI dst ) %{
4101     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
4102 
4103     // Compare the result to see if we need to go to the slow path
4104     emit_opcode(cbuf,0x81);       // CMP dst,imm
4105     emit_rm    (cbuf,0x3,0x7,$dst$$reg);
4106     emit_d32   (cbuf,0x80000000); //         0x80000000
4107 
4108     emit_opcode(cbuf,0x75);       // JNE around_slow_call
4109     emit_d8    (cbuf,0x13);       // Size of slow_call
4110     // Store xmm to a temp memory
4111     // location and push it onto stack.
4112 
4113     emit_opcode(cbuf,0x83);  // SUB ESP,4
4114     emit_opcode(cbuf,0xEC);
4115     emit_d8(cbuf, $primary ? 0x8 : 0x4);
4116 
4117     emit_opcode  (cbuf, $primary ? 0xF2 : 0xF3 );   // MOVSS [ESP], xmm
4118     emit_opcode  (cbuf, 0x0F );
4119     emit_opcode  (cbuf, 0x11 );
4120     encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4121 
4122     emit_opcode(cbuf, $primary ? 0xDD : 0xD9 );      // FLD [ESP]
4123     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4124 
4125     emit_opcode(cbuf,0x83);    // ADD ESP,4
4126     emit_opcode(cbuf,0xC4);
4127     emit_d8(cbuf, $primary ? 0x8 : 0x4);
4128 
4129     // CALL directly to the runtime
4130     cbuf.set_insts_mark();
4131     emit_opcode(cbuf,0xE8);       // Call into runtime
4132     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
4133 
4134     // Carry on here...
4135   %}
4136 
4137   enc_class X2D_encoding( regD dst, regX src ) %{
4138     // Allocate a word
4139     emit_opcode(cbuf,0x83);     // SUB ESP,4
4140     emit_opcode(cbuf,0xEC);
4141     emit_d8(cbuf,0x04);
4142 
4143     emit_opcode  (cbuf, 0xF3 ); // MOVSS [ESP], xmm
4144     emit_opcode  (cbuf, 0x0F );
4145     emit_opcode  (cbuf, 0x11 );
4146     encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4147 
4148     emit_opcode(cbuf,0xD9 );    // FLD_S [ESP]
4149     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4150 
4151     emit_opcode(cbuf,0x83);     // ADD ESP,4
4152     emit_opcode(cbuf,0xC4);
4153     emit_d8(cbuf,0x04);
4154 
4155     // Carry on here...
4156   %}
4157 
4158   enc_class AbsXF_encoding(regX dst) %{
4159     address signmask_address=(address)float_signmask_pool;
4160     // andpd:\tANDPS  $dst,[signconst]
4161     emit_opcode(cbuf, 0x0F);
4162     emit_opcode(cbuf, 0x54);
4163     emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4164     emit_d32(cbuf, (int)signmask_address);
4165   %}
4166 
4167   enc_class AbsXD_encoding(regXD dst) %{
4168     address signmask_address=(address)double_signmask_pool;
4169     // andpd:\tANDPD  $dst,[signconst]
4170     emit_opcode(cbuf, 0x66);
4171     emit_opcode(cbuf, 0x0F);
4172     emit_opcode(cbuf, 0x54);
4173     emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4174     emit_d32(cbuf, (int)signmask_address);
4175   %}
4176 
4177   enc_class NegXF_encoding(regX dst) %{
4178     address signmask_address=(address)float_signflip_pool;
4179     // andpd:\tXORPS  $dst,[signconst]
4180     emit_opcode(cbuf, 0x0F);
4181     emit_opcode(cbuf, 0x57);
4182     emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4183     emit_d32(cbuf, (int)signmask_address);
4184   %}
4185 
4186   enc_class NegXD_encoding(regXD dst) %{
4187     address signmask_address=(address)double_signflip_pool;
4188     // andpd:\tXORPD  $dst,[signconst]
4189     emit_opcode(cbuf, 0x66);
4190     emit_opcode(cbuf, 0x0F);
4191     emit_opcode(cbuf, 0x57);
4192     emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4193     emit_d32(cbuf, (int)signmask_address);
4194   %}
4195 
4196   enc_class FMul_ST_reg( eRegF src1 ) %{
4197     // Operand was loaded from memory into fp ST (stack top)
4198     // FMUL   ST,$src  /* D8 C8+i */
4199     emit_opcode(cbuf, 0xD8);
4200     emit_opcode(cbuf, 0xC8 + $src1$$reg);
4201   %}
4202 
4203   enc_class FAdd_ST_reg( eRegF src2 ) %{
4204     // FADDP  ST,src2  /* D8 C0+i */
4205     emit_opcode(cbuf, 0xD8);
4206     emit_opcode(cbuf, 0xC0 + $src2$$reg);
4207     //could use FADDP  src2,fpST  /* DE C0+i */
4208   %}
4209 
4210   enc_class FAddP_reg_ST( eRegF src2 ) %{
4211     // FADDP  src2,ST  /* DE C0+i */
4212     emit_opcode(cbuf, 0xDE);
4213     emit_opcode(cbuf, 0xC0 + $src2$$reg);
4214   %}
4215 
4216   enc_class subF_divF_encode( eRegF src1, eRegF src2) %{
4217     // Operand has been loaded into fp ST (stack top)
4218       // FSUB   ST,$src1
4219       emit_opcode(cbuf, 0xD8);
4220       emit_opcode(cbuf, 0xE0 + $src1$$reg);
4221 
4222       // FDIV
4223       emit_opcode(cbuf, 0xD8);
4224       emit_opcode(cbuf, 0xF0 + $src2$$reg);
4225   %}
4226 
4227   enc_class MulFAddF (eRegF src1, eRegF src2) %{
4228     // Operand was loaded from memory into fp ST (stack top)
4229     // FADD   ST,$src  /* D8 C0+i */
4230     emit_opcode(cbuf, 0xD8);
4231     emit_opcode(cbuf, 0xC0 + $src1$$reg);
4232 
4233     // FMUL  ST,src2  /* D8 C*+i */
4234     emit_opcode(cbuf, 0xD8);
4235     emit_opcode(cbuf, 0xC8 + $src2$$reg);
4236   %}
4237 
4238 
4239   enc_class MulFAddFreverse (eRegF src1, eRegF src2) %{
4240     // Operand was loaded from memory into fp ST (stack top)
4241     // FADD   ST,$src  /* D8 C0+i */
4242     emit_opcode(cbuf, 0xD8);
4243     emit_opcode(cbuf, 0xC0 + $src1$$reg);
4244 
4245     // FMULP  src2,ST  /* DE C8+i */
4246     emit_opcode(cbuf, 0xDE);
4247     emit_opcode(cbuf, 0xC8 + $src2$$reg);
4248   %}
4249 
4250   // Atomically load the volatile long
4251   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
4252     emit_opcode(cbuf,0xDF);
4253     int rm_byte_opcode = 0x05;
4254     int base     = $mem$$base;
4255     int index    = $mem$$index;
4256     int scale    = $mem$$scale;
4257     int displace = $mem$$disp;
4258     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4259     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
4260     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
4261   %}
4262 
4263   enc_class enc_loadLX_volatile( memory mem, stackSlotL dst, regXD tmp ) %{
4264     { // Atomic long load
4265       // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
4266       emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4267       emit_opcode(cbuf,0x0F);
4268       emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4269       int base     = $mem$$base;
4270       int index    = $mem$$index;
4271       int scale    = $mem$$scale;
4272       int displace = $mem$$disp;
4273       bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4274       encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4275     }
4276     { // MOVSD $dst,$tmp ! atomic long store
4277       emit_opcode(cbuf,0xF2);
4278       emit_opcode(cbuf,0x0F);
4279       emit_opcode(cbuf,0x11);
4280       int base     = $dst$$base;
4281       int index    = $dst$$index;
4282       int scale    = $dst$$scale;
4283       int displace = $dst$$disp;
4284       bool disp_is_oop = $dst->disp_is_oop(); // disp-as-oop when working with static globals
4285       encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4286     }
4287   %}
4288 
4289   enc_class enc_loadLX_reg_volatile( memory mem, eRegL dst, regXD tmp ) %{
4290     { // Atomic long load
4291       // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
4292       emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4293       emit_opcode(cbuf,0x0F);
4294       emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4295       int base     = $mem$$base;
4296       int index    = $mem$$index;
4297       int scale    = $mem$$scale;
4298       int displace = $mem$$disp;
4299       bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4300       encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4301     }
4302     { // MOVD $dst.lo,$tmp
4303       emit_opcode(cbuf,0x66);
4304       emit_opcode(cbuf,0x0F);
4305       emit_opcode(cbuf,0x7E);
4306       emit_rm(cbuf, 0x3, $tmp$$reg, $dst$$reg);
4307     }
4308     { // PSRLQ $tmp,32
4309       emit_opcode(cbuf,0x66);
4310       emit_opcode(cbuf,0x0F);
4311       emit_opcode(cbuf,0x73);
4312       emit_rm(cbuf, 0x3, 0x02, $tmp$$reg);
4313       emit_d8(cbuf, 0x20);
4314     }
4315     { // MOVD $dst.hi,$tmp
4316       emit_opcode(cbuf,0x66);
4317       emit_opcode(cbuf,0x0F);
4318       emit_opcode(cbuf,0x7E);
4319       emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
4320     }
4321   %}
4322 
4323   // Volatile Store Long.  Must be atomic, so move it into
4324   // the FP TOS and then do a 64-bit FIST.  Has to probe the
4325   // target address before the store (for null-ptr checks)
4326   // so the memory operand is used twice in the encoding.
4327   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
4328     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
4329     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
4330     emit_opcode(cbuf,0xDF);
4331     int rm_byte_opcode = 0x07;
4332     int base     = $mem$$base;
4333     int index    = $mem$$index;
4334     int scale    = $mem$$scale;
4335     int displace = $mem$$disp;
4336     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4337     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
4338   %}
4339 
4340   enc_class enc_storeLX_volatile( memory mem, stackSlotL src, regXD tmp) %{
4341     { // Atomic long load
4342       // UseXmmLoadAndClearUpper ? movsd $tmp,[$src] : movlpd $tmp,[$src]
4343       emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4344       emit_opcode(cbuf,0x0F);
4345       emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4346       int base     = $src$$base;
4347       int index    = $src$$index;
4348       int scale    = $src$$scale;
4349       int displace = $src$$disp;
4350       bool disp_is_oop = $src->disp_is_oop(); // disp-as-oop when working with static globals
4351       encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4352     }
4353     cbuf.set_insts_mark();            // Mark start of MOVSD in case $mem has an oop
4354     { // MOVSD $mem,$tmp ! atomic long store
4355       emit_opcode(cbuf,0xF2);
4356       emit_opcode(cbuf,0x0F);
4357       emit_opcode(cbuf,0x11);
4358       int base     = $mem$$base;
4359       int index    = $mem$$index;
4360       int scale    = $mem$$scale;
4361       int displace = $mem$$disp;
4362       bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4363       encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4364     }
4365   %}
4366 
4367   enc_class enc_storeLX_reg_volatile( memory mem, eRegL src, regXD tmp, regXD tmp2) %{
4368     { // MOVD $tmp,$src.lo
4369       emit_opcode(cbuf,0x66);
4370       emit_opcode(cbuf,0x0F);
4371       emit_opcode(cbuf,0x6E);
4372       emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
4373     }
4374     { // MOVD $tmp2,$src.hi
4375       emit_opcode(cbuf,0x66);
4376       emit_opcode(cbuf,0x0F);
4377       emit_opcode(cbuf,0x6E);
4378       emit_rm(cbuf, 0x3, $tmp2$$reg, HIGH_FROM_LOW($src$$reg));
4379     }
4380     { // PUNPCKLDQ $tmp,$tmp2
4381       emit_opcode(cbuf,0x66);
4382       emit_opcode(cbuf,0x0F);
4383       emit_opcode(cbuf,0x62);
4384       emit_rm(cbuf, 0x3, $tmp$$reg, $tmp2$$reg);
4385     }
4386     cbuf.set_insts_mark();            // Mark start of MOVSD in case $mem has an oop
4387     { // MOVSD $mem,$tmp ! atomic long store
4388       emit_opcode(cbuf,0xF2);
4389       emit_opcode(cbuf,0x0F);
4390       emit_opcode(cbuf,0x11);
4391       int base     = $mem$$base;
4392       int index    = $mem$$index;
4393       int scale    = $mem$$scale;
4394       int displace = $mem$$disp;
4395       bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4396       encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4397     }
4398   %}
4399 
4400   // Safepoint Poll.  This polls the safepoint page, and causes an
4401   // exception if it is not readable. Unfortunately, it kills the condition code
4402   // in the process
4403   // We current use TESTL [spp],EDI
4404   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
4405 
4406   enc_class Safepoint_Poll() %{
4407     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
4408     emit_opcode(cbuf,0x85);
4409     emit_rm (cbuf, 0x0, 0x7, 0x5);
4410     emit_d32(cbuf, (intptr_t)os::get_polling_page());
4411   %}
4412 %}
4413 
4414 
4415 //----------FRAME--------------------------------------------------------------
4416 // Definition of frame structure and management information.
4417 //
4418 //  S T A C K   L A Y O U T    Allocators stack-slot number
4419 //                             |   (to get allocators register number
4420 //  G  Owned by    |        |  v    add OptoReg::stack0())
4421 //  r   CALLER     |        |
4422 //  o     |        +--------+      pad to even-align allocators stack-slot
4423 //  w     V        |  pad0  |        numbers; owned by CALLER
4424 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
4425 //  h     ^        |   in   |  5
4426 //        |        |  args  |  4   Holes in incoming args owned by SELF
4427 //  |     |        |        |  3
4428 //  |     |        +--------+
4429 //  V     |        | old out|      Empty on Intel, window on Sparc
4430 //        |    old |preserve|      Must be even aligned.
4431 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
4432 //        |        |   in   |  3   area for Intel ret address
4433 //     Owned by    |preserve|      Empty on Sparc.
4434 //       SELF      +--------+
4435 //        |        |  pad2  |  2   pad to align old SP
4436 //        |        +--------+  1
4437 //        |        | locks  |  0
4438 //        |        +--------+----> OptoReg::stack0(), even aligned
4439 //        |        |  pad1  | 11   pad to align new SP
4440 //        |        +--------+
4441 //        |        |        | 10
4442 //        |        | spills |  9   spills
4443 //        V        |        |  8   (pad0 slot for callee)
4444 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
4445 //        ^        |  out   |  7
4446 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
4447 //     Owned by    +--------+
4448 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
4449 //        |    new |preserve|      Must be even-aligned.
4450 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
4451 //        |        |        |
4452 //
4453 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
4454 //         known from SELF's arguments and the Java calling convention.
4455 //         Region 6-7 is determined per call site.
4456 // Note 2: If the calling convention leaves holes in the incoming argument
4457 //         area, those holes are owned by SELF.  Holes in the outgoing area
4458 //         are owned by the CALLEE.  Holes should not be nessecary in the
4459 //         incoming area, as the Java calling convention is completely under
4460 //         the control of the AD file.  Doubles can be sorted and packed to
4461 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
4462 //         varargs C calling conventions.
4463 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
4464 //         even aligned with pad0 as needed.
4465 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
4466 //         region 6-11 is even aligned; it may be padded out more so that
4467 //         the region from SP to FP meets the minimum stack alignment.
4468 
4469 frame %{
4470   // What direction does stack grow in (assumed to be same for C & Java)
4471   stack_direction(TOWARDS_LOW);
4472 
4473   // These three registers define part of the calling convention
4474   // between compiled code and the interpreter.
4475   inline_cache_reg(EAX);                // Inline Cache Register
4476   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
4477 
4478   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
4479   cisc_spilling_operand_name(indOffset32);
4480 
4481   // Number of stack slots consumed by locking an object
4482   sync_stack_slots(1);
4483 
4484   // Compiled code's Frame Pointer
4485   frame_pointer(ESP);
4486   // Interpreter stores its frame pointer in a register which is
4487   // stored to the stack by I2CAdaptors.
4488   // I2CAdaptors convert from interpreted java to compiled java.
4489   interpreter_frame_pointer(EBP);
4490 
4491   // Stack alignment requirement
4492   // Alignment size in bytes (128-bit -> 16 bytes)
4493   stack_alignment(StackAlignmentInBytes);
4494 
4495   // Number of stack slots between incoming argument block and the start of
4496   // a new frame.  The PROLOG must add this many slots to the stack.  The
4497   // EPILOG must remove this many slots.  Intel needs one slot for
4498   // return address and one for rbp, (must save rbp)
4499   in_preserve_stack_slots(2+VerifyStackAtCalls);
4500 
4501   // Number of outgoing stack slots killed above the out_preserve_stack_slots
4502   // for calls to C.  Supports the var-args backing area for register parms.
4503   varargs_C_out_slots_killed(0);
4504 
4505   // The after-PROLOG location of the return address.  Location of
4506   // return address specifies a type (REG or STACK) and a number
4507   // representing the register number (i.e. - use a register name) or
4508   // stack slot.
4509   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4510   // Otherwise, it is above the locks and verification slot and alignment word
4511   return_addr(STACK - 1 +
4512               round_to(1+VerifyStackAtCalls+
4513               Compile::current()->fixed_slots(),
4514               (StackAlignmentInBytes/wordSize)));
4515 
4516   // Body of function which returns an integer array locating
4517   // arguments either in registers or in stack slots.  Passed an array
4518   // of ideal registers called "sig" and a "length" count.  Stack-slot
4519   // offsets are based on outgoing arguments, i.e. a CALLER setting up
4520   // arguments for a CALLEE.  Incoming stack arguments are
4521   // automatically biased by the preserve_stack_slots field above.
4522   calling_convention %{
4523     // No difference between ingoing/outgoing just pass false
4524     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4525   %}
4526 
4527 
4528   // Body of function which returns an integer array locating
4529   // arguments either in registers or in stack slots.  Passed an array
4530   // of ideal registers called "sig" and a "length" count.  Stack-slot
4531   // offsets are based on outgoing arguments, i.e. a CALLER setting up
4532   // arguments for a CALLEE.  Incoming stack arguments are
4533   // automatically biased by the preserve_stack_slots field above.
4534   c_calling_convention %{
4535     // This is obviously always outgoing
4536     (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
4537   %}
4538 
4539   // Location of C & interpreter return values
4540   c_return_value %{
4541     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
4542     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
4543     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
4544 
4545     // in SSE2+ mode we want to keep the FPU stack clean so pretend
4546     // that C functions return float and double results in XMM0.
4547     if( ideal_reg == Op_RegD && UseSSE>=2 )
4548       return OptoRegPair(XMM0b_num,XMM0a_num);
4549     if( ideal_reg == Op_RegF && UseSSE>=2 )
4550       return OptoRegPair(OptoReg::Bad,XMM0a_num);
4551 
4552     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
4553   %}
4554 
4555   // Location of return values
4556   return_value %{
4557     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
4558     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
4559     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
4560     if( ideal_reg == Op_RegD && UseSSE>=2 )
4561       return OptoRegPair(XMM0b_num,XMM0a_num);
4562     if( ideal_reg == Op_RegF && UseSSE>=1 )
4563       return OptoRegPair(OptoReg::Bad,XMM0a_num);
4564     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
4565   %}
4566 
4567 %}
4568 
4569 //----------ATTRIBUTES---------------------------------------------------------
4570 //----------Operand Attributes-------------------------------------------------
4571 op_attrib op_cost(0);        // Required cost attribute
4572 
4573 //----------Instruction Attributes---------------------------------------------
4574 ins_attrib ins_cost(100);       // Required cost attribute
4575 ins_attrib ins_size(8);         // Required size attribute (in bits)
4576 ins_attrib ins_pc_relative(0);  // Required PC Relative flag
4577 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
4578                                 // non-matching short branch variant of some
4579                                                             // long branch?
4580 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
4581                                 // specifies the alignment that some part of the instruction (not
4582                                 // necessarily the start) requires.  If > 1, a compute_padding()
4583                                 // function must be provided for the instruction
4584 
4585 //----------OPERANDS-----------------------------------------------------------
4586 // Operand definitions must precede instruction definitions for correct parsing
4587 // in the ADLC because operands constitute user defined types which are used in
4588 // instruction definitions.
4589 
4590 //----------Simple Operands----------------------------------------------------
4591 // Immediate Operands
4592 // Integer Immediate
4593 operand immI() %{
4594   match(ConI);
4595 
4596   op_cost(10);
4597   format %{ %}
4598   interface(CONST_INTER);
4599 %}
4600 
4601 // Constant for test vs zero
4602 operand immI0() %{
4603   predicate(n->get_int() == 0);
4604   match(ConI);
4605 
4606   op_cost(0);
4607   format %{ %}
4608   interface(CONST_INTER);
4609 %}
4610 
4611 // Constant for increment
4612 operand immI1() %{
4613   predicate(n->get_int() == 1);
4614   match(ConI);
4615 
4616   op_cost(0);
4617   format %{ %}
4618   interface(CONST_INTER);
4619 %}
4620 
4621 // Constant for decrement
4622 operand immI_M1() %{
4623   predicate(n->get_int() == -1);
4624   match(ConI);
4625 
4626   op_cost(0);
4627   format %{ %}
4628   interface(CONST_INTER);
4629 %}
4630 
4631 // Valid scale values for addressing modes
4632 operand immI2() %{
4633   predicate(0 <= n->get_int() && (n->get_int() <= 3));
4634   match(ConI);
4635 
4636   format %{ %}
4637   interface(CONST_INTER);
4638 %}
4639 
4640 operand immI8() %{
4641   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
4642   match(ConI);
4643 
4644   op_cost(5);
4645   format %{ %}
4646   interface(CONST_INTER);
4647 %}
4648 
4649 operand immI16() %{
4650   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4651   match(ConI);
4652 
4653   op_cost(10);
4654   format %{ %}
4655   interface(CONST_INTER);
4656 %}
4657 
4658 // Constant for long shifts
4659 operand immI_32() %{
4660   predicate( n->get_int() == 32 );
4661   match(ConI);
4662 
4663   op_cost(0);
4664   format %{ %}
4665   interface(CONST_INTER);
4666 %}
4667 
4668 operand immI_1_31() %{
4669   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
4670   match(ConI);
4671 
4672   op_cost(0);
4673   format %{ %}
4674   interface(CONST_INTER);
4675 %}
4676 
4677 operand immI_32_63() %{
4678   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
4679   match(ConI);
4680   op_cost(0);
4681 
4682   format %{ %}
4683   interface(CONST_INTER);
4684 %}
4685 
4686 operand immI_1() %{
4687   predicate( n->get_int() == 1 );
4688   match(ConI);
4689 
4690   op_cost(0);
4691   format %{ %}
4692   interface(CONST_INTER);
4693 %}
4694 
4695 operand immI_2() %{
4696   predicate( n->get_int() == 2 );
4697   match(ConI);
4698 
4699   op_cost(0);
4700   format %{ %}
4701   interface(CONST_INTER);
4702 %}
4703 
4704 operand immI_3() %{
4705   predicate( n->get_int() == 3 );
4706   match(ConI);
4707 
4708   op_cost(0);
4709   format %{ %}
4710   interface(CONST_INTER);
4711 %}
4712 
4713 // Pointer Immediate
4714 operand immP() %{
4715   match(ConP);
4716 
4717   op_cost(10);
4718   format %{ %}
4719   interface(CONST_INTER);
4720 %}
4721 
4722 // NULL Pointer Immediate
4723 operand immP0() %{
4724   predicate( n->get_ptr() == 0 );
4725   match(ConP);
4726   op_cost(0);
4727 
4728   format %{ %}
4729   interface(CONST_INTER);
4730 %}
4731 
4732 // Long Immediate
4733 operand immL() %{
4734   match(ConL);
4735 
4736   op_cost(20);
4737   format %{ %}
4738   interface(CONST_INTER);
4739 %}
4740 
4741 // Long Immediate zero
4742 operand immL0() %{
4743   predicate( n->get_long() == 0L );
4744   match(ConL);
4745   op_cost(0);
4746 
4747   format %{ %}
4748   interface(CONST_INTER);
4749 %}
4750 
4751 // Long Immediate zero
4752 operand immL_M1() %{
4753   predicate( n->get_long() == -1L );
4754   match(ConL);
4755   op_cost(0);
4756 
4757   format %{ %}
4758   interface(CONST_INTER);
4759 %}
4760 
4761 // Long immediate from 0 to 127.
4762 // Used for a shorter form of long mul by 10.
4763 operand immL_127() %{
4764   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
4765   match(ConL);
4766   op_cost(0);
4767 
4768   format %{ %}
4769   interface(CONST_INTER);
4770 %}
4771 
4772 // Long Immediate: low 32-bit mask
4773 operand immL_32bits() %{
4774   predicate(n->get_long() == 0xFFFFFFFFL);
4775   match(ConL);
4776   op_cost(0);
4777 
4778   format %{ %}
4779   interface(CONST_INTER);
4780 %}
4781 
4782 // Long Immediate: low 32-bit mask
4783 operand immL32() %{
4784   predicate(n->get_long() == (int)(n->get_long()));
4785   match(ConL);
4786   op_cost(20);
4787 
4788   format %{ %}
4789   interface(CONST_INTER);
4790 %}
4791 
4792 //Double Immediate zero
4793 operand immD0() %{
4794   // Do additional (and counter-intuitive) test against NaN to work around VC++
4795   // bug that generates code such that NaNs compare equal to 0.0
4796   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
4797   match(ConD);
4798 
4799   op_cost(5);
4800   format %{ %}
4801   interface(CONST_INTER);
4802 %}
4803 
4804 // Double Immediate
4805 operand immD1() %{
4806   predicate( UseSSE<=1 && n->getd() == 1.0 );
4807   match(ConD);
4808 
4809   op_cost(5);
4810   format %{ %}
4811   interface(CONST_INTER);
4812 %}
4813 
4814 // Double Immediate
4815 operand immD() %{
4816   predicate(UseSSE<=1);
4817   match(ConD);
4818 
4819   op_cost(5);
4820   format %{ %}
4821   interface(CONST_INTER);
4822 %}
4823 
4824 operand immXD() %{
4825   predicate(UseSSE>=2);
4826   match(ConD);
4827 
4828   op_cost(5);
4829   format %{ %}
4830   interface(CONST_INTER);
4831 %}
4832 
4833 // Double Immediate zero
4834 operand immXD0() %{
4835   // Do additional (and counter-intuitive) test against NaN to work around VC++
4836   // bug that generates code such that NaNs compare equal to 0.0 AND do not
4837   // compare equal to -0.0.
4838   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
4839   match(ConD);
4840 
4841   format %{ %}
4842   interface(CONST_INTER);
4843 %}
4844 
4845 // Float Immediate zero
4846 operand immF0() %{
4847   predicate( UseSSE == 0 && n->getf() == 0.0 );
4848   match(ConF);
4849 
4850   op_cost(5);
4851   format %{ %}
4852   interface(CONST_INTER);
4853 %}
4854 
4855 // Float Immediate
4856 operand immF() %{
4857   predicate( UseSSE == 0 );
4858   match(ConF);
4859 
4860   op_cost(5);
4861   format %{ %}
4862   interface(CONST_INTER);
4863 %}
4864 
4865 // Float Immediate
4866 operand immXF() %{
4867   predicate(UseSSE >= 1);
4868   match(ConF);
4869 
4870   op_cost(5);
4871   format %{ %}
4872   interface(CONST_INTER);
4873 %}
4874 
4875 // Float Immediate zero.  Zero and not -0.0
4876 operand immXF0() %{
4877   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
4878   match(ConF);
4879 
4880   op_cost(5);
4881   format %{ %}
4882   interface(CONST_INTER);
4883 %}
4884 
4885 // Immediates for special shifts (sign extend)
4886 
4887 // Constants for increment
4888 operand immI_16() %{
4889   predicate( n->get_int() == 16 );
4890   match(ConI);
4891 
4892   format %{ %}
4893   interface(CONST_INTER);
4894 %}
4895 
4896 operand immI_24() %{
4897   predicate( n->get_int() == 24 );
4898   match(ConI);
4899 
4900   format %{ %}
4901   interface(CONST_INTER);
4902 %}
4903 
4904 // Constant for byte-wide masking
4905 operand immI_255() %{
4906   predicate( n->get_int() == 255 );
4907   match(ConI);
4908 
4909   format %{ %}
4910   interface(CONST_INTER);
4911 %}
4912 
4913 // Constant for short-wide masking
4914 operand immI_65535() %{
4915   predicate(n->get_int() == 65535);
4916   match(ConI);
4917 
4918   format %{ %}
4919   interface(CONST_INTER);
4920 %}
4921 
4922 // Register Operands
4923 // Integer Register
4924 operand eRegI() %{
4925   constraint(ALLOC_IN_RC(e_reg));
4926   match(RegI);
4927   match(xRegI);
4928   match(eAXRegI);
4929   match(eBXRegI);
4930   match(eCXRegI);
4931   match(eDXRegI);
4932   match(eDIRegI);
4933   match(eSIRegI);
4934 
4935   format %{ %}
4936   interface(REG_INTER);
4937 %}
4938 
4939 // Subset of Integer Register
4940 operand xRegI(eRegI reg) %{
4941   constraint(ALLOC_IN_RC(x_reg));
4942   match(reg);
4943   match(eAXRegI);
4944   match(eBXRegI);
4945   match(eCXRegI);
4946   match(eDXRegI);
4947 
4948   format %{ %}
4949   interface(REG_INTER);
4950 %}
4951 
4952 // Special Registers
4953 operand eAXRegI(xRegI reg) %{
4954   constraint(ALLOC_IN_RC(eax_reg));
4955   match(reg);
4956   match(eRegI);
4957 
4958   format %{ "EAX" %}
4959   interface(REG_INTER);
4960 %}
4961 
4962 // Special Registers
4963 operand eBXRegI(xRegI reg) %{
4964   constraint(ALLOC_IN_RC(ebx_reg));
4965   match(reg);
4966   match(eRegI);
4967 
4968   format %{ "EBX" %}
4969   interface(REG_INTER);
4970 %}
4971 
4972 operand eCXRegI(xRegI reg) %{
4973   constraint(ALLOC_IN_RC(ecx_reg));
4974   match(reg);
4975   match(eRegI);
4976 
4977   format %{ "ECX" %}
4978   interface(REG_INTER);
4979 %}
4980 
4981 operand eDXRegI(xRegI reg) %{
4982   constraint(ALLOC_IN_RC(edx_reg));
4983   match(reg);
4984   match(eRegI);
4985 
4986   format %{ "EDX" %}
4987   interface(REG_INTER);
4988 %}
4989 
4990 operand eDIRegI(xRegI reg) %{
4991   constraint(ALLOC_IN_RC(edi_reg));
4992   match(reg);
4993   match(eRegI);
4994 
4995   format %{ "EDI" %}
4996   interface(REG_INTER);
4997 %}
4998 
4999 operand naxRegI() %{
5000   constraint(ALLOC_IN_RC(nax_reg));
5001   match(RegI);
5002   match(eCXRegI);
5003   match(eDXRegI);
5004   match(eSIRegI);
5005   match(eDIRegI);
5006 
5007   format %{ %}
5008   interface(REG_INTER);
5009 %}
5010 
5011 operand nadxRegI() %{
5012   constraint(ALLOC_IN_RC(nadx_reg));
5013   match(RegI);
5014   match(eBXRegI);
5015   match(eCXRegI);
5016   match(eSIRegI);
5017   match(eDIRegI);
5018 
5019   format %{ %}
5020   interface(REG_INTER);
5021 %}
5022 
5023 operand ncxRegI() %{
5024   constraint(ALLOC_IN_RC(ncx_reg));
5025   match(RegI);
5026   match(eAXRegI);
5027   match(eDXRegI);
5028   match(eSIRegI);
5029   match(eDIRegI);
5030 
5031   format %{ %}
5032   interface(REG_INTER);
5033 %}
5034 
5035 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
5036 // //
5037 operand eSIRegI(xRegI reg) %{
5038    constraint(ALLOC_IN_RC(esi_reg));
5039    match(reg);
5040    match(eRegI);
5041 
5042    format %{ "ESI" %}
5043    interface(REG_INTER);
5044 %}
5045 
5046 // Pointer Register
5047 operand anyRegP() %{
5048   constraint(ALLOC_IN_RC(any_reg));
5049   match(RegP);
5050   match(eAXRegP);
5051   match(eBXRegP);
5052   match(eCXRegP);
5053   match(eDIRegP);
5054   match(eRegP);
5055 
5056   format %{ %}
5057   interface(REG_INTER);
5058 %}
5059 
5060 operand eRegP() %{
5061   constraint(ALLOC_IN_RC(e_reg));
5062   match(RegP);
5063   match(eAXRegP);
5064   match(eBXRegP);
5065   match(eCXRegP);
5066   match(eDIRegP);
5067 
5068   format %{ %}
5069   interface(REG_INTER);
5070 %}
5071 
5072 // On windows95, EBP is not safe to use for implicit null tests.
5073 operand eRegP_no_EBP() %{
5074   constraint(ALLOC_IN_RC(e_reg_no_rbp));
5075   match(RegP);
5076   match(eAXRegP);
5077   match(eBXRegP);
5078   match(eCXRegP);
5079   match(eDIRegP);
5080 
5081   op_cost(100);
5082   format %{ %}
5083   interface(REG_INTER);
5084 %}
5085 
5086 operand naxRegP() %{
5087   constraint(ALLOC_IN_RC(nax_reg));
5088   match(RegP);
5089   match(eBXRegP);
5090   match(eDXRegP);
5091   match(eCXRegP);
5092   match(eSIRegP);
5093   match(eDIRegP);
5094 
5095   format %{ %}
5096   interface(REG_INTER);
5097 %}
5098 
5099 operand nabxRegP() %{
5100   constraint(ALLOC_IN_RC(nabx_reg));
5101   match(RegP);
5102   match(eCXRegP);
5103   match(eDXRegP);
5104   match(eSIRegP);
5105   match(eDIRegP);
5106 
5107   format %{ %}
5108   interface(REG_INTER);
5109 %}
5110 
5111 operand pRegP() %{
5112   constraint(ALLOC_IN_RC(p_reg));
5113   match(RegP);
5114   match(eBXRegP);
5115   match(eDXRegP);
5116   match(eSIRegP);
5117   match(eDIRegP);
5118 
5119   format %{ %}
5120   interface(REG_INTER);
5121 %}
5122 
5123 // Special Registers
5124 // Return a pointer value
5125 operand eAXRegP(eRegP reg) %{
5126   constraint(ALLOC_IN_RC(eax_reg));
5127   match(reg);
5128   format %{ "EAX" %}
5129   interface(REG_INTER);
5130 %}
5131 
5132 // Used in AtomicAdd
5133 operand eBXRegP(eRegP reg) %{
5134   constraint(ALLOC_IN_RC(ebx_reg));
5135   match(reg);
5136   format %{ "EBX" %}
5137   interface(REG_INTER);
5138 %}
5139 
5140 // Tail-call (interprocedural jump) to interpreter
5141 operand eCXRegP(eRegP reg) %{
5142   constraint(ALLOC_IN_RC(ecx_reg));
5143   match(reg);
5144   format %{ "ECX" %}
5145   interface(REG_INTER);
5146 %}
5147 
5148 operand eSIRegP(eRegP reg) %{
5149   constraint(ALLOC_IN_RC(esi_reg));
5150   match(reg);
5151   format %{ "ESI" %}
5152   interface(REG_INTER);
5153 %}
5154 
5155 // Used in rep stosw
5156 operand eDIRegP(eRegP reg) %{
5157   constraint(ALLOC_IN_RC(edi_reg));
5158   match(reg);
5159   format %{ "EDI" %}
5160   interface(REG_INTER);
5161 %}
5162 
5163 operand eBPRegP() %{
5164   constraint(ALLOC_IN_RC(ebp_reg));
5165   match(RegP);
5166   format %{ "EBP" %}
5167   interface(REG_INTER);
5168 %}
5169 
5170 operand eRegL() %{
5171   constraint(ALLOC_IN_RC(long_reg));
5172   match(RegL);
5173   match(eADXRegL);
5174 
5175   format %{ %}
5176   interface(REG_INTER);
5177 %}
5178 
5179 operand eADXRegL( eRegL reg ) %{
5180   constraint(ALLOC_IN_RC(eadx_reg));
5181   match(reg);
5182 
5183   format %{ "EDX:EAX" %}
5184   interface(REG_INTER);
5185 %}
5186 
5187 operand eBCXRegL( eRegL reg ) %{
5188   constraint(ALLOC_IN_RC(ebcx_reg));
5189   match(reg);
5190 
5191   format %{ "EBX:ECX" %}
5192   interface(REG_INTER);
5193 %}
5194 
5195 // Special case for integer high multiply
5196 operand eADXRegL_low_only() %{
5197   constraint(ALLOC_IN_RC(eadx_reg));
5198   match(RegL);
5199 
5200   format %{ "EAX" %}
5201   interface(REG_INTER);
5202 %}
5203 
5204 // Flags register, used as output of compare instructions
5205 operand eFlagsReg() %{
5206   constraint(ALLOC_IN_RC(int_flags));
5207   match(RegFlags);
5208 
5209   format %{ "EFLAGS" %}
5210   interface(REG_INTER);
5211 %}
5212 
5213 // Flags register, used as output of FLOATING POINT compare instructions
5214 operand eFlagsRegU() %{
5215   constraint(ALLOC_IN_RC(int_flags));
5216   match(RegFlags);
5217 
5218   format %{ "EFLAGS_U" %}
5219   interface(REG_INTER);
5220 %}
5221 
5222 operand eFlagsRegUCF() %{
5223   constraint(ALLOC_IN_RC(int_flags));
5224   match(RegFlags);
5225   predicate(false);
5226 
5227   format %{ "EFLAGS_U_CF" %}
5228   interface(REG_INTER);
5229 %}
5230 
5231 // Condition Code Register used by long compare
5232 operand flagsReg_long_LTGE() %{
5233   constraint(ALLOC_IN_RC(int_flags));
5234   match(RegFlags);
5235   format %{ "FLAGS_LTGE" %}
5236   interface(REG_INTER);
5237 %}
5238 operand flagsReg_long_EQNE() %{
5239   constraint(ALLOC_IN_RC(int_flags));
5240   match(RegFlags);
5241   format %{ "FLAGS_EQNE" %}
5242   interface(REG_INTER);
5243 %}
5244 operand flagsReg_long_LEGT() %{
5245   constraint(ALLOC_IN_RC(int_flags));
5246   match(RegFlags);
5247   format %{ "FLAGS_LEGT" %}
5248   interface(REG_INTER);
5249 %}
5250 
5251 // Float register operands
5252 operand regD() %{
5253   predicate( UseSSE < 2 );
5254   constraint(ALLOC_IN_RC(dbl_reg));
5255   match(RegD);
5256   match(regDPR1);
5257   match(regDPR2);
5258   format %{ %}
5259   interface(REG_INTER);
5260 %}
5261 
5262 operand regDPR1(regD reg) %{
5263   predicate( UseSSE < 2 );
5264   constraint(ALLOC_IN_RC(dbl_reg0));
5265   match(reg);
5266   format %{ "FPR1" %}
5267   interface(REG_INTER);
5268 %}
5269 
5270 operand regDPR2(regD reg) %{
5271   predicate( UseSSE < 2 );
5272   constraint(ALLOC_IN_RC(dbl_reg1));
5273   match(reg);
5274   format %{ "FPR2" %}
5275   interface(REG_INTER);
5276 %}
5277 
5278 operand regnotDPR1(regD reg) %{
5279   predicate( UseSSE < 2 );
5280   constraint(ALLOC_IN_RC(dbl_notreg0));
5281   match(reg);
5282   format %{ %}
5283   interface(REG_INTER);
5284 %}
5285 
5286 // XMM Double register operands
5287 operand regXD() %{
5288   predicate( UseSSE>=2 );
5289   constraint(ALLOC_IN_RC(xdb_reg));
5290   match(RegD);
5291   match(regXD6);
5292   match(regXD7);
5293   format %{ %}
5294   interface(REG_INTER);
5295 %}
5296 
5297 // XMM6 double register operands
5298 operand regXD6(regXD reg) %{
5299   predicate( UseSSE>=2 );
5300   constraint(ALLOC_IN_RC(xdb_reg6));
5301   match(reg);
5302   format %{ "XMM6" %}
5303   interface(REG_INTER);
5304 %}
5305 
5306 // XMM7 double register operands
5307 operand regXD7(regXD reg) %{
5308   predicate( UseSSE>=2 );
5309   constraint(ALLOC_IN_RC(xdb_reg7));
5310   match(reg);
5311   format %{ "XMM7" %}
5312   interface(REG_INTER);
5313 %}
5314 
5315 // Float register operands
5316 operand regF() %{
5317   predicate( UseSSE < 2 );
5318   constraint(ALLOC_IN_RC(flt_reg));
5319   match(RegF);
5320   match(regFPR1);
5321   format %{ %}
5322   interface(REG_INTER);
5323 %}
5324 
5325 // Float register operands
5326 operand regFPR1(regF reg) %{
5327   predicate( UseSSE < 2 );
5328   constraint(ALLOC_IN_RC(flt_reg0));
5329   match(reg);
5330   format %{ "FPR1" %}
5331   interface(REG_INTER);
5332 %}
5333 
5334 // XMM register operands
5335 operand regX() %{
5336   predicate( UseSSE>=1 );
5337   constraint(ALLOC_IN_RC(xmm_reg));
5338   match(RegF);
5339   format %{ %}
5340   interface(REG_INTER);
5341 %}
5342 
5343 
5344 //----------Memory Operands----------------------------------------------------
5345 // Direct Memory Operand
5346 operand direct(immP addr) %{
5347   match(addr);
5348 
5349   format %{ "[$addr]" %}
5350   interface(MEMORY_INTER) %{
5351     base(0xFFFFFFFF);
5352     index(0x4);
5353     scale(0x0);
5354     disp($addr);
5355   %}
5356 %}
5357 
5358 // Indirect Memory Operand
5359 operand indirect(eRegP reg) %{
5360   constraint(ALLOC_IN_RC(e_reg));
5361   match(reg);
5362 
5363   format %{ "[$reg]" %}
5364   interface(MEMORY_INTER) %{
5365     base($reg);
5366     index(0x4);
5367     scale(0x0);
5368     disp(0x0);
5369   %}
5370 %}
5371 
5372 // Indirect Memory Plus Short Offset Operand
5373 operand indOffset8(eRegP reg, immI8 off) %{
5374   match(AddP reg off);
5375 
5376   format %{ "[$reg + $off]" %}
5377   interface(MEMORY_INTER) %{
5378     base($reg);
5379     index(0x4);
5380     scale(0x0);
5381     disp($off);
5382   %}
5383 %}
5384 
5385 // Indirect Memory Plus Long Offset Operand
5386 operand indOffset32(eRegP reg, immI off) %{
5387   match(AddP reg off);
5388 
5389   format %{ "[$reg + $off]" %}
5390   interface(MEMORY_INTER) %{
5391     base($reg);
5392     index(0x4);
5393     scale(0x0);
5394     disp($off);
5395   %}
5396 %}
5397 
5398 // Indirect Memory Plus Long Offset Operand
5399 operand indOffset32X(eRegI reg, immP off) %{
5400   match(AddP off reg);
5401 
5402   format %{ "[$reg + $off]" %}
5403   interface(MEMORY_INTER) %{
5404     base($reg);
5405     index(0x4);
5406     scale(0x0);
5407     disp($off);
5408   %}
5409 %}
5410 
5411 // Indirect Memory Plus Index Register Plus Offset Operand
5412 operand indIndexOffset(eRegP reg, eRegI ireg, immI off) %{
5413   match(AddP (AddP reg ireg) off);
5414 
5415   op_cost(10);
5416   format %{"[$reg + $off + $ireg]" %}
5417   interface(MEMORY_INTER) %{
5418     base($reg);
5419     index($ireg);
5420     scale(0x0);
5421     disp($off);
5422   %}
5423 %}
5424 
5425 // Indirect Memory Plus Index Register Plus Offset Operand
5426 operand indIndex(eRegP reg, eRegI ireg) %{
5427   match(AddP reg ireg);
5428 
5429   op_cost(10);
5430   format %{"[$reg + $ireg]" %}
5431   interface(MEMORY_INTER) %{
5432     base($reg);
5433     index($ireg);
5434     scale(0x0);
5435     disp(0x0);
5436   %}
5437 %}
5438 
5439 // // -------------------------------------------------------------------------
5440 // // 486 architecture doesn't support "scale * index + offset" with out a base
5441 // // -------------------------------------------------------------------------
5442 // // Scaled Memory Operands
5443 // // Indirect Memory Times Scale Plus Offset Operand
5444 // operand indScaleOffset(immP off, eRegI ireg, immI2 scale) %{
5445 //   match(AddP off (LShiftI ireg scale));
5446 //
5447 //   op_cost(10);
5448 //   format %{"[$off + $ireg << $scale]" %}
5449 //   interface(MEMORY_INTER) %{
5450 //     base(0x4);
5451 //     index($ireg);
5452 //     scale($scale);
5453 //     disp($off);
5454 //   %}
5455 // %}
5456 
5457 // Indirect Memory Times Scale Plus Index Register
5458 operand indIndexScale(eRegP reg, eRegI ireg, immI2 scale) %{
5459   match(AddP reg (LShiftI ireg scale));
5460 
5461   op_cost(10);
5462   format %{"[$reg + $ireg << $scale]" %}
5463   interface(MEMORY_INTER) %{
5464     base($reg);
5465     index($ireg);
5466     scale($scale);
5467     disp(0x0);
5468   %}
5469 %}
5470 
5471 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5472 operand indIndexScaleOffset(eRegP reg, immI off, eRegI ireg, immI2 scale) %{
5473   match(AddP (AddP reg (LShiftI ireg scale)) off);
5474 
5475   op_cost(10);
5476   format %{"[$reg + $off + $ireg << $scale]" %}
5477   interface(MEMORY_INTER) %{
5478     base($reg);
5479     index($ireg);
5480     scale($scale);
5481     disp($off);
5482   %}
5483 %}
5484 
5485 //----------Load Long Memory Operands------------------------------------------
5486 // The load-long idiom will use it's address expression again after loading
5487 // the first word of the long.  If the load-long destination overlaps with
5488 // registers used in the addressing expression, the 2nd half will be loaded
5489 // from a clobbered address.  Fix this by requiring that load-long use
5490 // address registers that do not overlap with the load-long target.
5491 
5492 // load-long support
5493 operand load_long_RegP() %{
5494   constraint(ALLOC_IN_RC(esi_reg));
5495   match(RegP);
5496   match(eSIRegP);
5497   op_cost(100);
5498   format %{  %}
5499   interface(REG_INTER);
5500 %}
5501 
5502 // Indirect Memory Operand Long
5503 operand load_long_indirect(load_long_RegP reg) %{
5504   constraint(ALLOC_IN_RC(esi_reg));
5505   match(reg);
5506 
5507   format %{ "[$reg]" %}
5508   interface(MEMORY_INTER) %{
5509     base($reg);
5510     index(0x4);
5511     scale(0x0);
5512     disp(0x0);
5513   %}
5514 %}
5515 
5516 // Indirect Memory Plus Long Offset Operand
5517 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
5518   match(AddP reg off);
5519 
5520   format %{ "[$reg + $off]" %}
5521   interface(MEMORY_INTER) %{
5522     base($reg);
5523     index(0x4);
5524     scale(0x0);
5525     disp($off);
5526   %}
5527 %}
5528 
5529 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
5530 
5531 
5532 //----------Special Memory Operands--------------------------------------------
5533 // Stack Slot Operand - This operand is used for loading and storing temporary
5534 //                      values on the stack where a match requires a value to
5535 //                      flow through memory.
5536 operand stackSlotP(sRegP reg) %{
5537   constraint(ALLOC_IN_RC(stack_slots));
5538   // No match rule because this operand is only generated in matching
5539   format %{ "[$reg]" %}
5540   interface(MEMORY_INTER) %{
5541     base(0x4);   // ESP
5542     index(0x4);  // No Index
5543     scale(0x0);  // No Scale
5544     disp($reg);  // Stack Offset
5545   %}
5546 %}
5547 
5548 operand stackSlotI(sRegI reg) %{
5549   constraint(ALLOC_IN_RC(stack_slots));
5550   // No match rule because this operand is only generated in matching
5551   format %{ "[$reg]" %}
5552   interface(MEMORY_INTER) %{
5553     base(0x4);   // ESP
5554     index(0x4);  // No Index
5555     scale(0x0);  // No Scale
5556     disp($reg);  // Stack Offset
5557   %}
5558 %}
5559 
5560 operand stackSlotF(sRegF reg) %{
5561   constraint(ALLOC_IN_RC(stack_slots));
5562   // No match rule because this operand is only generated in matching
5563   format %{ "[$reg]" %}
5564   interface(MEMORY_INTER) %{
5565     base(0x4);   // ESP
5566     index(0x4);  // No Index
5567     scale(0x0);  // No Scale
5568     disp($reg);  // Stack Offset
5569   %}
5570 %}
5571 
5572 operand stackSlotD(sRegD reg) %{
5573   constraint(ALLOC_IN_RC(stack_slots));
5574   // No match rule because this operand is only generated in matching
5575   format %{ "[$reg]" %}
5576   interface(MEMORY_INTER) %{
5577     base(0x4);   // ESP
5578     index(0x4);  // No Index
5579     scale(0x0);  // No Scale
5580     disp($reg);  // Stack Offset
5581   %}
5582 %}
5583 
5584 operand stackSlotL(sRegL reg) %{
5585   constraint(ALLOC_IN_RC(stack_slots));
5586   // No match rule because this operand is only generated in matching
5587   format %{ "[$reg]" %}
5588   interface(MEMORY_INTER) %{
5589     base(0x4);   // ESP
5590     index(0x4);  // No Index
5591     scale(0x0);  // No Scale
5592     disp($reg);  // Stack Offset
5593   %}
5594 %}
5595 
5596 //----------Memory Operands - Win95 Implicit Null Variants----------------
5597 // Indirect Memory Operand
5598 operand indirect_win95_safe(eRegP_no_EBP reg)
5599 %{
5600   constraint(ALLOC_IN_RC(e_reg));
5601   match(reg);
5602 
5603   op_cost(100);
5604   format %{ "[$reg]" %}
5605   interface(MEMORY_INTER) %{
5606     base($reg);
5607     index(0x4);
5608     scale(0x0);
5609     disp(0x0);
5610   %}
5611 %}
5612 
5613 // Indirect Memory Plus Short Offset Operand
5614 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
5615 %{
5616   match(AddP reg off);
5617 
5618   op_cost(100);
5619   format %{ "[$reg + $off]" %}
5620   interface(MEMORY_INTER) %{
5621     base($reg);
5622     index(0x4);
5623     scale(0x0);
5624     disp($off);
5625   %}
5626 %}
5627 
5628 // Indirect Memory Plus Long Offset Operand
5629 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
5630 %{
5631   match(AddP reg off);
5632 
5633   op_cost(100);
5634   format %{ "[$reg + $off]" %}
5635   interface(MEMORY_INTER) %{
5636     base($reg);
5637     index(0x4);
5638     scale(0x0);
5639     disp($off);
5640   %}
5641 %}
5642 
5643 // Indirect Memory Plus Index Register Plus Offset Operand
5644 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI off)
5645 %{
5646   match(AddP (AddP reg ireg) off);
5647 
5648   op_cost(100);
5649   format %{"[$reg + $off + $ireg]" %}
5650   interface(MEMORY_INTER) %{
5651     base($reg);
5652     index($ireg);
5653     scale(0x0);
5654     disp($off);
5655   %}
5656 %}
5657 
5658 // Indirect Memory Times Scale Plus Index Register
5659 operand indIndexScale_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI2 scale)
5660 %{
5661   match(AddP reg (LShiftI ireg scale));
5662 
5663   op_cost(100);
5664   format %{"[$reg + $ireg << $scale]" %}
5665   interface(MEMORY_INTER) %{
5666     base($reg);
5667     index($ireg);
5668     scale($scale);
5669     disp(0x0);
5670   %}
5671 %}
5672 
5673 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5674 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, eRegI ireg, immI2 scale)
5675 %{
5676   match(AddP (AddP reg (LShiftI ireg scale)) off);
5677 
5678   op_cost(100);
5679   format %{"[$reg + $off + $ireg << $scale]" %}
5680   interface(MEMORY_INTER) %{
5681     base($reg);
5682     index($ireg);
5683     scale($scale);
5684     disp($off);
5685   %}
5686 %}
5687 
5688 //----------Conditional Branch Operands----------------------------------------
5689 // Comparison Op  - This is the operation of the comparison, and is limited to
5690 //                  the following set of codes:
5691 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5692 //
5693 // Other attributes of the comparison, such as unsignedness, are specified
5694 // by the comparison instruction that sets a condition code flags register.
5695 // That result is represented by a flags operand whose subtype is appropriate
5696 // to the unsignedness (etc.) of the comparison.
5697 //
5698 // Later, the instruction which matches both the Comparison Op (a Bool) and
5699 // the flags (produced by the Cmp) specifies the coding of the comparison op
5700 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5701 
5702 // Comparision Code
5703 operand cmpOp() %{
5704   match(Bool);
5705 
5706   format %{ "" %}
5707   interface(COND_INTER) %{
5708     equal(0x4, "e");
5709     not_equal(0x5, "ne");
5710     less(0xC, "l");
5711     greater_equal(0xD, "ge");
5712     less_equal(0xE, "le");
5713     greater(0xF, "g");
5714   %}
5715 %}
5716 
5717 // Comparison Code, unsigned compare.  Used by FP also, with
5718 // C2 (unordered) turned into GT or LT already.  The other bits
5719 // C0 and C3 are turned into Carry & Zero flags.
5720 operand cmpOpU() %{
5721   match(Bool);
5722 
5723   format %{ "" %}
5724   interface(COND_INTER) %{
5725     equal(0x4, "e");
5726     not_equal(0x5, "ne");
5727     less(0x2, "b");
5728     greater_equal(0x3, "nb");
5729     less_equal(0x6, "be");
5730     greater(0x7, "nbe");
5731   %}
5732 %}
5733 
5734 // Floating comparisons that don't require any fixup for the unordered case
5735 operand cmpOpUCF() %{
5736   match(Bool);
5737   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5738             n->as_Bool()->_test._test == BoolTest::ge ||
5739             n->as_Bool()->_test._test == BoolTest::le ||
5740             n->as_Bool()->_test._test == BoolTest::gt);
5741   format %{ "" %}
5742   interface(COND_INTER) %{
5743     equal(0x4, "e");
5744     not_equal(0x5, "ne");
5745     less(0x2, "b");
5746     greater_equal(0x3, "nb");
5747     less_equal(0x6, "be");
5748     greater(0x7, "nbe");
5749   %}
5750 %}
5751 
5752 
5753 // Floating comparisons that can be fixed up with extra conditional jumps
5754 operand cmpOpUCF2() %{
5755   match(Bool);
5756   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
5757             n->as_Bool()->_test._test == BoolTest::eq);
5758   format %{ "" %}
5759   interface(COND_INTER) %{
5760     equal(0x4, "e");
5761     not_equal(0x5, "ne");
5762     less(0x2, "b");
5763     greater_equal(0x3, "nb");
5764     less_equal(0x6, "be");
5765     greater(0x7, "nbe");
5766   %}
5767 %}
5768 
5769 // Comparison Code for FP conditional move
5770 operand cmpOp_fcmov() %{
5771   match(Bool);
5772 
5773   format %{ "" %}
5774   interface(COND_INTER) %{
5775     equal        (0x0C8);
5776     not_equal    (0x1C8);
5777     less         (0x0C0);
5778     greater_equal(0x1C0);
5779     less_equal   (0x0D0);
5780     greater      (0x1D0);
5781   %}
5782 %}
5783 
5784 // Comparision Code used in long compares
5785 operand cmpOp_commute() %{
5786   match(Bool);
5787 
5788   format %{ "" %}
5789   interface(COND_INTER) %{
5790     equal(0x4, "e");
5791     not_equal(0x5, "ne");
5792     less(0xF, "g");
5793     greater_equal(0xE, "le");
5794     less_equal(0xD, "ge");
5795     greater(0xC, "l");
5796   %}
5797 %}
5798 
5799 //----------OPERAND CLASSES----------------------------------------------------
5800 // Operand Classes are groups of operands that are used as to simplify
5801 // instruction definitions by not requiring the AD writer to specify separate
5802 // instructions for every form of operand when the instruction accepts
5803 // multiple operand types with the same basic encoding and format.  The classic
5804 // case of this is memory operands.
5805 
5806 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
5807                indIndex, indIndexScale, indIndexScaleOffset);
5808 
5809 // Long memory operations are encoded in 2 instructions and a +4 offset.
5810 // This means some kind of offset is always required and you cannot use
5811 // an oop as the offset (done when working on static globals).
5812 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
5813                     indIndex, indIndexScale, indIndexScaleOffset);
5814 
5815 
5816 //----------PIPELINE-----------------------------------------------------------
5817 // Rules which define the behavior of the target architectures pipeline.
5818 pipeline %{
5819 
5820 //----------ATTRIBUTES---------------------------------------------------------
5821 attributes %{
5822   variable_size_instructions;        // Fixed size instructions
5823   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
5824   instruction_unit_size = 1;         // An instruction is 1 bytes long
5825   instruction_fetch_unit_size = 16;  // The processor fetches one line
5826   instruction_fetch_units = 1;       // of 16 bytes
5827 
5828   // List of nop instructions
5829   nops( MachNop );
5830 %}
5831 
5832 //----------RESOURCES----------------------------------------------------------
5833 // Resources are the functional units available to the machine
5834 
5835 // Generic P2/P3 pipeline
5836 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5837 // 3 instructions decoded per cycle.
5838 // 2 load/store ops per cycle, 1 branch, 1 FPU,
5839 // 2 ALU op, only ALU0 handles mul/div instructions.
5840 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5841            MS0, MS1, MEM = MS0 | MS1,
5842            BR, FPU,
5843            ALU0, ALU1, ALU = ALU0 | ALU1 );
5844 
5845 //----------PIPELINE DESCRIPTION-----------------------------------------------
5846 // Pipeline Description specifies the stages in the machine's pipeline
5847 
5848 // Generic P2/P3 pipeline
5849 pipe_desc(S0, S1, S2, S3, S4, S5);
5850 
5851 //----------PIPELINE CLASSES---------------------------------------------------
5852 // Pipeline Classes describe the stages in which input and output are
5853 // referenced by the hardware pipeline.
5854 
5855 // Naming convention: ialu or fpu
5856 // Then: _reg
5857 // Then: _reg if there is a 2nd register
5858 // Then: _long if it's a pair of instructions implementing a long
5859 // Then: _fat if it requires the big decoder
5860 //   Or: _mem if it requires the big decoder and a memory unit.
5861 
5862 // Integer ALU reg operation
5863 pipe_class ialu_reg(eRegI dst) %{
5864     single_instruction;
5865     dst    : S4(write);
5866     dst    : S3(read);
5867     DECODE : S0;        // any decoder
5868     ALU    : S3;        // any alu
5869 %}
5870 
5871 // Long ALU reg operation
5872 pipe_class ialu_reg_long(eRegL dst) %{
5873     instruction_count(2);
5874     dst    : S4(write);
5875     dst    : S3(read);
5876     DECODE : S0(2);     // any 2 decoders
5877     ALU    : S3(2);     // both alus
5878 %}
5879 
5880 // Integer ALU reg operation using big decoder
5881 pipe_class ialu_reg_fat(eRegI dst) %{
5882     single_instruction;
5883     dst    : S4(write);
5884     dst    : S3(read);
5885     D0     : S0;        // big decoder only
5886     ALU    : S3;        // any alu
5887 %}
5888 
5889 // Long ALU reg operation using big decoder
5890 pipe_class ialu_reg_long_fat(eRegL dst) %{
5891     instruction_count(2);
5892     dst    : S4(write);
5893     dst    : S3(read);
5894     D0     : S0(2);     // big decoder only; twice
5895     ALU    : S3(2);     // any 2 alus
5896 %}
5897 
5898 // Integer ALU reg-reg operation
5899 pipe_class ialu_reg_reg(eRegI dst, eRegI src) %{
5900     single_instruction;
5901     dst    : S4(write);
5902     src    : S3(read);
5903     DECODE : S0;        // any decoder
5904     ALU    : S3;        // any alu
5905 %}
5906 
5907 // Long ALU reg-reg operation
5908 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
5909     instruction_count(2);
5910     dst    : S4(write);
5911     src    : S3(read);
5912     DECODE : S0(2);     // any 2 decoders
5913     ALU    : S3(2);     // both alus
5914 %}
5915 
5916 // Integer ALU reg-reg operation
5917 pipe_class ialu_reg_reg_fat(eRegI dst, memory src) %{
5918     single_instruction;
5919     dst    : S4(write);
5920     src    : S3(read);
5921     D0     : S0;        // big decoder only
5922     ALU    : S3;        // any alu
5923 %}
5924 
5925 // Long ALU reg-reg operation
5926 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
5927     instruction_count(2);
5928     dst    : S4(write);
5929     src    : S3(read);
5930     D0     : S0(2);     // big decoder only; twice
5931     ALU    : S3(2);     // both alus
5932 %}
5933 
5934 // Integer ALU reg-mem operation
5935 pipe_class ialu_reg_mem(eRegI dst, memory mem) %{
5936     single_instruction;
5937     dst    : S5(write);
5938     mem    : S3(read);
5939     D0     : S0;        // big decoder only
5940     ALU    : S4;        // any alu
5941     MEM    : S3;        // any mem
5942 %}
5943 
5944 // Long ALU reg-mem operation
5945 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
5946     instruction_count(2);
5947     dst    : S5(write);
5948     mem    : S3(read);
5949     D0     : S0(2);     // big decoder only; twice
5950     ALU    : S4(2);     // any 2 alus
5951     MEM    : S3(2);     // both mems
5952 %}
5953 
5954 // Integer mem operation (prefetch)
5955 pipe_class ialu_mem(memory mem)
5956 %{
5957     single_instruction;
5958     mem    : S3(read);
5959     D0     : S0;        // big decoder only
5960     MEM    : S3;        // any mem
5961 %}
5962 
5963 // Integer Store to Memory
5964 pipe_class ialu_mem_reg(memory mem, eRegI src) %{
5965     single_instruction;
5966     mem    : S3(read);
5967     src    : S5(read);
5968     D0     : S0;        // big decoder only
5969     ALU    : S4;        // any alu
5970     MEM    : S3;
5971 %}
5972 
5973 // Long Store to Memory
5974 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
5975     instruction_count(2);
5976     mem    : S3(read);
5977     src    : S5(read);
5978     D0     : S0(2);     // big decoder only; twice
5979     ALU    : S4(2);     // any 2 alus
5980     MEM    : S3(2);     // Both mems
5981 %}
5982 
5983 // Integer Store to Memory
5984 pipe_class ialu_mem_imm(memory mem) %{
5985     single_instruction;
5986     mem    : S3(read);
5987     D0     : S0;        // big decoder only
5988     ALU    : S4;        // any alu
5989     MEM    : S3;
5990 %}
5991 
5992 // Integer ALU0 reg-reg operation
5993 pipe_class ialu_reg_reg_alu0(eRegI dst, eRegI src) %{
5994     single_instruction;
5995     dst    : S4(write);
5996     src    : S3(read);
5997     D0     : S0;        // Big decoder only
5998     ALU0   : S3;        // only alu0
5999 %}
6000 
6001 // Integer ALU0 reg-mem operation
6002 pipe_class ialu_reg_mem_alu0(eRegI dst, memory mem) %{
6003     single_instruction;
6004     dst    : S5(write);
6005     mem    : S3(read);
6006     D0     : S0;        // big decoder only
6007     ALU0   : S4;        // ALU0 only
6008     MEM    : S3;        // any mem
6009 %}
6010 
6011 // Integer ALU reg-reg operation
6012 pipe_class ialu_cr_reg_reg(eFlagsReg cr, eRegI src1, eRegI src2) %{
6013     single_instruction;
6014     cr     : S4(write);
6015     src1   : S3(read);
6016     src2   : S3(read);
6017     DECODE : S0;        // any decoder
6018     ALU    : S3;        // any alu
6019 %}
6020 
6021 // Integer ALU reg-imm operation
6022 pipe_class ialu_cr_reg_imm(eFlagsReg cr, eRegI src1) %{
6023     single_instruction;
6024     cr     : S4(write);
6025     src1   : S3(read);
6026     DECODE : S0;        // any decoder
6027     ALU    : S3;        // any alu
6028 %}
6029 
6030 // Integer ALU reg-mem operation
6031 pipe_class ialu_cr_reg_mem(eFlagsReg cr, eRegI src1, memory src2) %{
6032     single_instruction;
6033     cr     : S4(write);
6034     src1   : S3(read);
6035     src2   : S3(read);
6036     D0     : S0;        // big decoder only
6037     ALU    : S4;        // any alu
6038     MEM    : S3;
6039 %}
6040 
6041 // Conditional move reg-reg
6042 pipe_class pipe_cmplt( eRegI p, eRegI q, eRegI y ) %{
6043     instruction_count(4);
6044     y      : S4(read);
6045     q      : S3(read);
6046     p      : S3(read);
6047     DECODE : S0(4);     // any decoder
6048 %}
6049 
6050 // Conditional move reg-reg
6051 pipe_class pipe_cmov_reg( eRegI dst, eRegI src, eFlagsReg cr ) %{
6052     single_instruction;
6053     dst    : S4(write);
6054     src    : S3(read);
6055     cr     : S3(read);
6056     DECODE : S0;        // any decoder
6057 %}
6058 
6059 // Conditional move reg-mem
6060 pipe_class pipe_cmov_mem( eFlagsReg cr, eRegI dst, memory src) %{
6061     single_instruction;
6062     dst    : S4(write);
6063     src    : S3(read);
6064     cr     : S3(read);
6065     DECODE : S0;        // any decoder
6066     MEM    : S3;
6067 %}
6068 
6069 // Conditional move reg-reg long
6070 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
6071     single_instruction;
6072     dst    : S4(write);
6073     src    : S3(read);
6074     cr     : S3(read);
6075     DECODE : S0(2);     // any 2 decoders
6076 %}
6077 
6078 // Conditional move double reg-reg
6079 pipe_class pipe_cmovD_reg( eFlagsReg cr, regDPR1 dst, regD src) %{
6080     single_instruction;
6081     dst    : S4(write);
6082     src    : S3(read);
6083     cr     : S3(read);
6084     DECODE : S0;        // any decoder
6085 %}
6086 
6087 // Float reg-reg operation
6088 pipe_class fpu_reg(regD dst) %{
6089     instruction_count(2);
6090     dst    : S3(read);
6091     DECODE : S0(2);     // any 2 decoders
6092     FPU    : S3;
6093 %}
6094 
6095 // Float reg-reg operation
6096 pipe_class fpu_reg_reg(regD dst, regD src) %{
6097     instruction_count(2);
6098     dst    : S4(write);
6099     src    : S3(read);
6100     DECODE : S0(2);     // any 2 decoders
6101     FPU    : S3;
6102 %}
6103 
6104 // Float reg-reg operation
6105 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2) %{
6106     instruction_count(3);
6107     dst    : S4(write);
6108     src1   : S3(read);
6109     src2   : S3(read);
6110     DECODE : S0(3);     // any 3 decoders
6111     FPU    : S3(2);
6112 %}
6113 
6114 // Float reg-reg operation
6115 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
6116     instruction_count(4);
6117     dst    : S4(write);
6118     src1   : S3(read);
6119     src2   : S3(read);
6120     src3   : S3(read);
6121     DECODE : S0(4);     // any 3 decoders
6122     FPU    : S3(2);
6123 %}
6124 
6125 // Float reg-reg operation
6126 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3) %{
6127     instruction_count(4);
6128     dst    : S4(write);
6129     src1   : S3(read);
6130     src2   : S3(read);
6131     src3   : S3(read);
6132     DECODE : S1(3);     // any 3 decoders
6133     D0     : S0;        // Big decoder only
6134     FPU    : S3(2);
6135     MEM    : S3;
6136 %}
6137 
6138 // Float reg-mem operation
6139 pipe_class fpu_reg_mem(regD dst, memory mem) %{
6140     instruction_count(2);
6141     dst    : S5(write);
6142     mem    : S3(read);
6143     D0     : S0;        // big decoder only
6144     DECODE : S1;        // any decoder for FPU POP
6145     FPU    : S4;
6146     MEM    : S3;        // any mem
6147 %}
6148 
6149 // Float reg-mem operation
6150 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem) %{
6151     instruction_count(3);
6152     dst    : S5(write);
6153     src1   : S3(read);
6154     mem    : S3(read);
6155     D0     : S0;        // big decoder only
6156     DECODE : S1(2);     // any decoder for FPU POP
6157     FPU    : S4;
6158     MEM    : S3;        // any mem
6159 %}
6160 
6161 // Float mem-reg operation
6162 pipe_class fpu_mem_reg(memory mem, regD src) %{
6163     instruction_count(2);
6164     src    : S5(read);
6165     mem    : S3(read);
6166     DECODE : S0;        // any decoder for FPU PUSH
6167     D0     : S1;        // big decoder only
6168     FPU    : S4;
6169     MEM    : S3;        // any mem
6170 %}
6171 
6172 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2) %{
6173     instruction_count(3);
6174     src1   : S3(read);
6175     src2   : S3(read);
6176     mem    : S3(read);
6177     DECODE : S0(2);     // any decoder for FPU PUSH
6178     D0     : S1;        // big decoder only
6179     FPU    : S4;
6180     MEM    : S3;        // any mem
6181 %}
6182 
6183 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2) %{
6184     instruction_count(3);
6185     src1   : S3(read);
6186     src2   : S3(read);
6187     mem    : S4(read);
6188     DECODE : S0;        // any decoder for FPU PUSH
6189     D0     : S0(2);     // big decoder only
6190     FPU    : S4;
6191     MEM    : S3(2);     // any mem
6192 %}
6193 
6194 pipe_class fpu_mem_mem(memory dst, memory src1) %{
6195     instruction_count(2);
6196     src1   : S3(read);
6197     dst    : S4(read);
6198     D0     : S0(2);     // big decoder only
6199     MEM    : S3(2);     // any mem
6200 %}
6201 
6202 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
6203     instruction_count(3);
6204     src1   : S3(read);
6205     src2   : S3(read);
6206     dst    : S4(read);
6207     D0     : S0(3);     // big decoder only
6208     FPU    : S4;
6209     MEM    : S3(3);     // any mem
6210 %}
6211 
6212 pipe_class fpu_mem_reg_con(memory mem, regD src1) %{
6213     instruction_count(3);
6214     src1   : S4(read);
6215     mem    : S4(read);
6216     DECODE : S0;        // any decoder for FPU PUSH
6217     D0     : S0(2);     // big decoder only
6218     FPU    : S4;
6219     MEM    : S3(2);     // any mem
6220 %}
6221 
6222 // Float load constant
6223 pipe_class fpu_reg_con(regD dst) %{
6224     instruction_count(2);
6225     dst    : S5(write);
6226     D0     : S0;        // big decoder only for the load
6227     DECODE : S1;        // any decoder for FPU POP
6228     FPU    : S4;
6229     MEM    : S3;        // any mem
6230 %}
6231 
6232 // Float load constant
6233 pipe_class fpu_reg_reg_con(regD dst, regD src) %{
6234     instruction_count(3);
6235     dst    : S5(write);
6236     src    : S3(read);
6237     D0     : S0;        // big decoder only for the load
6238     DECODE : S1(2);     // any decoder for FPU POP
6239     FPU    : S4;
6240     MEM    : S3;        // any mem
6241 %}
6242 
6243 // UnConditional branch
6244 pipe_class pipe_jmp( label labl ) %{
6245     single_instruction;
6246     BR   : S3;
6247 %}
6248 
6249 // Conditional branch
6250 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
6251     single_instruction;
6252     cr    : S1(read);
6253     BR    : S3;
6254 %}
6255 
6256 // Allocation idiom
6257 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
6258     instruction_count(1); force_serialization;
6259     fixed_latency(6);
6260     heap_ptr : S3(read);
6261     DECODE   : S0(3);
6262     D0       : S2;
6263     MEM      : S3;
6264     ALU      : S3(2);
6265     dst      : S5(write);
6266     BR       : S5;
6267 %}
6268 
6269 // Generic big/slow expanded idiom
6270 pipe_class pipe_slow(  ) %{
6271     instruction_count(10); multiple_bundles; force_serialization;
6272     fixed_latency(100);
6273     D0  : S0(2);
6274     MEM : S3(2);
6275 %}
6276 
6277 // The real do-nothing guy
6278 pipe_class empty( ) %{
6279     instruction_count(0);
6280 %}
6281 
6282 // Define the class for the Nop node
6283 define %{
6284    MachNop = empty;
6285 %}
6286 
6287 %}
6288 
6289 //----------INSTRUCTIONS-------------------------------------------------------
6290 //
6291 // match      -- States which machine-independent subtree may be replaced
6292 //               by this instruction.
6293 // ins_cost   -- The estimated cost of this instruction is used by instruction
6294 //               selection to identify a minimum cost tree of machine
6295 //               instructions that matches a tree of machine-independent
6296 //               instructions.
6297 // format     -- A string providing the disassembly for this instruction.
6298 //               The value of an instruction's operand may be inserted
6299 //               by referring to it with a '$' prefix.
6300 // opcode     -- Three instruction opcodes may be provided.  These are referred
6301 //               to within an encode class as $primary, $secondary, and $tertiary
6302 //               respectively.  The primary opcode is commonly used to
6303 //               indicate the type of machine instruction, while secondary
6304 //               and tertiary are often used for prefix options or addressing
6305 //               modes.
6306 // ins_encode -- A list of encode classes with parameters. The encode class
6307 //               name must have been defined in an 'enc_class' specification
6308 //               in the encode section of the architecture description.
6309 
6310 //----------BSWAP-Instruction--------------------------------------------------
6311 instruct bytes_reverse_int(eRegI dst) %{
6312   match(Set dst (ReverseBytesI dst));
6313 
6314   format %{ "BSWAP  $dst" %}
6315   opcode(0x0F, 0xC8);
6316   ins_encode( OpcP, OpcSReg(dst) );
6317   ins_pipe( ialu_reg );
6318 %}
6319 
6320 instruct bytes_reverse_long(eRegL dst) %{
6321   match(Set dst (ReverseBytesL dst));
6322 
6323   format %{ "BSWAP  $dst.lo\n\t"
6324             "BSWAP  $dst.hi\n\t"
6325             "XCHG   $dst.lo $dst.hi" %}
6326 
6327   ins_cost(125);
6328   ins_encode( bswap_long_bytes(dst) );
6329   ins_pipe( ialu_reg_reg);
6330 %}
6331 
6332 instruct bytes_reverse_unsigned_short(eRegI dst) %{
6333   match(Set dst (ReverseBytesUS dst));
6334 
6335   format %{ "BSWAP  $dst\n\t" 
6336             "SHR    $dst,16\n\t" %}
6337   ins_encode %{
6338     __ bswapl($dst$$Register);
6339     __ shrl($dst$$Register, 16); 
6340   %}
6341   ins_pipe( ialu_reg );
6342 %}
6343 
6344 instruct bytes_reverse_short(eRegI dst) %{
6345   match(Set dst (ReverseBytesS dst));
6346 
6347   format %{ "BSWAP  $dst\n\t" 
6348             "SAR    $dst,16\n\t" %}
6349   ins_encode %{
6350     __ bswapl($dst$$Register);
6351     __ sarl($dst$$Register, 16); 
6352   %}
6353   ins_pipe( ialu_reg );
6354 %}
6355 
6356 
6357 //---------- Zeros Count Instructions ------------------------------------------
6358 
6359 instruct countLeadingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
6360   predicate(UseCountLeadingZerosInstruction);
6361   match(Set dst (CountLeadingZerosI src));
6362   effect(KILL cr);
6363 
6364   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
6365   ins_encode %{
6366     __ lzcntl($dst$$Register, $src$$Register);
6367   %}
6368   ins_pipe(ialu_reg);
6369 %}
6370 
6371 instruct countLeadingZerosI_bsr(eRegI dst, eRegI src, eFlagsReg cr) %{
6372   predicate(!UseCountLeadingZerosInstruction);
6373   match(Set dst (CountLeadingZerosI src));
6374   effect(KILL cr);
6375 
6376   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
6377             "JNZ    skip\n\t"
6378             "MOV    $dst, -1\n"
6379       "skip:\n\t"
6380             "NEG    $dst\n\t"
6381             "ADD    $dst, 31" %}
6382   ins_encode %{
6383     Register Rdst = $dst$$Register;
6384     Register Rsrc = $src$$Register;
6385     Label skip;
6386     __ bsrl(Rdst, Rsrc);
6387     __ jccb(Assembler::notZero, skip);
6388     __ movl(Rdst, -1);
6389     __ bind(skip);
6390     __ negl(Rdst);
6391     __ addl(Rdst, BitsPerInt - 1);
6392   %}
6393   ins_pipe(ialu_reg);
6394 %}
6395 
6396 instruct countLeadingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
6397   predicate(UseCountLeadingZerosInstruction);
6398   match(Set dst (CountLeadingZerosL src));
6399   effect(TEMP dst, KILL cr);
6400 
6401   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
6402             "JNC    done\n\t"
6403             "LZCNT  $dst, $src.lo\n\t"
6404             "ADD    $dst, 32\n"
6405       "done:" %}
6406   ins_encode %{
6407     Register Rdst = $dst$$Register;
6408     Register Rsrc = $src$$Register;
6409     Label done;
6410     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
6411     __ jccb(Assembler::carryClear, done);
6412     __ lzcntl(Rdst, Rsrc);
6413     __ addl(Rdst, BitsPerInt);
6414     __ bind(done);
6415   %}
6416   ins_pipe(ialu_reg);
6417 %}
6418 
6419 instruct countLeadingZerosL_bsr(eRegI dst, eRegL src, eFlagsReg cr) %{
6420   predicate(!UseCountLeadingZerosInstruction);
6421   match(Set dst (CountLeadingZerosL src));
6422   effect(TEMP dst, KILL cr);
6423 
6424   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
6425             "JZ     msw_is_zero\n\t"
6426             "ADD    $dst, 32\n\t"
6427             "JMP    not_zero\n"
6428       "msw_is_zero:\n\t"
6429             "BSR    $dst, $src.lo\n\t"
6430             "JNZ    not_zero\n\t"
6431             "MOV    $dst, -1\n"
6432       "not_zero:\n\t"
6433             "NEG    $dst\n\t"
6434             "ADD    $dst, 63\n" %}
6435  ins_encode %{
6436     Register Rdst = $dst$$Register;
6437     Register Rsrc = $src$$Register;
6438     Label msw_is_zero;
6439     Label not_zero;
6440     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
6441     __ jccb(Assembler::zero, msw_is_zero);
6442     __ addl(Rdst, BitsPerInt);
6443     __ jmpb(not_zero);
6444     __ bind(msw_is_zero);
6445     __ bsrl(Rdst, Rsrc);
6446     __ jccb(Assembler::notZero, not_zero);
6447     __ movl(Rdst, -1);
6448     __ bind(not_zero);
6449     __ negl(Rdst);
6450     __ addl(Rdst, BitsPerLong - 1);
6451   %}
6452   ins_pipe(ialu_reg);
6453 %}
6454 
6455 instruct countTrailingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
6456   match(Set dst (CountTrailingZerosI src));
6457   effect(KILL cr);
6458 
6459   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
6460             "JNZ    done\n\t"
6461             "MOV    $dst, 32\n"
6462       "done:" %}
6463   ins_encode %{
6464     Register Rdst = $dst$$Register;
6465     Label done;
6466     __ bsfl(Rdst, $src$$Register);
6467     __ jccb(Assembler::notZero, done);
6468     __ movl(Rdst, BitsPerInt);
6469     __ bind(done);
6470   %}
6471   ins_pipe(ialu_reg);
6472 %}
6473 
6474 instruct countTrailingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
6475   match(Set dst (CountTrailingZerosL src));
6476   effect(TEMP dst, KILL cr);
6477 
6478   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
6479             "JNZ    done\n\t"
6480             "BSF    $dst, $src.hi\n\t"
6481             "JNZ    msw_not_zero\n\t"
6482             "MOV    $dst, 32\n"
6483       "msw_not_zero:\n\t"
6484             "ADD    $dst, 32\n"
6485       "done:" %}
6486   ins_encode %{
6487     Register Rdst = $dst$$Register;
6488     Register Rsrc = $src$$Register;
6489     Label msw_not_zero;
6490     Label done;
6491     __ bsfl(Rdst, Rsrc);
6492     __ jccb(Assembler::notZero, done);
6493     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
6494     __ jccb(Assembler::notZero, msw_not_zero);
6495     __ movl(Rdst, BitsPerInt);
6496     __ bind(msw_not_zero);
6497     __ addl(Rdst, BitsPerInt);
6498     __ bind(done);
6499   %}
6500   ins_pipe(ialu_reg);
6501 %}
6502 
6503 
6504 //---------- Population Count Instructions -------------------------------------
6505 
6506 instruct popCountI(eRegI dst, eRegI src) %{
6507   predicate(UsePopCountInstruction);
6508   match(Set dst (PopCountI src));
6509 
6510   format %{ "POPCNT $dst, $src" %}
6511   ins_encode %{
6512     __ popcntl($dst$$Register, $src$$Register);
6513   %}
6514   ins_pipe(ialu_reg);
6515 %}
6516 
6517 instruct popCountI_mem(eRegI dst, memory mem) %{
6518   predicate(UsePopCountInstruction);
6519   match(Set dst (PopCountI (LoadI mem)));
6520 
6521   format %{ "POPCNT $dst, $mem" %}
6522   ins_encode %{
6523     __ popcntl($dst$$Register, $mem$$Address);
6524   %}
6525   ins_pipe(ialu_reg);
6526 %}
6527 
6528 // Note: Long.bitCount(long) returns an int.
6529 instruct popCountL(eRegI dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
6530   predicate(UsePopCountInstruction);
6531   match(Set dst (PopCountL src));
6532   effect(KILL cr, TEMP tmp, TEMP dst);
6533 
6534   format %{ "POPCNT $dst, $src.lo\n\t"
6535             "POPCNT $tmp, $src.hi\n\t"
6536             "ADD    $dst, $tmp" %}
6537   ins_encode %{
6538     __ popcntl($dst$$Register, $src$$Register);
6539     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
6540     __ addl($dst$$Register, $tmp$$Register);
6541   %}
6542   ins_pipe(ialu_reg);
6543 %}
6544 
6545 // Note: Long.bitCount(long) returns an int.
6546 instruct popCountL_mem(eRegI dst, memory mem, eRegI tmp, eFlagsReg cr) %{
6547   predicate(UsePopCountInstruction);
6548   match(Set dst (PopCountL (LoadL mem)));
6549   effect(KILL cr, TEMP tmp, TEMP dst);
6550 
6551   format %{ "POPCNT $dst, $mem\n\t"
6552             "POPCNT $tmp, $mem+4\n\t"
6553             "ADD    $dst, $tmp" %}
6554   ins_encode %{
6555     //__ popcntl($dst$$Register, $mem$$Address$$first);
6556     //__ popcntl($tmp$$Register, $mem$$Address$$second);
6557     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false));
6558     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false));
6559     __ addl($dst$$Register, $tmp$$Register);
6560   %}
6561   ins_pipe(ialu_reg);
6562 %}
6563 
6564 
6565 //----------Load/Store/Move Instructions---------------------------------------
6566 //----------Load Instructions--------------------------------------------------
6567 // Load Byte (8bit signed)
6568 instruct loadB(xRegI dst, memory mem) %{
6569   match(Set dst (LoadB mem));
6570 
6571   ins_cost(125);
6572   format %{ "MOVSX8 $dst,$mem\t# byte" %}
6573 
6574   ins_encode %{
6575     __ movsbl($dst$$Register, $mem$$Address);
6576   %}
6577 
6578   ins_pipe(ialu_reg_mem);
6579 %}
6580 
6581 // Load Byte (8bit signed) into Long Register
6582 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
6583   match(Set dst (ConvI2L (LoadB mem)));
6584   effect(KILL cr);
6585 
6586   ins_cost(375);
6587   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
6588             "MOV    $dst.hi,$dst.lo\n\t"
6589             "SAR    $dst.hi,7" %}
6590 
6591   ins_encode %{
6592     __ movsbl($dst$$Register, $mem$$Address);
6593     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
6594     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
6595   %}
6596 
6597   ins_pipe(ialu_reg_mem);
6598 %}
6599 
6600 // Load Unsigned Byte (8bit UNsigned)
6601 instruct loadUB(xRegI dst, memory mem) %{
6602   match(Set dst (LoadUB mem));
6603 
6604   ins_cost(125);
6605   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
6606 
6607   ins_encode %{
6608     __ movzbl($dst$$Register, $mem$$Address);
6609   %}
6610 
6611   ins_pipe(ialu_reg_mem);
6612 %}
6613 
6614 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6615 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
6616   match(Set dst (ConvI2L (LoadUB mem)));
6617   effect(KILL cr);
6618 
6619   ins_cost(250);
6620   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
6621             "XOR    $dst.hi,$dst.hi" %}
6622 
6623   ins_encode %{
6624     Register Rdst = $dst$$Register;
6625     __ movzbl(Rdst, $mem$$Address);
6626     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6627   %}
6628 
6629   ins_pipe(ialu_reg_mem);
6630 %}
6631 
6632 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
6633 instruct loadUB2L_immI8(eRegL dst, memory mem, immI8 mask, eFlagsReg cr) %{
6634   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6635   effect(KILL cr);
6636 
6637   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 8-bit mask -> long\n\t"
6638             "XOR    $dst.hi,$dst.hi\n\t"
6639             "AND    $dst.lo,$mask" %}
6640   ins_encode %{
6641     Register Rdst = $dst$$Register;
6642     __ movzbl(Rdst, $mem$$Address);
6643     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6644     __ andl(Rdst, $mask$$constant);
6645   %}
6646   ins_pipe(ialu_reg_mem);
6647 %}
6648 
6649 // Load Short (16bit signed)
6650 instruct loadS(eRegI dst, memory mem) %{
6651   match(Set dst (LoadS mem));
6652 
6653   ins_cost(125);
6654   format %{ "MOVSX  $dst,$mem\t# short" %}
6655 
6656   ins_encode %{
6657     __ movswl($dst$$Register, $mem$$Address);
6658   %}
6659 
6660   ins_pipe(ialu_reg_mem);
6661 %}
6662 
6663 // Load Short (16 bit signed) to Byte (8 bit signed)
6664 instruct loadS2B(eRegI dst, memory mem, immI_24 twentyfour) %{
6665   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6666 
6667   ins_cost(125);
6668   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
6669   ins_encode %{
6670     __ movsbl($dst$$Register, $mem$$Address);
6671   %}
6672   ins_pipe(ialu_reg_mem);
6673 %}
6674 
6675 // Load Short (16bit signed) into Long Register
6676 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
6677   match(Set dst (ConvI2L (LoadS mem)));
6678   effect(KILL cr);
6679 
6680   ins_cost(375);
6681   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
6682             "MOV    $dst.hi,$dst.lo\n\t"
6683             "SAR    $dst.hi,15" %}
6684 
6685   ins_encode %{
6686     __ movswl($dst$$Register, $mem$$Address);
6687     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
6688     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
6689   %}
6690 
6691   ins_pipe(ialu_reg_mem);
6692 %}
6693 
6694 // Load Unsigned Short/Char (16bit unsigned)
6695 instruct loadUS(eRegI dst, memory mem) %{
6696   match(Set dst (LoadUS mem));
6697 
6698   ins_cost(125);
6699   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
6700 
6701   ins_encode %{
6702     __ movzwl($dst$$Register, $mem$$Address);
6703   %}
6704 
6705   ins_pipe(ialu_reg_mem);
6706 %}
6707 
6708 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
6709 instruct loadUS2B(eRegI dst, memory mem, immI_24 twentyfour) %{
6710   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
6711 
6712   ins_cost(125);
6713   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
6714   ins_encode %{
6715     __ movsbl($dst$$Register, $mem$$Address);
6716   %}
6717   ins_pipe(ialu_reg_mem);
6718 %}
6719 
6720 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
6721 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
6722   match(Set dst (ConvI2L (LoadUS mem)));
6723   effect(KILL cr);
6724 
6725   ins_cost(250);
6726   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
6727             "XOR    $dst.hi,$dst.hi" %}
6728 
6729   ins_encode %{
6730     __ movzwl($dst$$Register, $mem$$Address);
6731     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
6732   %}
6733 
6734   ins_pipe(ialu_reg_mem);
6735 %}
6736 
6737 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
6738 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
6739   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6740   effect(KILL cr);
6741 
6742   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
6743             "XOR    $dst.hi,$dst.hi" %}
6744   ins_encode %{
6745     Register Rdst = $dst$$Register;
6746     __ movzbl(Rdst, $mem$$Address);
6747     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6748   %}
6749   ins_pipe(ialu_reg_mem);
6750 %}
6751 
6752 // Load Unsigned Short/Char (16 bit UNsigned) with a 16-bit mask into Long Register
6753 instruct loadUS2L_immI16(eRegL dst, memory mem, immI16 mask, eFlagsReg cr) %{
6754   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6755   effect(KILL cr);
6756 
6757   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 16-bit mask -> long\n\t"
6758             "XOR    $dst.hi,$dst.hi\n\t"
6759             "AND    $dst.lo,$mask" %}
6760   ins_encode %{
6761     Register Rdst = $dst$$Register;
6762     __ movzwl(Rdst, $mem$$Address);
6763     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6764     __ andl(Rdst, $mask$$constant);
6765   %}
6766   ins_pipe(ialu_reg_mem);
6767 %}
6768 
6769 // Load Integer
6770 instruct loadI(eRegI dst, memory mem) %{
6771   match(Set dst (LoadI mem));
6772 
6773   ins_cost(125);
6774   format %{ "MOV    $dst,$mem\t# int" %}
6775 
6776   ins_encode %{
6777     __ movl($dst$$Register, $mem$$Address);
6778   %}
6779 
6780   ins_pipe(ialu_reg_mem);
6781 %}
6782 
6783 // Load Integer (32 bit signed) to Byte (8 bit signed)
6784 instruct loadI2B(eRegI dst, memory mem, immI_24 twentyfour) %{
6785   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
6786 
6787   ins_cost(125);
6788   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
6789   ins_encode %{
6790     __ movsbl($dst$$Register, $mem$$Address);
6791   %}
6792   ins_pipe(ialu_reg_mem);
6793 %}
6794 
6795 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
6796 instruct loadI2UB(eRegI dst, memory mem, immI_255 mask) %{
6797   match(Set dst (AndI (LoadI mem) mask));
6798 
6799   ins_cost(125);
6800   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
6801   ins_encode %{
6802     __ movzbl($dst$$Register, $mem$$Address);
6803   %}
6804   ins_pipe(ialu_reg_mem);
6805 %}
6806 
6807 // Load Integer (32 bit signed) to Short (16 bit signed)
6808 instruct loadI2S(eRegI dst, memory mem, immI_16 sixteen) %{
6809   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
6810 
6811   ins_cost(125);
6812   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
6813   ins_encode %{
6814     __ movswl($dst$$Register, $mem$$Address);
6815   %}
6816   ins_pipe(ialu_reg_mem);
6817 %}
6818 
6819 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
6820 instruct loadI2US(eRegI dst, memory mem, immI_65535 mask) %{
6821   match(Set dst (AndI (LoadI mem) mask));
6822 
6823   ins_cost(125);
6824   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
6825   ins_encode %{
6826     __ movzwl($dst$$Register, $mem$$Address);
6827   %}
6828   ins_pipe(ialu_reg_mem);
6829 %}
6830 
6831 // Load Integer into Long Register
6832 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
6833   match(Set dst (ConvI2L (LoadI mem)));
6834   effect(KILL cr);
6835 
6836   ins_cost(375);
6837   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
6838             "MOV    $dst.hi,$dst.lo\n\t"
6839             "SAR    $dst.hi,31" %}
6840 
6841   ins_encode %{
6842     __ movl($dst$$Register, $mem$$Address);
6843     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
6844     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
6845   %}
6846 
6847   ins_pipe(ialu_reg_mem);
6848 %}
6849 
6850 // Load Integer with mask 0xFF into Long Register
6851 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
6852   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6853   effect(KILL cr);
6854 
6855   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
6856             "XOR    $dst.hi,$dst.hi" %}
6857   ins_encode %{
6858     Register Rdst = $dst$$Register;
6859     __ movzbl(Rdst, $mem$$Address);
6860     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6861   %}
6862   ins_pipe(ialu_reg_mem);
6863 %}
6864 
6865 // Load Integer with mask 0xFFFF into Long Register
6866 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
6867   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6868   effect(KILL cr);
6869 
6870   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
6871             "XOR    $dst.hi,$dst.hi" %}
6872   ins_encode %{
6873     Register Rdst = $dst$$Register;
6874     __ movzwl(Rdst, $mem$$Address);
6875     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6876   %}
6877   ins_pipe(ialu_reg_mem);
6878 %}
6879 
6880 // Load Integer with 32-bit mask into Long Register
6881 instruct loadI2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
6882   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6883   effect(KILL cr);
6884 
6885   format %{ "MOV    $dst.lo,$mem\t# int & 32-bit mask -> long\n\t"
6886             "XOR    $dst.hi,$dst.hi\n\t"
6887             "AND    $dst.lo,$mask" %}
6888   ins_encode %{
6889     Register Rdst = $dst$$Register;
6890     __ movl(Rdst, $mem$$Address);
6891     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6892     __ andl(Rdst, $mask$$constant);
6893   %}
6894   ins_pipe(ialu_reg_mem);
6895 %}
6896 
6897 // Load Unsigned Integer into Long Register
6898 instruct loadUI2L(eRegL dst, memory mem, eFlagsReg cr) %{
6899   match(Set dst (LoadUI2L mem));
6900   effect(KILL cr);
6901 
6902   ins_cost(250);
6903   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
6904             "XOR    $dst.hi,$dst.hi" %}
6905 
6906   ins_encode %{
6907     __ movl($dst$$Register, $mem$$Address);
6908     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
6909   %}
6910 
6911   ins_pipe(ialu_reg_mem);
6912 %}
6913 
6914 // Load Long.  Cannot clobber address while loading, so restrict address
6915 // register to ESI
6916 instruct loadL(eRegL dst, load_long_memory mem) %{
6917   predicate(!((LoadLNode*)n)->require_atomic_access());
6918   match(Set dst (LoadL mem));
6919 
6920   ins_cost(250);
6921   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
6922             "MOV    $dst.hi,$mem+4" %}
6923 
6924   ins_encode %{
6925     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false);
6926     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false);
6927     __ movl($dst$$Register, Amemlo);
6928     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
6929   %}
6930 
6931   ins_pipe(ialu_reg_long_mem);
6932 %}
6933 
6934 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
6935 // then store it down to the stack and reload on the int
6936 // side.
6937 instruct loadL_volatile(stackSlotL dst, memory mem) %{
6938   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
6939   match(Set dst (LoadL mem));
6940 
6941   ins_cost(200);
6942   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
6943             "FISTp  $dst" %}
6944   ins_encode(enc_loadL_volatile(mem,dst));
6945   ins_pipe( fpu_reg_mem );
6946 %}
6947 
6948 instruct loadLX_volatile(stackSlotL dst, memory mem, regXD tmp) %{
6949   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6950   match(Set dst (LoadL mem));
6951   effect(TEMP tmp);
6952   ins_cost(180);
6953   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
6954             "MOVSD  $dst,$tmp" %}
6955   ins_encode(enc_loadLX_volatile(mem, dst, tmp));
6956   ins_pipe( pipe_slow );
6957 %}
6958 
6959 instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{
6960   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6961   match(Set dst (LoadL mem));
6962   effect(TEMP tmp);
6963   ins_cost(160);
6964   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
6965             "MOVD   $dst.lo,$tmp\n\t"
6966             "PSRLQ  $tmp,32\n\t"
6967             "MOVD   $dst.hi,$tmp" %}
6968   ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));
6969   ins_pipe( pipe_slow );
6970 %}
6971 
6972 // Load Range
6973 instruct loadRange(eRegI dst, memory mem) %{
6974   match(Set dst (LoadRange mem));
6975 
6976   ins_cost(125);
6977   format %{ "MOV    $dst,$mem" %}
6978   opcode(0x8B);
6979   ins_encode( OpcP, RegMem(dst,mem));
6980   ins_pipe( ialu_reg_mem );
6981 %}
6982 
6983 
6984 // Load Pointer
6985 instruct loadP(eRegP dst, memory mem) %{
6986   match(Set dst (LoadP mem));
6987 
6988   ins_cost(125);
6989   format %{ "MOV    $dst,$mem" %}
6990   opcode(0x8B);
6991   ins_encode( OpcP, RegMem(dst,mem));
6992   ins_pipe( ialu_reg_mem );
6993 %}
6994 
6995 // Load Klass Pointer
6996 instruct loadKlass(eRegP dst, memory mem) %{
6997   match(Set dst (LoadKlass mem));
6998 
6999   ins_cost(125);
7000   format %{ "MOV    $dst,$mem" %}
7001   opcode(0x8B);
7002   ins_encode( OpcP, RegMem(dst,mem));
7003   ins_pipe( ialu_reg_mem );
7004 %}
7005 
7006 // Load Double
7007 instruct loadD(regD dst, memory mem) %{
7008   predicate(UseSSE<=1);
7009   match(Set dst (LoadD mem));
7010 
7011   ins_cost(150);
7012   format %{ "FLD_D  ST,$mem\n\t"
7013             "FSTP   $dst" %}
7014   opcode(0xDD);               /* DD /0 */
7015   ins_encode( OpcP, RMopc_Mem(0x00,mem),
7016               Pop_Reg_D(dst) );
7017   ins_pipe( fpu_reg_mem );
7018 %}
7019 
7020 // Load Double to XMM
7021 instruct loadXD(regXD dst, memory mem) %{
7022   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
7023   match(Set dst (LoadD mem));
7024   ins_cost(145);
7025   format %{ "MOVSD  $dst,$mem" %}
7026   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
7027   ins_pipe( pipe_slow );
7028 %}
7029 
7030 instruct loadXD_partial(regXD dst, memory mem) %{
7031   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
7032   match(Set dst (LoadD mem));
7033   ins_cost(145);
7034   format %{ "MOVLPD $dst,$mem" %}
7035   ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,mem));
7036   ins_pipe( pipe_slow );
7037 %}
7038 
7039 // Load to XMM register (single-precision floating point)
7040 // MOVSS instruction
7041 instruct loadX(regX dst, memory mem) %{
7042   predicate(UseSSE>=1);
7043   match(Set dst (LoadF mem));
7044   ins_cost(145);
7045   format %{ "MOVSS  $dst,$mem" %}
7046   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
7047   ins_pipe( pipe_slow );
7048 %}
7049 
7050 // Load Float
7051 instruct loadF(regF dst, memory mem) %{
7052   predicate(UseSSE==0);
7053   match(Set dst (LoadF mem));
7054 
7055   ins_cost(150);
7056   format %{ "FLD_S  ST,$mem\n\t"
7057             "FSTP   $dst" %}
7058   opcode(0xD9);               /* D9 /0 */
7059   ins_encode( OpcP, RMopc_Mem(0x00,mem),
7060               Pop_Reg_F(dst) );
7061   ins_pipe( fpu_reg_mem );
7062 %}
7063 
7064 // Load Aligned Packed Byte to XMM register
7065 instruct loadA8B(regXD dst, memory mem) %{
7066   predicate(UseSSE>=1);
7067   match(Set dst (Load8B mem));
7068   ins_cost(125);
7069   format %{ "MOVQ  $dst,$mem\t! packed8B" %}
7070   ins_encode( movq_ld(dst, mem));
7071   ins_pipe( pipe_slow );
7072 %}
7073 
7074 // Load Aligned Packed Short to XMM register
7075 instruct loadA4S(regXD dst, memory mem) %{
7076   predicate(UseSSE>=1);
7077   match(Set dst (Load4S mem));
7078   ins_cost(125);
7079   format %{ "MOVQ  $dst,$mem\t! packed4S" %}
7080   ins_encode( movq_ld(dst, mem));
7081   ins_pipe( pipe_slow );
7082 %}
7083 
7084 // Load Aligned Packed Char to XMM register
7085 instruct loadA4C(regXD dst, memory mem) %{
7086   predicate(UseSSE>=1);
7087   match(Set dst (Load4C mem));
7088   ins_cost(125);
7089   format %{ "MOVQ  $dst,$mem\t! packed4C" %}
7090   ins_encode( movq_ld(dst, mem));
7091   ins_pipe( pipe_slow );
7092 %}
7093 
7094 // Load Aligned Packed Integer to XMM register
7095 instruct load2IU(regXD dst, memory mem) %{
7096   predicate(UseSSE>=1);
7097   match(Set dst (Load2I mem));
7098   ins_cost(125);
7099   format %{ "MOVQ  $dst,$mem\t! packed2I" %}
7100   ins_encode( movq_ld(dst, mem));
7101   ins_pipe( pipe_slow );
7102 %}
7103 
7104 // Load Aligned Packed Single to XMM
7105 instruct loadA2F(regXD dst, memory mem) %{
7106   predicate(UseSSE>=1);
7107   match(Set dst (Load2F mem));
7108   ins_cost(145);
7109   format %{ "MOVQ  $dst,$mem\t! packed2F" %}
7110   ins_encode( movq_ld(dst, mem));
7111   ins_pipe( pipe_slow );
7112 %}
7113 
7114 // Load Effective Address
7115 instruct leaP8(eRegP dst, indOffset8 mem) %{
7116   match(Set dst mem);
7117 
7118   ins_cost(110);
7119   format %{ "LEA    $dst,$mem" %}
7120   opcode(0x8D);
7121   ins_encode( OpcP, RegMem(dst,mem));
7122   ins_pipe( ialu_reg_reg_fat );
7123 %}
7124 
7125 instruct leaP32(eRegP dst, indOffset32 mem) %{
7126   match(Set dst mem);
7127 
7128   ins_cost(110);
7129   format %{ "LEA    $dst,$mem" %}
7130   opcode(0x8D);
7131   ins_encode( OpcP, RegMem(dst,mem));
7132   ins_pipe( ialu_reg_reg_fat );
7133 %}
7134 
7135 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
7136   match(Set dst mem);
7137 
7138   ins_cost(110);
7139   format %{ "LEA    $dst,$mem" %}
7140   opcode(0x8D);
7141   ins_encode( OpcP, RegMem(dst,mem));
7142   ins_pipe( ialu_reg_reg_fat );
7143 %}
7144 
7145 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
7146   match(Set dst mem);
7147 
7148   ins_cost(110);
7149   format %{ "LEA    $dst,$mem" %}
7150   opcode(0x8D);
7151   ins_encode( OpcP, RegMem(dst,mem));
7152   ins_pipe( ialu_reg_reg_fat );
7153 %}
7154 
7155 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
7156   match(Set dst mem);
7157 
7158   ins_cost(110);
7159   format %{ "LEA    $dst,$mem" %}
7160   opcode(0x8D);
7161   ins_encode( OpcP, RegMem(dst,mem));
7162   ins_pipe( ialu_reg_reg_fat );
7163 %}
7164 
7165 // Load Constant
7166 instruct loadConI(eRegI dst, immI src) %{
7167   match(Set dst src);
7168 
7169   format %{ "MOV    $dst,$src" %}
7170   ins_encode( LdImmI(dst, src) );
7171   ins_pipe( ialu_reg_fat );
7172 %}
7173 
7174 // Load Constant zero
7175 instruct loadConI0(eRegI dst, immI0 src, eFlagsReg cr) %{
7176   match(Set dst src);
7177   effect(KILL cr);
7178 
7179   ins_cost(50);
7180   format %{ "XOR    $dst,$dst" %}
7181   opcode(0x33);  /* + rd */
7182   ins_encode( OpcP, RegReg( dst, dst ) );
7183   ins_pipe( ialu_reg );
7184 %}
7185 
7186 instruct loadConP(eRegP dst, immP src) %{
7187   match(Set dst src);
7188 
7189   format %{ "MOV    $dst,$src" %}
7190   opcode(0xB8);  /* + rd */
7191   ins_encode( LdImmP(dst, src) );
7192   ins_pipe( ialu_reg_fat );
7193 %}
7194 
7195 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
7196   match(Set dst src);
7197   effect(KILL cr);
7198   ins_cost(200);
7199   format %{ "MOV    $dst.lo,$src.lo\n\t"
7200             "MOV    $dst.hi,$src.hi" %}
7201   opcode(0xB8);
7202   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
7203   ins_pipe( ialu_reg_long_fat );
7204 %}
7205 
7206 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
7207   match(Set dst src);
7208   effect(KILL cr);
7209   ins_cost(150);
7210   format %{ "XOR    $dst.lo,$dst.lo\n\t"
7211             "XOR    $dst.hi,$dst.hi" %}
7212   opcode(0x33,0x33);
7213   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
7214   ins_pipe( ialu_reg_long );
7215 %}
7216 
7217 // The instruction usage is guarded by predicate in operand immF().
7218 instruct loadConF(regF dst, immF src) %{
7219   match(Set dst src);
7220   ins_cost(125);
7221 
7222   format %{ "FLD_S  ST,$src\n\t"
7223             "FSTP   $dst" %}
7224   opcode(0xD9, 0x00);       /* D9 /0 */
7225   ins_encode(LdImmF(src), Pop_Reg_F(dst) );
7226   ins_pipe( fpu_reg_con );
7227 %}
7228 
7229 // The instruction usage is guarded by predicate in operand immXF().
7230 instruct loadConX(regX dst, immXF con) %{
7231   match(Set dst con);
7232   ins_cost(125);
7233   format %{ "MOVSS  $dst,[$con]" %}
7234   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), LdImmX(dst, con));
7235   ins_pipe( pipe_slow );
7236 %}
7237 
7238 // The instruction usage is guarded by predicate in operand immXF0().
7239 instruct loadConX0(regX dst, immXF0 src) %{
7240   match(Set dst src);
7241   ins_cost(100);
7242   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
7243   ins_encode( Opcode(0x0F), Opcode(0x57), RegReg(dst,dst));
7244   ins_pipe( pipe_slow );
7245 %}
7246 
7247 // The instruction usage is guarded by predicate in operand immD().
7248 instruct loadConD(regD dst, immD src) %{
7249   match(Set dst src);
7250   ins_cost(125);
7251 
7252   format %{ "FLD_D  ST,$src\n\t"
7253             "FSTP   $dst" %}
7254   ins_encode(LdImmD(src), Pop_Reg_D(dst) );
7255   ins_pipe( fpu_reg_con );
7256 %}
7257 
7258 // The instruction usage is guarded by predicate in operand immXD().
7259 instruct loadConXD(regXD dst, immXD con) %{
7260   match(Set dst con);
7261   ins_cost(125);
7262   format %{ "MOVSD  $dst,[$con]" %}
7263   ins_encode(load_conXD(dst, con));
7264   ins_pipe( pipe_slow );
7265 %}
7266 
7267 // The instruction usage is guarded by predicate in operand immXD0().
7268 instruct loadConXD0(regXD dst, immXD0 src) %{
7269   match(Set dst src);
7270   ins_cost(100);
7271   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
7272   ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x57), RegReg(dst,dst));
7273   ins_pipe( pipe_slow );
7274 %}
7275 
7276 // Load Stack Slot
7277 instruct loadSSI(eRegI dst, stackSlotI src) %{
7278   match(Set dst src);
7279   ins_cost(125);
7280 
7281   format %{ "MOV    $dst,$src" %}
7282   opcode(0x8B);
7283   ins_encode( OpcP, RegMem(dst,src));
7284   ins_pipe( ialu_reg_mem );
7285 %}
7286 
7287 instruct loadSSL(eRegL dst, stackSlotL src) %{
7288   match(Set dst src);
7289 
7290   ins_cost(200);
7291   format %{ "MOV    $dst,$src.lo\n\t"
7292             "MOV    $dst+4,$src.hi" %}
7293   opcode(0x8B, 0x8B);
7294   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
7295   ins_pipe( ialu_mem_long_reg );
7296 %}
7297 
7298 // Load Stack Slot
7299 instruct loadSSP(eRegP dst, stackSlotP src) %{
7300   match(Set dst src);
7301   ins_cost(125);
7302 
7303   format %{ "MOV    $dst,$src" %}
7304   opcode(0x8B);
7305   ins_encode( OpcP, RegMem(dst,src));
7306   ins_pipe( ialu_reg_mem );
7307 %}
7308 
7309 // Load Stack Slot
7310 instruct loadSSF(regF dst, stackSlotF src) %{
7311   match(Set dst src);
7312   ins_cost(125);
7313 
7314   format %{ "FLD_S  $src\n\t"
7315             "FSTP   $dst" %}
7316   opcode(0xD9);               /* D9 /0, FLD m32real */
7317   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
7318               Pop_Reg_F(dst) );
7319   ins_pipe( fpu_reg_mem );
7320 %}
7321 
7322 // Load Stack Slot
7323 instruct loadSSD(regD dst, stackSlotD src) %{
7324   match(Set dst src);
7325   ins_cost(125);
7326 
7327   format %{ "FLD_D  $src\n\t"
7328             "FSTP   $dst" %}
7329   opcode(0xDD);               /* DD /0, FLD m64real */
7330   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
7331               Pop_Reg_D(dst) );
7332   ins_pipe( fpu_reg_mem );
7333 %}
7334 
7335 // Prefetch instructions.
7336 // Must be safe to execute with invalid address (cannot fault).
7337 
7338 instruct prefetchr0( memory mem ) %{
7339   predicate(UseSSE==0 && !VM_Version::supports_3dnow());
7340   match(PrefetchRead mem);
7341   ins_cost(0);
7342   size(0);
7343   format %{ "PREFETCHR (non-SSE is empty encoding)" %}
7344   ins_encode();
7345   ins_pipe(empty);
7346 %}
7347 
7348 instruct prefetchr( memory mem ) %{
7349   predicate(UseSSE==0 && VM_Version::supports_3dnow() || ReadPrefetchInstr==3);
7350   match(PrefetchRead mem);
7351   ins_cost(100);
7352 
7353   format %{ "PREFETCHR $mem\t! Prefetch into level 1 cache for read" %}
7354   opcode(0x0F, 0x0d);     /* Opcode 0F 0d /0 */
7355   ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
7356   ins_pipe(ialu_mem);
7357 %}
7358 
7359 instruct prefetchrNTA( memory mem ) %{
7360   predicate(UseSSE>=1 && ReadPrefetchInstr==0);
7361   match(PrefetchRead mem);
7362   ins_cost(100);
7363 
7364   format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for read" %}
7365   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
7366   ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
7367   ins_pipe(ialu_mem);
7368 %}
7369 
7370 instruct prefetchrT0( memory mem ) %{
7371   predicate(UseSSE>=1 && ReadPrefetchInstr==1);
7372   match(PrefetchRead mem);
7373   ins_cost(100);
7374 
7375   format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for read" %}
7376   opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
7377   ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
7378   ins_pipe(ialu_mem);
7379 %}
7380 
7381 instruct prefetchrT2( memory mem ) %{
7382   predicate(UseSSE>=1 && ReadPrefetchInstr==2);
7383   match(PrefetchRead mem);
7384   ins_cost(100);
7385 
7386   format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for read" %}
7387   opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
7388   ins_encode(OpcP, OpcS, RMopc_Mem(0x03,mem));
7389   ins_pipe(ialu_mem);
7390 %}
7391 
7392 instruct prefetchw0( memory mem ) %{
7393   predicate(UseSSE==0 && !VM_Version::supports_3dnow());
7394   match(PrefetchWrite mem);
7395   ins_cost(0);
7396   size(0);
7397   format %{ "Prefetch (non-SSE is empty encoding)" %}
7398   ins_encode();
7399   ins_pipe(empty);
7400 %}
7401 
7402 instruct prefetchw( memory mem ) %{
7403   predicate(UseSSE==0 && VM_Version::supports_3dnow() || AllocatePrefetchInstr==3);
7404   match( PrefetchWrite mem );
7405   ins_cost(100);
7406 
7407   format %{ "PREFETCHW $mem\t! Prefetch into L1 cache and mark modified" %}
7408   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /1 */
7409   ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
7410   ins_pipe(ialu_mem);
7411 %}
7412 
7413 instruct prefetchwNTA( memory mem ) %{
7414   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
7415   match(PrefetchWrite mem);
7416   ins_cost(100);
7417 
7418   format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for write" %}
7419   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
7420   ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
7421   ins_pipe(ialu_mem);
7422 %}
7423 
7424 instruct prefetchwT0( memory mem ) %{
7425   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
7426   match(PrefetchWrite mem);
7427   ins_cost(100);
7428 
7429   format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for write" %}
7430   opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
7431   ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
7432   ins_pipe(ialu_mem);
7433 %}
7434 
7435 instruct prefetchwT2( memory mem ) %{
7436   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
7437   match(PrefetchWrite mem);
7438   ins_cost(100);
7439 
7440   format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for write" %}
7441   opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
7442   ins_encode(OpcP, OpcS, RMopc_Mem(0x03,mem));
7443   ins_pipe(ialu_mem);
7444 %}
7445 
7446 //----------Store Instructions-------------------------------------------------
7447 
7448 // Store Byte
7449 instruct storeB(memory mem, xRegI src) %{
7450   match(Set mem (StoreB mem src));
7451 
7452   ins_cost(125);
7453   format %{ "MOV8   $mem,$src" %}
7454   opcode(0x88);
7455   ins_encode( OpcP, RegMem( src, mem ) );
7456   ins_pipe( ialu_mem_reg );
7457 %}
7458 
7459 // Store Char/Short
7460 instruct storeC(memory mem, eRegI src) %{
7461   match(Set mem (StoreC mem src));
7462 
7463   ins_cost(125);
7464   format %{ "MOV16  $mem,$src" %}
7465   opcode(0x89, 0x66);
7466   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
7467   ins_pipe( ialu_mem_reg );
7468 %}
7469 
7470 // Store Integer
7471 instruct storeI(memory mem, eRegI src) %{
7472   match(Set mem (StoreI mem src));
7473 
7474   ins_cost(125);
7475   format %{ "MOV    $mem,$src" %}
7476   opcode(0x89);
7477   ins_encode( OpcP, RegMem( src, mem ) );
7478   ins_pipe( ialu_mem_reg );
7479 %}
7480 
7481 // Store Long
7482 instruct storeL(long_memory mem, eRegL src) %{
7483   predicate(!((StoreLNode*)n)->require_atomic_access());
7484   match(Set mem (StoreL mem src));
7485 
7486   ins_cost(200);
7487   format %{ "MOV    $mem,$src.lo\n\t"
7488             "MOV    $mem+4,$src.hi" %}
7489   opcode(0x89, 0x89);
7490   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
7491   ins_pipe( ialu_mem_long_reg );
7492 %}
7493 
7494 // Store Long to Integer
7495 instruct storeL2I(memory mem, eRegL src) %{
7496   match(Set mem (StoreI mem (ConvL2I src)));
7497 
7498   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
7499   ins_encode %{
7500     __ movl($mem$$Address, $src$$Register);
7501   %}
7502   ins_pipe(ialu_mem_reg);
7503 %}
7504 
7505 // Volatile Store Long.  Must be atomic, so move it into
7506 // the FP TOS and then do a 64-bit FIST.  Has to probe the
7507 // target address before the store (for null-ptr checks)
7508 // so the memory operand is used twice in the encoding.
7509 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
7510   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
7511   match(Set mem (StoreL mem src));
7512   effect( KILL cr );
7513   ins_cost(400);
7514   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
7515             "FILD   $src\n\t"
7516             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
7517   opcode(0x3B);
7518   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
7519   ins_pipe( fpu_reg_mem );
7520 %}
7521 
7522 instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %{
7523   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7524   match(Set mem (StoreL mem src));
7525   effect( TEMP tmp, KILL cr );
7526   ins_cost(380);
7527   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
7528             "MOVSD  $tmp,$src\n\t"
7529             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
7530   opcode(0x3B);
7531   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_volatile(mem, src, tmp));
7532   ins_pipe( pipe_slow );
7533 %}
7534 
7535 instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFlagsReg cr) %{
7536   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7537   match(Set mem (StoreL mem src));
7538   effect( TEMP tmp2 , TEMP tmp, KILL cr );
7539   ins_cost(360);
7540   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
7541             "MOVD   $tmp,$src.lo\n\t"
7542             "MOVD   $tmp2,$src.hi\n\t"
7543             "PUNPCKLDQ $tmp,$tmp2\n\t"
7544             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
7545   opcode(0x3B);
7546   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_reg_volatile(mem, src, tmp, tmp2));
7547   ins_pipe( pipe_slow );
7548 %}
7549 
7550 // Store Pointer; for storing unknown oops and raw pointers
7551 instruct storeP(memory mem, anyRegP src) %{
7552   match(Set mem (StoreP mem src));
7553 
7554   ins_cost(125);
7555   format %{ "MOV    $mem,$src" %}
7556   opcode(0x89);
7557   ins_encode( OpcP, RegMem( src, mem ) );
7558   ins_pipe( ialu_mem_reg );
7559 %}
7560 
7561 // Store Integer Immediate
7562 instruct storeImmI(memory mem, immI src) %{
7563   match(Set mem (StoreI mem src));
7564 
7565   ins_cost(150);
7566   format %{ "MOV    $mem,$src" %}
7567   opcode(0xC7);               /* C7 /0 */
7568   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
7569   ins_pipe( ialu_mem_imm );
7570 %}
7571 
7572 // Store Short/Char Immediate
7573 instruct storeImmI16(memory mem, immI16 src) %{
7574   predicate(UseStoreImmI16);
7575   match(Set mem (StoreC mem src));
7576 
7577   ins_cost(150);
7578   format %{ "MOV16  $mem,$src" %}
7579   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
7580   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
7581   ins_pipe( ialu_mem_imm );
7582 %}
7583 
7584 // Store Pointer Immediate; null pointers or constant oops that do not
7585 // need card-mark barriers.
7586 instruct storeImmP(memory mem, immP src) %{
7587   match(Set mem (StoreP mem src));
7588 
7589   ins_cost(150);
7590   format %{ "MOV    $mem,$src" %}
7591   opcode(0xC7);               /* C7 /0 */
7592   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
7593   ins_pipe( ialu_mem_imm );
7594 %}
7595 
7596 // Store Byte Immediate
7597 instruct storeImmB(memory mem, immI8 src) %{
7598   match(Set mem (StoreB mem src));
7599 
7600   ins_cost(150);
7601   format %{ "MOV8   $mem,$src" %}
7602   opcode(0xC6);               /* C6 /0 */
7603   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
7604   ins_pipe( ialu_mem_imm );
7605 %}
7606 
7607 // Store Aligned Packed Byte XMM register to memory
7608 instruct storeA8B(memory mem, regXD src) %{
7609   predicate(UseSSE>=1);
7610   match(Set mem (Store8B mem src));
7611   ins_cost(145);
7612   format %{ "MOVQ  $mem,$src\t! packed8B" %}
7613   ins_encode( movq_st(mem, src));
7614   ins_pipe( pipe_slow );
7615 %}
7616 
7617 // Store Aligned Packed Char/Short XMM register to memory
7618 instruct storeA4C(memory mem, regXD src) %{
7619   predicate(UseSSE>=1);
7620   match(Set mem (Store4C mem src));
7621   ins_cost(145);
7622   format %{ "MOVQ  $mem,$src\t! packed4C" %}
7623   ins_encode( movq_st(mem, src));
7624   ins_pipe( pipe_slow );
7625 %}
7626 
7627 // Store Aligned Packed Integer XMM register to memory
7628 instruct storeA2I(memory mem, regXD src) %{
7629   predicate(UseSSE>=1);
7630   match(Set mem (Store2I mem src));
7631   ins_cost(145);
7632   format %{ "MOVQ  $mem,$src\t! packed2I" %}
7633   ins_encode( movq_st(mem, src));
7634   ins_pipe( pipe_slow );
7635 %}
7636 
7637 // Store CMS card-mark Immediate
7638 instruct storeImmCM(memory mem, immI8 src) %{
7639   match(Set mem (StoreCM mem src));
7640 
7641   ins_cost(150);
7642   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
7643   opcode(0xC6);               /* C6 /0 */
7644   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
7645   ins_pipe( ialu_mem_imm );
7646 %}
7647 
7648 // Store Double
7649 instruct storeD( memory mem, regDPR1 src) %{
7650   predicate(UseSSE<=1);
7651   match(Set mem (StoreD mem src));
7652 
7653   ins_cost(100);
7654   format %{ "FST_D  $mem,$src" %}
7655   opcode(0xDD);       /* DD /2 */
7656   ins_encode( enc_FP_store(mem,src) );
7657   ins_pipe( fpu_mem_reg );
7658 %}
7659 
7660 // Store double does rounding on x86
7661 instruct storeD_rounded( memory mem, regDPR1 src) %{
7662   predicate(UseSSE<=1);
7663   match(Set mem (StoreD mem (RoundDouble src)));
7664 
7665   ins_cost(100);
7666   format %{ "FST_D  $mem,$src\t# round" %}
7667   opcode(0xDD);       /* DD /2 */
7668   ins_encode( enc_FP_store(mem,src) );
7669   ins_pipe( fpu_mem_reg );
7670 %}
7671 
7672 // Store XMM register to memory (double-precision floating points)
7673 // MOVSD instruction
7674 instruct storeXD(memory mem, regXD src) %{
7675   predicate(UseSSE>=2);
7676   match(Set mem (StoreD mem src));
7677   ins_cost(95);
7678   format %{ "MOVSD  $mem,$src" %}
7679   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
7680   ins_pipe( pipe_slow );
7681 %}
7682 
7683 // Store XMM register to memory (single-precision floating point)
7684 // MOVSS instruction
7685 instruct storeX(memory mem, regX src) %{
7686   predicate(UseSSE>=1);
7687   match(Set mem (StoreF mem src));
7688   ins_cost(95);
7689   format %{ "MOVSS  $mem,$src" %}
7690   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
7691   ins_pipe( pipe_slow );
7692 %}
7693 
7694 // Store Aligned Packed Single Float XMM register to memory
7695 instruct storeA2F(memory mem, regXD src) %{
7696   predicate(UseSSE>=1);
7697   match(Set mem (Store2F mem src));
7698   ins_cost(145);
7699   format %{ "MOVQ  $mem,$src\t! packed2F" %}
7700   ins_encode( movq_st(mem, src));
7701   ins_pipe( pipe_slow );
7702 %}
7703 
7704 // Store Float
7705 instruct storeF( memory mem, regFPR1 src) %{
7706   predicate(UseSSE==0);
7707   match(Set mem (StoreF mem src));
7708 
7709   ins_cost(100);
7710   format %{ "FST_S  $mem,$src" %}
7711   opcode(0xD9);       /* D9 /2 */
7712   ins_encode( enc_FP_store(mem,src) );
7713   ins_pipe( fpu_mem_reg );
7714 %}
7715 
7716 // Store Float does rounding on x86
7717 instruct storeF_rounded( memory mem, regFPR1 src) %{
7718   predicate(UseSSE==0);
7719   match(Set mem (StoreF mem (RoundFloat src)));
7720 
7721   ins_cost(100);
7722   format %{ "FST_S  $mem,$src\t# round" %}
7723   opcode(0xD9);       /* D9 /2 */
7724   ins_encode( enc_FP_store(mem,src) );
7725   ins_pipe( fpu_mem_reg );
7726 %}
7727 
7728 // Store Float does rounding on x86
7729 instruct storeF_Drounded( memory mem, regDPR1 src) %{
7730   predicate(UseSSE<=1);
7731   match(Set mem (StoreF mem (ConvD2F src)));
7732 
7733   ins_cost(100);
7734   format %{ "FST_S  $mem,$src\t# D-round" %}
7735   opcode(0xD9);       /* D9 /2 */
7736   ins_encode( enc_FP_store(mem,src) );
7737   ins_pipe( fpu_mem_reg );
7738 %}
7739 
7740 // Store immediate Float value (it is faster than store from FPU register)
7741 // The instruction usage is guarded by predicate in operand immF().
7742 instruct storeF_imm( memory mem, immF src) %{
7743   match(Set mem (StoreF mem src));
7744 
7745   ins_cost(50);
7746   format %{ "MOV    $mem,$src\t# store float" %}
7747   opcode(0xC7);               /* C7 /0 */
7748   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
7749   ins_pipe( ialu_mem_imm );
7750 %}
7751 
7752 // Store immediate Float value (it is faster than store from XMM register)
7753 // The instruction usage is guarded by predicate in operand immXF().
7754 instruct storeX_imm( memory mem, immXF src) %{
7755   match(Set mem (StoreF mem src));
7756 
7757   ins_cost(50);
7758   format %{ "MOV    $mem,$src\t# store float" %}
7759   opcode(0xC7);               /* C7 /0 */
7760   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32XF_as_bits( src ));
7761   ins_pipe( ialu_mem_imm );
7762 %}
7763 
7764 // Store Integer to stack slot
7765 instruct storeSSI(stackSlotI dst, eRegI src) %{
7766   match(Set dst src);
7767 
7768   ins_cost(100);
7769   format %{ "MOV    $dst,$src" %}
7770   opcode(0x89);
7771   ins_encode( OpcPRegSS( dst, src ) );
7772   ins_pipe( ialu_mem_reg );
7773 %}
7774 
7775 // Store Integer to stack slot
7776 instruct storeSSP(stackSlotP dst, eRegP src) %{
7777   match(Set dst src);
7778 
7779   ins_cost(100);
7780   format %{ "MOV    $dst,$src" %}
7781   opcode(0x89);
7782   ins_encode( OpcPRegSS( dst, src ) );
7783   ins_pipe( ialu_mem_reg );
7784 %}
7785 
7786 // Store Long to stack slot
7787 instruct storeSSL(stackSlotL dst, eRegL src) %{
7788   match(Set dst src);
7789 
7790   ins_cost(200);
7791   format %{ "MOV    $dst,$src.lo\n\t"
7792             "MOV    $dst+4,$src.hi" %}
7793   opcode(0x89, 0x89);
7794   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
7795   ins_pipe( ialu_mem_long_reg );
7796 %}
7797 
7798 //----------MemBar Instructions-----------------------------------------------
7799 // Memory barrier flavors
7800 
7801 instruct membar_acquire() %{
7802   match(MemBarAcquire);
7803   ins_cost(400);
7804 
7805   size(0);
7806   format %{ "MEMBAR-acquire ! (empty encoding)" %}
7807   ins_encode();
7808   ins_pipe(empty);
7809 %}
7810 
7811 instruct membar_acquire_lock() %{
7812   match(MemBarAcquire);
7813   predicate(Matcher::prior_fast_lock(n));
7814   ins_cost(0);
7815 
7816   size(0);
7817   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
7818   ins_encode( );
7819   ins_pipe(empty);
7820 %}
7821 
7822 instruct membar_release() %{
7823   match(MemBarRelease);
7824   ins_cost(400);
7825 
7826   size(0);
7827   format %{ "MEMBAR-release ! (empty encoding)" %}
7828   ins_encode( );
7829   ins_pipe(empty);
7830 %}
7831 
7832 instruct membar_release_lock() %{
7833   match(MemBarRelease);
7834   predicate(Matcher::post_fast_unlock(n));
7835   ins_cost(0);
7836 
7837   size(0);
7838   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
7839   ins_encode( );
7840   ins_pipe(empty);
7841 %}
7842 
7843 instruct membar_volatile(eFlagsReg cr) %{
7844   match(MemBarVolatile);
7845   effect(KILL cr);
7846   ins_cost(400);
7847 
7848   format %{ 
7849     $$template
7850     if (os::is_MP()) {
7851       $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
7852     } else {
7853       $$emit$$"MEMBAR-volatile ! (empty encoding)"
7854     }
7855   %}
7856   ins_encode %{
7857     __ membar(Assembler::StoreLoad);
7858   %}
7859   ins_pipe(pipe_slow);
7860 %}
7861 
7862 instruct unnecessary_membar_volatile() %{
7863   match(MemBarVolatile);
7864   predicate(Matcher::post_store_load_barrier(n));
7865   ins_cost(0);
7866 
7867   size(0);
7868   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
7869   ins_encode( );
7870   ins_pipe(empty);
7871 %}
7872 
7873 //----------Move Instructions--------------------------------------------------
7874 instruct castX2P(eAXRegP dst, eAXRegI src) %{
7875   match(Set dst (CastX2P src));
7876   format %{ "# X2P  $dst, $src" %}
7877   ins_encode( /*empty encoding*/ );
7878   ins_cost(0);
7879   ins_pipe(empty);
7880 %}
7881 
7882 instruct castP2X(eRegI dst, eRegP src ) %{
7883   match(Set dst (CastP2X src));
7884   ins_cost(50);
7885   format %{ "MOV    $dst, $src\t# CastP2X" %}
7886   ins_encode( enc_Copy( dst, src) );
7887   ins_pipe( ialu_reg_reg );
7888 %}
7889 
7890 //----------Conditional Move---------------------------------------------------
7891 // Conditional move
7892 instruct cmovI_reg(eRegI dst, eRegI src, eFlagsReg cr, cmpOp cop ) %{
7893   predicate(VM_Version::supports_cmov() );
7894   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7895   ins_cost(200);
7896   format %{ "CMOV$cop $dst,$src" %}
7897   opcode(0x0F,0x40);
7898   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7899   ins_pipe( pipe_cmov_reg );
7900 %}
7901 
7902 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, eRegI dst, eRegI src ) %{
7903   predicate(VM_Version::supports_cmov() );
7904   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7905   ins_cost(200);
7906   format %{ "CMOV$cop $dst,$src" %}
7907   opcode(0x0F,0x40);
7908   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7909   ins_pipe( pipe_cmov_reg );
7910 %}
7911 
7912 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, eRegI src ) %{
7913   predicate(VM_Version::supports_cmov() );
7914   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7915   ins_cost(200);
7916   expand %{
7917     cmovI_regU(cop, cr, dst, src);
7918   %}
7919 %}
7920 
7921 // Conditional move
7922 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, eRegI dst, memory src) %{
7923   predicate(VM_Version::supports_cmov() );
7924   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7925   ins_cost(250);
7926   format %{ "CMOV$cop $dst,$src" %}
7927   opcode(0x0F,0x40);
7928   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7929   ins_pipe( pipe_cmov_mem );
7930 %}
7931 
7932 // Conditional move
7933 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, eRegI dst, memory src) %{
7934   predicate(VM_Version::supports_cmov() );
7935   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7936   ins_cost(250);
7937   format %{ "CMOV$cop $dst,$src" %}
7938   opcode(0x0F,0x40);
7939   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7940   ins_pipe( pipe_cmov_mem );
7941 %}
7942 
7943 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, memory src) %{
7944   predicate(VM_Version::supports_cmov() );
7945   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7946   ins_cost(250);
7947   expand %{
7948     cmovI_memU(cop, cr, dst, src);
7949   %}
7950 %}
7951 
7952 // Conditional move
7953 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
7954   predicate(VM_Version::supports_cmov() );
7955   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7956   ins_cost(200);
7957   format %{ "CMOV$cop $dst,$src\t# ptr" %}
7958   opcode(0x0F,0x40);
7959   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7960   ins_pipe( pipe_cmov_reg );
7961 %}
7962 
7963 // Conditional move (non-P6 version)
7964 // Note:  a CMoveP is generated for  stubs and native wrappers
7965 //        regardless of whether we are on a P6, so we
7966 //        emulate a cmov here
7967 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
7968   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7969   ins_cost(300);
7970   format %{ "Jn$cop   skip\n\t"
7971           "MOV    $dst,$src\t# pointer\n"
7972       "skip:" %}
7973   opcode(0x8b);
7974   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
7975   ins_pipe( pipe_cmov_reg );
7976 %}
7977 
7978 // Conditional move
7979 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
7980   predicate(VM_Version::supports_cmov() );
7981   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7982   ins_cost(200);
7983   format %{ "CMOV$cop $dst,$src\t# ptr" %}
7984   opcode(0x0F,0x40);
7985   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7986   ins_pipe( pipe_cmov_reg );
7987 %}
7988 
7989 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
7990   predicate(VM_Version::supports_cmov() );
7991   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7992   ins_cost(200);
7993   expand %{
7994     cmovP_regU(cop, cr, dst, src);
7995   %}
7996 %}
7997 
7998 // DISABLED: Requires the ADLC to emit a bottom_type call that
7999 // correctly meets the two pointer arguments; one is an incoming
8000 // register but the other is a memory operand.  ALSO appears to
8001 // be buggy with implicit null checks.
8002 //
8003 //// Conditional move
8004 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
8005 //  predicate(VM_Version::supports_cmov() );
8006 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
8007 //  ins_cost(250);
8008 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
8009 //  opcode(0x0F,0x40);
8010 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
8011 //  ins_pipe( pipe_cmov_mem );
8012 //%}
8013 //
8014 //// Conditional move
8015 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
8016 //  predicate(VM_Version::supports_cmov() );
8017 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
8018 //  ins_cost(250);
8019 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
8020 //  opcode(0x0F,0x40);
8021 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
8022 //  ins_pipe( pipe_cmov_mem );
8023 //%}
8024 
8025 // Conditional move
8026 instruct fcmovD_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regD src) %{
8027   predicate(UseSSE<=1);
8028   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8029   ins_cost(200);
8030   format %{ "FCMOV$cop $dst,$src\t# double" %}
8031   opcode(0xDA);
8032   ins_encode( enc_cmov_d(cop,src) );
8033   ins_pipe( pipe_cmovD_reg );
8034 %}
8035 
8036 // Conditional move
8037 instruct fcmovF_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regF src) %{
8038   predicate(UseSSE==0);
8039   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8040   ins_cost(200);
8041   format %{ "FCMOV$cop $dst,$src\t# float" %}
8042   opcode(0xDA);
8043   ins_encode( enc_cmov_d(cop,src) );
8044   ins_pipe( pipe_cmovD_reg );
8045 %}
8046 
8047 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
8048 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
8049   predicate(UseSSE<=1);
8050   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8051   ins_cost(200);
8052   format %{ "Jn$cop   skip\n\t"
8053             "MOV    $dst,$src\t# double\n"
8054       "skip:" %}
8055   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
8056   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_D(src), OpcP, RegOpc(dst) );
8057   ins_pipe( pipe_cmovD_reg );
8058 %}
8059 
8060 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
8061 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
8062   predicate(UseSSE==0);
8063   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8064   ins_cost(200);
8065   format %{ "Jn$cop    skip\n\t"
8066             "MOV    $dst,$src\t# float\n"
8067       "skip:" %}
8068   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
8069   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_F(src), OpcP, RegOpc(dst) );
8070   ins_pipe( pipe_cmovD_reg );
8071 %}
8072 
8073 // No CMOVE with SSE/SSE2
8074 instruct fcmovX_regS(cmpOp cop, eFlagsReg cr, regX dst, regX src) %{
8075   predicate (UseSSE>=1);
8076   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8077   ins_cost(200);
8078   format %{ "Jn$cop   skip\n\t"
8079             "MOVSS  $dst,$src\t# float\n"
8080       "skip:" %}
8081   ins_encode %{
8082     Label skip;
8083     // Invert sense of branch from sense of CMOV
8084     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
8085     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
8086     __ bind(skip);
8087   %}
8088   ins_pipe( pipe_slow );
8089 %}
8090 
8091 // No CMOVE with SSE/SSE2
8092 instruct fcmovXD_regS(cmpOp cop, eFlagsReg cr, regXD dst, regXD src) %{
8093   predicate (UseSSE>=2);
8094   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8095   ins_cost(200);
8096   format %{ "Jn$cop   skip\n\t"
8097             "MOVSD  $dst,$src\t# float\n"
8098       "skip:" %}
8099   ins_encode %{
8100     Label skip;
8101     // Invert sense of branch from sense of CMOV
8102     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
8103     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
8104     __ bind(skip);
8105   %}
8106   ins_pipe( pipe_slow );
8107 %}
8108 
8109 // unsigned version
8110 instruct fcmovX_regU(cmpOpU cop, eFlagsRegU cr, regX dst, regX src) %{
8111   predicate (UseSSE>=1);
8112   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8113   ins_cost(200);
8114   format %{ "Jn$cop   skip\n\t"
8115             "MOVSS  $dst,$src\t# float\n"
8116       "skip:" %}
8117   ins_encode %{
8118     Label skip;
8119     // Invert sense of branch from sense of CMOV
8120     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
8121     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
8122     __ bind(skip);
8123   %}
8124   ins_pipe( pipe_slow );
8125 %}
8126 
8127 instruct fcmovX_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regX dst, regX src) %{
8128   predicate (UseSSE>=1);
8129   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8130   ins_cost(200);
8131   expand %{
8132     fcmovX_regU(cop, cr, dst, src);
8133   %}
8134 %}
8135 
8136 // unsigned version
8137 instruct fcmovXD_regU(cmpOpU cop, eFlagsRegU cr, regXD dst, regXD src) %{
8138   predicate (UseSSE>=2);
8139   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8140   ins_cost(200);
8141   format %{ "Jn$cop   skip\n\t"
8142             "MOVSD  $dst,$src\t# float\n"
8143       "skip:" %}
8144   ins_encode %{
8145     Label skip;
8146     // Invert sense of branch from sense of CMOV
8147     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
8148     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
8149     __ bind(skip);
8150   %}
8151   ins_pipe( pipe_slow );
8152 %}
8153 
8154 instruct fcmovXD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regXD dst, regXD src) %{
8155   predicate (UseSSE>=2);
8156   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8157   ins_cost(200);
8158   expand %{
8159     fcmovXD_regU(cop, cr, dst, src);
8160   %}
8161 %}
8162 
8163 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
8164   predicate(VM_Version::supports_cmov() );
8165   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8166   ins_cost(200);
8167   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
8168             "CMOV$cop $dst.hi,$src.hi" %}
8169   opcode(0x0F,0x40);
8170   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
8171   ins_pipe( pipe_cmov_reg_long );
8172 %}
8173 
8174 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
8175   predicate(VM_Version::supports_cmov() );
8176   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8177   ins_cost(200);
8178   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
8179             "CMOV$cop $dst.hi,$src.hi" %}
8180   opcode(0x0F,0x40);
8181   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
8182   ins_pipe( pipe_cmov_reg_long );
8183 %}
8184 
8185 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
8186   predicate(VM_Version::supports_cmov() );
8187   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8188   ins_cost(200);
8189   expand %{
8190     cmovL_regU(cop, cr, dst, src);
8191   %}
8192 %}
8193 
8194 //----------Arithmetic Instructions--------------------------------------------
8195 //----------Addition Instructions----------------------------------------------
8196 // Integer Addition Instructions
8197 instruct addI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8198   match(Set dst (AddI dst src));
8199   effect(KILL cr);
8200 
8201   size(2);
8202   format %{ "ADD    $dst,$src" %}
8203   opcode(0x03);
8204   ins_encode( OpcP, RegReg( dst, src) );
8205   ins_pipe( ialu_reg_reg );
8206 %}
8207 
8208 instruct addI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8209   match(Set dst (AddI dst src));
8210   effect(KILL cr);
8211 
8212   format %{ "ADD    $dst,$src" %}
8213   opcode(0x81, 0x00); /* /0 id */
8214   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8215   ins_pipe( ialu_reg );
8216 %}
8217 
8218 instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
8219   predicate(UseIncDec);
8220   match(Set dst (AddI dst src));
8221   effect(KILL cr);
8222 
8223   size(1);
8224   format %{ "INC    $dst" %}
8225   opcode(0x40); /*  */
8226   ins_encode( Opc_plus( primary, dst ) );
8227   ins_pipe( ialu_reg );
8228 %}
8229 
8230 instruct leaI_eReg_immI(eRegI dst, eRegI src0, immI src1) %{
8231   match(Set dst (AddI src0 src1));
8232   ins_cost(110);
8233 
8234   format %{ "LEA    $dst,[$src0 + $src1]" %}
8235   opcode(0x8D); /* 0x8D /r */
8236   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
8237   ins_pipe( ialu_reg_reg );
8238 %}
8239 
8240 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
8241   match(Set dst (AddP src0 src1));
8242   ins_cost(110);
8243 
8244   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
8245   opcode(0x8D); /* 0x8D /r */
8246   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
8247   ins_pipe( ialu_reg_reg );
8248 %}
8249 
8250 instruct decI_eReg(eRegI dst, immI_M1 src, eFlagsReg cr) %{
8251   predicate(UseIncDec);
8252   match(Set dst (AddI dst src));
8253   effect(KILL cr);
8254 
8255   size(1);
8256   format %{ "DEC    $dst" %}
8257   opcode(0x48); /*  */
8258   ins_encode( Opc_plus( primary, dst ) );
8259   ins_pipe( ialu_reg );
8260 %}
8261 
8262 instruct addP_eReg(eRegP dst, eRegI src, eFlagsReg cr) %{
8263   match(Set dst (AddP dst src));
8264   effect(KILL cr);
8265 
8266   size(2);
8267   format %{ "ADD    $dst,$src" %}
8268   opcode(0x03);
8269   ins_encode( OpcP, RegReg( dst, src) );
8270   ins_pipe( ialu_reg_reg );
8271 %}
8272 
8273 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
8274   match(Set dst (AddP dst src));
8275   effect(KILL cr);
8276 
8277   format %{ "ADD    $dst,$src" %}
8278   opcode(0x81,0x00); /* Opcode 81 /0 id */
8279   // ins_encode( RegImm( dst, src) );
8280   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8281   ins_pipe( ialu_reg );
8282 %}
8283 
8284 instruct addI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8285   match(Set dst (AddI dst (LoadI src)));
8286   effect(KILL cr);
8287 
8288   ins_cost(125);
8289   format %{ "ADD    $dst,$src" %}
8290   opcode(0x03);
8291   ins_encode( OpcP, RegMem( dst, src) );
8292   ins_pipe( ialu_reg_mem );
8293 %}
8294 
8295 instruct addI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8296   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8297   effect(KILL cr);
8298 
8299   ins_cost(150);
8300   format %{ "ADD    $dst,$src" %}
8301   opcode(0x01);  /* Opcode 01 /r */
8302   ins_encode( OpcP, RegMem( src, dst ) );
8303   ins_pipe( ialu_mem_reg );
8304 %}
8305 
8306 // Add Memory with Immediate
8307 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8308   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8309   effect(KILL cr);
8310 
8311   ins_cost(125);
8312   format %{ "ADD    $dst,$src" %}
8313   opcode(0x81);               /* Opcode 81 /0 id */
8314   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
8315   ins_pipe( ialu_mem_imm );
8316 %}
8317 
8318 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
8319   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8320   effect(KILL cr);
8321 
8322   ins_cost(125);
8323   format %{ "INC    $dst" %}
8324   opcode(0xFF);               /* Opcode FF /0 */
8325   ins_encode( OpcP, RMopc_Mem(0x00,dst));
8326   ins_pipe( ialu_mem_imm );
8327 %}
8328 
8329 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
8330   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8331   effect(KILL cr);
8332 
8333   ins_cost(125);
8334   format %{ "DEC    $dst" %}
8335   opcode(0xFF);               /* Opcode FF /1 */
8336   ins_encode( OpcP, RMopc_Mem(0x01,dst));
8337   ins_pipe( ialu_mem_imm );
8338 %}
8339 
8340 
8341 instruct checkCastPP( eRegP dst ) %{
8342   match(Set dst (CheckCastPP dst));
8343 
8344   size(0);
8345   format %{ "#checkcastPP of $dst" %}
8346   ins_encode( /*empty encoding*/ );
8347   ins_pipe( empty );
8348 %}
8349 
8350 instruct castPP( eRegP dst ) %{
8351   match(Set dst (CastPP dst));
8352   format %{ "#castPP of $dst" %}
8353   ins_encode( /*empty encoding*/ );
8354   ins_pipe( empty );
8355 %}
8356 
8357 instruct castII( eRegI dst ) %{
8358   match(Set dst (CastII dst));
8359   format %{ "#castII of $dst" %}
8360   ins_encode( /*empty encoding*/ );
8361   ins_cost(0);
8362   ins_pipe( empty );
8363 %}
8364 
8365 
8366 // Load-locked - same as a regular pointer load when used with compare-swap
8367 instruct loadPLocked(eRegP dst, memory mem) %{
8368   match(Set dst (LoadPLocked mem));
8369 
8370   ins_cost(125);
8371   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
8372   opcode(0x8B);
8373   ins_encode( OpcP, RegMem(dst,mem));
8374   ins_pipe( ialu_reg_mem );
8375 %}
8376 
8377 // LoadLong-locked - same as a volatile long load when used with compare-swap
8378 instruct loadLLocked(stackSlotL dst, load_long_memory mem) %{
8379   predicate(UseSSE<=1);
8380   match(Set dst (LoadLLocked mem));
8381 
8382   ins_cost(200);
8383   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
8384             "FISTp  $dst" %}
8385   ins_encode(enc_loadL_volatile(mem,dst));
8386   ins_pipe( fpu_reg_mem );
8387 %}
8388 
8389 instruct loadLX_Locked(stackSlotL dst, load_long_memory mem, regXD tmp) %{
8390   predicate(UseSSE>=2);
8391   match(Set dst (LoadLLocked mem));
8392   effect(TEMP tmp);
8393   ins_cost(180);
8394   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
8395             "MOVSD  $dst,$tmp" %}
8396   ins_encode(enc_loadLX_volatile(mem, dst, tmp));
8397   ins_pipe( pipe_slow );
8398 %}
8399 
8400 instruct loadLX_reg_Locked(eRegL dst, load_long_memory mem, regXD tmp) %{
8401   predicate(UseSSE>=2);
8402   match(Set dst (LoadLLocked mem));
8403   effect(TEMP tmp);
8404   ins_cost(160);
8405   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
8406             "MOVD   $dst.lo,$tmp\n\t"
8407             "PSRLQ  $tmp,32\n\t"
8408             "MOVD   $dst.hi,$tmp" %}
8409   ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));
8410   ins_pipe( pipe_slow );
8411 %}
8412 
8413 // Conditional-store of the updated heap-top.
8414 // Used during allocation of the shared heap.
8415 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
8416 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
8417   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8418   // EAX is killed if there is contention, but then it's also unused.
8419   // In the common case of no contention, EAX holds the new oop address.
8420   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
8421   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
8422   ins_pipe( pipe_cmpxchg );
8423 %}
8424 
8425 // Conditional-store of an int value.
8426 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
8427 instruct storeIConditional( memory mem, eAXRegI oldval, eRegI newval, eFlagsReg cr ) %{
8428   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8429   effect(KILL oldval);
8430   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
8431   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
8432   ins_pipe( pipe_cmpxchg );
8433 %}
8434 
8435 // Conditional-store of a long value.
8436 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
8437 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
8438   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8439   effect(KILL oldval);
8440   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
8441             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
8442             "XCHG   EBX,ECX"
8443   %}
8444   ins_encode %{
8445     // Note: we need to swap rbx, and rcx before and after the
8446     //       cmpxchg8 instruction because the instruction uses
8447     //       rcx as the high order word of the new value to store but
8448     //       our register encoding uses rbx.
8449     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
8450     if( os::is_MP() )
8451       __ lock();
8452     __ cmpxchg8($mem$$Address);
8453     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
8454   %}
8455   ins_pipe( pipe_cmpxchg );
8456 %}
8457 
8458 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
8459 
8460 instruct compareAndSwapL( eRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
8461   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
8462   effect(KILL cr, KILL oldval);
8463   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
8464             "MOV    $res,0\n\t"
8465             "JNE,s  fail\n\t"
8466             "MOV    $res,1\n"
8467           "fail:" %}
8468   ins_encode( enc_cmpxchg8(mem_ptr),
8469               enc_flags_ne_to_boolean(res) );
8470   ins_pipe( pipe_cmpxchg );
8471 %}
8472 
8473 instruct compareAndSwapP( eRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
8474   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
8475   effect(KILL cr, KILL oldval);
8476   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
8477             "MOV    $res,0\n\t"
8478             "JNE,s  fail\n\t"
8479             "MOV    $res,1\n"
8480           "fail:" %}
8481   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
8482   ins_pipe( pipe_cmpxchg );
8483 %}
8484 
8485 instruct compareAndSwapI( eRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
8486   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
8487   effect(KILL cr, KILL oldval);
8488   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
8489             "MOV    $res,0\n\t"
8490             "JNE,s  fail\n\t"
8491             "MOV    $res,1\n"
8492           "fail:" %}
8493   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
8494   ins_pipe( pipe_cmpxchg );
8495 %}
8496 
8497 //----------Subtraction Instructions-------------------------------------------
8498 // Integer Subtraction Instructions
8499 instruct subI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8500   match(Set dst (SubI dst src));
8501   effect(KILL cr);
8502 
8503   size(2);
8504   format %{ "SUB    $dst,$src" %}
8505   opcode(0x2B);
8506   ins_encode( OpcP, RegReg( dst, src) );
8507   ins_pipe( ialu_reg_reg );
8508 %}
8509 
8510 instruct subI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8511   match(Set dst (SubI dst src));
8512   effect(KILL cr);
8513 
8514   format %{ "SUB    $dst,$src" %}
8515   opcode(0x81,0x05);  /* Opcode 81 /5 */
8516   // ins_encode( RegImm( dst, src) );
8517   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8518   ins_pipe( ialu_reg );
8519 %}
8520 
8521 instruct subI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8522   match(Set dst (SubI dst (LoadI src)));
8523   effect(KILL cr);
8524 
8525   ins_cost(125);
8526   format %{ "SUB    $dst,$src" %}
8527   opcode(0x2B);
8528   ins_encode( OpcP, RegMem( dst, src) );
8529   ins_pipe( ialu_reg_mem );
8530 %}
8531 
8532 instruct subI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8533   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8534   effect(KILL cr);
8535 
8536   ins_cost(150);
8537   format %{ "SUB    $dst,$src" %}
8538   opcode(0x29);  /* Opcode 29 /r */
8539   ins_encode( OpcP, RegMem( src, dst ) );
8540   ins_pipe( ialu_mem_reg );
8541 %}
8542 
8543 // Subtract from a pointer
8544 instruct subP_eReg(eRegP dst, eRegI src, immI0 zero, eFlagsReg cr) %{
8545   match(Set dst (AddP dst (SubI zero src)));
8546   effect(KILL cr);
8547 
8548   size(2);
8549   format %{ "SUB    $dst,$src" %}
8550   opcode(0x2B);
8551   ins_encode( OpcP, RegReg( dst, src) );
8552   ins_pipe( ialu_reg_reg );
8553 %}
8554 
8555 instruct negI_eReg(eRegI dst, immI0 zero, eFlagsReg cr) %{
8556   match(Set dst (SubI zero dst));
8557   effect(KILL cr);
8558 
8559   size(2);
8560   format %{ "NEG    $dst" %}
8561   opcode(0xF7,0x03);  // Opcode F7 /3
8562   ins_encode( OpcP, RegOpc( dst ) );
8563   ins_pipe( ialu_reg );
8564 %}
8565 
8566 
8567 //----------Multiplication/Division Instructions-------------------------------
8568 // Integer Multiplication Instructions
8569 // Multiply Register
8570 instruct mulI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8571   match(Set dst (MulI dst src));
8572   effect(KILL cr);
8573 
8574   size(3);
8575   ins_cost(300);
8576   format %{ "IMUL   $dst,$src" %}
8577   opcode(0xAF, 0x0F);
8578   ins_encode( OpcS, OpcP, RegReg( dst, src) );
8579   ins_pipe( ialu_reg_reg_alu0 );
8580 %}
8581 
8582 // Multiply 32-bit Immediate
8583 instruct mulI_eReg_imm(eRegI dst, eRegI src, immI imm, eFlagsReg cr) %{
8584   match(Set dst (MulI src imm));
8585   effect(KILL cr);
8586 
8587   ins_cost(300);
8588   format %{ "IMUL   $dst,$src,$imm" %}
8589   opcode(0x69);  /* 69 /r id */
8590   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
8591   ins_pipe( ialu_reg_reg_alu0 );
8592 %}
8593 
8594 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
8595   match(Set dst src);
8596   effect(KILL cr);
8597 
8598   // Note that this is artificially increased to make it more expensive than loadConL
8599   ins_cost(250);
8600   format %{ "MOV    EAX,$src\t// low word only" %}
8601   opcode(0xB8);
8602   ins_encode( LdImmL_Lo(dst, src) );
8603   ins_pipe( ialu_reg_fat );
8604 %}
8605 
8606 // Multiply by 32-bit Immediate, taking the shifted high order results
8607 //  (special case for shift by 32)
8608 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
8609   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
8610   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
8611              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
8612              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
8613   effect(USE src1, KILL cr);
8614 
8615   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
8616   ins_cost(0*100 + 1*400 - 150);
8617   format %{ "IMUL   EDX:EAX,$src1" %}
8618   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
8619   ins_pipe( pipe_slow );
8620 %}
8621 
8622 // Multiply by 32-bit Immediate, taking the shifted high order results
8623 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
8624   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
8625   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
8626              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
8627              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
8628   effect(USE src1, KILL cr);
8629 
8630   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
8631   ins_cost(1*100 + 1*400 - 150);
8632   format %{ "IMUL   EDX:EAX,$src1\n\t"
8633             "SAR    EDX,$cnt-32" %}
8634   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
8635   ins_pipe( pipe_slow );
8636 %}
8637 
8638 // Multiply Memory 32-bit Immediate
8639 instruct mulI_mem_imm(eRegI dst, memory src, immI imm, eFlagsReg cr) %{
8640   match(Set dst (MulI (LoadI src) imm));
8641   effect(KILL cr);
8642 
8643   ins_cost(300);
8644   format %{ "IMUL   $dst,$src,$imm" %}
8645   opcode(0x69);  /* 69 /r id */
8646   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
8647   ins_pipe( ialu_reg_mem_alu0 );
8648 %}
8649 
8650 // Multiply Memory
8651 instruct mulI(eRegI dst, memory src, eFlagsReg cr) %{
8652   match(Set dst (MulI dst (LoadI src)));
8653   effect(KILL cr);
8654 
8655   ins_cost(350);
8656   format %{ "IMUL   $dst,$src" %}
8657   opcode(0xAF, 0x0F);
8658   ins_encode( OpcS, OpcP, RegMem( dst, src) );
8659   ins_pipe( ialu_reg_mem_alu0 );
8660 %}
8661 
8662 // Multiply Register Int to Long
8663 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
8664   // Basic Idea: long = (long)int * (long)int
8665   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
8666   effect(DEF dst, USE src, USE src1, KILL flags);
8667 
8668   ins_cost(300);
8669   format %{ "IMUL   $dst,$src1" %}
8670 
8671   ins_encode( long_int_multiply( dst, src1 ) );
8672   ins_pipe( ialu_reg_reg_alu0 );
8673 %}
8674 
8675 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
8676   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
8677   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
8678   effect(KILL flags);
8679 
8680   ins_cost(300);
8681   format %{ "MUL    $dst,$src1" %}
8682 
8683   ins_encode( long_uint_multiply(dst, src1) );
8684   ins_pipe( ialu_reg_reg_alu0 );
8685 %}
8686 
8687 // Multiply Register Long
8688 instruct mulL_eReg(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
8689   match(Set dst (MulL dst src));
8690   effect(KILL cr, TEMP tmp);
8691   ins_cost(4*100+3*400);
8692 // Basic idea: lo(result) = lo(x_lo * y_lo)
8693 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
8694   format %{ "MOV    $tmp,$src.lo\n\t"
8695             "IMUL   $tmp,EDX\n\t"
8696             "MOV    EDX,$src.hi\n\t"
8697             "IMUL   EDX,EAX\n\t"
8698             "ADD    $tmp,EDX\n\t"
8699             "MUL    EDX:EAX,$src.lo\n\t"
8700             "ADD    EDX,$tmp" %}
8701   ins_encode( long_multiply( dst, src, tmp ) );
8702   ins_pipe( pipe_slow );
8703 %}
8704 
8705 // Multiply Register Long where the left operand's high 32 bits are zero
8706 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
8707   predicate(is_operand_hi32_zero(n->in(1)));
8708   match(Set dst (MulL dst src));
8709   effect(KILL cr, TEMP tmp);
8710   ins_cost(2*100+2*400);
8711 // Basic idea: lo(result) = lo(x_lo * y_lo)
8712 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
8713   format %{ "MOV    $tmp,$src.hi\n\t"
8714             "IMUL   $tmp,EAX\n\t"
8715             "MUL    EDX:EAX,$src.lo\n\t"
8716             "ADD    EDX,$tmp" %}
8717   ins_encode %{
8718     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
8719     __ imull($tmp$$Register, rax);
8720     __ mull($src$$Register);
8721     __ addl(rdx, $tmp$$Register);
8722   %}
8723   ins_pipe( pipe_slow );
8724 %}
8725 
8726 // Multiply Register Long where the right operand's high 32 bits are zero
8727 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
8728   predicate(is_operand_hi32_zero(n->in(2)));
8729   match(Set dst (MulL dst src));
8730   effect(KILL cr, TEMP tmp);
8731   ins_cost(2*100+2*400);
8732 // Basic idea: lo(result) = lo(x_lo * y_lo)
8733 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
8734   format %{ "MOV    $tmp,$src.lo\n\t"
8735             "IMUL   $tmp,EDX\n\t"
8736             "MUL    EDX:EAX,$src.lo\n\t"
8737             "ADD    EDX,$tmp" %}
8738   ins_encode %{
8739     __ movl($tmp$$Register, $src$$Register);
8740     __ imull($tmp$$Register, rdx);
8741     __ mull($src$$Register);
8742     __ addl(rdx, $tmp$$Register);
8743   %}
8744   ins_pipe( pipe_slow );
8745 %}
8746 
8747 // Multiply Register Long where the left and the right operands' high 32 bits are zero
8748 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
8749   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
8750   match(Set dst (MulL dst src));
8751   effect(KILL cr);
8752   ins_cost(1*400);
8753 // Basic idea: lo(result) = lo(x_lo * y_lo)
8754 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
8755   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
8756   ins_encode %{
8757     __ mull($src$$Register);
8758   %}
8759   ins_pipe( pipe_slow );
8760 %}
8761 
8762 // Multiply Register Long by small constant
8763 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, eRegI tmp, eFlagsReg cr) %{
8764   match(Set dst (MulL dst src));
8765   effect(KILL cr, TEMP tmp);
8766   ins_cost(2*100+2*400);
8767   size(12);
8768 // Basic idea: lo(result) = lo(src * EAX)
8769 //             hi(result) = hi(src * EAX) + lo(src * EDX)
8770   format %{ "IMUL   $tmp,EDX,$src\n\t"
8771             "MOV    EDX,$src\n\t"
8772             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
8773             "ADD    EDX,$tmp" %}
8774   ins_encode( long_multiply_con( dst, src, tmp ) );
8775   ins_pipe( pipe_slow );
8776 %}
8777 
8778 // Integer DIV with Register
8779 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
8780   match(Set rax (DivI rax div));
8781   effect(KILL rdx, KILL cr);
8782   size(26);
8783   ins_cost(30*100+10*100);
8784   format %{ "CMP    EAX,0x80000000\n\t"
8785             "JNE,s  normal\n\t"
8786             "XOR    EDX,EDX\n\t"
8787             "CMP    ECX,-1\n\t"
8788             "JE,s   done\n"
8789     "normal: CDQ\n\t"
8790             "IDIV   $div\n\t"
8791     "done:"        %}
8792   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8793   ins_encode( cdq_enc, OpcP, RegOpc(div) );
8794   ins_pipe( ialu_reg_reg_alu0 );
8795 %}
8796 
8797 // Divide Register Long
8798 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
8799   match(Set dst (DivL src1 src2));
8800   effect( KILL cr, KILL cx, KILL bx );
8801   ins_cost(10000);
8802   format %{ "PUSH   $src1.hi\n\t"
8803             "PUSH   $src1.lo\n\t"
8804             "PUSH   $src2.hi\n\t"
8805             "PUSH   $src2.lo\n\t"
8806             "CALL   SharedRuntime::ldiv\n\t"
8807             "ADD    ESP,16" %}
8808   ins_encode( long_div(src1,src2) );
8809   ins_pipe( pipe_slow );
8810 %}
8811 
8812 // Integer DIVMOD with Register, both quotient and mod results
8813 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
8814   match(DivModI rax div);
8815   effect(KILL cr);
8816   size(26);
8817   ins_cost(30*100+10*100);
8818   format %{ "CMP    EAX,0x80000000\n\t"
8819             "JNE,s  normal\n\t"
8820             "XOR    EDX,EDX\n\t"
8821             "CMP    ECX,-1\n\t"
8822             "JE,s   done\n"
8823     "normal: CDQ\n\t"
8824             "IDIV   $div\n\t"
8825     "done:"        %}
8826   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8827   ins_encode( cdq_enc, OpcP, RegOpc(div) );
8828   ins_pipe( pipe_slow );
8829 %}
8830 
8831 // Integer MOD with Register
8832 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
8833   match(Set rdx (ModI rax div));
8834   effect(KILL rax, KILL cr);
8835 
8836   size(26);
8837   ins_cost(300);
8838   format %{ "CDQ\n\t"
8839             "IDIV   $div" %}
8840   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8841   ins_encode( cdq_enc, OpcP, RegOpc(div) );
8842   ins_pipe( ialu_reg_reg_alu0 );
8843 %}
8844 
8845 // Remainder Register Long
8846 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
8847   match(Set dst (ModL src1 src2));
8848   effect( KILL cr, KILL cx, KILL bx );
8849   ins_cost(10000);
8850   format %{ "PUSH   $src1.hi\n\t"
8851             "PUSH   $src1.lo\n\t"
8852             "PUSH   $src2.hi\n\t"
8853             "PUSH   $src2.lo\n\t"
8854             "CALL   SharedRuntime::lrem\n\t"
8855             "ADD    ESP,16" %}
8856   ins_encode( long_mod(src1,src2) );
8857   ins_pipe( pipe_slow );
8858 %}
8859 
8860 // Divide Register Long (no special case since divisor != -1)
8861 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{
8862   match(Set dst (DivL dst imm));
8863   effect( TEMP tmp, TEMP tmp2, KILL cr );
8864   ins_cost(1000);
8865   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
8866             "CMP    $tmp,EDX\n\t"
8867             "JA,s   fast\n\t"
8868             "MOV    $tmp2,EAX\n\t"
8869             "MOV    EAX,EDX\n\t"
8870             "SAR    EDX,31\n\t"
8871             "IDIV   $tmp\n\t"
8872             "XCHG   EAX,$tmp2 \n\t"
8873             "IDIV   $tmp\n\t"
8874             "CDQ\n\t"
8875             "ADD    EDX,$tmp2\n\t"
8876             "JMP,s  done\n"
8877     "fast:\n\t"
8878             "IDIV   $tmp\n\t"
8879             "XOR    EDX,EDX\n"
8880     "done:\n\t"
8881             "NEG    EDX:EAX # if $imm < 0" %}
8882   ins_encode %{
8883     int con = (int)$imm$$constant;
8884     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8885     int pcon = (con > 0) ? con : -con;
8886     Label Lfast, Ldone;
8887 
8888     __ movl($tmp$$Register, pcon);
8889     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8890     __ jccb(Assembler::above, Lfast);
8891 
8892     __ movl($tmp2$$Register, $dst$$Register); // save
8893     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8894     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // src sign
8895     __ idivl($tmp$$Register);
8896     __ xchgl($dst$$Register, $tmp2$$Register);
8897     __ idivl($tmp$$Register);
8898     __ cdql();
8899     __ addl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
8900     __ jmpb(Ldone);
8901 
8902     __ bind(Lfast);
8903     // fast path: src is positive and result fits into 32 bit
8904     __ idivl($tmp$$Register);
8905     __ xorl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
8906 
8907     __ bind(Ldone);
8908     if (con < 0) {
8909       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
8910     }
8911   %}
8912   ins_pipe( pipe_slow );
8913 %}
8914 
8915 // Remainder Register Long (remainder fit into 32 bits)
8916 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{
8917   match(Set dst (ModL dst imm));
8918   effect( TEMP tmp, TEMP tmp2, KILL cr );
8919   ins_cost(1000);
8920   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
8921             "CMP    $tmp,EDX\n\t"
8922             "JA,s   fast\n\t"
8923             "MOV    $tmp2,EAX\n\t"
8924             "MOV    EAX,EDX\n\t"
8925             "SAR    EDX,31\n\t"
8926             "IDIV   $tmp\n\t"
8927             "MOV    EAX,$tmp2\n"
8928     "fast:\n\t"
8929             "IDIV   $tmp\n\t"
8930             "MOV    EAX,EDX\n\t"
8931             "SAR    EDX,31\n\t" %}
8932   ins_encode %{
8933     int con = (int)$imm$$constant;
8934     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8935     int pcon = (con > 0) ? con : -con;
8936     Label  Lfast;
8937 
8938     __ movl($tmp$$Register, pcon);
8939     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8940     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
8941 
8942     __ movl($tmp2$$Register, $dst$$Register); // save
8943     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8944     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // src sign
8945     __ idivl($tmp$$Register);
8946     __ movl($dst$$Register, $tmp2$$Register);
8947 
8948     __ bind(Lfast);
8949     __ idivl($tmp$$Register);
8950     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8951     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
8952 
8953   %}
8954   ins_pipe( pipe_slow );
8955 %}
8956 
8957 // Integer Shift Instructions
8958 // Shift Left by one
8959 instruct shlI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
8960   match(Set dst (LShiftI dst shift));
8961   effect(KILL cr);
8962 
8963   size(2);
8964   format %{ "SHL    $dst,$shift" %}
8965   opcode(0xD1, 0x4);  /* D1 /4 */
8966   ins_encode( OpcP, RegOpc( dst ) );
8967   ins_pipe( ialu_reg );
8968 %}
8969 
8970 // Shift Left by 8-bit immediate
8971 instruct salI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
8972   match(Set dst (LShiftI dst shift));
8973   effect(KILL cr);
8974 
8975   size(3);
8976   format %{ "SHL    $dst,$shift" %}
8977   opcode(0xC1, 0x4);  /* C1 /4 ib */
8978   ins_encode( RegOpcImm( dst, shift) );
8979   ins_pipe( ialu_reg );
8980 %}
8981 
8982 // Shift Left by variable
8983 instruct salI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
8984   match(Set dst (LShiftI dst shift));
8985   effect(KILL cr);
8986 
8987   size(2);
8988   format %{ "SHL    $dst,$shift" %}
8989   opcode(0xD3, 0x4);  /* D3 /4 */
8990   ins_encode( OpcP, RegOpc( dst ) );
8991   ins_pipe( ialu_reg_reg );
8992 %}
8993 
8994 // Arithmetic shift right by one
8995 instruct sarI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
8996   match(Set dst (RShiftI dst shift));
8997   effect(KILL cr);
8998 
8999   size(2);
9000   format %{ "SAR    $dst,$shift" %}
9001   opcode(0xD1, 0x7);  /* D1 /7 */
9002   ins_encode( OpcP, RegOpc( dst ) );
9003   ins_pipe( ialu_reg );
9004 %}
9005 
9006 // Arithmetic shift right by one
9007 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
9008   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9009   effect(KILL cr);
9010   format %{ "SAR    $dst,$shift" %}
9011   opcode(0xD1, 0x7);  /* D1 /7 */
9012   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
9013   ins_pipe( ialu_mem_imm );
9014 %}
9015 
9016 // Arithmetic Shift Right by 8-bit immediate
9017 instruct sarI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
9018   match(Set dst (RShiftI dst shift));
9019   effect(KILL cr);
9020 
9021   size(3);
9022   format %{ "SAR    $dst,$shift" %}
9023   opcode(0xC1, 0x7);  /* C1 /7 ib */
9024   ins_encode( RegOpcImm( dst, shift ) );
9025   ins_pipe( ialu_mem_imm );
9026 %}
9027 
9028 // Arithmetic Shift Right by 8-bit immediate
9029 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
9030   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9031   effect(KILL cr);
9032 
9033   format %{ "SAR    $dst,$shift" %}
9034   opcode(0xC1, 0x7);  /* C1 /7 ib */
9035   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
9036   ins_pipe( ialu_mem_imm );
9037 %}
9038 
9039 // Arithmetic Shift Right by variable
9040 instruct sarI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
9041   match(Set dst (RShiftI dst shift));
9042   effect(KILL cr);
9043 
9044   size(2);
9045   format %{ "SAR    $dst,$shift" %}
9046   opcode(0xD3, 0x7);  /* D3 /7 */
9047   ins_encode( OpcP, RegOpc( dst ) );
9048   ins_pipe( ialu_reg_reg );
9049 %}
9050 
9051 // Logical shift right by one
9052 instruct shrI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9053   match(Set dst (URShiftI dst shift));
9054   effect(KILL cr);
9055 
9056   size(2);
9057   format %{ "SHR    $dst,$shift" %}
9058   opcode(0xD1, 0x5);  /* D1 /5 */
9059   ins_encode( OpcP, RegOpc( dst ) );
9060   ins_pipe( ialu_reg );
9061 %}
9062 
9063 // Logical Shift Right by 8-bit immediate
9064 instruct shrI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
9065   match(Set dst (URShiftI dst shift));
9066   effect(KILL cr);
9067 
9068   size(3);
9069   format %{ "SHR    $dst,$shift" %}
9070   opcode(0xC1, 0x5);  /* C1 /5 ib */
9071   ins_encode( RegOpcImm( dst, shift) );
9072   ins_pipe( ialu_reg );
9073 %}
9074 
9075 
9076 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
9077 // This idiom is used by the compiler for the i2b bytecode.
9078 instruct i2b(eRegI dst, xRegI src, immI_24 twentyfour) %{
9079   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
9080 
9081   size(3);
9082   format %{ "MOVSX  $dst,$src :8" %}
9083   ins_encode %{
9084     __ movsbl($dst$$Register, $src$$Register);
9085   %}
9086   ins_pipe(ialu_reg_reg);
9087 %}
9088 
9089 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
9090 // This idiom is used by the compiler the i2s bytecode.
9091 instruct i2s(eRegI dst, xRegI src, immI_16 sixteen) %{
9092   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
9093 
9094   size(3);
9095   format %{ "MOVSX  $dst,$src :16" %}
9096   ins_encode %{
9097     __ movswl($dst$$Register, $src$$Register);
9098   %}
9099   ins_pipe(ialu_reg_reg);
9100 %}
9101 
9102 
9103 // Logical Shift Right by variable
9104 instruct shrI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
9105   match(Set dst (URShiftI dst shift));
9106   effect(KILL cr);
9107 
9108   size(2);
9109   format %{ "SHR    $dst,$shift" %}
9110   opcode(0xD3, 0x5);  /* D3 /5 */
9111   ins_encode( OpcP, RegOpc( dst ) );
9112   ins_pipe( ialu_reg_reg );
9113 %}
9114 
9115 
9116 //----------Logical Instructions-----------------------------------------------
9117 //----------Integer Logical Instructions---------------------------------------
9118 // And Instructions
9119 // And Register with Register
9120 instruct andI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
9121   match(Set dst (AndI dst src));
9122   effect(KILL cr);
9123 
9124   size(2);
9125   format %{ "AND    $dst,$src" %}
9126   opcode(0x23);
9127   ins_encode( OpcP, RegReg( dst, src) );
9128   ins_pipe( ialu_reg_reg );
9129 %}
9130 
9131 // And Register with Immediate
9132 instruct andI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
9133   match(Set dst (AndI dst src));
9134   effect(KILL cr);
9135 
9136   format %{ "AND    $dst,$src" %}
9137   opcode(0x81,0x04);  /* Opcode 81 /4 */
9138   // ins_encode( RegImm( dst, src) );
9139   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
9140   ins_pipe( ialu_reg );
9141 %}
9142 
9143 // And Register with Memory
9144 instruct andI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
9145   match(Set dst (AndI dst (LoadI src)));
9146   effect(KILL cr);
9147 
9148   ins_cost(125);
9149   format %{ "AND    $dst,$src" %}
9150   opcode(0x23);
9151   ins_encode( OpcP, RegMem( dst, src) );
9152   ins_pipe( ialu_reg_mem );
9153 %}
9154 
9155 // And Memory with Register
9156 instruct andI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
9157   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9158   effect(KILL cr);
9159 
9160   ins_cost(150);
9161   format %{ "AND    $dst,$src" %}
9162   opcode(0x21);  /* Opcode 21 /r */
9163   ins_encode( OpcP, RegMem( src, dst ) );
9164   ins_pipe( ialu_mem_reg );
9165 %}
9166 
9167 // And Memory with Immediate
9168 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
9169   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9170   effect(KILL cr);
9171 
9172   ins_cost(125);
9173   format %{ "AND    $dst,$src" %}
9174   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
9175   // ins_encode( MemImm( dst, src) );
9176   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
9177   ins_pipe( ialu_mem_imm );
9178 %}
9179 
9180 // Or Instructions
9181 // Or Register with Register
9182 instruct orI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
9183   match(Set dst (OrI dst src));
9184   effect(KILL cr);
9185 
9186   size(2);
9187   format %{ "OR     $dst,$src" %}
9188   opcode(0x0B);
9189   ins_encode( OpcP, RegReg( dst, src) );
9190   ins_pipe( ialu_reg_reg );
9191 %}
9192 
9193 instruct orI_eReg_castP2X(eRegI dst, eRegP src, eFlagsReg cr) %{
9194   match(Set dst (OrI dst (CastP2X src)));
9195   effect(KILL cr);
9196 
9197   size(2);
9198   format %{ "OR     $dst,$src" %}
9199   opcode(0x0B);
9200   ins_encode( OpcP, RegReg( dst, src) );
9201   ins_pipe( ialu_reg_reg );
9202 %}
9203 
9204 
9205 // Or Register with Immediate
9206 instruct orI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
9207   match(Set dst (OrI dst src));
9208   effect(KILL cr);
9209 
9210   format %{ "OR     $dst,$src" %}
9211   opcode(0x81,0x01);  /* Opcode 81 /1 id */
9212   // ins_encode( RegImm( dst, src) );
9213   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
9214   ins_pipe( ialu_reg );
9215 %}
9216 
9217 // Or Register with Memory
9218 instruct orI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
9219   match(Set dst (OrI dst (LoadI src)));
9220   effect(KILL cr);
9221 
9222   ins_cost(125);
9223   format %{ "OR     $dst,$src" %}
9224   opcode(0x0B);
9225   ins_encode( OpcP, RegMem( dst, src) );
9226   ins_pipe( ialu_reg_mem );
9227 %}
9228 
9229 // Or Memory with Register
9230 instruct orI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
9231   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9232   effect(KILL cr);
9233 
9234   ins_cost(150);
9235   format %{ "OR     $dst,$src" %}
9236   opcode(0x09);  /* Opcode 09 /r */
9237   ins_encode( OpcP, RegMem( src, dst ) );
9238   ins_pipe( ialu_mem_reg );
9239 %}
9240 
9241 // Or Memory with Immediate
9242 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
9243   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9244   effect(KILL cr);
9245 
9246   ins_cost(125);
9247   format %{ "OR     $dst,$src" %}
9248   opcode(0x81,0x1);  /* Opcode 81 /1 id */
9249   // ins_encode( MemImm( dst, src) );
9250   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
9251   ins_pipe( ialu_mem_imm );
9252 %}
9253 
9254 // ROL/ROR
9255 // ROL expand
9256 instruct rolI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9257   effect(USE_DEF dst, USE shift, KILL cr);
9258 
9259   format %{ "ROL    $dst, $shift" %}
9260   opcode(0xD1, 0x0); /* Opcode D1 /0 */
9261   ins_encode( OpcP, RegOpc( dst ));
9262   ins_pipe( ialu_reg );
9263 %}
9264 
9265 instruct rolI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{
9266   effect(USE_DEF dst, USE shift, KILL cr);
9267 
9268   format %{ "ROL    $dst, $shift" %}
9269   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
9270   ins_encode( RegOpcImm(dst, shift) );
9271   ins_pipe(ialu_reg);
9272 %}
9273 
9274 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
9275   effect(USE_DEF dst, USE shift, KILL cr);
9276 
9277   format %{ "ROL    $dst, $shift" %}
9278   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
9279   ins_encode(OpcP, RegOpc(dst));
9280   ins_pipe( ialu_reg_reg );
9281 %}
9282 // end of ROL expand
9283 
9284 // ROL 32bit by one once
9285 instruct rolI_eReg_i1(eRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
9286   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9287 
9288   expand %{
9289     rolI_eReg_imm1(dst, lshift, cr);
9290   %}
9291 %}
9292 
9293 // ROL 32bit var by imm8 once
9294 instruct rolI_eReg_i8(eRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
9295   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9296   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9297 
9298   expand %{
9299     rolI_eReg_imm8(dst, lshift, cr);
9300   %}
9301 %}
9302 
9303 // ROL 32bit var by var once
9304 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
9305   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
9306 
9307   expand %{
9308     rolI_eReg_CL(dst, shift, cr);
9309   %}
9310 %}
9311 
9312 // ROL 32bit var by var once
9313 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
9314   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
9315 
9316   expand %{
9317     rolI_eReg_CL(dst, shift, cr);
9318   %}
9319 %}
9320 
9321 // ROR expand
9322 instruct rorI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9323   effect(USE_DEF dst, USE shift, KILL cr);
9324 
9325   format %{ "ROR    $dst, $shift" %}
9326   opcode(0xD1,0x1);  /* Opcode D1 /1 */
9327   ins_encode( OpcP, RegOpc( dst ) );
9328   ins_pipe( ialu_reg );
9329 %}
9330 
9331 instruct rorI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{
9332   effect (USE_DEF dst, USE shift, KILL cr);
9333 
9334   format %{ "ROR    $dst, $shift" %}
9335   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
9336   ins_encode( RegOpcImm(dst, shift) );
9337   ins_pipe( ialu_reg );
9338 %}
9339 
9340 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
9341   effect(USE_DEF dst, USE shift, KILL cr);
9342 
9343   format %{ "ROR    $dst, $shift" %}
9344   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
9345   ins_encode(OpcP, RegOpc(dst));
9346   ins_pipe( ialu_reg_reg );
9347 %}
9348 // end of ROR expand
9349 
9350 // ROR right once
9351 instruct rorI_eReg_i1(eRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
9352   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9353 
9354   expand %{
9355     rorI_eReg_imm1(dst, rshift, cr);
9356   %}
9357 %}
9358 
9359 // ROR 32bit by immI8 once
9360 instruct rorI_eReg_i8(eRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
9361   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9362   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9363 
9364   expand %{
9365     rorI_eReg_imm8(dst, rshift, cr);
9366   %}
9367 %}
9368 
9369 // ROR 32bit var by var once
9370 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
9371   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
9372 
9373   expand %{
9374     rorI_eReg_CL(dst, shift, cr);
9375   %}
9376 %}
9377 
9378 // ROR 32bit var by var once
9379 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
9380   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
9381 
9382   expand %{
9383     rorI_eReg_CL(dst, shift, cr);
9384   %}
9385 %}
9386 
9387 // Xor Instructions
9388 // Xor Register with Register
9389 instruct xorI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
9390   match(Set dst (XorI dst src));
9391   effect(KILL cr);
9392 
9393   size(2);
9394   format %{ "XOR    $dst,$src" %}
9395   opcode(0x33);
9396   ins_encode( OpcP, RegReg( dst, src) );
9397   ins_pipe( ialu_reg_reg );
9398 %}
9399 
9400 // Xor Register with Immediate -1
9401 instruct xorI_eReg_im1(eRegI dst, immI_M1 imm) %{
9402   match(Set dst (XorI dst imm));  
9403 
9404   size(2);
9405   format %{ "NOT    $dst" %}  
9406   ins_encode %{
9407      __ notl($dst$$Register);
9408   %}
9409   ins_pipe( ialu_reg );
9410 %}
9411 
9412 // Xor Register with Immediate
9413 instruct xorI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
9414   match(Set dst (XorI dst src));
9415   effect(KILL cr);
9416 
9417   format %{ "XOR    $dst,$src" %}
9418   opcode(0x81,0x06);  /* Opcode 81 /6 id */
9419   // ins_encode( RegImm( dst, src) );
9420   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
9421   ins_pipe( ialu_reg );
9422 %}
9423 
9424 // Xor Register with Memory
9425 instruct xorI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
9426   match(Set dst (XorI dst (LoadI src)));
9427   effect(KILL cr);
9428 
9429   ins_cost(125);
9430   format %{ "XOR    $dst,$src" %}
9431   opcode(0x33);
9432   ins_encode( OpcP, RegMem(dst, src) );
9433   ins_pipe( ialu_reg_mem );
9434 %}
9435 
9436 // Xor Memory with Register
9437 instruct xorI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
9438   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9439   effect(KILL cr);
9440 
9441   ins_cost(150);
9442   format %{ "XOR    $dst,$src" %}
9443   opcode(0x31);  /* Opcode 31 /r */
9444   ins_encode( OpcP, RegMem( src, dst ) );
9445   ins_pipe( ialu_mem_reg );
9446 %}
9447 
9448 // Xor Memory with Immediate
9449 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
9450   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9451   effect(KILL cr);
9452 
9453   ins_cost(125);
9454   format %{ "XOR    $dst,$src" %}
9455   opcode(0x81,0x6);  /* Opcode 81 /6 id */
9456   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
9457   ins_pipe( ialu_mem_imm );
9458 %}
9459 
9460 //----------Convert Int to Boolean---------------------------------------------
9461 
9462 instruct movI_nocopy(eRegI dst, eRegI src) %{
9463   effect( DEF dst, USE src );
9464   format %{ "MOV    $dst,$src" %}
9465   ins_encode( enc_Copy( dst, src) );
9466   ins_pipe( ialu_reg_reg );
9467 %}
9468 
9469 instruct ci2b( eRegI dst, eRegI src, eFlagsReg cr ) %{
9470   effect( USE_DEF dst, USE src, KILL cr );
9471 
9472   size(4);
9473   format %{ "NEG    $dst\n\t"
9474             "ADC    $dst,$src" %}
9475   ins_encode( neg_reg(dst),
9476               OpcRegReg(0x13,dst,src) );
9477   ins_pipe( ialu_reg_reg_long );
9478 %}
9479 
9480 instruct convI2B( eRegI dst, eRegI src, eFlagsReg cr ) %{
9481   match(Set dst (Conv2B src));
9482 
9483   expand %{
9484     movI_nocopy(dst,src);
9485     ci2b(dst,src,cr);
9486   %}
9487 %}
9488 
9489 instruct movP_nocopy(eRegI dst, eRegP src) %{
9490   effect( DEF dst, USE src );
9491   format %{ "MOV    $dst,$src" %}
9492   ins_encode( enc_Copy( dst, src) );
9493   ins_pipe( ialu_reg_reg );
9494 %}
9495 
9496 instruct cp2b( eRegI dst, eRegP src, eFlagsReg cr ) %{
9497   effect( USE_DEF dst, USE src, KILL cr );
9498   format %{ "NEG    $dst\n\t"
9499             "ADC    $dst,$src" %}
9500   ins_encode( neg_reg(dst),
9501               OpcRegReg(0x13,dst,src) );
9502   ins_pipe( ialu_reg_reg_long );
9503 %}
9504 
9505 instruct convP2B( eRegI dst, eRegP src, eFlagsReg cr ) %{
9506   match(Set dst (Conv2B src));
9507 
9508   expand %{
9509     movP_nocopy(dst,src);
9510     cp2b(dst,src,cr);
9511   %}
9512 %}
9513 
9514 instruct cmpLTMask( eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr ) %{
9515   match(Set dst (CmpLTMask p q));
9516   effect( KILL cr );
9517   ins_cost(400);
9518 
9519   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
9520   format %{ "XOR    $dst,$dst\n\t"
9521             "CMP    $p,$q\n\t"
9522             "SETlt  $dst\n\t"
9523             "NEG    $dst" %}
9524   ins_encode( OpcRegReg(0x33,dst,dst),
9525               OpcRegReg(0x3B,p,q),
9526               setLT_reg(dst), neg_reg(dst) );
9527   ins_pipe( pipe_slow );
9528 %}
9529 
9530 instruct cmpLTMask0( eRegI dst, immI0 zero, eFlagsReg cr ) %{
9531   match(Set dst (CmpLTMask dst zero));
9532   effect( DEF dst, KILL cr );
9533   ins_cost(100);
9534 
9535   format %{ "SAR    $dst,31" %}
9536   opcode(0xC1, 0x7);  /* C1 /7 ib */
9537   ins_encode( RegOpcImm( dst, 0x1F ) );
9538   ins_pipe( ialu_reg );
9539 %}
9540 
9541 
9542 instruct cadd_cmpLTMask( ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp, eFlagsReg cr ) %{
9543   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
9544   effect( KILL tmp, KILL cr );
9545   ins_cost(400);
9546   // annoyingly, $tmp has no edges so you cant ask for it in
9547   // any format or encoding
9548   format %{ "SUB    $p,$q\n\t"
9549             "SBB    ECX,ECX\n\t"
9550             "AND    ECX,$y\n\t"
9551             "ADD    $p,ECX" %}
9552   ins_encode( enc_cmpLTP(p,q,y,tmp) );
9553   ins_pipe( pipe_cmplt );
9554 %}
9555 
9556 /* If I enable this, I encourage spilling in the inner loop of compress.
9557 instruct cadd_cmpLTMask_mem( ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr ) %{
9558   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
9559   effect( USE_KILL tmp, KILL cr );
9560   ins_cost(400);
9561 
9562   format %{ "SUB    $p,$q\n\t"
9563             "SBB    ECX,ECX\n\t"
9564             "AND    ECX,$y\n\t"
9565             "ADD    $p,ECX" %}
9566   ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
9567 %}
9568 */
9569 
9570 //----------Long Instructions------------------------------------------------
9571 // Add Long Register with Register
9572 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9573   match(Set dst (AddL dst src));
9574   effect(KILL cr);
9575   ins_cost(200);
9576   format %{ "ADD    $dst.lo,$src.lo\n\t"
9577             "ADC    $dst.hi,$src.hi" %}
9578   opcode(0x03, 0x13);
9579   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
9580   ins_pipe( ialu_reg_reg_long );
9581 %}
9582 
9583 // Add Long Register with Immediate
9584 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9585   match(Set dst (AddL dst src));
9586   effect(KILL cr);
9587   format %{ "ADD    $dst.lo,$src.lo\n\t"
9588             "ADC    $dst.hi,$src.hi" %}
9589   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
9590   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9591   ins_pipe( ialu_reg_long );
9592 %}
9593 
9594 // Add Long Register with Memory
9595 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9596   match(Set dst (AddL dst (LoadL mem)));
9597   effect(KILL cr);
9598   ins_cost(125);
9599   format %{ "ADD    $dst.lo,$mem\n\t"
9600             "ADC    $dst.hi,$mem+4" %}
9601   opcode(0x03, 0x13);
9602   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9603   ins_pipe( ialu_reg_long_mem );
9604 %}
9605 
9606 // Subtract Long Register with Register.
9607 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9608   match(Set dst (SubL dst src));
9609   effect(KILL cr);
9610   ins_cost(200);
9611   format %{ "SUB    $dst.lo,$src.lo\n\t"
9612             "SBB    $dst.hi,$src.hi" %}
9613   opcode(0x2B, 0x1B);
9614   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
9615   ins_pipe( ialu_reg_reg_long );
9616 %}
9617 
9618 // Subtract Long Register with Immediate
9619 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9620   match(Set dst (SubL dst src));
9621   effect(KILL cr);
9622   format %{ "SUB    $dst.lo,$src.lo\n\t"
9623             "SBB    $dst.hi,$src.hi" %}
9624   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
9625   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9626   ins_pipe( ialu_reg_long );
9627 %}
9628 
9629 // Subtract Long Register with Memory
9630 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9631   match(Set dst (SubL dst (LoadL mem)));
9632   effect(KILL cr);
9633   ins_cost(125);
9634   format %{ "SUB    $dst.lo,$mem\n\t"
9635             "SBB    $dst.hi,$mem+4" %}
9636   opcode(0x2B, 0x1B);
9637   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9638   ins_pipe( ialu_reg_long_mem );
9639 %}
9640 
9641 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
9642   match(Set dst (SubL zero dst));
9643   effect(KILL cr);
9644   ins_cost(300);
9645   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
9646   ins_encode( neg_long(dst) );
9647   ins_pipe( ialu_reg_reg_long );
9648 %}
9649 
9650 // And Long Register with Register
9651 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9652   match(Set dst (AndL dst src));
9653   effect(KILL cr);
9654   format %{ "AND    $dst.lo,$src.lo\n\t"
9655             "AND    $dst.hi,$src.hi" %}
9656   opcode(0x23,0x23);
9657   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9658   ins_pipe( ialu_reg_reg_long );
9659 %}
9660 
9661 // And Long Register with Immediate
9662 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9663   match(Set dst (AndL dst src));
9664   effect(KILL cr);
9665   format %{ "AND    $dst.lo,$src.lo\n\t"
9666             "AND    $dst.hi,$src.hi" %}
9667   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
9668   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9669   ins_pipe( ialu_reg_long );
9670 %}
9671 
9672 // And Long Register with Memory
9673 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9674   match(Set dst (AndL dst (LoadL mem)));
9675   effect(KILL cr);
9676   ins_cost(125);
9677   format %{ "AND    $dst.lo,$mem\n\t"
9678             "AND    $dst.hi,$mem+4" %}
9679   opcode(0x23, 0x23);
9680   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9681   ins_pipe( ialu_reg_long_mem );
9682 %}
9683 
9684 // Or Long Register with Register
9685 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9686   match(Set dst (OrL dst src));
9687   effect(KILL cr);
9688   format %{ "OR     $dst.lo,$src.lo\n\t"
9689             "OR     $dst.hi,$src.hi" %}
9690   opcode(0x0B,0x0B);
9691   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9692   ins_pipe( ialu_reg_reg_long );
9693 %}
9694 
9695 // Or Long Register with Immediate
9696 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9697   match(Set dst (OrL dst src));
9698   effect(KILL cr);
9699   format %{ "OR     $dst.lo,$src.lo\n\t"
9700             "OR     $dst.hi,$src.hi" %}
9701   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9702   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9703   ins_pipe( ialu_reg_long );
9704 %}
9705 
9706 // Or Long Register with Memory
9707 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9708   match(Set dst (OrL dst (LoadL mem)));
9709   effect(KILL cr);
9710   ins_cost(125);
9711   format %{ "OR     $dst.lo,$mem\n\t"
9712             "OR     $dst.hi,$mem+4" %}
9713   opcode(0x0B,0x0B);
9714   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9715   ins_pipe( ialu_reg_long_mem );
9716 %}
9717 
9718 // Xor Long Register with Register
9719 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9720   match(Set dst (XorL dst src));
9721   effect(KILL cr);
9722   format %{ "XOR    $dst.lo,$src.lo\n\t"
9723             "XOR    $dst.hi,$src.hi" %}
9724   opcode(0x33,0x33);
9725   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9726   ins_pipe( ialu_reg_reg_long );
9727 %}
9728 
9729 // Xor Long Register with Immediate -1
9730 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9731   match(Set dst (XorL dst imm));  
9732   format %{ "NOT    $dst.lo\n\t"
9733             "NOT    $dst.hi" %}
9734   ins_encode %{
9735      __ notl($dst$$Register);
9736      __ notl(HIGH_FROM_LOW($dst$$Register));
9737   %}
9738   ins_pipe( ialu_reg_long );
9739 %}
9740 
9741 // Xor Long Register with Immediate
9742 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9743   match(Set dst (XorL dst src));
9744   effect(KILL cr);
9745   format %{ "XOR    $dst.lo,$src.lo\n\t"
9746             "XOR    $dst.hi,$src.hi" %}
9747   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9748   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9749   ins_pipe( ialu_reg_long );
9750 %}
9751 
9752 // Xor Long Register with Memory
9753 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9754   match(Set dst (XorL dst (LoadL mem)));
9755   effect(KILL cr);
9756   ins_cost(125);
9757   format %{ "XOR    $dst.lo,$mem\n\t"
9758             "XOR    $dst.hi,$mem+4" %}
9759   opcode(0x33,0x33);
9760   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9761   ins_pipe( ialu_reg_long_mem );
9762 %}
9763 
9764 // Shift Left Long by 1
9765 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9766   predicate(UseNewLongLShift);
9767   match(Set dst (LShiftL dst cnt));
9768   effect(KILL cr);
9769   ins_cost(100);
9770   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9771             "ADC    $dst.hi,$dst.hi" %}
9772   ins_encode %{
9773     __ addl($dst$$Register,$dst$$Register);
9774     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9775   %}
9776   ins_pipe( ialu_reg_long );
9777 %}
9778 
9779 // Shift Left Long by 2
9780 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9781   predicate(UseNewLongLShift);
9782   match(Set dst (LShiftL dst cnt));
9783   effect(KILL cr);
9784   ins_cost(100);
9785   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9786             "ADC    $dst.hi,$dst.hi\n\t" 
9787             "ADD    $dst.lo,$dst.lo\n\t"
9788             "ADC    $dst.hi,$dst.hi" %}
9789   ins_encode %{
9790     __ addl($dst$$Register,$dst$$Register);
9791     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9792     __ addl($dst$$Register,$dst$$Register);
9793     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9794   %}
9795   ins_pipe( ialu_reg_long );
9796 %}
9797 
9798 // Shift Left Long by 3
9799 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9800   predicate(UseNewLongLShift);
9801   match(Set dst (LShiftL dst cnt));
9802   effect(KILL cr);
9803   ins_cost(100);
9804   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9805             "ADC    $dst.hi,$dst.hi\n\t" 
9806             "ADD    $dst.lo,$dst.lo\n\t"
9807             "ADC    $dst.hi,$dst.hi\n\t" 
9808             "ADD    $dst.lo,$dst.lo\n\t"
9809             "ADC    $dst.hi,$dst.hi" %}
9810   ins_encode %{
9811     __ addl($dst$$Register,$dst$$Register);
9812     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9813     __ addl($dst$$Register,$dst$$Register);
9814     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9815     __ addl($dst$$Register,$dst$$Register);
9816     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9817   %}
9818   ins_pipe( ialu_reg_long );
9819 %}
9820 
9821 // Shift Left Long by 1-31
9822 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9823   match(Set dst (LShiftL dst cnt));
9824   effect(KILL cr);
9825   ins_cost(200);
9826   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9827             "SHL    $dst.lo,$cnt" %}
9828   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9829   ins_encode( move_long_small_shift(dst,cnt) );
9830   ins_pipe( ialu_reg_long );
9831 %}
9832 
9833 // Shift Left Long by 32-63
9834 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9835   match(Set dst (LShiftL dst cnt));
9836   effect(KILL cr);
9837   ins_cost(300);
9838   format %{ "MOV    $dst.hi,$dst.lo\n"
9839           "\tSHL    $dst.hi,$cnt-32\n"
9840           "\tXOR    $dst.lo,$dst.lo" %}
9841   opcode(0xC1, 0x4);  /* C1 /4 ib */
9842   ins_encode( move_long_big_shift_clr(dst,cnt) );
9843   ins_pipe( ialu_reg_long );
9844 %}
9845 
9846 // Shift Left Long by variable
9847 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9848   match(Set dst (LShiftL dst shift));
9849   effect(KILL cr);
9850   ins_cost(500+200);
9851   size(17);
9852   format %{ "TEST   $shift,32\n\t"
9853             "JEQ,s  small\n\t"
9854             "MOV    $dst.hi,$dst.lo\n\t"
9855             "XOR    $dst.lo,$dst.lo\n"
9856     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9857             "SHL    $dst.lo,$shift" %}
9858   ins_encode( shift_left_long( dst, shift ) );
9859   ins_pipe( pipe_slow );
9860 %}
9861 
9862 // Shift Right Long by 1-31
9863 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9864   match(Set dst (URShiftL dst cnt));
9865   effect(KILL cr);
9866   ins_cost(200);
9867   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9868             "SHR    $dst.hi,$cnt" %}
9869   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9870   ins_encode( move_long_small_shift(dst,cnt) );
9871   ins_pipe( ialu_reg_long );
9872 %}
9873 
9874 // Shift Right Long by 32-63
9875 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9876   match(Set dst (URShiftL dst cnt));
9877   effect(KILL cr);
9878   ins_cost(300);
9879   format %{ "MOV    $dst.lo,$dst.hi\n"
9880           "\tSHR    $dst.lo,$cnt-32\n"
9881           "\tXOR    $dst.hi,$dst.hi" %}
9882   opcode(0xC1, 0x5);  /* C1 /5 ib */
9883   ins_encode( move_long_big_shift_clr(dst,cnt) );
9884   ins_pipe( ialu_reg_long );
9885 %}
9886 
9887 // Shift Right Long by variable
9888 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9889   match(Set dst (URShiftL dst shift));
9890   effect(KILL cr);
9891   ins_cost(600);
9892   size(17);
9893   format %{ "TEST   $shift,32\n\t"
9894             "JEQ,s  small\n\t"
9895             "MOV    $dst.lo,$dst.hi\n\t"
9896             "XOR    $dst.hi,$dst.hi\n"
9897     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9898             "SHR    $dst.hi,$shift" %}
9899   ins_encode( shift_right_long( dst, shift ) );
9900   ins_pipe( pipe_slow );
9901 %}
9902 
9903 // Shift Right Long by 1-31
9904 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9905   match(Set dst (RShiftL dst cnt));
9906   effect(KILL cr);
9907   ins_cost(200);
9908   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9909             "SAR    $dst.hi,$cnt" %}
9910   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9911   ins_encode( move_long_small_shift(dst,cnt) );
9912   ins_pipe( ialu_reg_long );
9913 %}
9914 
9915 // Shift Right Long by 32-63
9916 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9917   match(Set dst (RShiftL dst cnt));
9918   effect(KILL cr);
9919   ins_cost(300);
9920   format %{ "MOV    $dst.lo,$dst.hi\n"
9921           "\tSAR    $dst.lo,$cnt-32\n"
9922           "\tSAR    $dst.hi,31" %}
9923   opcode(0xC1, 0x7);  /* C1 /7 ib */
9924   ins_encode( move_long_big_shift_sign(dst,cnt) );
9925   ins_pipe( ialu_reg_long );
9926 %}
9927 
9928 // Shift Right arithmetic Long by variable
9929 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9930   match(Set dst (RShiftL dst shift));
9931   effect(KILL cr);
9932   ins_cost(600);
9933   size(18);
9934   format %{ "TEST   $shift,32\n\t"
9935             "JEQ,s  small\n\t"
9936             "MOV    $dst.lo,$dst.hi\n\t"
9937             "SAR    $dst.hi,31\n"
9938     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9939             "SAR    $dst.hi,$shift" %}
9940   ins_encode( shift_right_arith_long( dst, shift ) );
9941   ins_pipe( pipe_slow );
9942 %}
9943 
9944 
9945 //----------Double Instructions------------------------------------------------
9946 // Double Math
9947 
9948 // Compare & branch
9949 
9950 // P6 version of float compare, sets condition codes in EFLAGS
9951 instruct cmpD_cc_P6(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
9952   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9953   match(Set cr (CmpD src1 src2));
9954   effect(KILL rax);
9955   ins_cost(150);
9956   format %{ "FLD    $src1\n\t"
9957             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9958             "JNP    exit\n\t"
9959             "MOV    ah,1       // saw a NaN, set CF\n\t"
9960             "SAHF\n"
9961      "exit:\tNOP               // avoid branch to branch" %}
9962   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9963   ins_encode( Push_Reg_D(src1),
9964               OpcP, RegOpc(src2),
9965               cmpF_P6_fixup );
9966   ins_pipe( pipe_slow );
9967 %}
9968 
9969 instruct cmpD_cc_P6CF(eFlagsRegUCF cr, regD src1, regD src2) %{
9970   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9971   match(Set cr (CmpD src1 src2));
9972   ins_cost(150);
9973   format %{ "FLD    $src1\n\t"
9974             "FUCOMIP ST,$src2  // P6 instruction" %}
9975   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9976   ins_encode( Push_Reg_D(src1),
9977               OpcP, RegOpc(src2));
9978   ins_pipe( pipe_slow );
9979 %}
9980 
9981 // Compare & branch
9982 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
9983   predicate(UseSSE<=1);
9984   match(Set cr (CmpD src1 src2));
9985   effect(KILL rax);
9986   ins_cost(200);
9987   format %{ "FLD    $src1\n\t"
9988             "FCOMp  $src2\n\t"
9989             "FNSTSW AX\n\t"
9990             "TEST   AX,0x400\n\t"
9991             "JZ,s   flags\n\t"
9992             "MOV    AH,1\t# unordered treat as LT\n"
9993     "flags:\tSAHF" %}
9994   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9995   ins_encode( Push_Reg_D(src1),
9996               OpcP, RegOpc(src2),
9997               fpu_flags);
9998   ins_pipe( pipe_slow );
9999 %}
10000 
10001 // Compare vs zero into -1,0,1
10002 instruct cmpD_0(eRegI dst, regD src1, immD0 zero, eAXRegI rax, eFlagsReg cr) %{
10003   predicate(UseSSE<=1);
10004   match(Set dst (CmpD3 src1 zero));
10005   effect(KILL cr, KILL rax);
10006   ins_cost(280);
10007   format %{ "FTSTD  $dst,$src1" %}
10008   opcode(0xE4, 0xD9);
10009   ins_encode( Push_Reg_D(src1),
10010               OpcS, OpcP, PopFPU,
10011               CmpF_Result(dst));
10012   ins_pipe( pipe_slow );
10013 %}
10014 
10015 // Compare into -1,0,1
10016 instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI rax, eFlagsReg cr) %{
10017   predicate(UseSSE<=1);
10018   match(Set dst (CmpD3 src1 src2));
10019   effect(KILL cr, KILL rax);
10020   ins_cost(300);
10021   format %{ "FCMPD  $dst,$src1,$src2" %}
10022   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10023   ins_encode( Push_Reg_D(src1),
10024               OpcP, RegOpc(src2),
10025               CmpF_Result(dst));
10026   ins_pipe( pipe_slow );
10027 %}
10028 
10029 // float compare and set condition codes in EFLAGS by XMM regs
10030 instruct cmpXD_cc(eFlagsRegU cr, regXD dst, regXD src, eAXRegI rax) %{
10031   predicate(UseSSE>=2);
10032   match(Set cr (CmpD dst src));
10033   effect(KILL rax);
10034   ins_cost(125);
10035   format %{ "COMISD $dst,$src\n"
10036           "\tJNP    exit\n"
10037           "\tMOV    ah,1       // saw a NaN, set CF\n"
10038           "\tSAHF\n"
10039      "exit:\tNOP               // avoid branch to branch" %}
10040   opcode(0x66, 0x0F, 0x2F);
10041   ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src), cmpF_P6_fixup);
10042   ins_pipe( pipe_slow );
10043 %}
10044 
10045 instruct cmpXD_ccCF(eFlagsRegUCF cr, regXD dst, regXD src) %{
10046   predicate(UseSSE>=2);
10047   match(Set cr (CmpD dst src));
10048   ins_cost(100);
10049   format %{ "COMISD $dst,$src" %}
10050   opcode(0x66, 0x0F, 0x2F);
10051   ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
10052   ins_pipe( pipe_slow );
10053 %}
10054 
10055 // float compare and set condition codes in EFLAGS by XMM regs
10056 instruct cmpXD_ccmem(eFlagsRegU cr, regXD dst, memory src, eAXRegI rax) %{
10057   predicate(UseSSE>=2);
10058   match(Set cr (CmpD dst (LoadD src)));
10059   effect(KILL rax);
10060   ins_cost(145);
10061   format %{ "COMISD $dst,$src\n"
10062           "\tJNP    exit\n"
10063           "\tMOV    ah,1       // saw a NaN, set CF\n"
10064           "\tSAHF\n"
10065      "exit:\tNOP               // avoid branch to branch" %}
10066   opcode(0x66, 0x0F, 0x2F);
10067   ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src), cmpF_P6_fixup);
10068   ins_pipe( pipe_slow );
10069 %}
10070 
10071 instruct cmpXD_ccmemCF(eFlagsRegUCF cr, regXD dst, memory src) %{
10072   predicate(UseSSE>=2);
10073   match(Set cr (CmpD dst (LoadD src)));
10074   ins_cost(100);
10075   format %{ "COMISD $dst,$src" %}
10076   opcode(0x66, 0x0F, 0x2F);
10077   ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src));
10078   ins_pipe( pipe_slow );
10079 %}
10080 
10081 // Compare into -1,0,1 in XMM
10082 instruct cmpXD_reg(eRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{
10083   predicate(UseSSE>=2);
10084   match(Set dst (CmpD3 src1 src2));
10085   effect(KILL cr);
10086   ins_cost(255);
10087   format %{ "XOR    $dst,$dst\n"
10088           "\tCOMISD $src1,$src2\n"
10089           "\tJP,s   nan\n"
10090           "\tJEQ,s  exit\n"
10091           "\tJA,s   inc\n"
10092       "nan:\tDEC    $dst\n"
10093           "\tJMP,s  exit\n"
10094       "inc:\tINC    $dst\n"
10095       "exit:"
10096                 %}
10097   opcode(0x66, 0x0F, 0x2F);
10098   ins_encode(Xor_Reg(dst), OpcP, OpcS, Opcode(tertiary), RegReg(src1, src2),
10099              CmpX_Result(dst));
10100   ins_pipe( pipe_slow );
10101 %}
10102 
10103 // Compare into -1,0,1 in XMM and memory
10104 instruct cmpXD_regmem(eRegI dst, regXD src1, memory mem, eFlagsReg cr) %{
10105   predicate(UseSSE>=2);
10106   match(Set dst (CmpD3 src1 (LoadD mem)));
10107   effect(KILL cr);
10108   ins_cost(275);
10109   format %{ "COMISD $src1,$mem\n"
10110           "\tMOV    $dst,0\t\t# do not blow flags\n"
10111           "\tJP,s   nan\n"
10112           "\tJEQ,s  exit\n"
10113           "\tJA,s   inc\n"
10114       "nan:\tDEC    $dst\n"
10115           "\tJMP,s  exit\n"
10116       "inc:\tINC    $dst\n"
10117       "exit:"
10118                 %}
10119   opcode(0x66, 0x0F, 0x2F);
10120   ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(src1, mem),
10121              LdImmI(dst,0x0), CmpX_Result(dst));
10122   ins_pipe( pipe_slow );
10123 %}
10124 
10125 
10126 instruct subD_reg(regD dst, regD src) %{
10127   predicate (UseSSE <=1);
10128   match(Set dst (SubD dst src));
10129 
10130   format %{ "FLD    $src\n\t"
10131             "DSUBp  $dst,ST" %}
10132   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10133   ins_cost(150);
10134   ins_encode( Push_Reg_D(src),
10135               OpcP, RegOpc(dst) );
10136   ins_pipe( fpu_reg_reg );
10137 %}
10138 
10139 instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{
10140   predicate (UseSSE <=1);
10141   match(Set dst (RoundDouble (SubD src1 src2)));
10142   ins_cost(250);
10143 
10144   format %{ "FLD    $src2\n\t"
10145             "DSUB   ST,$src1\n\t"
10146             "FSTP_D $dst\t# D-round" %}
10147   opcode(0xD8, 0x5);
10148   ins_encode( Push_Reg_D(src2),
10149               OpcP, RegOpc(src1), Pop_Mem_D(dst) );
10150   ins_pipe( fpu_mem_reg_reg );
10151 %}
10152 
10153 
10154 instruct subD_reg_mem(regD dst, memory src) %{
10155   predicate (UseSSE <=1);
10156   match(Set dst (SubD dst (LoadD src)));
10157   ins_cost(150);
10158 
10159   format %{ "FLD    $src\n\t"
10160             "DSUBp  $dst,ST" %}
10161   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
10162   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10163               OpcP, RegOpc(dst) );
10164   ins_pipe( fpu_reg_mem );
10165 %}
10166 
10167 instruct absD_reg(regDPR1 dst, regDPR1 src) %{
10168   predicate (UseSSE<=1);
10169   match(Set dst (AbsD src));
10170   ins_cost(100);
10171   format %{ "FABS" %}
10172   opcode(0xE1, 0xD9);
10173   ins_encode( OpcS, OpcP );
10174   ins_pipe( fpu_reg_reg );
10175 %}
10176 
10177 instruct absXD_reg( regXD dst ) %{
10178   predicate(UseSSE>=2);
10179   match(Set dst (AbsD dst));
10180   format %{ "ANDPD  $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %}
10181   ins_encode( AbsXD_encoding(dst));
10182   ins_pipe( pipe_slow );
10183 %}
10184 
10185 instruct negD_reg(regDPR1 dst, regDPR1 src) %{
10186   predicate(UseSSE<=1);
10187   match(Set dst (NegD src));
10188   ins_cost(100);
10189   format %{ "FCHS" %}
10190   opcode(0xE0, 0xD9);
10191   ins_encode( OpcS, OpcP );
10192   ins_pipe( fpu_reg_reg );
10193 %}
10194 
10195 instruct negXD_reg( regXD dst ) %{
10196   predicate(UseSSE>=2);
10197   match(Set dst (NegD dst));
10198   format %{ "XORPD  $dst,[0x8000000000000000]\t# CHS D by sign flipping" %}
10199   ins_encode %{
10200      __ xorpd($dst$$XMMRegister,
10201               ExternalAddress((address)double_signflip_pool));
10202   %}
10203   ins_pipe( pipe_slow );
10204 %}
10205 
10206 instruct addD_reg(regD dst, regD src) %{
10207   predicate(UseSSE<=1);
10208   match(Set dst (AddD dst src));
10209   format %{ "FLD    $src\n\t"
10210             "DADD   $dst,ST" %}
10211   size(4);
10212   ins_cost(150);
10213   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10214   ins_encode( Push_Reg_D(src),
10215               OpcP, RegOpc(dst) );
10216   ins_pipe( fpu_reg_reg );
10217 %}
10218 
10219 
10220 instruct addD_reg_round(stackSlotD dst, regD src1, regD src2) %{
10221   predicate(UseSSE<=1);
10222   match(Set dst (RoundDouble (AddD src1 src2)));
10223   ins_cost(250);
10224 
10225   format %{ "FLD    $src2\n\t"
10226             "DADD   ST,$src1\n\t"
10227             "FSTP_D $dst\t# D-round" %}
10228   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
10229   ins_encode( Push_Reg_D(src2),
10230               OpcP, RegOpc(src1), Pop_Mem_D(dst) );
10231   ins_pipe( fpu_mem_reg_reg );
10232 %}
10233 
10234 
10235 instruct addD_reg_mem(regD dst, memory src) %{
10236   predicate(UseSSE<=1);
10237   match(Set dst (AddD dst (LoadD src)));
10238   ins_cost(150);
10239 
10240   format %{ "FLD    $src\n\t"
10241             "DADDp  $dst,ST" %}
10242   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
10243   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10244               OpcP, RegOpc(dst) );
10245   ins_pipe( fpu_reg_mem );
10246 %}
10247 
10248 // add-to-memory
10249 instruct addD_mem_reg(memory dst, regD src) %{
10250   predicate(UseSSE<=1);
10251   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
10252   ins_cost(150);
10253 
10254   format %{ "FLD_D  $dst\n\t"
10255             "DADD   ST,$src\n\t"
10256             "FST_D  $dst" %}
10257   opcode(0xDD, 0x0);
10258   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
10259               Opcode(0xD8), RegOpc(src),
10260               set_instruction_start,
10261               Opcode(0xDD), RMopc_Mem(0x03,dst) );
10262   ins_pipe( fpu_reg_mem );
10263 %}
10264 
10265 instruct addD_reg_imm1(regD dst, immD1 src) %{
10266   predicate(UseSSE<=1);
10267   match(Set dst (AddD dst src));
10268   ins_cost(125);
10269   format %{ "FLD1\n\t"
10270             "DADDp  $dst,ST" %}
10271   opcode(0xDE, 0x00);
10272   ins_encode( LdImmD(src),
10273               OpcP, RegOpc(dst) );
10274   ins_pipe( fpu_reg );
10275 %}
10276 
10277 instruct addD_reg_imm(regD dst, immD src) %{
10278   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
10279   match(Set dst (AddD dst src));
10280   ins_cost(200);
10281   format %{ "FLD_D  [$src]\n\t"
10282             "DADDp  $dst,ST" %}
10283   opcode(0xDE, 0x00);       /* DE /0 */
10284   ins_encode( LdImmD(src),
10285               OpcP, RegOpc(dst));
10286   ins_pipe( fpu_reg_mem );
10287 %}
10288 
10289 instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{
10290   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
10291   match(Set dst (RoundDouble (AddD src con)));
10292   ins_cost(200);
10293   format %{ "FLD_D  [$con]\n\t"
10294             "DADD   ST,$src\n\t"
10295             "FSTP_D $dst\t# D-round" %}
10296   opcode(0xD8, 0x00);       /* D8 /0 */
10297   ins_encode( LdImmD(con),
10298               OpcP, RegOpc(src), Pop_Mem_D(dst));
10299   ins_pipe( fpu_mem_reg_con );
10300 %}
10301 
10302 // Add two double precision floating point values in xmm
10303 instruct addXD_reg(regXD dst, regXD src) %{
10304   predicate(UseSSE>=2);
10305   match(Set dst (AddD dst src));
10306   format %{ "ADDSD  $dst,$src" %}
10307   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
10308   ins_pipe( pipe_slow );
10309 %}
10310 
10311 instruct addXD_imm(regXD dst, immXD con) %{
10312   predicate(UseSSE>=2);
10313   match(Set dst (AddD dst con));
10314   format %{ "ADDSD  $dst,[$con]" %}
10315   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), LdImmXD(dst, con) );
10316   ins_pipe( pipe_slow );
10317 %}
10318 
10319 instruct addXD_mem(regXD dst, memory mem) %{
10320   predicate(UseSSE>=2);
10321   match(Set dst (AddD dst (LoadD mem)));
10322   format %{ "ADDSD  $dst,$mem" %}
10323   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegMem(dst,mem));
10324   ins_pipe( pipe_slow );
10325 %}
10326 
10327 // Sub two double precision floating point values in xmm
10328 instruct subXD_reg(regXD dst, regXD src) %{
10329   predicate(UseSSE>=2);
10330   match(Set dst (SubD dst src));
10331   format %{ "SUBSD  $dst,$src" %}
10332   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
10333   ins_pipe( pipe_slow );
10334 %}
10335 
10336 instruct subXD_imm(regXD dst, immXD con) %{
10337   predicate(UseSSE>=2);
10338   match(Set dst (SubD dst con));
10339   format %{ "SUBSD  $dst,[$con]" %}
10340   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), LdImmXD(dst, con) );
10341   ins_pipe( pipe_slow );
10342 %}
10343 
10344 instruct subXD_mem(regXD dst, memory mem) %{
10345   predicate(UseSSE>=2);
10346   match(Set dst (SubD dst (LoadD mem)));
10347   format %{ "SUBSD  $dst,$mem" %}
10348   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
10349   ins_pipe( pipe_slow );
10350 %}
10351 
10352 // Mul two double precision floating point values in xmm
10353 instruct mulXD_reg(regXD dst, regXD src) %{
10354   predicate(UseSSE>=2);
10355   match(Set dst (MulD dst src));
10356   format %{ "MULSD  $dst,$src" %}
10357   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
10358   ins_pipe( pipe_slow );
10359 %}
10360 
10361 instruct mulXD_imm(regXD dst, immXD con) %{
10362   predicate(UseSSE>=2);
10363   match(Set dst (MulD dst con));
10364   format %{ "MULSD  $dst,[$con]" %}
10365   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), LdImmXD(dst, con) );
10366   ins_pipe( pipe_slow );
10367 %}
10368 
10369 instruct mulXD_mem(regXD dst, memory mem) %{
10370   predicate(UseSSE>=2);
10371   match(Set dst (MulD dst (LoadD mem)));
10372   format %{ "MULSD  $dst,$mem" %}
10373   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
10374   ins_pipe( pipe_slow );
10375 %}
10376 
10377 // Div two double precision floating point values in xmm
10378 instruct divXD_reg(regXD dst, regXD src) %{
10379   predicate(UseSSE>=2);
10380   match(Set dst (DivD dst src));
10381   format %{ "DIVSD  $dst,$src" %}
10382   opcode(0xF2, 0x0F, 0x5E);
10383   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
10384   ins_pipe( pipe_slow );
10385 %}
10386 
10387 instruct divXD_imm(regXD dst, immXD con) %{
10388   predicate(UseSSE>=2);
10389   match(Set dst (DivD dst con));
10390   format %{ "DIVSD  $dst,[$con]" %}
10391   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), LdImmXD(dst, con));
10392   ins_pipe( pipe_slow );
10393 %}
10394 
10395 instruct divXD_mem(regXD dst, memory mem) %{
10396   predicate(UseSSE>=2);
10397   match(Set dst (DivD dst (LoadD mem)));
10398   format %{ "DIVSD  $dst,$mem" %}
10399   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
10400   ins_pipe( pipe_slow );
10401 %}
10402 
10403 
10404 instruct mulD_reg(regD dst, regD src) %{
10405   predicate(UseSSE<=1);
10406   match(Set dst (MulD dst src));
10407   format %{ "FLD    $src\n\t"
10408             "DMULp  $dst,ST" %}
10409   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
10410   ins_cost(150);
10411   ins_encode( Push_Reg_D(src),
10412               OpcP, RegOpc(dst) );
10413   ins_pipe( fpu_reg_reg );
10414 %}
10415 
10416 // Strict FP instruction biases argument before multiply then
10417 // biases result to avoid double rounding of subnormals.
10418 //
10419 // scale arg1 by multiplying arg1 by 2^(-15360)
10420 // load arg2
10421 // multiply scaled arg1 by arg2
10422 // rescale product by 2^(15360)
10423 //
10424 instruct strictfp_mulD_reg(regDPR1 dst, regnotDPR1 src) %{
10425   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10426   match(Set dst (MulD dst src));
10427   ins_cost(1);   // Select this instruction for all strict FP double multiplies
10428 
10429   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
10430             "DMULp  $dst,ST\n\t"
10431             "FLD    $src\n\t"
10432             "DMULp  $dst,ST\n\t"
10433             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
10434             "DMULp  $dst,ST\n\t" %}
10435   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
10436   ins_encode( strictfp_bias1(dst),
10437               Push_Reg_D(src),
10438               OpcP, RegOpc(dst),
10439               strictfp_bias2(dst) );
10440   ins_pipe( fpu_reg_reg );
10441 %}
10442 
10443 instruct mulD_reg_imm(regD dst, immD src) %{
10444   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
10445   match(Set dst (MulD dst src));
10446   ins_cost(200);
10447   format %{ "FLD_D  [$src]\n\t"
10448             "DMULp  $dst,ST" %}
10449   opcode(0xDE, 0x1); /* DE /1 */
10450   ins_encode( LdImmD(src),
10451               OpcP, RegOpc(dst) );
10452   ins_pipe( fpu_reg_mem );
10453 %}
10454 
10455 
10456 instruct mulD_reg_mem(regD dst, memory src) %{
10457   predicate( UseSSE<=1 );
10458   match(Set dst (MulD dst (LoadD src)));
10459   ins_cost(200);
10460   format %{ "FLD_D  $src\n\t"
10461             "DMULp  $dst,ST" %}
10462   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
10463   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10464               OpcP, RegOpc(dst) );
10465   ins_pipe( fpu_reg_mem );
10466 %}
10467 
10468 //
10469 // Cisc-alternate to reg-reg multiply
10470 instruct mulD_reg_mem_cisc(regD dst, regD src, memory mem) %{
10471   predicate( UseSSE<=1 );
10472   match(Set dst (MulD src (LoadD mem)));
10473   ins_cost(250);
10474   format %{ "FLD_D  $mem\n\t"
10475             "DMUL   ST,$src\n\t"
10476             "FSTP_D $dst" %}
10477   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
10478   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
10479               OpcReg_F(src),
10480               Pop_Reg_D(dst) );
10481   ins_pipe( fpu_reg_reg_mem );
10482 %}
10483 
10484 
10485 // MACRO3 -- addD a mulD
10486 // This instruction is a '2-address' instruction in that the result goes
10487 // back to src2.  This eliminates a move from the macro; possibly the
10488 // register allocator will have to add it back (and maybe not).
10489 instruct addD_mulD_reg(regD src2, regD src1, regD src0) %{
10490   predicate( UseSSE<=1 );
10491   match(Set src2 (AddD (MulD src0 src1) src2));
10492   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
10493             "DMUL   ST,$src1\n\t"
10494             "DADDp  $src2,ST" %}
10495   ins_cost(250);
10496   opcode(0xDD); /* LoadD DD /0 */
10497   ins_encode( Push_Reg_F(src0),
10498               FMul_ST_reg(src1),
10499               FAddP_reg_ST(src2) );
10500   ins_pipe( fpu_reg_reg_reg );
10501 %}
10502 
10503 
10504 // MACRO3 -- subD a mulD
10505 instruct subD_mulD_reg(regD src2, regD src1, regD src0) %{
10506   predicate( UseSSE<=1 );
10507   match(Set src2 (SubD (MulD src0 src1) src2));
10508   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
10509             "DMUL   ST,$src1\n\t"
10510             "DSUBRp $src2,ST" %}
10511   ins_cost(250);
10512   ins_encode( Push_Reg_F(src0),
10513               FMul_ST_reg(src1),
10514               Opcode(0xDE), Opc_plus(0xE0,src2));
10515   ins_pipe( fpu_reg_reg_reg );
10516 %}
10517 
10518 
10519 instruct divD_reg(regD dst, regD src) %{
10520   predicate( UseSSE<=1 );
10521   match(Set dst (DivD dst src));
10522 
10523   format %{ "FLD    $src\n\t"
10524             "FDIVp  $dst,ST" %}
10525   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10526   ins_cost(150);
10527   ins_encode( Push_Reg_D(src),
10528               OpcP, RegOpc(dst) );
10529   ins_pipe( fpu_reg_reg );
10530 %}
10531 
10532 // Strict FP instruction biases argument before division then
10533 // biases result, to avoid double rounding of subnormals.
10534 //
10535 // scale dividend by multiplying dividend by 2^(-15360)
10536 // load divisor
10537 // divide scaled dividend by divisor
10538 // rescale quotient by 2^(15360)
10539 //
10540 instruct strictfp_divD_reg(regDPR1 dst, regnotDPR1 src) %{
10541   predicate (UseSSE<=1);
10542   match(Set dst (DivD dst src));
10543   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10544   ins_cost(01);
10545 
10546   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
10547             "DMULp  $dst,ST\n\t"
10548             "FLD    $src\n\t"
10549             "FDIVp  $dst,ST\n\t"
10550             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
10551             "DMULp  $dst,ST\n\t" %}
10552   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10553   ins_encode( strictfp_bias1(dst),
10554               Push_Reg_D(src),
10555               OpcP, RegOpc(dst),
10556               strictfp_bias2(dst) );
10557   ins_pipe( fpu_reg_reg );
10558 %}
10559 
10560 instruct divD_reg_round(stackSlotD dst, regD src1, regD src2) %{
10561   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
10562   match(Set dst (RoundDouble (DivD src1 src2)));
10563 
10564   format %{ "FLD    $src1\n\t"
10565             "FDIV   ST,$src2\n\t"
10566             "FSTP_D $dst\t# D-round" %}
10567   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
10568   ins_encode( Push_Reg_D(src1),
10569               OpcP, RegOpc(src2), Pop_Mem_D(dst) );
10570   ins_pipe( fpu_mem_reg_reg );
10571 %}
10572 
10573 
10574 instruct modD_reg(regD dst, regD src, eAXRegI rax, eFlagsReg cr) %{
10575   predicate(UseSSE<=1);
10576   match(Set dst (ModD dst src));
10577   effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
10578 
10579   format %{ "DMOD   $dst,$src" %}
10580   ins_cost(250);
10581   ins_encode(Push_Reg_Mod_D(dst, src),
10582               emitModD(),
10583               Push_Result_Mod_D(src),
10584               Pop_Reg_D(dst));
10585   ins_pipe( pipe_slow );
10586 %}
10587 
10588 instruct modXD_reg(regXD dst, regXD src0, regXD src1, eAXRegI rax, eFlagsReg cr) %{
10589   predicate(UseSSE>=2);
10590   match(Set dst (ModD src0 src1));
10591   effect(KILL rax, KILL cr);
10592 
10593   format %{ "SUB    ESP,8\t # DMOD\n"
10594           "\tMOVSD  [ESP+0],$src1\n"
10595           "\tFLD_D  [ESP+0]\n"
10596           "\tMOVSD  [ESP+0],$src0\n"
10597           "\tFLD_D  [ESP+0]\n"
10598      "loop:\tFPREM\n"
10599           "\tFWAIT\n"
10600           "\tFNSTSW AX\n"
10601           "\tSAHF\n"
10602           "\tJP     loop\n"
10603           "\tFSTP_D [ESP+0]\n"
10604           "\tMOVSD  $dst,[ESP+0]\n"
10605           "\tADD    ESP,8\n"
10606           "\tFSTP   ST0\t # Restore FPU Stack"
10607     %}
10608   ins_cost(250);
10609   ins_encode( Push_ModD_encoding(src0, src1), emitModD(), Push_ResultXD(dst), PopFPU);
10610   ins_pipe( pipe_slow );
10611 %}
10612 
10613 instruct sinD_reg(regDPR1 dst, regDPR1 src) %{
10614   predicate (UseSSE<=1);
10615   match(Set dst (SinD src));
10616   ins_cost(1800);
10617   format %{ "DSIN   $dst" %}
10618   opcode(0xD9, 0xFE);
10619   ins_encode( OpcP, OpcS );
10620   ins_pipe( pipe_slow );
10621 %}
10622 
10623 instruct sinXD_reg(regXD dst, eFlagsReg cr) %{
10624   predicate (UseSSE>=2);
10625   match(Set dst (SinD dst));
10626   effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10627   ins_cost(1800);
10628   format %{ "DSIN   $dst" %}
10629   opcode(0xD9, 0xFE);
10630   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10631   ins_pipe( pipe_slow );
10632 %}
10633 
10634 instruct cosD_reg(regDPR1 dst, regDPR1 src) %{
10635   predicate (UseSSE<=1);
10636   match(Set dst (CosD src));
10637   ins_cost(1800);
10638   format %{ "DCOS   $dst" %}
10639   opcode(0xD9, 0xFF);
10640   ins_encode( OpcP, OpcS );
10641   ins_pipe( pipe_slow );
10642 %}
10643 
10644 instruct cosXD_reg(regXD dst, eFlagsReg cr) %{
10645   predicate (UseSSE>=2);
10646   match(Set dst (CosD dst));
10647   effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10648   ins_cost(1800);
10649   format %{ "DCOS   $dst" %}
10650   opcode(0xD9, 0xFF);
10651   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10652   ins_pipe( pipe_slow );
10653 %}
10654 
10655 instruct tanD_reg(regDPR1 dst, regDPR1 src) %{
10656   predicate (UseSSE<=1);
10657   match(Set dst(TanD src));
10658   format %{ "DTAN   $dst" %}
10659   ins_encode( Opcode(0xD9), Opcode(0xF2),    // fptan
10660               Opcode(0xDD), Opcode(0xD8));   // fstp st
10661   ins_pipe( pipe_slow );
10662 %}
10663 
10664 instruct tanXD_reg(regXD dst, eFlagsReg cr) %{
10665   predicate (UseSSE>=2);
10666   match(Set dst(TanD dst));
10667   effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10668   format %{ "DTAN   $dst" %}
10669   ins_encode( Push_SrcXD(dst),
10670               Opcode(0xD9), Opcode(0xF2),    // fptan
10671               Opcode(0xDD), Opcode(0xD8),   // fstp st
10672               Push_ResultXD(dst) );
10673   ins_pipe( pipe_slow );
10674 %}
10675 
10676 instruct atanD_reg(regD dst, regD src) %{
10677   predicate (UseSSE<=1);
10678   match(Set dst(AtanD dst src));
10679   format %{ "DATA   $dst,$src" %}
10680   opcode(0xD9, 0xF3);
10681   ins_encode( Push_Reg_D(src),
10682               OpcP, OpcS, RegOpc(dst) );
10683   ins_pipe( pipe_slow );
10684 %}
10685 
10686 instruct atanXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10687   predicate (UseSSE>=2);
10688   match(Set dst(AtanD dst src));
10689   effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10690   format %{ "DATA   $dst,$src" %}
10691   opcode(0xD9, 0xF3);
10692   ins_encode( Push_SrcXD(src),
10693               OpcP, OpcS, Push_ResultXD(dst) );
10694   ins_pipe( pipe_slow );
10695 %}
10696 
10697 instruct sqrtD_reg(regD dst, regD src) %{
10698   predicate (UseSSE<=1);
10699   match(Set dst (SqrtD src));
10700   format %{ "DSQRT  $dst,$src" %}
10701   opcode(0xFA, 0xD9);
10702   ins_encode( Push_Reg_D(src),
10703               OpcS, OpcP, Pop_Reg_D(dst) );
10704   ins_pipe( pipe_slow );
10705 %}
10706 
10707 instruct powD_reg(regD X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10708   predicate (UseSSE<=1);
10709   match(Set Y (PowD X Y));  // Raise X to the Yth power
10710   effect(KILL rax, KILL rbx, KILL rcx);
10711   format %{ "SUB    ESP,8\t\t# Fast-path POW encoding\n\t"
10712             "FLD_D  $X\n\t"
10713             "FYL2X  \t\t\t# Q=Y*ln2(X)\n\t"
10714 
10715             "FDUP   \t\t\t# Q Q\n\t"
10716             "FRNDINT\t\t\t# int(Q) Q\n\t"
10717             "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10718             "FISTP  dword [ESP]\n\t"
10719             "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10720             "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10721             "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10722             "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10723             "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10724             "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10725             "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10726             "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10727             "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10728             "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10729             "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10730             "MOV    [ESP+0],0\n\t"
10731             "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10732 
10733             "ADD    ESP,8"
10734              %}
10735   ins_encode( push_stack_temp_qword,
10736               Push_Reg_D(X),
10737               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10738               pow_exp_core_encoding,
10739               pop_stack_temp_qword);
10740   ins_pipe( pipe_slow );
10741 %}
10742 
10743 instruct powXD_reg(regXD dst, regXD src0, regXD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{
10744   predicate (UseSSE>=2);
10745   match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
10746   effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx );
10747   format %{ "SUB    ESP,8\t\t# Fast-path POW encoding\n\t"
10748             "MOVSD  [ESP],$src1\n\t"
10749             "FLD    FPR1,$src1\n\t"
10750             "MOVSD  [ESP],$src0\n\t"
10751             "FLD    FPR1,$src0\n\t"
10752             "FYL2X  \t\t\t# Q=Y*ln2(X)\n\t"
10753 
10754             "FDUP   \t\t\t# Q Q\n\t"
10755             "FRNDINT\t\t\t# int(Q) Q\n\t"
10756             "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10757             "FISTP  dword [ESP]\n\t"
10758             "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10759             "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10760             "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10761             "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10762             "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10763             "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10764             "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10765             "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10766             "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10767             "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10768             "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10769             "MOV    [ESP+0],0\n\t"
10770             "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10771 
10772             "FST_D  [ESP]\n\t"
10773             "MOVSD  $dst,[ESP]\n\t"
10774             "ADD    ESP,8"
10775              %}
10776   ins_encode( push_stack_temp_qword,
10777               push_xmm_to_fpr1(src1),
10778               push_xmm_to_fpr1(src0),
10779               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10780               pow_exp_core_encoding,
10781               Push_ResultXD(dst) );
10782   ins_pipe( pipe_slow );
10783 %}
10784 
10785 
10786 instruct expD_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10787   predicate (UseSSE<=1);
10788   match(Set dpr1 (ExpD dpr1));
10789   effect(KILL rax, KILL rbx, KILL rcx);
10790   format %{ "SUB    ESP,8\t\t# Fast-path EXP encoding"
10791             "FLDL2E \t\t\t# Ld log2(e) X\n\t"
10792             "FMULP  \t\t\t# Q=X*log2(e)\n\t"
10793 
10794             "FDUP   \t\t\t# Q Q\n\t"
10795             "FRNDINT\t\t\t# int(Q) Q\n\t"
10796             "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10797             "FISTP  dword [ESP]\n\t"
10798             "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10799             "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10800             "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10801             "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10802             "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10803             "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10804             "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10805             "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10806             "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10807             "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10808             "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10809             "MOV    [ESP+0],0\n\t"
10810             "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10811 
10812             "ADD    ESP,8"
10813              %}
10814   ins_encode( push_stack_temp_qword,
10815               Opcode(0xD9), Opcode(0xEA),   // fldl2e
10816               Opcode(0xDE), Opcode(0xC9),   // fmulp
10817               pow_exp_core_encoding,
10818               pop_stack_temp_qword);
10819   ins_pipe( pipe_slow );
10820 %}
10821 
10822 instruct expXD_reg(regXD dst, regXD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10823   predicate (UseSSE>=2);
10824   match(Set dst (ExpD src));
10825   effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx);
10826   format %{ "SUB    ESP,8\t\t# Fast-path EXP encoding\n\t"
10827             "MOVSD  [ESP],$src\n\t"
10828             "FLDL2E \t\t\t# Ld log2(e) X\n\t"
10829             "FMULP  \t\t\t# Q=X*log2(e) X\n\t"
10830 
10831             "FDUP   \t\t\t# Q Q\n\t"
10832             "FRNDINT\t\t\t# int(Q) Q\n\t"
10833             "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10834             "FISTP  dword [ESP]\n\t"
10835             "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10836             "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10837             "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10838             "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10839             "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10840             "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10841             "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10842             "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10843             "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10844             "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10845             "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10846             "MOV    [ESP+0],0\n\t"
10847             "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10848 
10849             "FST_D  [ESP]\n\t"
10850             "MOVSD  $dst,[ESP]\n\t"
10851             "ADD    ESP,8"
10852              %}
10853   ins_encode( Push_SrcXD(src),
10854               Opcode(0xD9), Opcode(0xEA),   // fldl2e
10855               Opcode(0xDE), Opcode(0xC9),   // fmulp
10856               pow_exp_core_encoding,
10857               Push_ResultXD(dst) );
10858   ins_pipe( pipe_slow );
10859 %}
10860 
10861 
10862 
10863 instruct log10D_reg(regDPR1 dst, regDPR1 src) %{
10864   predicate (UseSSE<=1);
10865   // The source Double operand on FPU stack
10866   match(Set dst (Log10D src));
10867   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
10868   // fxch         ; swap ST(0) with ST(1)
10869   // fyl2x        ; compute log_10(2) * log_2(x)
10870   format %{ "FLDLG2 \t\t\t#Log10\n\t"
10871             "FXCH   \n\t"
10872             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
10873          %}
10874   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
10875               Opcode(0xD9), Opcode(0xC9),   // fxch
10876               Opcode(0xD9), Opcode(0xF1));  // fyl2x
10877 
10878   ins_pipe( pipe_slow );
10879 %}
10880 
10881 instruct log10XD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10882   predicate (UseSSE>=2);
10883   effect(KILL cr);
10884   match(Set dst (Log10D src));
10885   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
10886   // fyl2x        ; compute log_10(2) * log_2(x)
10887   format %{ "FLDLG2 \t\t\t#Log10\n\t"
10888             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
10889          %}
10890   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
10891               Push_SrcXD(src),
10892               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10893               Push_ResultXD(dst));
10894 
10895   ins_pipe( pipe_slow );
10896 %}
10897 
10898 instruct logD_reg(regDPR1 dst, regDPR1 src) %{
10899   predicate (UseSSE<=1);
10900   // The source Double operand on FPU stack
10901   match(Set dst (LogD src));
10902   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
10903   // fxch         ; swap ST(0) with ST(1)
10904   // fyl2x        ; compute log_e(2) * log_2(x)
10905   format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10906             "FXCH   \n\t"
10907             "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
10908          %}
10909   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
10910               Opcode(0xD9), Opcode(0xC9),   // fxch
10911               Opcode(0xD9), Opcode(0xF1));  // fyl2x
10912 
10913   ins_pipe( pipe_slow );
10914 %}
10915 
10916 instruct logXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10917   predicate (UseSSE>=2);
10918   effect(KILL cr);
10919   // The source and result Double operands in XMM registers
10920   match(Set dst (LogD src));
10921   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
10922   // fyl2x        ; compute log_e(2) * log_2(x)
10923   format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10924             "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
10925          %}
10926   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
10927               Push_SrcXD(src),
10928               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10929               Push_ResultXD(dst));
10930   ins_pipe( pipe_slow );
10931 %}
10932 
10933 //-------------Float Instructions-------------------------------
10934 // Float Math
10935 
10936 // Code for float compare:
10937 //     fcompp();
10938 //     fwait(); fnstsw_ax();
10939 //     sahf();
10940 //     movl(dst, unordered_result);
10941 //     jcc(Assembler::parity, exit);
10942 //     movl(dst, less_result);
10943 //     jcc(Assembler::below, exit);
10944 //     movl(dst, equal_result);
10945 //     jcc(Assembler::equal, exit);
10946 //     movl(dst, greater_result);
10947 //   exit:
10948 
10949 // P6 version of float compare, sets condition codes in EFLAGS
10950 instruct cmpF_cc_P6(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
10951   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10952   match(Set cr (CmpF src1 src2));
10953   effect(KILL rax);
10954   ins_cost(150);
10955   format %{ "FLD    $src1\n\t"
10956             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10957             "JNP    exit\n\t"
10958             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10959             "SAHF\n"
10960      "exit:\tNOP               // avoid branch to branch" %}
10961   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10962   ins_encode( Push_Reg_D(src1),
10963               OpcP, RegOpc(src2),
10964               cmpF_P6_fixup );
10965   ins_pipe( pipe_slow );
10966 %}
10967 
10968 instruct cmpF_cc_P6CF(eFlagsRegUCF cr, regF src1, regF src2) %{
10969   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10970   match(Set cr (CmpF src1 src2));
10971   ins_cost(100);
10972   format %{ "FLD    $src1\n\t"
10973             "FUCOMIP ST,$src2  // P6 instruction" %}
10974   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10975   ins_encode( Push_Reg_D(src1),
10976               OpcP, RegOpc(src2));
10977   ins_pipe( pipe_slow );
10978 %}
10979 
10980 
10981 // Compare & branch
10982 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
10983   predicate(UseSSE == 0);
10984   match(Set cr (CmpF src1 src2));
10985   effect(KILL rax);
10986   ins_cost(200);
10987   format %{ "FLD    $src1\n\t"
10988             "FCOMp  $src2\n\t"
10989             "FNSTSW AX\n\t"
10990             "TEST   AX,0x400\n\t"
10991             "JZ,s   flags\n\t"
10992             "MOV    AH,1\t# unordered treat as LT\n"
10993     "flags:\tSAHF" %}
10994   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10995   ins_encode( Push_Reg_D(src1),
10996               OpcP, RegOpc(src2),
10997               fpu_flags);
10998   ins_pipe( pipe_slow );
10999 %}
11000 
11001 // Compare vs zero into -1,0,1
11002 instruct cmpF_0(eRegI dst, regF src1, immF0 zero, eAXRegI rax, eFlagsReg cr) %{
11003   predicate(UseSSE == 0);
11004   match(Set dst (CmpF3 src1 zero));
11005   effect(KILL cr, KILL rax);
11006   ins_cost(280);
11007   format %{ "FTSTF  $dst,$src1" %}
11008   opcode(0xE4, 0xD9);
11009   ins_encode( Push_Reg_D(src1),
11010               OpcS, OpcP, PopFPU,
11011               CmpF_Result(dst));
11012   ins_pipe( pipe_slow );
11013 %}
11014 
11015 // Compare into -1,0,1
11016 instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
11017   predicate(UseSSE == 0);
11018   match(Set dst (CmpF3 src1 src2));
11019   effect(KILL cr, KILL rax);
11020   ins_cost(300);
11021   format %{ "FCMPF  $dst,$src1,$src2" %}
11022   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
11023   ins_encode( Push_Reg_D(src1),
11024               OpcP, RegOpc(src2),
11025               CmpF_Result(dst));
11026   ins_pipe( pipe_slow );
11027 %}
11028 
11029 // float compare and set condition codes in EFLAGS by XMM regs
11030 instruct cmpX_cc(eFlagsRegU cr, regX dst, regX src, eAXRegI rax) %{
11031   predicate(UseSSE>=1);
11032   match(Set cr (CmpF dst src));
11033   effect(KILL rax);
11034   ins_cost(145);
11035   format %{ "COMISS $dst,$src\n"
11036           "\tJNP    exit\n"
11037           "\tMOV    ah,1       // saw a NaN, set CF\n"
11038           "\tSAHF\n"
11039      "exit:\tNOP               // avoid branch to branch" %}
11040   opcode(0x0F, 0x2F);
11041   ins_encode(OpcP, OpcS, RegReg(dst, src), cmpF_P6_fixup);
11042   ins_pipe( pipe_slow );
11043 %}
11044 
11045 instruct cmpX_ccCF(eFlagsRegUCF cr, regX dst, regX src) %{
11046   predicate(UseSSE>=1);
11047   match(Set cr (CmpF dst src));
11048   ins_cost(100);
11049   format %{ "COMISS $dst,$src" %}
11050   opcode(0x0F, 0x2F);
11051   ins_encode(OpcP, OpcS, RegReg(dst, src));
11052   ins_pipe( pipe_slow );
11053 %}
11054 
11055 // float compare and set condition codes in EFLAGS by XMM regs
11056 instruct cmpX_ccmem(eFlagsRegU cr, regX dst, memory src, eAXRegI rax) %{
11057   predicate(UseSSE>=1);
11058   match(Set cr (CmpF dst (LoadF src)));
11059   effect(KILL rax);
11060   ins_cost(165);
11061   format %{ "COMISS $dst,$src\n"
11062           "\tJNP    exit\n"
11063           "\tMOV    ah,1       // saw a NaN, set CF\n"
11064           "\tSAHF\n"
11065      "exit:\tNOP               // avoid branch to branch" %}
11066   opcode(0x0F, 0x2F);
11067   ins_encode(OpcP, OpcS, RegMem(dst, src), cmpF_P6_fixup);
11068   ins_pipe( pipe_slow );
11069 %}
11070 
11071 instruct cmpX_ccmemCF(eFlagsRegUCF cr, regX dst, memory src) %{
11072   predicate(UseSSE>=1);
11073   match(Set cr (CmpF dst (LoadF src)));
11074   ins_cost(100);
11075   format %{ "COMISS $dst,$src" %}
11076   opcode(0x0F, 0x2F);
11077   ins_encode(OpcP, OpcS, RegMem(dst, src));
11078   ins_pipe( pipe_slow );
11079 %}
11080 
11081 // Compare into -1,0,1 in XMM
11082 instruct cmpX_reg(eRegI dst, regX src1, regX src2, eFlagsReg cr) %{
11083   predicate(UseSSE>=1);
11084   match(Set dst (CmpF3 src1 src2));
11085   effect(KILL cr);
11086   ins_cost(255);
11087   format %{ "XOR    $dst,$dst\n"
11088           "\tCOMISS $src1,$src2\n"
11089           "\tJP,s   nan\n"
11090           "\tJEQ,s  exit\n"
11091           "\tJA,s   inc\n"
11092       "nan:\tDEC    $dst\n"
11093           "\tJMP,s  exit\n"
11094       "inc:\tINC    $dst\n"
11095       "exit:"
11096                 %}
11097   opcode(0x0F, 0x2F);
11098   ins_encode(Xor_Reg(dst), OpcP, OpcS, RegReg(src1, src2), CmpX_Result(dst));
11099   ins_pipe( pipe_slow );
11100 %}
11101 
11102 // Compare into -1,0,1 in XMM and memory
11103 instruct cmpX_regmem(eRegI dst, regX src1, memory mem, eFlagsReg cr) %{
11104   predicate(UseSSE>=1);
11105   match(Set dst (CmpF3 src1 (LoadF mem)));
11106   effect(KILL cr);
11107   ins_cost(275);
11108   format %{ "COMISS $src1,$mem\n"
11109           "\tMOV    $dst,0\t\t# do not blow flags\n"
11110           "\tJP,s   nan\n"
11111           "\tJEQ,s  exit\n"
11112           "\tJA,s   inc\n"
11113       "nan:\tDEC    $dst\n"
11114           "\tJMP,s  exit\n"
11115       "inc:\tINC    $dst\n"
11116       "exit:"
11117                 %}
11118   opcode(0x0F, 0x2F);
11119   ins_encode(OpcP, OpcS, RegMem(src1, mem), LdImmI(dst,0x0), CmpX_Result(dst));
11120   ins_pipe( pipe_slow );
11121 %}
11122 
11123 // Spill to obtain 24-bit precision
11124 instruct subF24_reg(stackSlotF dst, regF src1, regF src2) %{
11125   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11126   match(Set dst (SubF src1 src2));
11127 
11128   format %{ "FSUB   $dst,$src1 - $src2" %}
11129   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
11130   ins_encode( Push_Reg_F(src1),
11131               OpcReg_F(src2),
11132               Pop_Mem_F(dst) );
11133   ins_pipe( fpu_mem_reg_reg );
11134 %}
11135 //
11136 // This instruction does not round to 24-bits
11137 instruct subF_reg(regF dst, regF src) %{
11138   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11139   match(Set dst (SubF dst src));
11140 
11141   format %{ "FSUB   $dst,$src" %}
11142   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
11143   ins_encode( Push_Reg_F(src),
11144               OpcP, RegOpc(dst) );
11145   ins_pipe( fpu_reg_reg );
11146 %}
11147 
11148 // Spill to obtain 24-bit precision
11149 instruct addF24_reg(stackSlotF dst, regF src1, regF src2) %{
11150   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11151   match(Set dst (AddF src1 src2));
11152 
11153   format %{ "FADD   $dst,$src1,$src2" %}
11154   opcode(0xD8, 0x0); /* D8 C0+i */
11155   ins_encode( Push_Reg_F(src2),
11156               OpcReg_F(src1),
11157               Pop_Mem_F(dst) );
11158   ins_pipe( fpu_mem_reg_reg );
11159 %}
11160 //
11161 // This instruction does not round to 24-bits
11162 instruct addF_reg(regF dst, regF src) %{
11163   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11164   match(Set dst (AddF dst src));
11165 
11166   format %{ "FLD    $src\n\t"
11167             "FADDp  $dst,ST" %}
11168   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
11169   ins_encode( Push_Reg_F(src),
11170               OpcP, RegOpc(dst) );
11171   ins_pipe( fpu_reg_reg );
11172 %}
11173 
11174 // Add two single precision floating point values in xmm
11175 instruct addX_reg(regX dst, regX src) %{
11176   predicate(UseSSE>=1);
11177   match(Set dst (AddF dst src));
11178   format %{ "ADDSS  $dst,$src" %}
11179   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
11180   ins_pipe( pipe_slow );
11181 %}
11182 
11183 instruct addX_imm(regX dst, immXF con) %{
11184   predicate(UseSSE>=1);
11185   match(Set dst (AddF dst con));
11186   format %{ "ADDSS  $dst,[$con]" %}
11187   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), LdImmX(dst, con) );
11188   ins_pipe( pipe_slow );
11189 %}
11190 
11191 instruct addX_mem(regX dst, memory mem) %{
11192   predicate(UseSSE>=1);
11193   match(Set dst (AddF dst (LoadF mem)));
11194   format %{ "ADDSS  $dst,$mem" %}
11195   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegMem(dst, mem));
11196   ins_pipe( pipe_slow );
11197 %}
11198 
11199 // Subtract two single precision floating point values in xmm
11200 instruct subX_reg(regX dst, regX src) %{
11201   predicate(UseSSE>=1);
11202   match(Set dst (SubF dst src));
11203   format %{ "SUBSS  $dst,$src" %}
11204   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
11205   ins_pipe( pipe_slow );
11206 %}
11207 
11208 instruct subX_imm(regX dst, immXF con) %{
11209   predicate(UseSSE>=1);
11210   match(Set dst (SubF dst con));
11211   format %{ "SUBSS  $dst,[$con]" %}
11212   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), LdImmX(dst, con) );
11213   ins_pipe( pipe_slow );
11214 %}
11215 
11216 instruct subX_mem(regX dst, memory mem) %{
11217   predicate(UseSSE>=1);
11218   match(Set dst (SubF dst (LoadF mem)));
11219   format %{ "SUBSS  $dst,$mem" %}
11220   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
11221   ins_pipe( pipe_slow );
11222 %}
11223 
11224 // Multiply two single precision floating point values in xmm
11225 instruct mulX_reg(regX dst, regX src) %{
11226   predicate(UseSSE>=1);
11227   match(Set dst (MulF dst src));
11228   format %{ "MULSS  $dst,$src" %}
11229   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
11230   ins_pipe( pipe_slow );
11231 %}
11232 
11233 instruct mulX_imm(regX dst, immXF con) %{
11234   predicate(UseSSE>=1);
11235   match(Set dst (MulF dst con));
11236   format %{ "MULSS  $dst,[$con]" %}
11237   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), LdImmX(dst, con) );
11238   ins_pipe( pipe_slow );
11239 %}
11240 
11241 instruct mulX_mem(regX dst, memory mem) %{
11242   predicate(UseSSE>=1);
11243   match(Set dst (MulF dst (LoadF mem)));
11244   format %{ "MULSS  $dst,$mem" %}
11245   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
11246   ins_pipe( pipe_slow );
11247 %}
11248 
11249 // Divide two single precision floating point values in xmm
11250 instruct divX_reg(regX dst, regX src) %{
11251   predicate(UseSSE>=1);
11252   match(Set dst (DivF dst src));
11253   format %{ "DIVSS  $dst,$src" %}
11254   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
11255   ins_pipe( pipe_slow );
11256 %}
11257 
11258 instruct divX_imm(regX dst, immXF con) %{
11259   predicate(UseSSE>=1);
11260   match(Set dst (DivF dst con));
11261   format %{ "DIVSS  $dst,[$con]" %}
11262   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), LdImmX(dst, con) );
11263   ins_pipe( pipe_slow );
11264 %}
11265 
11266 instruct divX_mem(regX dst, memory mem) %{
11267   predicate(UseSSE>=1);
11268   match(Set dst (DivF dst (LoadF mem)));
11269   format %{ "DIVSS  $dst,$mem" %}
11270   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
11271   ins_pipe( pipe_slow );
11272 %}
11273 
11274 // Get the square root of a single precision floating point values in xmm
11275 instruct sqrtX_reg(regX dst, regX src) %{
11276   predicate(UseSSE>=1);
11277   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
11278   format %{ "SQRTSS $dst,$src" %}
11279   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
11280   ins_pipe( pipe_slow );
11281 %}
11282 
11283 instruct sqrtX_mem(regX dst, memory mem) %{
11284   predicate(UseSSE>=1);
11285   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF mem)))));
11286   format %{ "SQRTSS $dst,$mem" %}
11287   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
11288   ins_pipe( pipe_slow );
11289 %}
11290 
11291 // Get the square root of a double precision floating point values in xmm
11292 instruct sqrtXD_reg(regXD dst, regXD src) %{
11293   predicate(UseSSE>=2);
11294   match(Set dst (SqrtD src));
11295   format %{ "SQRTSD $dst,$src" %}
11296   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
11297   ins_pipe( pipe_slow );
11298 %}
11299 
11300 instruct sqrtXD_mem(regXD dst, memory mem) %{
11301   predicate(UseSSE>=2);
11302   match(Set dst (SqrtD (LoadD mem)));
11303   format %{ "SQRTSD $dst,$mem" %}
11304   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
11305   ins_pipe( pipe_slow );
11306 %}
11307 
11308 instruct absF_reg(regFPR1 dst, regFPR1 src) %{
11309   predicate(UseSSE==0);
11310   match(Set dst (AbsF src));
11311   ins_cost(100);
11312   format %{ "FABS" %}
11313   opcode(0xE1, 0xD9);
11314   ins_encode( OpcS, OpcP );
11315   ins_pipe( fpu_reg_reg );
11316 %}
11317 
11318 instruct absX_reg(regX dst ) %{
11319   predicate(UseSSE>=1);
11320   match(Set dst (AbsF dst));
11321   format %{ "ANDPS  $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %}
11322   ins_encode( AbsXF_encoding(dst));
11323   ins_pipe( pipe_slow );
11324 %}
11325 
11326 instruct negF_reg(regFPR1 dst, regFPR1 src) %{
11327   predicate(UseSSE==0);
11328   match(Set dst (NegF src));
11329   ins_cost(100);
11330   format %{ "FCHS" %}
11331   opcode(0xE0, 0xD9);
11332   ins_encode( OpcS, OpcP );
11333   ins_pipe( fpu_reg_reg );
11334 %}
11335 
11336 instruct negX_reg( regX dst ) %{
11337   predicate(UseSSE>=1);
11338   match(Set dst (NegF dst));
11339   format %{ "XORPS  $dst,[0x80000000]\t# CHS F by sign flipping" %}
11340   ins_encode( NegXF_encoding(dst));
11341   ins_pipe( pipe_slow );
11342 %}
11343 
11344 // Cisc-alternate to addF_reg
11345 // Spill to obtain 24-bit precision
11346 instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
11347   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11348   match(Set dst (AddF src1 (LoadF src2)));
11349 
11350   format %{ "FLD    $src2\n\t"
11351             "FADD   ST,$src1\n\t"
11352             "FSTP_S $dst" %}
11353   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
11354   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11355               OpcReg_F(src1),
11356               Pop_Mem_F(dst) );
11357   ins_pipe( fpu_mem_reg_mem );
11358 %}
11359 //
11360 // Cisc-alternate to addF_reg
11361 // This instruction does not round to 24-bits
11362 instruct addF_reg_mem(regF dst, memory src) %{
11363   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11364   match(Set dst (AddF dst (LoadF src)));
11365 
11366   format %{ "FADD   $dst,$src" %}
11367   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
11368   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
11369               OpcP, RegOpc(dst) );
11370   ins_pipe( fpu_reg_mem );
11371 %}
11372 
11373 // // Following two instructions for _222_mpegaudio
11374 // Spill to obtain 24-bit precision
11375 instruct addF24_mem_reg(stackSlotF dst, regF src2, memory src1 ) %{
11376   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11377   match(Set dst (AddF src1 src2));
11378 
11379   format %{ "FADD   $dst,$src1,$src2" %}
11380   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
11381   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
11382               OpcReg_F(src2),
11383               Pop_Mem_F(dst) );
11384   ins_pipe( fpu_mem_reg_mem );
11385 %}
11386 
11387 // Cisc-spill variant
11388 // Spill to obtain 24-bit precision
11389 instruct addF24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
11390   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11391   match(Set dst (AddF src1 (LoadF src2)));
11392 
11393   format %{ "FADD   $dst,$src1,$src2 cisc" %}
11394   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
11395   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11396               set_instruction_start,
11397               OpcP, RMopc_Mem(secondary,src1),
11398               Pop_Mem_F(dst) );
11399   ins_pipe( fpu_mem_mem_mem );
11400 %}
11401 
11402 // Spill to obtain 24-bit precision
11403 instruct addF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
11404   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11405   match(Set dst (AddF src1 src2));
11406 
11407   format %{ "FADD   $dst,$src1,$src2" %}
11408   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
11409   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11410               set_instruction_start,
11411               OpcP, RMopc_Mem(secondary,src1),
11412               Pop_Mem_F(dst) );
11413   ins_pipe( fpu_mem_mem_mem );
11414 %}
11415 
11416 
11417 // Spill to obtain 24-bit precision
11418 instruct addF24_reg_imm(stackSlotF dst, regF src1, immF src2) %{
11419   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11420   match(Set dst (AddF src1 src2));
11421   format %{ "FLD    $src1\n\t"
11422             "FADD   $src2\n\t"
11423             "FSTP_S $dst"  %}
11424   opcode(0xD8, 0x00);       /* D8 /0 */
11425   ins_encode( Push_Reg_F(src1),
11426               Opc_MemImm_F(src2),
11427               Pop_Mem_F(dst));
11428   ins_pipe( fpu_mem_reg_con );
11429 %}
11430 //
11431 // This instruction does not round to 24-bits
11432 instruct addF_reg_imm(regF dst, regF src1, immF src2) %{
11433   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11434   match(Set dst (AddF src1 src2));
11435   format %{ "FLD    $src1\n\t"
11436             "FADD   $src2\n\t"
11437             "FSTP_S $dst"  %}
11438   opcode(0xD8, 0x00);       /* D8 /0 */
11439   ins_encode( Push_Reg_F(src1),
11440               Opc_MemImm_F(src2),
11441               Pop_Reg_F(dst));
11442   ins_pipe( fpu_reg_reg_con );
11443 %}
11444 
11445 // Spill to obtain 24-bit precision
11446 instruct mulF24_reg(stackSlotF dst, regF src1, regF src2) %{
11447   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11448   match(Set dst (MulF src1 src2));
11449 
11450   format %{ "FLD    $src1\n\t"
11451             "FMUL   $src2\n\t"
11452             "FSTP_S $dst"  %}
11453   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
11454   ins_encode( Push_Reg_F(src1),
11455               OpcReg_F(src2),
11456               Pop_Mem_F(dst) );
11457   ins_pipe( fpu_mem_reg_reg );
11458 %}
11459 //
11460 // This instruction does not round to 24-bits
11461 instruct mulF_reg(regF dst, regF src1, regF src2) %{
11462   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11463   match(Set dst (MulF src1 src2));
11464 
11465   format %{ "FLD    $src1\n\t"
11466             "FMUL   $src2\n\t"
11467             "FSTP_S $dst"  %}
11468   opcode(0xD8, 0x1); /* D8 C8+i */
11469   ins_encode( Push_Reg_F(src2),
11470               OpcReg_F(src1),
11471               Pop_Reg_F(dst) );
11472   ins_pipe( fpu_reg_reg_reg );
11473 %}
11474 
11475 
11476 // Spill to obtain 24-bit precision
11477 // Cisc-alternate to reg-reg multiply
11478 instruct mulF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
11479   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11480   match(Set dst (MulF src1 (LoadF src2)));
11481 
11482   format %{ "FLD_S  $src2\n\t"
11483             "FMUL   $src1\n\t"
11484             "FSTP_S $dst"  %}
11485   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
11486   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11487               OpcReg_F(src1),
11488               Pop_Mem_F(dst) );
11489   ins_pipe( fpu_mem_reg_mem );
11490 %}
11491 //
11492 // This instruction does not round to 24-bits
11493 // Cisc-alternate to reg-reg multiply
11494 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
11495   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11496   match(Set dst (MulF src1 (LoadF src2)));
11497 
11498   format %{ "FMUL   $dst,$src1,$src2" %}
11499   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
11500   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11501               OpcReg_F(src1),
11502               Pop_Reg_F(dst) );
11503   ins_pipe( fpu_reg_reg_mem );
11504 %}
11505 
11506 // Spill to obtain 24-bit precision
11507 instruct mulF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
11508   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11509   match(Set dst (MulF src1 src2));
11510 
11511   format %{ "FMUL   $dst,$src1,$src2" %}
11512   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
11513   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11514               set_instruction_start,
11515               OpcP, RMopc_Mem(secondary,src1),
11516               Pop_Mem_F(dst) );
11517   ins_pipe( fpu_mem_mem_mem );
11518 %}
11519 
11520 // Spill to obtain 24-bit precision
11521 instruct mulF24_reg_imm(stackSlotF dst, regF src1, immF src2) %{
11522   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11523   match(Set dst (MulF src1 src2));
11524 
11525   format %{ "FMULc $dst,$src1,$src2" %}
11526   opcode(0xD8, 0x1);  /* D8 /1*/
11527   ins_encode( Push_Reg_F(src1),
11528               Opc_MemImm_F(src2),
11529               Pop_Mem_F(dst));
11530   ins_pipe( fpu_mem_reg_con );
11531 %}
11532 //
11533 // This instruction does not round to 24-bits
11534 instruct mulF_reg_imm(regF dst, regF src1, immF src2) %{
11535   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11536   match(Set dst (MulF src1 src2));
11537 
11538   format %{ "FMULc $dst. $src1, $src2" %}
11539   opcode(0xD8, 0x1);  /* D8 /1*/
11540   ins_encode( Push_Reg_F(src1),
11541               Opc_MemImm_F(src2),
11542               Pop_Reg_F(dst));
11543   ins_pipe( fpu_reg_reg_con );
11544 %}
11545 
11546 
11547 //
11548 // MACRO1 -- subsume unshared load into mulF
11549 // This instruction does not round to 24-bits
11550 instruct mulF_reg_load1(regF dst, regF src, memory mem1 ) %{
11551   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11552   match(Set dst (MulF (LoadF mem1) src));
11553 
11554   format %{ "FLD    $mem1    ===MACRO1===\n\t"
11555             "FMUL   ST,$src\n\t"
11556             "FSTP   $dst" %}
11557   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
11558   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
11559               OpcReg_F(src),
11560               Pop_Reg_F(dst) );
11561   ins_pipe( fpu_reg_reg_mem );
11562 %}
11563 //
11564 // MACRO2 -- addF a mulF which subsumed an unshared load
11565 // This instruction does not round to 24-bits
11566 instruct addF_mulF_reg_load1(regF dst, memory mem1, regF src1, regF src2) %{
11567   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11568   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
11569   ins_cost(95);
11570 
11571   format %{ "FLD    $mem1     ===MACRO2===\n\t"
11572             "FMUL   ST,$src1  subsume mulF left load\n\t"
11573             "FADD   ST,$src2\n\t"
11574             "FSTP   $dst" %}
11575   opcode(0xD9); /* LoadF D9 /0 */
11576   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
11577               FMul_ST_reg(src1),
11578               FAdd_ST_reg(src2),
11579               Pop_Reg_F(dst) );
11580   ins_pipe( fpu_reg_mem_reg_reg );
11581 %}
11582 
11583 // MACRO3 -- addF a mulF
11584 // This instruction does not round to 24-bits.  It is a '2-address'
11585 // instruction in that the result goes back to src2.  This eliminates
11586 // a move from the macro; possibly the register allocator will have
11587 // to add it back (and maybe not).
11588 instruct addF_mulF_reg(regF src2, regF src1, regF src0) %{
11589   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11590   match(Set src2 (AddF (MulF src0 src1) src2));
11591 
11592   format %{ "FLD    $src0     ===MACRO3===\n\t"
11593             "FMUL   ST,$src1\n\t"
11594             "FADDP  $src2,ST" %}
11595   opcode(0xD9); /* LoadF D9 /0 */
11596   ins_encode( Push_Reg_F(src0),
11597               FMul_ST_reg(src1),
11598               FAddP_reg_ST(src2) );
11599   ins_pipe( fpu_reg_reg_reg );
11600 %}
11601 
11602 // MACRO4 -- divF subF
11603 // This instruction does not round to 24-bits
11604 instruct subF_divF_reg(regF dst, regF src1, regF src2, regF src3) %{
11605   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11606   match(Set dst (DivF (SubF src2 src1) src3));
11607 
11608   format %{ "FLD    $src2   ===MACRO4===\n\t"
11609             "FSUB   ST,$src1\n\t"
11610             "FDIV   ST,$src3\n\t"
11611             "FSTP  $dst" %}
11612   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
11613   ins_encode( Push_Reg_F(src2),
11614               subF_divF_encode(src1,src3),
11615               Pop_Reg_F(dst) );
11616   ins_pipe( fpu_reg_reg_reg_reg );
11617 %}
11618 
11619 // Spill to obtain 24-bit precision
11620 instruct divF24_reg(stackSlotF dst, regF src1, regF src2) %{
11621   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11622   match(Set dst (DivF src1 src2));
11623 
11624   format %{ "FDIV   $dst,$src1,$src2" %}
11625   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
11626   ins_encode( Push_Reg_F(src1),
11627               OpcReg_F(src2),
11628               Pop_Mem_F(dst) );
11629   ins_pipe( fpu_mem_reg_reg );
11630 %}
11631 //
11632 // This instruction does not round to 24-bits
11633 instruct divF_reg(regF dst, regF src) %{
11634   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11635   match(Set dst (DivF dst src));
11636 
11637   format %{ "FDIV   $dst,$src" %}
11638   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
11639   ins_encode( Push_Reg_F(src),
11640               OpcP, RegOpc(dst) );
11641   ins_pipe( fpu_reg_reg );
11642 %}
11643 
11644 
11645 // Spill to obtain 24-bit precision
11646 instruct modF24_reg(stackSlotF dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
11647   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11648   match(Set dst (ModF src1 src2));
11649   effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
11650 
11651   format %{ "FMOD   $dst,$src1,$src2" %}
11652   ins_encode( Push_Reg_Mod_D(src1, src2),
11653               emitModD(),
11654               Push_Result_Mod_D(src2),
11655               Pop_Mem_F(dst));
11656   ins_pipe( pipe_slow );
11657 %}
11658 //
11659 // This instruction does not round to 24-bits
11660 instruct modF_reg(regF dst, regF src, eAXRegI rax, eFlagsReg cr) %{
11661   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11662   match(Set dst (ModF dst src));
11663   effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
11664 
11665   format %{ "FMOD   $dst,$src" %}
11666   ins_encode(Push_Reg_Mod_D(dst, src),
11667               emitModD(),
11668               Push_Result_Mod_D(src),
11669               Pop_Reg_F(dst));
11670   ins_pipe( pipe_slow );
11671 %}
11672 
11673 instruct modX_reg(regX dst, regX src0, regX src1, eAXRegI rax, eFlagsReg cr) %{
11674   predicate(UseSSE>=1);
11675   match(Set dst (ModF src0 src1));
11676   effect(KILL rax, KILL cr);
11677   format %{ "SUB    ESP,4\t # FMOD\n"
11678           "\tMOVSS  [ESP+0],$src1\n"
11679           "\tFLD_S  [ESP+0]\n"
11680           "\tMOVSS  [ESP+0],$src0\n"
11681           "\tFLD_S  [ESP+0]\n"
11682      "loop:\tFPREM\n"
11683           "\tFWAIT\n"
11684           "\tFNSTSW AX\n"
11685           "\tSAHF\n"
11686           "\tJP     loop\n"
11687           "\tFSTP_S [ESP+0]\n"
11688           "\tMOVSS  $dst,[ESP+0]\n"
11689           "\tADD    ESP,4\n"
11690           "\tFSTP   ST0\t # Restore FPU Stack"
11691     %}
11692   ins_cost(250);
11693   ins_encode( Push_ModX_encoding(src0, src1), emitModD(), Push_ResultX(dst,0x4), PopFPU);
11694   ins_pipe( pipe_slow );
11695 %}
11696 
11697 
11698 //----------Arithmetic Conversion Instructions---------------------------------
11699 // The conversions operations are all Alpha sorted.  Please keep it that way!
11700 
11701 instruct roundFloat_mem_reg(stackSlotF dst, regF src) %{
11702   predicate(UseSSE==0);
11703   match(Set dst (RoundFloat src));
11704   ins_cost(125);
11705   format %{ "FST_S  $dst,$src\t# F-round" %}
11706   ins_encode( Pop_Mem_Reg_F(dst, src) );
11707   ins_pipe( fpu_mem_reg );
11708 %}
11709 
11710 instruct roundDouble_mem_reg(stackSlotD dst, regD src) %{
11711   predicate(UseSSE<=1);
11712   match(Set dst (RoundDouble src));
11713   ins_cost(125);
11714   format %{ "FST_D  $dst,$src\t# D-round" %}
11715   ins_encode( Pop_Mem_Reg_D(dst, src) );
11716   ins_pipe( fpu_mem_reg );
11717 %}
11718 
11719 // Force rounding to 24-bit precision and 6-bit exponent
11720 instruct convD2F_reg(stackSlotF dst, regD src) %{
11721   predicate(UseSSE==0);
11722   match(Set dst (ConvD2F src));
11723   format %{ "FST_S  $dst,$src\t# F-round" %}
11724   expand %{
11725     roundFloat_mem_reg(dst,src);
11726   %}
11727 %}
11728 
11729 // Force rounding to 24-bit precision and 6-bit exponent
11730 instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{
11731   predicate(UseSSE==1);
11732   match(Set dst (ConvD2F src));
11733   effect( KILL cr );
11734   format %{ "SUB    ESP,4\n\t"
11735             "FST_S  [ESP],$src\t# F-round\n\t"
11736             "MOVSS  $dst,[ESP]\n\t"
11737             "ADD ESP,4" %}
11738   ins_encode( D2X_encoding(dst, src) );
11739   ins_pipe( pipe_slow );
11740 %}
11741 
11742 // Force rounding double precision to single precision
11743 instruct convXD2X_reg(regX dst, regXD src) %{
11744   predicate(UseSSE>=2);
11745   match(Set dst (ConvD2F src));
11746   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
11747   opcode(0xF2, 0x0F, 0x5A);
11748   ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
11749   ins_pipe( pipe_slow );
11750 %}
11751 
11752 instruct convF2D_reg_reg(regD dst, regF src) %{
11753   predicate(UseSSE==0);
11754   match(Set dst (ConvF2D src));
11755   format %{ "FST_S  $dst,$src\t# D-round" %}
11756   ins_encode( Pop_Reg_Reg_D(dst, src));
11757   ins_pipe( fpu_reg_reg );
11758 %}
11759 
11760 instruct convF2D_reg(stackSlotD dst, regF src) %{
11761   predicate(UseSSE==1);
11762   match(Set dst (ConvF2D src));
11763   format %{ "FST_D  $dst,$src\t# D-round" %}
11764   expand %{
11765     roundDouble_mem_reg(dst,src);
11766   %}
11767 %}
11768 
11769 instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{
11770   predicate(UseSSE==1);
11771   match(Set dst (ConvF2D src));
11772   effect( KILL cr );
11773   format %{ "SUB    ESP,4\n\t"
11774             "MOVSS  [ESP] $src\n\t"
11775             "FLD_S  [ESP]\n\t"
11776             "ADD    ESP,4\n\t"
11777             "FSTP   $dst\t# D-round" %}
11778   ins_encode( X2D_encoding(dst, src), Pop_Reg_D(dst));
11779   ins_pipe( pipe_slow );
11780 %}
11781 
11782 instruct convX2XD_reg(regXD dst, regX src) %{
11783   predicate(UseSSE>=2);
11784   match(Set dst (ConvF2D src));
11785   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
11786   opcode(0xF3, 0x0F, 0x5A);
11787   ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
11788   ins_pipe( pipe_slow );
11789 %}
11790 
11791 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
11792 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
11793   predicate(UseSSE<=1);
11794   match(Set dst (ConvD2I src));
11795   effect( KILL tmp, KILL cr );
11796   format %{ "FLD    $src\t# Convert double to int \n\t"
11797             "FLDCW  trunc mode\n\t"
11798             "SUB    ESP,4\n\t"
11799             "FISTp  [ESP + #0]\n\t"
11800             "FLDCW  std/24-bit mode\n\t"
11801             "POP    EAX\n\t"
11802             "CMP    EAX,0x80000000\n\t"
11803             "JNE,s  fast\n\t"
11804             "FLD_D  $src\n\t"
11805             "CALL   d2i_wrapper\n"
11806       "fast:" %}
11807   ins_encode( Push_Reg_D(src), D2I_encoding(src) );
11808   ins_pipe( pipe_slow );
11809 %}
11810 
11811 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
11812 instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %{
11813   predicate(UseSSE>=2);
11814   match(Set dst (ConvD2I src));
11815   effect( KILL tmp, KILL cr );
11816   format %{ "CVTTSD2SI $dst, $src\n\t"
11817             "CMP    $dst,0x80000000\n\t"
11818             "JNE,s  fast\n\t"
11819             "SUB    ESP, 8\n\t"
11820             "MOVSD  [ESP], $src\n\t"
11821             "FLD_D  [ESP]\n\t"
11822             "ADD    ESP, 8\n\t"
11823             "CALL   d2i_wrapper\n"
11824       "fast:" %}
11825   opcode(0x1); // double-precision conversion
11826   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
11827   ins_pipe( pipe_slow );
11828 %}
11829 
11830 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
11831   predicate(UseSSE<=1);
11832   match(Set dst (ConvD2L src));
11833   effect( KILL cr );
11834   format %{ "FLD    $src\t# Convert double to long\n\t"
11835             "FLDCW  trunc mode\n\t"
11836             "SUB    ESP,8\n\t"
11837             "FISTp  [ESP + #0]\n\t"
11838             "FLDCW  std/24-bit mode\n\t"
11839             "POP    EAX\n\t"
11840             "POP    EDX\n\t"
11841             "CMP    EDX,0x80000000\n\t"
11842             "JNE,s  fast\n\t"
11843             "TEST   EAX,EAX\n\t"
11844             "JNE,s  fast\n\t"
11845             "FLD    $src\n\t"
11846             "CALL   d2l_wrapper\n"
11847       "fast:" %}
11848   ins_encode( Push_Reg_D(src),  D2L_encoding(src) );
11849   ins_pipe( pipe_slow );
11850 %}
11851 
11852 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11853 instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{
11854   predicate (UseSSE>=2);
11855   match(Set dst (ConvD2L src));
11856   effect( KILL cr );
11857   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
11858             "MOVSD  [ESP],$src\n\t"
11859             "FLD_D  [ESP]\n\t"
11860             "FLDCW  trunc mode\n\t"
11861             "FISTp  [ESP + #0]\n\t"
11862             "FLDCW  std/24-bit mode\n\t"
11863             "POP    EAX\n\t"
11864             "POP    EDX\n\t"
11865             "CMP    EDX,0x80000000\n\t"
11866             "JNE,s  fast\n\t"
11867             "TEST   EAX,EAX\n\t"
11868             "JNE,s  fast\n\t"
11869             "SUB    ESP,8\n\t"
11870             "MOVSD  [ESP],$src\n\t"
11871             "FLD_D  [ESP]\n\t"
11872             "CALL   d2l_wrapper\n"
11873       "fast:" %}
11874   ins_encode( XD2L_encoding(src) );
11875   ins_pipe( pipe_slow );
11876 %}
11877 
11878 // Convert a double to an int.  Java semantics require we do complex
11879 // manglations in the corner cases.  So we set the rounding mode to
11880 // 'zero', store the darned double down as an int, and reset the
11881 // rounding mode to 'nearest'.  The hardware stores a flag value down
11882 // if we would overflow or converted a NAN; we check for this and
11883 // and go the slow path if needed.
11884 instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
11885   predicate(UseSSE==0);
11886   match(Set dst (ConvF2I src));
11887   effect( KILL tmp, KILL cr );
11888   format %{ "FLD    $src\t# Convert float to int \n\t"
11889             "FLDCW  trunc mode\n\t"
11890             "SUB    ESP,4\n\t"
11891             "FISTp  [ESP + #0]\n\t"
11892             "FLDCW  std/24-bit mode\n\t"
11893             "POP    EAX\n\t"
11894             "CMP    EAX,0x80000000\n\t"
11895             "JNE,s  fast\n\t"
11896             "FLD    $src\n\t"
11897             "CALL   d2i_wrapper\n"
11898       "fast:" %}
11899   // D2I_encoding works for F2I
11900   ins_encode( Push_Reg_F(src), D2I_encoding(src) );
11901   ins_pipe( pipe_slow );
11902 %}
11903 
11904 // Convert a float in xmm to an int reg.
11905 instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{
11906   predicate(UseSSE>=1);
11907   match(Set dst (ConvF2I src));
11908   effect( KILL tmp, KILL cr );
11909   format %{ "CVTTSS2SI $dst, $src\n\t"
11910             "CMP    $dst,0x80000000\n\t"
11911             "JNE,s  fast\n\t"
11912             "SUB    ESP, 4\n\t"
11913             "MOVSS  [ESP], $src\n\t"
11914             "FLD    [ESP]\n\t"
11915             "ADD    ESP, 4\n\t"
11916             "CALL   d2i_wrapper\n"
11917       "fast:" %}
11918   opcode(0x0); // single-precision conversion
11919   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
11920   ins_pipe( pipe_slow );
11921 %}
11922 
11923 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
11924   predicate(UseSSE==0);
11925   match(Set dst (ConvF2L src));
11926   effect( KILL cr );
11927   format %{ "FLD    $src\t# Convert float to long\n\t"
11928             "FLDCW  trunc mode\n\t"
11929             "SUB    ESP,8\n\t"
11930             "FISTp  [ESP + #0]\n\t"
11931             "FLDCW  std/24-bit mode\n\t"
11932             "POP    EAX\n\t"
11933             "POP    EDX\n\t"
11934             "CMP    EDX,0x80000000\n\t"
11935             "JNE,s  fast\n\t"
11936             "TEST   EAX,EAX\n\t"
11937             "JNE,s  fast\n\t"
11938             "FLD    $src\n\t"
11939             "CALL   d2l_wrapper\n"
11940       "fast:" %}
11941   // D2L_encoding works for F2L
11942   ins_encode( Push_Reg_F(src), D2L_encoding(src) );
11943   ins_pipe( pipe_slow );
11944 %}
11945 
11946 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11947 instruct convX2L_reg_reg( eADXRegL dst, regX src, eFlagsReg cr ) %{
11948   predicate (UseSSE>=1);
11949   match(Set dst (ConvF2L src));
11950   effect( KILL cr );
11951   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
11952             "MOVSS  [ESP],$src\n\t"
11953             "FLD_S  [ESP]\n\t"
11954             "FLDCW  trunc mode\n\t"
11955             "FISTp  [ESP + #0]\n\t"
11956             "FLDCW  std/24-bit mode\n\t"
11957             "POP    EAX\n\t"
11958             "POP    EDX\n\t"
11959             "CMP    EDX,0x80000000\n\t"
11960             "JNE,s  fast\n\t"
11961             "TEST   EAX,EAX\n\t"
11962             "JNE,s  fast\n\t"
11963             "SUB    ESP,4\t# Convert float to long\n\t"
11964             "MOVSS  [ESP],$src\n\t"
11965             "FLD_S  [ESP]\n\t"
11966             "ADD    ESP,4\n\t"
11967             "CALL   d2l_wrapper\n"
11968       "fast:" %}
11969   ins_encode( X2L_encoding(src) );
11970   ins_pipe( pipe_slow );
11971 %}
11972 
11973 instruct convI2D_reg(regD dst, stackSlotI src) %{
11974   predicate( UseSSE<=1 );
11975   match(Set dst (ConvI2D src));
11976   format %{ "FILD   $src\n\t"
11977             "FSTP   $dst" %}
11978   opcode(0xDB, 0x0);  /* DB /0 */
11979   ins_encode(Push_Mem_I(src), Pop_Reg_D(dst));
11980   ins_pipe( fpu_reg_mem );
11981 %}
11982 
11983 instruct convI2XD_reg(regXD dst, eRegI src) %{
11984   predicate( UseSSE>=2 && !UseXmmI2D );
11985   match(Set dst (ConvI2D src));
11986   format %{ "CVTSI2SD $dst,$src" %}
11987   opcode(0xF2, 0x0F, 0x2A);
11988   ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
11989   ins_pipe( pipe_slow );
11990 %}
11991 
11992 instruct convI2XD_mem(regXD dst, memory mem) %{
11993   predicate( UseSSE>=2 );
11994   match(Set dst (ConvI2D (LoadI mem)));
11995   format %{ "CVTSI2SD $dst,$mem" %}
11996   opcode(0xF2, 0x0F, 0x2A);
11997   ins_encode( OpcP, OpcS, Opcode(tertiary), RegMem(dst, mem));
11998   ins_pipe( pipe_slow );
11999 %}
12000 
12001 instruct convXI2XD_reg(regXD dst, eRegI src)
12002 %{
12003   predicate( UseSSE>=2 && UseXmmI2D );
12004   match(Set dst (ConvI2D src));
12005 
12006   format %{ "MOVD  $dst,$src\n\t"
12007             "CVTDQ2PD $dst,$dst\t# i2d" %}
12008   ins_encode %{
12009     __ movdl($dst$$XMMRegister, $src$$Register);
12010     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
12011   %}
12012   ins_pipe(pipe_slow); // XXX
12013 %}
12014 
12015 instruct convI2D_mem(regD dst, memory mem) %{
12016   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
12017   match(Set dst (ConvI2D (LoadI mem)));
12018   format %{ "FILD   $mem\n\t"
12019             "FSTP   $dst" %}
12020   opcode(0xDB);      /* DB /0 */
12021   ins_encode( OpcP, RMopc_Mem(0x00,mem),
12022               Pop_Reg_D(dst));
12023   ins_pipe( fpu_reg_mem );
12024 %}
12025 
12026 // Convert a byte to a float; no rounding step needed.
12027 instruct conv24I2F_reg(regF dst, stackSlotI src) %{
12028   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
12029   match(Set dst (ConvI2F src));
12030   format %{ "FILD   $src\n\t"
12031             "FSTP   $dst" %}
12032 
12033   opcode(0xDB, 0x0);  /* DB /0 */
12034   ins_encode(Push_Mem_I(src), Pop_Reg_F(dst));
12035   ins_pipe( fpu_reg_mem );
12036 %}
12037 
12038 // In 24-bit mode, force exponent rounding by storing back out
12039 instruct convI2F_SSF(stackSlotF dst, stackSlotI src) %{
12040   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
12041   match(Set dst (ConvI2F src));
12042   ins_cost(200);
12043   format %{ "FILD   $src\n\t"
12044             "FSTP_S $dst" %}
12045   opcode(0xDB, 0x0);  /* DB /0 */
12046   ins_encode( Push_Mem_I(src),
12047               Pop_Mem_F(dst));
12048   ins_pipe( fpu_mem_mem );
12049 %}
12050 
12051 // In 24-bit mode, force exponent rounding by storing back out
12052 instruct convI2F_SSF_mem(stackSlotF dst, memory mem) %{
12053   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
12054   match(Set dst (ConvI2F (LoadI mem)));
12055   ins_cost(200);
12056   format %{ "FILD   $mem\n\t"
12057             "FSTP_S $dst" %}
12058   opcode(0xDB);  /* DB /0 */
12059   ins_encode( OpcP, RMopc_Mem(0x00,mem),
12060               Pop_Mem_F(dst));
12061   ins_pipe( fpu_mem_mem );
12062 %}
12063 
12064 // This instruction does not round to 24-bits
12065 instruct convI2F_reg(regF dst, stackSlotI src) %{
12066   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
12067   match(Set dst (ConvI2F src));
12068   format %{ "FILD   $src\n\t"
12069             "FSTP   $dst" %}
12070   opcode(0xDB, 0x0);  /* DB /0 */
12071   ins_encode( Push_Mem_I(src),
12072               Pop_Reg_F(dst));
12073   ins_pipe( fpu_reg_mem );
12074 %}
12075 
12076 // This instruction does not round to 24-bits
12077 instruct convI2F_mem(regF dst, memory mem) %{
12078   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
12079   match(Set dst (ConvI2F (LoadI mem)));
12080   format %{ "FILD   $mem\n\t"
12081             "FSTP   $dst" %}
12082   opcode(0xDB);      /* DB /0 */
12083   ins_encode( OpcP, RMopc_Mem(0x00,mem),
12084               Pop_Reg_F(dst));
12085   ins_pipe( fpu_reg_mem );
12086 %}
12087 
12088 // Convert an int to a float in xmm; no rounding step needed.
12089 instruct convI2X_reg(regX dst, eRegI src) %{
12090   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
12091   match(Set dst (ConvI2F src));
12092   format %{ "CVTSI2SS $dst, $src" %}
12093 
12094   opcode(0xF3, 0x0F, 0x2A);  /* F3 0F 2A /r */
12095   ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
12096   ins_pipe( pipe_slow );
12097 %}
12098 
12099  instruct convXI2X_reg(regX dst, eRegI src)
12100 %{
12101   predicate( UseSSE>=2 && UseXmmI2F );
12102   match(Set dst (ConvI2F src));
12103 
12104   format %{ "MOVD  $dst,$src\n\t"
12105             "CVTDQ2PS $dst,$dst\t# i2f" %}
12106   ins_encode %{
12107     __ movdl($dst$$XMMRegister, $src$$Register);
12108     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
12109   %}
12110   ins_pipe(pipe_slow); // XXX
12111 %}
12112 
12113 instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{
12114   match(Set dst (ConvI2L src));
12115   effect(KILL cr);
12116   ins_cost(375);
12117   format %{ "MOV    $dst.lo,$src\n\t"
12118             "MOV    $dst.hi,$src\n\t"
12119             "SAR    $dst.hi,31" %}
12120   ins_encode(convert_int_long(dst,src));
12121   ins_pipe( ialu_reg_reg_long );
12122 %}
12123 
12124 // Zero-extend convert int to long
12125 instruct convI2L_reg_zex(eRegL dst, eRegI src, immL_32bits mask, eFlagsReg flags ) %{
12126   match(Set dst (AndL (ConvI2L src) mask) );
12127   effect( KILL flags );
12128   ins_cost(250);
12129   format %{ "MOV    $dst.lo,$src\n\t"
12130             "XOR    $dst.hi,$dst.hi" %}
12131   opcode(0x33); // XOR
12132   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
12133   ins_pipe( ialu_reg_reg_long );
12134 %}
12135 
12136 // Zero-extend long
12137 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
12138   match(Set dst (AndL src mask) );
12139   effect( KILL flags );
12140   ins_cost(250);
12141   format %{ "MOV    $dst.lo,$src.lo\n\t"
12142             "XOR    $dst.hi,$dst.hi\n\t" %}
12143   opcode(0x33); // XOR
12144   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
12145   ins_pipe( ialu_reg_reg_long );
12146 %}
12147 
12148 instruct convL2D_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
12149   predicate (UseSSE<=1);
12150   match(Set dst (ConvL2D src));
12151   effect( KILL cr );
12152   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
12153             "PUSH   $src.lo\n\t"
12154             "FILD   ST,[ESP + #0]\n\t"
12155             "ADD    ESP,8\n\t"
12156             "FSTP_D $dst\t# D-round" %}
12157   opcode(0xDF, 0x5);  /* DF /5 */
12158   ins_encode(convert_long_double(src), Pop_Mem_D(dst));
12159   ins_pipe( pipe_slow );
12160 %}
12161 
12162 instruct convL2XD_reg( regXD dst, eRegL src, eFlagsReg cr) %{
12163   predicate (UseSSE>=2);
12164   match(Set dst (ConvL2D src));
12165   effect( KILL cr );
12166   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
12167             "PUSH   $src.lo\n\t"
12168             "FILD_D [ESP]\n\t"
12169             "FSTP_D [ESP]\n\t"
12170             "MOVSD  $dst,[ESP]\n\t"
12171             "ADD    ESP,8" %}
12172   opcode(0xDF, 0x5);  /* DF /5 */
12173   ins_encode(convert_long_double2(src), Push_ResultXD(dst));
12174   ins_pipe( pipe_slow );
12175 %}
12176 
12177 instruct convL2X_reg( regX dst, eRegL src, eFlagsReg cr) %{
12178   predicate (UseSSE>=1);
12179   match(Set dst (ConvL2F src));
12180   effect( KILL cr );
12181   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
12182             "PUSH   $src.lo\n\t"
12183             "FILD_D [ESP]\n\t"
12184             "FSTP_S [ESP]\n\t"
12185             "MOVSS  $dst,[ESP]\n\t"
12186             "ADD    ESP,8" %}
12187   opcode(0xDF, 0x5);  /* DF /5 */
12188   ins_encode(convert_long_double2(src), Push_ResultX(dst,0x8));
12189   ins_pipe( pipe_slow );
12190 %}
12191 
12192 instruct convL2F_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
12193   match(Set dst (ConvL2F src));
12194   effect( KILL cr );
12195   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
12196             "PUSH   $src.lo\n\t"
12197             "FILD   ST,[ESP + #0]\n\t"
12198             "ADD    ESP,8\n\t"
12199             "FSTP_S $dst\t# F-round" %}
12200   opcode(0xDF, 0x5);  /* DF /5 */
12201   ins_encode(convert_long_double(src), Pop_Mem_F(dst));
12202   ins_pipe( pipe_slow );
12203 %}
12204 
12205 instruct convL2I_reg( eRegI dst, eRegL src ) %{
12206   match(Set dst (ConvL2I src));
12207   effect( DEF dst, USE src );
12208   format %{ "MOV    $dst,$src.lo" %}
12209   ins_encode(enc_CopyL_Lo(dst,src));
12210   ins_pipe( ialu_reg_reg );
12211 %}
12212 
12213 
12214 instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{
12215   match(Set dst (MoveF2I src));
12216   effect( DEF dst, USE src );
12217   ins_cost(100);
12218   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
12219   opcode(0x8B);
12220   ins_encode( OpcP, RegMem(dst,src));
12221   ins_pipe( ialu_reg_mem );
12222 %}
12223 
12224 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
12225   predicate(UseSSE==0);
12226   match(Set dst (MoveF2I src));
12227   effect( DEF dst, USE src );
12228 
12229   ins_cost(125);
12230   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
12231   ins_encode( Pop_Mem_Reg_F(dst, src) );
12232   ins_pipe( fpu_mem_reg );
12233 %}
12234 
12235 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{
12236   predicate(UseSSE>=1);
12237   match(Set dst (MoveF2I src));
12238   effect( DEF dst, USE src );
12239 
12240   ins_cost(95);
12241   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
12242   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, dst));
12243   ins_pipe( pipe_slow );
12244 %}
12245 
12246 instruct MoveF2I_reg_reg_sse(eRegI dst, regX src) %{
12247   predicate(UseSSE>=2);
12248   match(Set dst (MoveF2I src));
12249   effect( DEF dst, USE src );
12250   ins_cost(85);
12251   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
12252   ins_encode( MovX2I_reg(dst, src));
12253   ins_pipe( pipe_slow );
12254 %}
12255 
12256 instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{
12257   match(Set dst (MoveI2F src));
12258   effect( DEF dst, USE src );
12259 
12260   ins_cost(100);
12261   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
12262   opcode(0x89);
12263   ins_encode( OpcPRegSS( dst, src ) );
12264   ins_pipe( ialu_mem_reg );
12265 %}
12266 
12267 
12268 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
12269   predicate(UseSSE==0);
12270   match(Set dst (MoveI2F src));
12271   effect(DEF dst, USE src);
12272 
12273   ins_cost(125);
12274   format %{ "FLD_S  $src\n\t"
12275             "FSTP   $dst\t# MoveI2F_stack_reg" %}
12276   opcode(0xD9);               /* D9 /0, FLD m32real */
12277   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
12278               Pop_Reg_F(dst) );
12279   ins_pipe( fpu_reg_mem );
12280 %}
12281 
12282 instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{
12283   predicate(UseSSE>=1);
12284   match(Set dst (MoveI2F src));
12285   effect( DEF dst, USE src );
12286 
12287   ins_cost(95);
12288   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
12289   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
12290   ins_pipe( pipe_slow );
12291 %}
12292 
12293 instruct MoveI2F_reg_reg_sse(regX dst, eRegI src) %{
12294   predicate(UseSSE>=2);
12295   match(Set dst (MoveI2F src));
12296   effect( DEF dst, USE src );
12297 
12298   ins_cost(85);
12299   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
12300   ins_encode( MovI2X_reg(dst, src) );
12301   ins_pipe( pipe_slow );
12302 %}
12303 
12304 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
12305   match(Set dst (MoveD2L src));
12306   effect(DEF dst, USE src);
12307 
12308   ins_cost(250);
12309   format %{ "MOV    $dst.lo,$src\n\t"
12310             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
12311   opcode(0x8B, 0x8B);
12312   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
12313   ins_pipe( ialu_mem_long_reg );
12314 %}
12315 
12316 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
12317   predicate(UseSSE<=1);
12318   match(Set dst (MoveD2L src));
12319   effect(DEF dst, USE src);
12320 
12321   ins_cost(125);
12322   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
12323   ins_encode( Pop_Mem_Reg_D(dst, src) );
12324   ins_pipe( fpu_mem_reg );
12325 %}
12326 
12327 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{
12328   predicate(UseSSE>=2);
12329   match(Set dst (MoveD2L src));
12330   effect(DEF dst, USE src);
12331   ins_cost(95);
12332 
12333   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
12334   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src,dst));
12335   ins_pipe( pipe_slow );
12336 %}
12337 
12338 instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{
12339   predicate(UseSSE>=2);
12340   match(Set dst (MoveD2L src));
12341   effect(DEF dst, USE src, TEMP tmp);
12342   ins_cost(85);
12343   format %{ "MOVD   $dst.lo,$src\n\t"
12344             "PSHUFLW $tmp,$src,0x4E\n\t"
12345             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
12346   ins_encode( MovXD2L_reg(dst, src, tmp) );
12347   ins_pipe( pipe_slow );
12348 %}
12349 
12350 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
12351   match(Set dst (MoveL2D src));
12352   effect(DEF dst, USE src);
12353 
12354   ins_cost(200);
12355   format %{ "MOV    $dst,$src.lo\n\t"
12356             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
12357   opcode(0x89, 0x89);
12358   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
12359   ins_pipe( ialu_mem_long_reg );
12360 %}
12361 
12362 
12363 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
12364   predicate(UseSSE<=1);
12365   match(Set dst (MoveL2D src));
12366   effect(DEF dst, USE src);
12367   ins_cost(125);
12368 
12369   format %{ "FLD_D  $src\n\t"
12370             "FSTP   $dst\t# MoveL2D_stack_reg" %}
12371   opcode(0xDD);               /* DD /0, FLD m64real */
12372   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
12373               Pop_Reg_D(dst) );
12374   ins_pipe( fpu_reg_mem );
12375 %}
12376 
12377 
12378 instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{
12379   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
12380   match(Set dst (MoveL2D src));
12381   effect(DEF dst, USE src);
12382 
12383   ins_cost(95);
12384   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
12385   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
12386   ins_pipe( pipe_slow );
12387 %}
12388 
12389 instruct MoveL2D_stack_reg_sse_partial(regXD dst, stackSlotL src) %{
12390   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
12391   match(Set dst (MoveL2D src));
12392   effect(DEF dst, USE src);
12393 
12394   ins_cost(95);
12395   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
12396   ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,src));
12397   ins_pipe( pipe_slow );
12398 %}
12399 
12400 instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{
12401   predicate(UseSSE>=2);
12402   match(Set dst (MoveL2D src));
12403   effect(TEMP dst, USE src, TEMP tmp);
12404   ins_cost(85);
12405   format %{ "MOVD   $dst,$src.lo\n\t"
12406             "MOVD   $tmp,$src.hi\n\t"
12407             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
12408   ins_encode( MovL2XD_reg(dst, src, tmp) );
12409   ins_pipe( pipe_slow );
12410 %}
12411 
12412 // Replicate scalar to packed byte (1 byte) values in xmm
12413 instruct Repl8B_reg(regXD dst, regXD src) %{
12414   predicate(UseSSE>=2);
12415   match(Set dst (Replicate8B src));
12416   format %{ "MOVDQA  $dst,$src\n\t"
12417             "PUNPCKLBW $dst,$dst\n\t"
12418             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12419   ins_encode( pshufd_8x8(dst, src));
12420   ins_pipe( pipe_slow );
12421 %}
12422 
12423 // Replicate scalar to packed byte (1 byte) values in xmm
12424 instruct Repl8B_eRegI(regXD dst, eRegI src) %{
12425   predicate(UseSSE>=2);
12426   match(Set dst (Replicate8B src));
12427   format %{ "MOVD    $dst,$src\n\t"
12428             "PUNPCKLBW $dst,$dst\n\t"
12429             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12430   ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
12431   ins_pipe( pipe_slow );
12432 %}
12433 
12434 // Replicate scalar zero to packed byte (1 byte) values in xmm
12435 instruct Repl8B_immI0(regXD dst, immI0 zero) %{
12436   predicate(UseSSE>=2);
12437   match(Set dst (Replicate8B zero));
12438   format %{ "PXOR  $dst,$dst\t! replicate8B" %}
12439   ins_encode( pxor(dst, dst));
12440   ins_pipe( fpu_reg_reg );
12441 %}
12442 
12443 // Replicate scalar to packed shore (2 byte) values in xmm
12444 instruct Repl4S_reg(regXD dst, regXD src) %{
12445   predicate(UseSSE>=2);
12446   match(Set dst (Replicate4S src));
12447   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
12448   ins_encode( pshufd_4x16(dst, src));
12449   ins_pipe( fpu_reg_reg );
12450 %}
12451 
12452 // Replicate scalar to packed shore (2 byte) values in xmm
12453 instruct Repl4S_eRegI(regXD dst, eRegI src) %{
12454   predicate(UseSSE>=2);
12455   match(Set dst (Replicate4S src));
12456   format %{ "MOVD    $dst,$src\n\t"
12457             "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
12458   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
12459   ins_pipe( fpu_reg_reg );
12460 %}
12461 
12462 // Replicate scalar zero to packed short (2 byte) values in xmm
12463 instruct Repl4S_immI0(regXD dst, immI0 zero) %{
12464   predicate(UseSSE>=2);
12465   match(Set dst (Replicate4S zero));
12466   format %{ "PXOR  $dst,$dst\t! replicate4S" %}
12467   ins_encode( pxor(dst, dst));
12468   ins_pipe( fpu_reg_reg );
12469 %}
12470 
12471 // Replicate scalar to packed char (2 byte) values in xmm
12472 instruct Repl4C_reg(regXD dst, regXD src) %{
12473   predicate(UseSSE>=2);
12474   match(Set dst (Replicate4C src));
12475   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
12476   ins_encode( pshufd_4x16(dst, src));
12477   ins_pipe( fpu_reg_reg );
12478 %}
12479 
12480 // Replicate scalar to packed char (2 byte) values in xmm
12481 instruct Repl4C_eRegI(regXD dst, eRegI src) %{
12482   predicate(UseSSE>=2);
12483   match(Set dst (Replicate4C src));
12484   format %{ "MOVD    $dst,$src\n\t"
12485             "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
12486   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
12487   ins_pipe( fpu_reg_reg );
12488 %}
12489 
12490 // Replicate scalar zero to packed char (2 byte) values in xmm
12491 instruct Repl4C_immI0(regXD dst, immI0 zero) %{
12492   predicate(UseSSE>=2);
12493   match(Set dst (Replicate4C zero));
12494   format %{ "PXOR  $dst,$dst\t! replicate4C" %}
12495   ins_encode( pxor(dst, dst));
12496   ins_pipe( fpu_reg_reg );
12497 %}
12498 
12499 // Replicate scalar to packed integer (4 byte) values in xmm
12500 instruct Repl2I_reg(regXD dst, regXD src) %{
12501   predicate(UseSSE>=2);
12502   match(Set dst (Replicate2I src));
12503   format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
12504   ins_encode( pshufd(dst, src, 0x00));
12505   ins_pipe( fpu_reg_reg );
12506 %}
12507 
12508 // Replicate scalar to packed integer (4 byte) values in xmm
12509 instruct Repl2I_eRegI(regXD dst, eRegI src) %{
12510   predicate(UseSSE>=2);
12511   match(Set dst (Replicate2I src));
12512   format %{ "MOVD   $dst,$src\n\t"
12513             "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
12514   ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
12515   ins_pipe( fpu_reg_reg );
12516 %}
12517 
12518 // Replicate scalar zero to packed integer (2 byte) values in xmm
12519 instruct Repl2I_immI0(regXD dst, immI0 zero) %{
12520   predicate(UseSSE>=2);
12521   match(Set dst (Replicate2I zero));
12522   format %{ "PXOR  $dst,$dst\t! replicate2I" %}
12523   ins_encode( pxor(dst, dst));
12524   ins_pipe( fpu_reg_reg );
12525 %}
12526 
12527 // Replicate scalar to packed single precision floating point values in xmm
12528 instruct Repl2F_reg(regXD dst, regXD src) %{
12529   predicate(UseSSE>=2);
12530   match(Set dst (Replicate2F src));
12531   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12532   ins_encode( pshufd(dst, src, 0xe0));
12533   ins_pipe( fpu_reg_reg );
12534 %}
12535 
12536 // Replicate scalar to packed single precision floating point values in xmm
12537 instruct Repl2F_regX(regXD dst, regX src) %{
12538   predicate(UseSSE>=2);
12539   match(Set dst (Replicate2F src));
12540   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12541   ins_encode( pshufd(dst, src, 0xe0));
12542   ins_pipe( fpu_reg_reg );
12543 %}
12544 
12545 // Replicate scalar to packed single precision floating point values in xmm
12546 instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{
12547   predicate(UseSSE>=2);
12548   match(Set dst (Replicate2F zero));
12549   format %{ "PXOR  $dst,$dst\t! replicate2F" %}
12550   ins_encode( pxor(dst, dst));
12551   ins_pipe( fpu_reg_reg );
12552 %}
12553 
12554 // =======================================================================
12555 // fast clearing of an array
12556 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
12557   match(Set dummy (ClearArray cnt base));
12558   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
12559   format %{ "SHL    ECX,1\t# Convert doublewords to words\n\t"
12560             "XOR    EAX,EAX\n\t"
12561             "REP STOS\t# store EAX into [EDI++] while ECX--" %}
12562   opcode(0,0x4);
12563   ins_encode( Opcode(0xD1), RegOpc(ECX),
12564               OpcRegReg(0x33,EAX,EAX),
12565               Opcode(0xF3), Opcode(0xAB) );
12566   ins_pipe( pipe_slow );
12567 %}
12568 
12569 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eBXRegI cnt2,
12570                         eAXRegI result, regXD tmp1, regXD tmp2, eFlagsReg cr) %{
12571   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12572   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12573 
12574   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1, $tmp2" %}
12575   ins_encode %{
12576     __ string_compare($str1$$Register, $str2$$Register,
12577                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
12578                       $tmp1$$XMMRegister, $tmp2$$XMMRegister);
12579   %}
12580   ins_pipe( pipe_slow );
12581 %}
12582 
12583 // fast string equals
12584 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
12585                        regXD tmp1, regXD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
12586   match(Set result (StrEquals (Binary str1 str2) cnt));
12587   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
12588 
12589   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
12590   ins_encode %{
12591     __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
12592                           $cnt$$Register, $result$$Register, $tmp3$$Register,
12593                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
12594   %}
12595   ins_pipe( pipe_slow );
12596 %}
12597 
12598 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12599                         eBXRegI result, regXD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12600   predicate(UseSSE42Intrinsics);
12601   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12602   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp2, KILL cr);
12603 
12604   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp2, $tmp1" %}
12605   ins_encode %{
12606     __ string_indexof($str1$$Register, $str2$$Register,
12607                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
12608                       $tmp1$$XMMRegister, $tmp2$$Register);
12609   %}
12610   ins_pipe( pipe_slow );
12611 %}
12612 
12613 // fast array equals
12614 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12615                       regXD tmp1, regXD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12616 %{
12617   match(Set result (AryEq ary1 ary2));
12618   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12619   //ins_cost(300);
12620 
12621   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12622   ins_encode %{
12623     __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
12624                           $tmp3$$Register, $result$$Register, $tmp4$$Register,
12625                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
12626   %}
12627   ins_pipe( pipe_slow );
12628 %}
12629 
12630 //----------Control Flow Instructions------------------------------------------
12631 // Signed compare Instructions
12632 instruct compI_eReg(eFlagsReg cr, eRegI op1, eRegI op2) %{
12633   match(Set cr (CmpI op1 op2));
12634   effect( DEF cr, USE op1, USE op2 );
12635   format %{ "CMP    $op1,$op2" %}
12636   opcode(0x3B);  /* Opcode 3B /r */
12637   ins_encode( OpcP, RegReg( op1, op2) );
12638   ins_pipe( ialu_cr_reg_reg );
12639 %}
12640 
12641 instruct compI_eReg_imm(eFlagsReg cr, eRegI op1, immI op2) %{
12642   match(Set cr (CmpI op1 op2));
12643   effect( DEF cr, USE op1 );
12644   format %{ "CMP    $op1,$op2" %}
12645   opcode(0x81,0x07);  /* Opcode 81 /7 */
12646   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12647   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12648   ins_pipe( ialu_cr_reg_imm );
12649 %}
12650 
12651 // Cisc-spilled version of cmpI_eReg
12652 instruct compI_eReg_mem(eFlagsReg cr, eRegI op1, memory op2) %{
12653   match(Set cr (CmpI op1 (LoadI op2)));
12654 
12655   format %{ "CMP    $op1,$op2" %}
12656   ins_cost(500);
12657   opcode(0x3B);  /* Opcode 3B /r */
12658   ins_encode( OpcP, RegMem( op1, op2) );
12659   ins_pipe( ialu_cr_reg_mem );
12660 %}
12661 
12662 instruct testI_reg( eFlagsReg cr, eRegI src, immI0 zero ) %{
12663   match(Set cr (CmpI src zero));
12664   effect( DEF cr, USE src );
12665 
12666   format %{ "TEST   $src,$src" %}
12667   opcode(0x85);
12668   ins_encode( OpcP, RegReg( src, src ) );
12669   ins_pipe( ialu_cr_reg_imm );
12670 %}
12671 
12672 instruct testI_reg_imm( eFlagsReg cr, eRegI src, immI con, immI0 zero ) %{
12673   match(Set cr (CmpI (AndI src con) zero));
12674 
12675   format %{ "TEST   $src,$con" %}
12676   opcode(0xF7,0x00);
12677   ins_encode( OpcP, RegOpc(src), Con32(con) );
12678   ins_pipe( ialu_cr_reg_imm );
12679 %}
12680 
12681 instruct testI_reg_mem( eFlagsReg cr, eRegI src, memory mem, immI0 zero ) %{
12682   match(Set cr (CmpI (AndI src mem) zero));
12683 
12684   format %{ "TEST   $src,$mem" %}
12685   opcode(0x85);
12686   ins_encode( OpcP, RegMem( src, mem ) );
12687   ins_pipe( ialu_cr_reg_mem );
12688 %}
12689 
12690 // Unsigned compare Instructions; really, same as signed except they
12691 // produce an eFlagsRegU instead of eFlagsReg.
12692 instruct compU_eReg(eFlagsRegU cr, eRegI op1, eRegI op2) %{
12693   match(Set cr (CmpU op1 op2));
12694 
12695   format %{ "CMPu   $op1,$op2" %}
12696   opcode(0x3B);  /* Opcode 3B /r */
12697   ins_encode( OpcP, RegReg( op1, op2) );
12698   ins_pipe( ialu_cr_reg_reg );
12699 %}
12700 
12701 instruct compU_eReg_imm(eFlagsRegU cr, eRegI op1, immI op2) %{
12702   match(Set cr (CmpU op1 op2));
12703 
12704   format %{ "CMPu   $op1,$op2" %}
12705   opcode(0x81,0x07);  /* Opcode 81 /7 */
12706   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12707   ins_pipe( ialu_cr_reg_imm );
12708 %}
12709 
12710 // // Cisc-spilled version of cmpU_eReg
12711 instruct compU_eReg_mem(eFlagsRegU cr, eRegI op1, memory op2) %{
12712   match(Set cr (CmpU op1 (LoadI op2)));
12713 
12714   format %{ "CMPu   $op1,$op2" %}
12715   ins_cost(500);
12716   opcode(0x3B);  /* Opcode 3B /r */
12717   ins_encode( OpcP, RegMem( op1, op2) );
12718   ins_pipe( ialu_cr_reg_mem );
12719 %}
12720 
12721 // // Cisc-spilled version of cmpU_eReg
12722 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, eRegI op2) %{
12723 //  match(Set cr (CmpU (LoadI op1) op2));
12724 //
12725 //  format %{ "CMPu   $op1,$op2" %}
12726 //  ins_cost(500);
12727 //  opcode(0x39);  /* Opcode 39 /r */
12728 //  ins_encode( OpcP, RegMem( op1, op2) );
12729 //%}
12730 
12731 instruct testU_reg( eFlagsRegU cr, eRegI src, immI0 zero ) %{
12732   match(Set cr (CmpU src zero));
12733 
12734   format %{ "TESTu  $src,$src" %}
12735   opcode(0x85);
12736   ins_encode( OpcP, RegReg( src, src ) );
12737   ins_pipe( ialu_cr_reg_imm );
12738 %}
12739 
12740 // Unsigned pointer compare Instructions
12741 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12742   match(Set cr (CmpP op1 op2));
12743 
12744   format %{ "CMPu   $op1,$op2" %}
12745   opcode(0x3B);  /* Opcode 3B /r */
12746   ins_encode( OpcP, RegReg( op1, op2) );
12747   ins_pipe( ialu_cr_reg_reg );
12748 %}
12749 
12750 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12751   match(Set cr (CmpP op1 op2));
12752 
12753   format %{ "CMPu   $op1,$op2" %}
12754   opcode(0x81,0x07);  /* Opcode 81 /7 */
12755   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12756   ins_pipe( ialu_cr_reg_imm );
12757 %}
12758 
12759 // // Cisc-spilled version of cmpP_eReg
12760 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12761   match(Set cr (CmpP op1 (LoadP op2)));
12762 
12763   format %{ "CMPu   $op1,$op2" %}
12764   ins_cost(500);
12765   opcode(0x3B);  /* Opcode 3B /r */
12766   ins_encode( OpcP, RegMem( op1, op2) );
12767   ins_pipe( ialu_cr_reg_mem );
12768 %}
12769 
12770 // // Cisc-spilled version of cmpP_eReg
12771 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12772 //  match(Set cr (CmpP (LoadP op1) op2));
12773 //
12774 //  format %{ "CMPu   $op1,$op2" %}
12775 //  ins_cost(500);
12776 //  opcode(0x39);  /* Opcode 39 /r */
12777 //  ins_encode( OpcP, RegMem( op1, op2) );
12778 //%}
12779 
12780 // Compare raw pointer (used in out-of-heap check).
12781 // Only works because non-oop pointers must be raw pointers
12782 // and raw pointers have no anti-dependencies.
12783 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12784   predicate( !n->in(2)->in(2)->bottom_type()->isa_oop_ptr() );
12785   match(Set cr (CmpP op1 (LoadP op2)));
12786 
12787   format %{ "CMPu   $op1,$op2" %}
12788   opcode(0x3B);  /* Opcode 3B /r */
12789   ins_encode( OpcP, RegMem( op1, op2) );
12790   ins_pipe( ialu_cr_reg_mem );
12791 %}
12792 
12793 //
12794 // This will generate a signed flags result. This should be ok
12795 // since any compare to a zero should be eq/neq.
12796 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12797   match(Set cr (CmpP src zero));
12798 
12799   format %{ "TEST   $src,$src" %}
12800   opcode(0x85);
12801   ins_encode( OpcP, RegReg( src, src ) );
12802   ins_pipe( ialu_cr_reg_imm );
12803 %}
12804 
12805 // Cisc-spilled version of testP_reg
12806 // This will generate a signed flags result. This should be ok
12807 // since any compare to a zero should be eq/neq.
12808 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
12809   match(Set cr (CmpP (LoadP op) zero));
12810 
12811   format %{ "TEST   $op,0xFFFFFFFF" %}
12812   ins_cost(500);
12813   opcode(0xF7);               /* Opcode F7 /0 */
12814   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12815   ins_pipe( ialu_cr_reg_imm );
12816 %}
12817 
12818 // Yanked all unsigned pointer compare operations.
12819 // Pointer compares are done with CmpP which is already unsigned.
12820 
12821 //----------Max and Min--------------------------------------------------------
12822 // Min Instructions
12823 ////
12824 //   *** Min and Max using the conditional move are slower than the
12825 //   *** branch version on a Pentium III.
12826 // // Conditional move for min
12827 //instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
12828 //  effect( USE_DEF op2, USE op1, USE cr );
12829 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12830 //  opcode(0x4C,0x0F);
12831 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12832 //  ins_pipe( pipe_cmov_reg );
12833 //%}
12834 //
12835 //// Min Register with Register (P6 version)
12836 //instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{
12837 //  predicate(VM_Version::supports_cmov() );
12838 //  match(Set op2 (MinI op1 op2));
12839 //  ins_cost(200);
12840 //  expand %{
12841 //    eFlagsReg cr;
12842 //    compI_eReg(cr,op1,op2);
12843 //    cmovI_reg_lt(op2,op1,cr);
12844 //  %}
12845 //%}
12846 
12847 // Min Register with Register (generic version)
12848 instruct minI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
12849   match(Set dst (MinI dst src));
12850   effect(KILL flags);
12851   ins_cost(300);
12852 
12853   format %{ "MIN    $dst,$src" %}
12854   opcode(0xCC);
12855   ins_encode( min_enc(dst,src) );
12856   ins_pipe( pipe_slow );
12857 %}
12858 
12859 // Max Register with Register
12860 //   *** Min and Max using the conditional move are slower than the
12861 //   *** branch version on a Pentium III.
12862 // // Conditional move for max
12863 //instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
12864 //  effect( USE_DEF op2, USE op1, USE cr );
12865 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12866 //  opcode(0x4F,0x0F);
12867 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12868 //  ins_pipe( pipe_cmov_reg );
12869 //%}
12870 //
12871 // // Max Register with Register (P6 version)
12872 //instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{
12873 //  predicate(VM_Version::supports_cmov() );
12874 //  match(Set op2 (MaxI op1 op2));
12875 //  ins_cost(200);
12876 //  expand %{
12877 //    eFlagsReg cr;
12878 //    compI_eReg(cr,op1,op2);
12879 //    cmovI_reg_gt(op2,op1,cr);
12880 //  %}
12881 //%}
12882 
12883 // Max Register with Register (generic version)
12884 instruct maxI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
12885   match(Set dst (MaxI dst src));
12886   effect(KILL flags);
12887   ins_cost(300);
12888 
12889   format %{ "MAX    $dst,$src" %}
12890   opcode(0xCC);
12891   ins_encode( max_enc(dst,src) );
12892   ins_pipe( pipe_slow );
12893 %}
12894 
12895 // ============================================================================
12896 // Branch Instructions
12897 // Jump Table
12898 instruct jumpXtnd(eRegI switch_val) %{
12899   match(Jump switch_val);
12900   ins_cost(350);
12901 
12902   format %{  "JMP    [table_base](,$switch_val,1)\n\t" %}
12903 
12904   ins_encode %{
12905     address table_base  = __ address_table_constant(_index2label);
12906 
12907     // Jump to Address(table_base + switch_reg)
12908     InternalAddress table(table_base);
12909     Address index(noreg, $switch_val$$Register, Address::times_1);
12910     __ jump(ArrayAddress(table, index));
12911   %}
12912   ins_pc_relative(1);
12913   ins_pipe(pipe_jmp);
12914 %}
12915 
12916 // Jump Direct - Label defines a relative address from JMP+1
12917 instruct jmpDir(label labl) %{
12918   match(Goto);
12919   effect(USE labl);
12920 
12921   ins_cost(300);
12922   format %{ "JMP    $labl" %}
12923   size(5);
12924   opcode(0xE9);
12925   ins_encode( OpcP, Lbl( labl ) );
12926   ins_pipe( pipe_jmp );
12927   ins_pc_relative(1);
12928 %}
12929 
12930 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12931 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12932   match(If cop cr);
12933   effect(USE labl);
12934 
12935   ins_cost(300);
12936   format %{ "J$cop    $labl" %}
12937   size(6);
12938   opcode(0x0F, 0x80);
12939   ins_encode( Jcc( cop, labl) );
12940   ins_pipe( pipe_jcc );
12941   ins_pc_relative(1);
12942 %}
12943 
12944 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12945 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12946   match(CountedLoopEnd cop cr);
12947   effect(USE labl);
12948 
12949   ins_cost(300);
12950   format %{ "J$cop    $labl\t# Loop end" %}
12951   size(6);
12952   opcode(0x0F, 0x80);
12953   ins_encode( Jcc( cop, labl) );
12954   ins_pipe( pipe_jcc );
12955   ins_pc_relative(1);
12956 %}
12957 
12958 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12959 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12960   match(CountedLoopEnd cop cmp);
12961   effect(USE labl);
12962 
12963   ins_cost(300);
12964   format %{ "J$cop,u  $labl\t# Loop end" %}
12965   size(6);
12966   opcode(0x0F, 0x80);
12967   ins_encode( Jcc( cop, labl) );
12968   ins_pipe( pipe_jcc );
12969   ins_pc_relative(1);
12970 %}
12971 
12972 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12973   match(CountedLoopEnd cop cmp);
12974   effect(USE labl);
12975 
12976   ins_cost(200);
12977   format %{ "J$cop,u  $labl\t# Loop end" %}
12978   size(6);
12979   opcode(0x0F, 0x80);
12980   ins_encode( Jcc( cop, labl) );
12981   ins_pipe( pipe_jcc );
12982   ins_pc_relative(1);
12983 %}
12984 
12985 // Jump Direct Conditional - using unsigned comparison
12986 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12987   match(If cop cmp);
12988   effect(USE labl);
12989 
12990   ins_cost(300);
12991   format %{ "J$cop,u  $labl" %}
12992   size(6);
12993   opcode(0x0F, 0x80);
12994   ins_encode(Jcc(cop, labl));
12995   ins_pipe(pipe_jcc);
12996   ins_pc_relative(1);
12997 %}
12998 
12999 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
13000   match(If cop cmp);
13001   effect(USE labl);
13002 
13003   ins_cost(200);
13004   format %{ "J$cop,u  $labl" %}
13005   size(6);
13006   opcode(0x0F, 0x80);
13007   ins_encode(Jcc(cop, labl));
13008   ins_pipe(pipe_jcc);
13009   ins_pc_relative(1);
13010 %}
13011 
13012 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
13013   match(If cop cmp);
13014   effect(USE labl);
13015 
13016   ins_cost(200);
13017   format %{ $$template
13018     if ($cop$$cmpcode == Assembler::notEqual) {
13019       $$emit$$"JP,u   $labl\n\t"
13020       $$emit$$"J$cop,u   $labl"
13021     } else {
13022       $$emit$$"JP,u   done\n\t"
13023       $$emit$$"J$cop,u   $labl\n\t"
13024       $$emit$$"done:"
13025     }
13026   %}
13027   size(12);
13028   opcode(0x0F, 0x80);
13029   ins_encode %{
13030     Label* l = $labl$$label;
13031     $$$emit8$primary;
13032     emit_cc(cbuf, $secondary, Assembler::parity);
13033     int parity_disp = -1;
13034     bool ok = false;
13035     if ($cop$$cmpcode == Assembler::notEqual) {
13036        // the two jumps 6 bytes apart so the jump distances are too
13037        parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0;
13038     } else if ($cop$$cmpcode == Assembler::equal) {
13039        parity_disp = 6;
13040        ok = true;
13041     } else {
13042        ShouldNotReachHere();
13043     }
13044     emit_d32(cbuf, parity_disp);
13045     $$$emit8$primary;
13046     emit_cc(cbuf, $secondary, $cop$$cmpcode);
13047     int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0;
13048     emit_d32(cbuf, disp);
13049   %}
13050   ins_pipe(pipe_jcc);
13051   ins_pc_relative(1);
13052 %}
13053 
13054 // ============================================================================
13055 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
13056 // array for an instance of the superklass.  Set a hidden internal cache on a
13057 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
13058 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
13059 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
13060   match(Set result (PartialSubtypeCheck sub super));
13061   effect( KILL rcx, KILL cr );
13062 
13063   ins_cost(1100);  // slightly larger than the next version
13064   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
13065             "MOV    ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
13066             "ADD    EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
13067             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
13068             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
13069             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
13070             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
13071      "miss:\t" %}
13072 
13073   opcode(0x1); // Force a XOR of EDI
13074   ins_encode( enc_PartialSubtypeCheck() );
13075   ins_pipe( pipe_slow );
13076 %}
13077 
13078 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
13079   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
13080   effect( KILL rcx, KILL result );
13081 
13082   ins_cost(1000);
13083   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
13084             "MOV    ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
13085             "ADD    EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
13086             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
13087             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
13088             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
13089      "miss:\t" %}
13090 
13091   opcode(0x0);  // No need to XOR EDI
13092   ins_encode( enc_PartialSubtypeCheck() );
13093   ins_pipe( pipe_slow );
13094 %}
13095 
13096 // ============================================================================
13097 // Branch Instructions -- short offset versions
13098 //
13099 // These instructions are used to replace jumps of a long offset (the default
13100 // match) with jumps of a shorter offset.  These instructions are all tagged
13101 // with the ins_short_branch attribute, which causes the ADLC to suppress the
13102 // match rules in general matching.  Instead, the ADLC generates a conversion
13103 // method in the MachNode which can be used to do in-place replacement of the
13104 // long variant with the shorter variant.  The compiler will determine if a
13105 // branch can be taken by the is_short_branch_offset() predicate in the machine
13106 // specific code section of the file.
13107 
13108 // Jump Direct - Label defines a relative address from JMP+1
13109 instruct jmpDir_short(label labl) %{
13110   match(Goto);
13111   effect(USE labl);
13112 
13113   ins_cost(300);
13114   format %{ "JMP,s  $labl" %}
13115   size(2);
13116   opcode(0xEB);
13117   ins_encode( OpcP, LblShort( labl ) );
13118   ins_pipe( pipe_jmp );
13119   ins_pc_relative(1);
13120   ins_short_branch(1);
13121 %}
13122 
13123 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13124 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
13125   match(If cop cr);
13126   effect(USE labl);
13127 
13128   ins_cost(300);
13129   format %{ "J$cop,s  $labl" %}
13130   size(2);
13131   opcode(0x70);
13132   ins_encode( JccShort( cop, labl) );
13133   ins_pipe( pipe_jcc );
13134   ins_pc_relative(1);
13135   ins_short_branch(1);
13136 %}
13137 
13138 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13139 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
13140   match(CountedLoopEnd cop cr);
13141   effect(USE labl);
13142 
13143   ins_cost(300);
13144   format %{ "J$cop,s  $labl\t# Loop end" %}
13145   size(2);
13146   opcode(0x70);
13147   ins_encode( JccShort( cop, labl) );
13148   ins_pipe( pipe_jcc );
13149   ins_pc_relative(1);
13150   ins_short_branch(1);
13151 %}
13152 
13153 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13154 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
13155   match(CountedLoopEnd cop cmp);
13156   effect(USE labl);
13157 
13158   ins_cost(300);
13159   format %{ "J$cop,us $labl\t# Loop end" %}
13160   size(2);
13161   opcode(0x70);
13162   ins_encode( JccShort( cop, labl) );
13163   ins_pipe( pipe_jcc );
13164   ins_pc_relative(1);
13165   ins_short_branch(1);
13166 %}
13167 
13168 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
13169   match(CountedLoopEnd cop cmp);
13170   effect(USE labl);
13171 
13172   ins_cost(300);
13173   format %{ "J$cop,us $labl\t# Loop end" %}
13174   size(2);
13175   opcode(0x70);
13176   ins_encode( JccShort( cop, labl) );
13177   ins_pipe( pipe_jcc );
13178   ins_pc_relative(1);
13179   ins_short_branch(1);
13180 %}
13181 
13182 // Jump Direct Conditional - using unsigned comparison
13183 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
13184   match(If cop cmp);
13185   effect(USE labl);
13186 
13187   ins_cost(300);
13188   format %{ "J$cop,us $labl" %}
13189   size(2);
13190   opcode(0x70);
13191   ins_encode( JccShort( cop, labl) );
13192   ins_pipe( pipe_jcc );
13193   ins_pc_relative(1);
13194   ins_short_branch(1);
13195 %}
13196 
13197 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
13198   match(If cop cmp);
13199   effect(USE labl);
13200 
13201   ins_cost(300);
13202   format %{ "J$cop,us $labl" %}
13203   size(2);
13204   opcode(0x70);
13205   ins_encode( JccShort( cop, labl) );
13206   ins_pipe( pipe_jcc );
13207   ins_pc_relative(1);
13208   ins_short_branch(1);
13209 %}
13210 
13211 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
13212   match(If cop cmp);
13213   effect(USE labl);
13214 
13215   ins_cost(300);
13216   format %{ $$template
13217     if ($cop$$cmpcode == Assembler::notEqual) {
13218       $$emit$$"JP,u,s   $labl\n\t"
13219       $$emit$$"J$cop,u,s   $labl"
13220     } else {
13221       $$emit$$"JP,u,s   done\n\t"
13222       $$emit$$"J$cop,u,s  $labl\n\t"
13223       $$emit$$"done:"
13224     }
13225   %}
13226   size(4);
13227   opcode(0x70);
13228   ins_encode %{
13229     Label* l = $labl$$label;
13230     emit_cc(cbuf, $primary, Assembler::parity);
13231     int parity_disp = -1;
13232     if ($cop$$cmpcode == Assembler::notEqual) {
13233       parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
13234     } else if ($cop$$cmpcode == Assembler::equal) {
13235       parity_disp = 2;
13236     } else {
13237       ShouldNotReachHere();
13238     }
13239     emit_d8(cbuf, parity_disp);
13240     emit_cc(cbuf, $primary, $cop$$cmpcode);
13241     int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
13242     emit_d8(cbuf, disp);
13243     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
13244     assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp");
13245   %}
13246   ins_pipe(pipe_jcc);
13247   ins_pc_relative(1);
13248   ins_short_branch(1);
13249 %}
13250 
13251 // ============================================================================
13252 // Long Compare
13253 //
13254 // Currently we hold longs in 2 registers.  Comparing such values efficiently
13255 // is tricky.  The flavor of compare used depends on whether we are testing
13256 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
13257 // The GE test is the negated LT test.  The LE test can be had by commuting
13258 // the operands (yielding a GE test) and then negating; negate again for the
13259 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
13260 // NE test is negated from that.
13261 
13262 // Due to a shortcoming in the ADLC, it mixes up expressions like:
13263 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
13264 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
13265 // are collapsed internally in the ADLC's dfa-gen code.  The match for
13266 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
13267 // foo match ends up with the wrong leaf.  One fix is to not match both
13268 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
13269 // both forms beat the trinary form of long-compare and both are very useful
13270 // on Intel which has so few registers.
13271 
13272 // Manifest a CmpL result in an integer register.  Very painful.
13273 // This is the test to avoid.
13274 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
13275   match(Set dst (CmpL3 src1 src2));
13276   effect( KILL flags );
13277   ins_cost(1000);
13278   format %{ "XOR    $dst,$dst\n\t"
13279             "CMP    $src1.hi,$src2.hi\n\t"
13280             "JLT,s  m_one\n\t"
13281             "JGT,s  p_one\n\t"
13282             "CMP    $src1.lo,$src2.lo\n\t"
13283             "JB,s   m_one\n\t"
13284             "JEQ,s  done\n"
13285     "p_one:\tINC    $dst\n\t"
13286             "JMP,s  done\n"
13287     "m_one:\tDEC    $dst\n"
13288      "done:" %}
13289   ins_encode %{
13290     Label p_one, m_one, done;
13291     __ xorptr($dst$$Register, $dst$$Register);
13292     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
13293     __ jccb(Assembler::less,    m_one);
13294     __ jccb(Assembler::greater, p_one);
13295     __ cmpl($src1$$Register, $src2$$Register);
13296     __ jccb(Assembler::below,   m_one);
13297     __ jccb(Assembler::equal,   done);
13298     __ bind(p_one);
13299     __ incrementl($dst$$Register);
13300     __ jmpb(done);
13301     __ bind(m_one);
13302     __ decrementl($dst$$Register);
13303     __ bind(done);
13304   %}
13305   ins_pipe( pipe_slow );
13306 %}
13307 
13308 //======
13309 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13310 // compares.  Can be used for LE or GT compares by reversing arguments.
13311 // NOT GOOD FOR EQ/NE tests.
13312 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
13313   match( Set flags (CmpL src zero ));
13314   ins_cost(100);
13315   format %{ "TEST   $src.hi,$src.hi" %}
13316   opcode(0x85);
13317   ins_encode( OpcP, RegReg_Hi2( src, src ) );
13318   ins_pipe( ialu_cr_reg_reg );
13319 %}
13320 
13321 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13322 // compares.  Can be used for LE or GT compares by reversing arguments.
13323 // NOT GOOD FOR EQ/NE tests.
13324 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, eRegI tmp ) %{
13325   match( Set flags (CmpL src1 src2 ));
13326   effect( TEMP tmp );
13327   ins_cost(300);
13328   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13329             "MOV    $tmp,$src1.hi\n\t"
13330             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
13331   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
13332   ins_pipe( ialu_cr_reg_reg );
13333 %}
13334 
13335 // Long compares reg < zero/req OR reg >= zero/req.
13336 // Just a wrapper for a normal branch, plus the predicate test.
13337 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
13338   match(If cmp flags);
13339   effect(USE labl);
13340   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13341   expand %{
13342     jmpCon(cmp,flags,labl);    // JLT or JGE...
13343   %}
13344 %}
13345 
13346 // Compare 2 longs and CMOVE longs.
13347 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
13348   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13349   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13350   ins_cost(400);
13351   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13352             "CMOV$cmp $dst.hi,$src.hi" %}
13353   opcode(0x0F,0x40);
13354   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13355   ins_pipe( pipe_cmov_reg_long );
13356 %}
13357 
13358 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
13359   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13360   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13361   ins_cost(500);
13362   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13363             "CMOV$cmp $dst.hi,$src.hi" %}
13364   opcode(0x0F,0x40);
13365   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13366   ins_pipe( pipe_cmov_reg_long );
13367 %}
13368 
13369 // Compare 2 longs and CMOVE ints.
13370 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, eRegI src) %{
13371   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13372   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13373   ins_cost(200);
13374   format %{ "CMOV$cmp $dst,$src" %}
13375   opcode(0x0F,0x40);
13376   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13377   ins_pipe( pipe_cmov_reg );
13378 %}
13379 
13380 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, memory src) %{
13381   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13382   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13383   ins_cost(250);
13384   format %{ "CMOV$cmp $dst,$src" %}
13385   opcode(0x0F,0x40);
13386   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13387   ins_pipe( pipe_cmov_mem );
13388 %}
13389 
13390 // Compare 2 longs and CMOVE ints.
13391 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13392   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13393   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13394   ins_cost(200);
13395   format %{ "CMOV$cmp $dst,$src" %}
13396   opcode(0x0F,0x40);
13397   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13398   ins_pipe( pipe_cmov_reg );
13399 %}
13400 
13401 // Compare 2 longs and CMOVE doubles
13402 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13403   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13404   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13405   ins_cost(200);
13406   expand %{
13407     fcmovD_regS(cmp,flags,dst,src);
13408   %}
13409 %}
13410 
13411 // Compare 2 longs and CMOVE doubles
13412 instruct cmovXDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regXD dst, regXD src) %{
13413   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13414   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13415   ins_cost(200);
13416   expand %{
13417     fcmovXD_regS(cmp,flags,dst,src);
13418   %}
13419 %}
13420 
13421 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13422   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13423   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13424   ins_cost(200);
13425   expand %{
13426     fcmovF_regS(cmp,flags,dst,src);
13427   %}
13428 %}
13429 
13430 instruct cmovXX_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regX dst, regX src) %{
13431   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13432   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13433   ins_cost(200);
13434   expand %{
13435     fcmovX_regS(cmp,flags,dst,src);
13436   %}
13437 %}
13438 
13439 //======
13440 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13441 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, eRegI tmp ) %{
13442   match( Set flags (CmpL src zero ));
13443   effect(TEMP tmp);
13444   ins_cost(200);
13445   format %{ "MOV    $tmp,$src.lo\n\t"
13446             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13447   ins_encode( long_cmp_flags0( src, tmp ) );
13448   ins_pipe( ialu_reg_reg_long );
13449 %}
13450 
13451 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13452 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13453   match( Set flags (CmpL src1 src2 ));
13454   ins_cost(200+300);
13455   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13456             "JNE,s  skip\n\t"
13457             "CMP    $src1.hi,$src2.hi\n\t"
13458      "skip:\t" %}
13459   ins_encode( long_cmp_flags1( src1, src2 ) );
13460   ins_pipe( ialu_cr_reg_reg );
13461 %}
13462 
13463 // Long compare reg == zero/reg OR reg != zero/reg
13464 // Just a wrapper for a normal branch, plus the predicate test.
13465 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13466   match(If cmp flags);
13467   effect(USE labl);
13468   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13469   expand %{
13470     jmpCon(cmp,flags,labl);    // JEQ or JNE...
13471   %}
13472 %}
13473 
13474 // Compare 2 longs and CMOVE longs.
13475 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13476   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13477   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13478   ins_cost(400);
13479   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13480             "CMOV$cmp $dst.hi,$src.hi" %}
13481   opcode(0x0F,0x40);
13482   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13483   ins_pipe( pipe_cmov_reg_long );
13484 %}
13485 
13486 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13487   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13488   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13489   ins_cost(500);
13490   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13491             "CMOV$cmp $dst.hi,$src.hi" %}
13492   opcode(0x0F,0x40);
13493   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13494   ins_pipe( pipe_cmov_reg_long );
13495 %}
13496 
13497 // Compare 2 longs and CMOVE ints.
13498 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, eRegI src) %{
13499   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13500   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13501   ins_cost(200);
13502   format %{ "CMOV$cmp $dst,$src" %}
13503   opcode(0x0F,0x40);
13504   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13505   ins_pipe( pipe_cmov_reg );
13506 %}
13507 
13508 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, memory src) %{
13509   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13510   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13511   ins_cost(250);
13512   format %{ "CMOV$cmp $dst,$src" %}
13513   opcode(0x0F,0x40);
13514   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13515   ins_pipe( pipe_cmov_mem );
13516 %}
13517 
13518 // Compare 2 longs and CMOVE ints.
13519 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13520   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13521   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13522   ins_cost(200);
13523   format %{ "CMOV$cmp $dst,$src" %}
13524   opcode(0x0F,0x40);
13525   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13526   ins_pipe( pipe_cmov_reg );
13527 %}
13528 
13529 // Compare 2 longs and CMOVE doubles
13530 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13531   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13532   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13533   ins_cost(200);
13534   expand %{
13535     fcmovD_regS(cmp,flags,dst,src);
13536   %}
13537 %}
13538 
13539 // Compare 2 longs and CMOVE doubles
13540 instruct cmovXDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regXD dst, regXD src) %{
13541   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13542   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13543   ins_cost(200);
13544   expand %{
13545     fcmovXD_regS(cmp,flags,dst,src);
13546   %}
13547 %}
13548 
13549 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13550   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13551   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13552   ins_cost(200);
13553   expand %{
13554     fcmovF_regS(cmp,flags,dst,src);
13555   %}
13556 %}
13557 
13558 instruct cmovXX_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regX dst, regX src) %{
13559   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13560   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13561   ins_cost(200);
13562   expand %{
13563     fcmovX_regS(cmp,flags,dst,src);
13564   %}
13565 %}
13566 
13567 //======
13568 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13569 // Same as cmpL_reg_flags_LEGT except must negate src
13570 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, eRegI tmp ) %{
13571   match( Set flags (CmpL src zero ));
13572   effect( TEMP tmp );
13573   ins_cost(300);
13574   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13575             "CMP    $tmp,$src.lo\n\t"
13576             "SBB    $tmp,$src.hi\n\t" %}
13577   ins_encode( long_cmp_flags3(src, tmp) );
13578   ins_pipe( ialu_reg_reg_long );
13579 %}
13580 
13581 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13582 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13583 // requires a commuted test to get the same result.
13584 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, eRegI tmp ) %{
13585   match( Set flags (CmpL src1 src2 ));
13586   effect( TEMP tmp );
13587   ins_cost(300);
13588   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13589             "MOV    $tmp,$src2.hi\n\t"
13590             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13591   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13592   ins_pipe( ialu_cr_reg_reg );
13593 %}
13594 
13595 // Long compares reg < zero/req OR reg >= zero/req.
13596 // Just a wrapper for a normal branch, plus the predicate test
13597 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13598   match(If cmp flags);
13599   effect(USE labl);
13600   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13601   ins_cost(300);
13602   expand %{
13603     jmpCon(cmp,flags,labl);    // JGT or JLE...
13604   %}
13605 %}
13606 
13607 // Compare 2 longs and CMOVE longs.
13608 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13609   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13610   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13611   ins_cost(400);
13612   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13613             "CMOV$cmp $dst.hi,$src.hi" %}
13614   opcode(0x0F,0x40);
13615   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13616   ins_pipe( pipe_cmov_reg_long );
13617 %}
13618 
13619 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13620   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13621   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13622   ins_cost(500);
13623   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13624             "CMOV$cmp $dst.hi,$src.hi+4" %}
13625   opcode(0x0F,0x40);
13626   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13627   ins_pipe( pipe_cmov_reg_long );
13628 %}
13629 
13630 // Compare 2 longs and CMOVE ints.
13631 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, eRegI src) %{
13632   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13633   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13634   ins_cost(200);
13635   format %{ "CMOV$cmp $dst,$src" %}
13636   opcode(0x0F,0x40);
13637   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13638   ins_pipe( pipe_cmov_reg );
13639 %}
13640 
13641 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, memory src) %{
13642   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13643   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13644   ins_cost(250);
13645   format %{ "CMOV$cmp $dst,$src" %}
13646   opcode(0x0F,0x40);
13647   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13648   ins_pipe( pipe_cmov_mem );
13649 %}
13650 
13651 // Compare 2 longs and CMOVE ptrs.
13652 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13653   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13654   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13655   ins_cost(200);
13656   format %{ "CMOV$cmp $dst,$src" %}
13657   opcode(0x0F,0x40);
13658   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13659   ins_pipe( pipe_cmov_reg );
13660 %}
13661 
13662 // Compare 2 longs and CMOVE doubles
13663 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13664   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13665   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13666   ins_cost(200);
13667   expand %{
13668     fcmovD_regS(cmp,flags,dst,src);
13669   %}
13670 %}
13671 
13672 // Compare 2 longs and CMOVE doubles
13673 instruct cmovXDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regXD dst, regXD src) %{
13674   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13675   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13676   ins_cost(200);
13677   expand %{
13678     fcmovXD_regS(cmp,flags,dst,src);
13679   %}
13680 %}
13681 
13682 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13683   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13684   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13685   ins_cost(200);
13686   expand %{
13687     fcmovF_regS(cmp,flags,dst,src);
13688   %}
13689 %}
13690 
13691 
13692 instruct cmovXX_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regX dst, regX src) %{
13693   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13694   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13695   ins_cost(200);
13696   expand %{
13697     fcmovX_regS(cmp,flags,dst,src);
13698   %}
13699 %}
13700 
13701 
13702 // ============================================================================
13703 // Procedure Call/Return Instructions
13704 // Call Java Static Instruction
13705 // Note: If this code changes, the corresponding ret_addr_offset() and
13706 //       compute_padding() functions will have to be adjusted.
13707 instruct CallStaticJavaDirect(method meth) %{
13708   match(CallStaticJava);
13709   predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke());
13710   effect(USE meth);
13711 
13712   ins_cost(300);
13713   format %{ "CALL,static " %}
13714   opcode(0xE8); /* E8 cd */
13715   ins_encode( pre_call_FPU,
13716               Java_Static_Call( meth ),
13717               call_epilog,
13718               post_call_FPU );
13719   ins_pipe( pipe_slow );
13720   ins_pc_relative(1);
13721   ins_alignment(4);
13722 %}
13723 
13724 // Call Java Static Instruction (method handle version)
13725 // Note: If this code changes, the corresponding ret_addr_offset() and
13726 //       compute_padding() functions will have to be adjusted.
13727 instruct CallStaticJavaHandle(method meth, eBPRegP ebp_mh_SP_save) %{
13728   match(CallStaticJava);
13729   predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
13730   effect(USE meth);
13731   // EBP is saved by all callees (for interpreter stack correction).
13732   // We use it here for a similar purpose, in {preserve,restore}_SP.
13733 
13734   ins_cost(300);
13735   format %{ "CALL,static/MethodHandle " %}
13736   opcode(0xE8); /* E8 cd */
13737   ins_encode( pre_call_FPU,
13738               preserve_SP,
13739               Java_Static_Call( meth ),
13740               restore_SP,
13741               call_epilog,
13742               post_call_FPU );
13743   ins_pipe( pipe_slow );
13744   ins_pc_relative(1);
13745   ins_alignment(4);
13746 %}
13747 
13748 // Call Java Dynamic Instruction
13749 // Note: If this code changes, the corresponding ret_addr_offset() and
13750 //       compute_padding() functions will have to be adjusted.
13751 instruct CallDynamicJavaDirect(method meth) %{
13752   match(CallDynamicJava);
13753   effect(USE meth);
13754 
13755   ins_cost(300);
13756   format %{ "MOV    EAX,(oop)-1\n\t"
13757             "CALL,dynamic" %}
13758   opcode(0xE8); /* E8 cd */
13759   ins_encode( pre_call_FPU,
13760               Java_Dynamic_Call( meth ),
13761               call_epilog,
13762               post_call_FPU );
13763   ins_pipe( pipe_slow );
13764   ins_pc_relative(1);
13765   ins_alignment(4);
13766 %}
13767 
13768 // Call Runtime Instruction
13769 instruct CallRuntimeDirect(method meth) %{
13770   match(CallRuntime );
13771   effect(USE meth);
13772 
13773   ins_cost(300);
13774   format %{ "CALL,runtime " %}
13775   opcode(0xE8); /* E8 cd */
13776   // Use FFREEs to clear entries in float stack
13777   ins_encode( pre_call_FPU,
13778               FFree_Float_Stack_All,
13779               Java_To_Runtime( meth ),
13780               post_call_FPU );
13781   ins_pipe( pipe_slow );
13782   ins_pc_relative(1);
13783 %}
13784 
13785 // Call runtime without safepoint
13786 instruct CallLeafDirect(method meth) %{
13787   match(CallLeaf);
13788   effect(USE meth);
13789 
13790   ins_cost(300);
13791   format %{ "CALL_LEAF,runtime " %}
13792   opcode(0xE8); /* E8 cd */
13793   ins_encode( pre_call_FPU,
13794               FFree_Float_Stack_All,
13795               Java_To_Runtime( meth ),
13796               Verify_FPU_For_Leaf, post_call_FPU );
13797   ins_pipe( pipe_slow );
13798   ins_pc_relative(1);
13799 %}
13800 
13801 instruct CallLeafNoFPDirect(method meth) %{
13802   match(CallLeafNoFP);
13803   effect(USE meth);
13804 
13805   ins_cost(300);
13806   format %{ "CALL_LEAF_NOFP,runtime " %}
13807   opcode(0xE8); /* E8 cd */
13808   ins_encode(Java_To_Runtime(meth));
13809   ins_pipe( pipe_slow );
13810   ins_pc_relative(1);
13811 %}
13812 
13813 
13814 // Return Instruction
13815 // Remove the return address & jump to it.
13816 instruct Ret() %{
13817   match(Return);
13818   format %{ "RET" %}
13819   opcode(0xC3);
13820   ins_encode(OpcP);
13821   ins_pipe( pipe_jmp );
13822 %}
13823 
13824 // Tail Call; Jump from runtime stub to Java code.
13825 // Also known as an 'interprocedural jump'.
13826 // Target of jump will eventually return to caller.
13827 // TailJump below removes the return address.
13828 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
13829   match(TailCall jump_target method_oop );
13830   ins_cost(300);
13831   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
13832   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13833   ins_encode( OpcP, RegOpc(jump_target) );
13834   ins_pipe( pipe_jmp );
13835 %}
13836 
13837 
13838 // Tail Jump; remove the return address; jump to target.
13839 // TailCall above leaves the return address around.
13840 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13841   match( TailJump jump_target ex_oop );
13842   ins_cost(300);
13843   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13844             "JMP    $jump_target " %}
13845   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13846   ins_encode( enc_pop_rdx,
13847               OpcP, RegOpc(jump_target) );
13848   ins_pipe( pipe_jmp );
13849 %}
13850 
13851 // Create exception oop: created by stack-crawling runtime code.
13852 // Created exception is now available to this handler, and is setup
13853 // just prior to jumping to this handler.  No code emitted.
13854 instruct CreateException( eAXRegP ex_oop )
13855 %{
13856   match(Set ex_oop (CreateEx));
13857 
13858   size(0);
13859   // use the following format syntax
13860   format %{ "# exception oop is in EAX; no code emitted" %}
13861   ins_encode();
13862   ins_pipe( empty );
13863 %}
13864 
13865 
13866 // Rethrow exception:
13867 // The exception oop will come in the first argument position.
13868 // Then JUMP (not call) to the rethrow stub code.
13869 instruct RethrowException()
13870 %{
13871   match(Rethrow);
13872 
13873   // use the following format syntax
13874   format %{ "JMP    rethrow_stub" %}
13875   ins_encode(enc_rethrow);
13876   ins_pipe( pipe_jmp );
13877 %}
13878 
13879 // inlined locking and unlocking
13880 
13881 
13882 instruct cmpFastLock( eFlagsReg cr, eRegP object, eRegP box, eAXRegI tmp, eRegP scr) %{
13883   match( Set cr (FastLock object box) );
13884   effect( TEMP tmp, TEMP scr );
13885   ins_cost(300);
13886   format %{ "FASTLOCK $object, $box KILLS $tmp,$scr" %}
13887   ins_encode( Fast_Lock(object,box,tmp,scr) );
13888   ins_pipe( pipe_slow );
13889   ins_pc_relative(1);
13890 %}
13891 
13892 instruct cmpFastUnlock( eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13893   match( Set cr (FastUnlock object box) );
13894   effect( TEMP tmp );
13895   ins_cost(300);
13896   format %{ "FASTUNLOCK $object, $box, $tmp" %}
13897   ins_encode( Fast_Unlock(object,box,tmp) );
13898   ins_pipe( pipe_slow );
13899   ins_pc_relative(1);
13900 %}
13901 
13902 
13903 
13904 // ============================================================================
13905 // Safepoint Instruction
13906 instruct safePoint_poll(eFlagsReg cr) %{
13907   match(SafePoint);
13908   effect(KILL cr);
13909 
13910   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13911   // On SPARC that might be acceptable as we can generate the address with
13912   // just a sethi, saving an or.  By polling at offset 0 we can end up
13913   // putting additional pressure on the index-0 in the D$.  Because of
13914   // alignment (just like the situation at hand) the lower indices tend
13915   // to see more traffic.  It'd be better to change the polling address
13916   // to offset 0 of the last $line in the polling page.
13917 
13918   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13919   ins_cost(125);
13920   size(6) ;
13921   ins_encode( Safepoint_Poll() );
13922   ins_pipe( ialu_reg_mem );
13923 %}
13924 
13925 //----------PEEPHOLE RULES-----------------------------------------------------
13926 // These must follow all instruction definitions as they use the names
13927 // defined in the instructions definitions.
13928 //
13929 // peepmatch ( root_instr_name [preceding_instruction]* );
13930 //
13931 // peepconstraint %{
13932 // (instruction_number.operand_name relational_op instruction_number.operand_name
13933 //  [, ...] );
13934 // // instruction numbers are zero-based using left to right order in peepmatch
13935 //
13936 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13937 // // provide an instruction_number.operand_name for each operand that appears
13938 // // in the replacement instruction's match rule
13939 //
13940 // ---------VM FLAGS---------------------------------------------------------
13941 //
13942 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13943 //
13944 // Each peephole rule is given an identifying number starting with zero and
13945 // increasing by one in the order seen by the parser.  An individual peephole
13946 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13947 // on the command-line.
13948 //
13949 // ---------CURRENT LIMITATIONS----------------------------------------------
13950 //
13951 // Only match adjacent instructions in same basic block
13952 // Only equality constraints
13953 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13954 // Only one replacement instruction
13955 //
13956 // ---------EXAMPLE----------------------------------------------------------
13957 //
13958 // // pertinent parts of existing instructions in architecture description
13959 // instruct movI(eRegI dst, eRegI src) %{
13960 //   match(Set dst (CopyI src));
13961 // %}
13962 //
13963 // instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
13964 //   match(Set dst (AddI dst src));
13965 //   effect(KILL cr);
13966 // %}
13967 //
13968 // // Change (inc mov) to lea
13969 // peephole %{
13970 //   // increment preceeded by register-register move
13971 //   peepmatch ( incI_eReg movI );
13972 //   // require that the destination register of the increment
13973 //   // match the destination register of the move
13974 //   peepconstraint ( 0.dst == 1.dst );
13975 //   // construct a replacement instruction that sets
13976 //   // the destination to ( move's source register + one )
13977 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13978 // %}
13979 //
13980 // Implementation no longer uses movX instructions since
13981 // machine-independent system no longer uses CopyX nodes.
13982 //
13983 // peephole %{
13984 //   peepmatch ( incI_eReg movI );
13985 //   peepconstraint ( 0.dst == 1.dst );
13986 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13987 // %}
13988 //
13989 // peephole %{
13990 //   peepmatch ( decI_eReg movI );
13991 //   peepconstraint ( 0.dst == 1.dst );
13992 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13993 // %}
13994 //
13995 // peephole %{
13996 //   peepmatch ( addI_eReg_imm movI );
13997 //   peepconstraint ( 0.dst == 1.dst );
13998 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13999 // %}
14000 //
14001 // peephole %{
14002 //   peepmatch ( addP_eReg_imm movP );
14003 //   peepconstraint ( 0.dst == 1.dst );
14004 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
14005 // %}
14006 
14007 // // Change load of spilled value to only a spill
14008 // instruct storeI(memory mem, eRegI src) %{
14009 //   match(Set mem (StoreI mem src));
14010 // %}
14011 //
14012 // instruct loadI(eRegI dst, memory mem) %{
14013 //   match(Set dst (LoadI mem));
14014 // %}
14015 //
14016 peephole %{
14017   peepmatch ( loadI storeI );
14018   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
14019   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
14020 %}
14021 
14022 //----------SMARTSPILL RULES---------------------------------------------------
14023 // These must follow all instruction definitions as they use the names
14024 // defined in the instructions definitions.