New src/cpu/x86/vm/x86

   1 //
   2 // Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Special Registers
  78 reg_def EFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
  79 
  80 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  81 // allocator, and only shows up in the encodings.
  82 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  83 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  84 // Ok so here's the trick FPR1 is really st(0) except in the midst
  85 // of emission of assembly for a machnode. During the emission the fpu stack
  86 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  87 // the stack will not have this element so FPR1 == st(0) from the
  88 // oopMap viewpoint. This same weirdness with numbering causes
  89 // instruction encoding to have to play games with the register
  90 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  91 // where it does flt->flt moves to see an example
  92 //
  93 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  94 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  95 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  96 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  97 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  98 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  99 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
 100 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
 101 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
 102 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 103 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 104 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 105 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 106 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 107 
 108 // XMM registers.  128-bit registers or 4 words each, labeled a-d.
 109 // Word a in each register holds a Float, words ab hold a Double.
 110 // We currently do not use the SIMD capabilities, so registers cd
 111 // are unused at the moment.
 112 reg_def XMM0a( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
 113 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next());
 114 reg_def XMM1a( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
 115 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next());
 116 reg_def XMM2a( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
 117 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next());
 118 reg_def XMM3a( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
 119 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next());
 120 reg_def XMM4a( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
 121 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next());
 122 reg_def XMM5a( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
 123 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next());
 124 reg_def XMM6a( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
 125 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next());
 126 reg_def XMM7a( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
 127 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next());
 128 
 129 // Specify priority of register selection within phases of register
 130 // allocation.  Highest priority is first.  A useful heuristic is to
 131 // give registers a low priority when they are required by machine
 132 // instructions, like EAX and EDX.  Registers which are used as
 133 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 134 // For the Intel integer registers, the equivalent Long pairs are
 135 // EDX:EAX, EBX:ECX, and EDI:EBP.
 136 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 137                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 138                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 139                     FPR6L, FPR6H, FPR7L, FPR7H );
 140 
 141 alloc_class chunk1( XMM0a, XMM0b,
 142                     XMM1a, XMM1b,
 143                     XMM2a, XMM2b,
 144                     XMM3a, XMM3b,
 145                     XMM4a, XMM4b,
 146                     XMM5a, XMM5b,
 147                     XMM6a, XMM6b,
 148                     XMM7a, XMM7b, EFLAGS);
 149 
 150 
 151 //----------Architecture Description Register Classes--------------------------
 152 // Several register classes are automatically defined based upon information in
 153 // this architecture description.
 154 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 155 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 156 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 157 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 158 //
 159 // Class for all registers
 160 reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 161 // Class for general registers
 162 reg_class e_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 163 // Class for general registers which may be used for implicit null checks on win95
 164 // Also safe for use by tailjump. We don't want to allocate in rbp,
 165 reg_class e_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX);
 166 // Class of "X" registers
 167 reg_class x_reg(EBX, ECX, EDX, EAX);
 168 // Class of registers that can appear in an address with no offset.
 169 // EBP and ESP require an extra instruction byte for zero offset.
 170 // Used in fast-unlock
 171 reg_class p_reg(EDX, EDI, ESI, EBX);
 172 // Class for general registers not including ECX
 173 reg_class ncx_reg(EAX, EDX, EBP, EDI, ESI, EBX);
 174 // Class for general registers not including EAX
 175 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 176 // Class for general registers not including EAX or EBX.
 177 reg_class nabx_reg(EDX, EDI, ESI, ECX, EBP);
 178 // Class of EAX (for multiply and divide operations)
 179 reg_class eax_reg(EAX);
 180 // Class of EBX (for atomic add)
 181 reg_class ebx_reg(EBX);
 182 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 183 reg_class ecx_reg(ECX);
 184 // Class of EDX (for multiply and divide operations)
 185 reg_class edx_reg(EDX);
 186 // Class of EDI (for synchronization)
 187 reg_class edi_reg(EDI);
 188 // Class of ESI (for synchronization)
 189 reg_class esi_reg(ESI);
 190 // Singleton class for interpreter's stack pointer
 191 reg_class ebp_reg(EBP);
 192 // Singleton class for stack pointer
 193 reg_class sp_reg(ESP);
 194 // Singleton class for instruction pointer
 195 // reg_class ip_reg(EIP);
 196 // Singleton class for condition codes
 197 reg_class int_flags(EFLAGS);
 198 // Class of integer register pairs
 199 reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI );
 200 // Class of integer register pairs that aligns with calling convention
 201 reg_class eadx_reg( EAX,EDX );
 202 reg_class ebcx_reg( ECX,EBX );
 203 // Not AX or DX, used in divides
 204 reg_class nadx_reg( EBX,ECX,ESI,EDI,EBP );
 205 
 206 // Floating point registers.  Notice FPR0 is not a choice.
 207 // FPR0 is not ever allocated; we use clever encodings to fake
 208 // a 2-address instructions out of Intels FP stack.
 209 reg_class flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 210 
 211 // make a register class for SSE registers
 212 reg_class xmm_reg(XMM0a, XMM1a, XMM2a, XMM3a, XMM4a, XMM5a, XMM6a, XMM7a);
 213 
 214 // make a double register class for SSE2 registers
 215 reg_class xdb_reg(XMM0a,XMM0b, XMM1a,XMM1b, XMM2a,XMM2b, XMM3a,XMM3b,
 216                   XMM4a,XMM4b, XMM5a,XMM5b, XMM6a,XMM6b, XMM7a,XMM7b );
 217 
 218 reg_class dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 219                    FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 220                    FPR7L,FPR7H );
 221 
 222 reg_class flt_reg0( FPR1L );
 223 reg_class dbl_reg0( FPR1L,FPR1H );
 224 reg_class dbl_reg1( FPR2L,FPR2H );
 225 reg_class dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 226                        FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 227 
 228 // XMM6 and XMM7 could be used as temporary registers for long, float and
 229 // double values for SSE2.
 230 reg_class xdb_reg6( XMM6a,XMM6b );
 231 reg_class xdb_reg7( XMM7a,XMM7b );
 232 %}
 233 
 234 
 235 //----------SOURCE BLOCK-------------------------------------------------------
 236 // This is a block of C++ code which provides values, functions, and
 237 // definitions necessary in the rest of the architecture description
 238 source_hpp %{
 239 // Must be visible to the DFA in dfa_x86_32.cpp
 240 extern bool is_operand_hi32_zero(Node* n);
 241 %}
 242 
 243 source %{
 244 #define   RELOC_IMM32    Assembler::imm_operand
 245 #define   RELOC_DISP32   Assembler::disp32_operand
 246 
 247 #define __ _masm.
 248 
 249 // How to find the high register of a Long pair, given the low register
 250 #define   HIGH_FROM_LOW(x) ((x)+2)
 251 
 252 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 253 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 254 // fast versions of NegF/NegD and AbsF/AbsD.
 255 
 256 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 257 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 258   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 259   // of 128-bits operands for SSE instructions.
 260   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 261   // Store the value to a 128-bits operand.
 262   operand[0] = lo;
 263   operand[1] = hi;
 264   return operand;
 265 }
 266 
 267 // Buffer for 128-bits masks used by SSE instructions.
 268 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 269 
 270 // Static initialization during VM startup.
 271 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 272 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 273 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 274 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 275 
 276 // Offset hacking within calls.
 277 static int pre_call_FPU_size() {
 278   if (Compile::current()->in_24_bit_fp_mode())
 279     return 6; // fldcw
 280   return 0;
 281 }
 282 
 283 static int preserve_SP_size() {
 284   return LP64_ONLY(1 +) 2;  // [rex,] op, rm(reg/reg)
 285 }
 286 
 287 // !!!!! Special hack to get all type of calls to specify the byte offset
 288 //       from the start of the call to the point where the return address
 289 //       will point.
 290 int MachCallStaticJavaNode::ret_addr_offset() {
 291   int offset = 5 + pre_call_FPU_size();  // 5 bytes from start of call to where return address points
 292   if (_method_handle_invoke)
 293     offset += preserve_SP_size();
 294   return offset;
 295 }
 296 
 297 int MachCallDynamicJavaNode::ret_addr_offset() {
 298   return 10 + pre_call_FPU_size();  // 10 bytes from start of call to where return address points
 299 }
 300 
 301 static int sizeof_FFree_Float_Stack_All = -1;
 302 
 303 int MachCallRuntimeNode::ret_addr_offset() {
 304   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 305   return sizeof_FFree_Float_Stack_All + 5 + pre_call_FPU_size();
 306 }
 307 
 308 // Indicate if the safepoint node needs the polling page as an input.
 309 // Since x86 does have absolute addressing, it doesn't.
 310 bool SafePointNode::needs_polling_address_input() {
 311   return false;
 312 }
 313 
 314 //
 315 // Compute padding required for nodes which need alignment
 316 //
 317 
 318 // The address of the call instruction needs to be 4-byte aligned to
 319 // ensure that it does not span a cache line so that it can be patched.
 320 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 321   current_offset += pre_call_FPU_size();  // skip fldcw, if any
 322   current_offset += 1;      // skip call opcode byte
 323   return round_to(current_offset, alignment_required()) - current_offset;
 324 }
 325 
 326 // The address of the call instruction needs to be 4-byte aligned to
 327 // ensure that it does not span a cache line so that it can be patched.
 328 int CallStaticJavaHandleNode::compute_padding(int current_offset) const {
 329   current_offset += pre_call_FPU_size();  // skip fldcw, if any
 330   current_offset += preserve_SP_size();   // skip mov rbp, rsp
 331   current_offset += 1;      // skip call opcode byte
 332   return round_to(current_offset, alignment_required()) - current_offset;
 333 }
 334 
 335 // The address of the call instruction needs to be 4-byte aligned to
 336 // ensure that it does not span a cache line so that it can be patched.
 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 338   current_offset += pre_call_FPU_size();  // skip fldcw, if any
 339   current_offset += 5;      // skip MOV instruction
 340   current_offset += 1;      // skip call opcode byte
 341   return round_to(current_offset, alignment_required()) - current_offset;
 342 }
 343 
 344 #ifndef PRODUCT
 345 void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const {
 346   st->print("INT3");
 347 }
 348 #endif
 349 
 350 // EMIT_RM()
 351 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 352   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 353   cbuf.insts()->emit_int8(c);
 354 }
 355 
 356 // EMIT_CC()
 357 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 358   unsigned char c = (unsigned char)( f1 | f2 );
 359   cbuf.insts()->emit_int8(c);
 360 }
 361 
 362 // EMIT_OPCODE()
 363 void emit_opcode(CodeBuffer &cbuf, int code) {
 364   cbuf.insts()->emit_int8((unsigned char) code);
 365 }
 366 
 367 // EMIT_OPCODE() w/ relocation information
 368 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 369   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 370   emit_opcode(cbuf, code);
 371 }
 372 
 373 // EMIT_D8()
 374 void emit_d8(CodeBuffer &cbuf, int d8) {
 375   cbuf.insts()->emit_int8((unsigned char) d8);
 376 }
 377 
 378 // EMIT_D16()
 379 void emit_d16(CodeBuffer &cbuf, int d16) {
 380   cbuf.insts()->emit_int16(d16);
 381 }
 382 
 383 // EMIT_D32()
 384 void emit_d32(CodeBuffer &cbuf, int d32) {
 385   cbuf.insts()->emit_int32(d32);
 386 }
 387 
 388 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 389 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 390         int format) {
 391   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 392   cbuf.insts()->emit_int32(d32);
 393 }
 394 
 395 // emit 32 bit value and construct relocation entry from RelocationHolder
 396 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 397         int format) {
 398 #ifdef ASSERT
 399   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 400     assert(oop(d32)->is_oop() && (ScavengeRootsInCode || !oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
 401   }
 402 #endif
 403   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 404   cbuf.insts()->emit_int32(d32);
 405 }
 406 
 407 // Access stack slot for load or store
 408 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 409   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 410   if( -128 <= disp && disp <= 127 ) {
 411     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 412     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 413     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 414   } else {
 415     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 416     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 417     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 418   }
 419 }
 420 
 421    // eRegI ereg, memory mem) %{    // emit_reg_mem
 422 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, bool displace_is_oop ) {
 423   // There is no index & no scale, use form without SIB byte
 424   if ((index == 0x4) &&
 425       (scale == 0) && (base != ESP_enc)) {
 426     // If no displacement, mode is 0x0; unless base is [EBP]
 427     if ( (displace == 0) && (base != EBP_enc) ) {
 428       emit_rm(cbuf, 0x0, reg_encoding, base);
 429     }
 430     else {                    // If 8-bit displacement, mode 0x1
 431       if ((displace >= -128) && (displace <= 127)
 432           && !(displace_is_oop) ) {
 433         emit_rm(cbuf, 0x1, reg_encoding, base);
 434         emit_d8(cbuf, displace);
 435       }
 436       else {                  // If 32-bit displacement
 437         if (base == -1) { // Special flag for absolute address
 438           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 439           // (manual lies; no SIB needed here)
 440           if ( displace_is_oop ) {
 441             emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
 442           } else {
 443             emit_d32      (cbuf, displace);
 444           }
 445         }
 446         else {                // Normal base + offset
 447           emit_rm(cbuf, 0x2, reg_encoding, base);
 448           if ( displace_is_oop ) {
 449             emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
 450           } else {
 451             emit_d32      (cbuf, displace);
 452           }
 453         }
 454       }
 455     }
 456   }
 457   else {                      // Else, encode with the SIB byte
 458     // If no displacement, mode is 0x0; unless base is [EBP]
 459     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 460       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 461       emit_rm(cbuf, scale, index, base);
 462     }
 463     else {                    // If 8-bit displacement, mode 0x1
 464       if ((displace >= -128) && (displace <= 127)
 465           && !(displace_is_oop) ) {
 466         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 467         emit_rm(cbuf, scale, index, base);
 468         emit_d8(cbuf, displace);
 469       }
 470       else {                  // If 32-bit displacement
 471         if (base == 0x04 ) {
 472           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 473           emit_rm(cbuf, scale, index, 0x04);
 474         } else {
 475           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 476           emit_rm(cbuf, scale, index, base);
 477         }
 478         if ( displace_is_oop ) {
 479           emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
 480         } else {
 481           emit_d32      (cbuf, displace);
 482         }
 483       }
 484     }
 485   }
 486 }
 487 
 488 
 489 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 490   if( dst_encoding == src_encoding ) {
 491     // reg-reg copy, use an empty encoding
 492   } else {
 493     emit_opcode( cbuf, 0x8B );
 494     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 495   }
 496 }
 497 
 498 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 499   if( dst_encoding == src_encoding ) {
 500     // reg-reg copy, use an empty encoding
 501   } else {
 502     MacroAssembler _masm(&cbuf);
 503 
 504     __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
 505   }
 506 }
 507 
 508 
 509 //=============================================================================
 510 const bool Matcher::constant_table_absolute_addressing = true;
 511 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 512 
 513 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 514   // Empty encoding
 515 }
 516 
 517 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 518   return 0;
 519 }
 520 
 521 #ifndef PRODUCT
 522 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 523   st->print("# MachConstantBaseNode (empty encoding)");
 524 }
 525 #endif
 526 
 527 
 528 //=============================================================================
 529 #ifndef PRODUCT
 530 void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 531   Compile* C = ra_->C;
 532   if( C->in_24_bit_fp_mode() ) {
 533     st->print("FLDCW  24 bit fpu control word");
 534     st->print_cr(""); st->print("\t");
 535   }
 536 
 537   int framesize = C->frame_slots() << LogBytesPerInt;
 538   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 539   // Remove two words for return addr and rbp,
 540   framesize -= 2*wordSize;
 541 
 542   // Calls to C2R adapters often do not accept exceptional returns.
 543   // We require that their callers must bang for them.  But be careful, because
 544   // some VM calls (such as call site linkage) can use several kilobytes of
 545   // stack.  But the stack safety zone should account for that.
 546   // See bugs 4446381, 4468289, 4497237.
 547   if (C->need_stack_bang(framesize)) {
 548     st->print_cr("# stack bang"); st->print("\t");
 549   }
 550   st->print_cr("PUSHL  EBP"); st->print("\t");
 551 
 552   if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth
 553     st->print("PUSH   0xBADB100D\t# Majik cookie for stack depth check");
 554     st->print_cr(""); st->print("\t");
 555     framesize -= wordSize;
 556   }
 557 
 558   if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) {
 559     if (framesize) {
 560       st->print("SUB    ESP,%d\t# Create frame",framesize);
 561     }
 562   } else {
 563     st->print("SUB    ESP,%d\t# Create frame",framesize);
 564   }
 565 }
 566 #endif
 567 
 568 
 569 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 570   Compile* C = ra_->C;
 571 
 572   if (UseSSE >= 2 && VerifyFPU) {
 573     MacroAssembler masm(&cbuf);
 574     masm.verify_FPU(0, "FPU stack must be clean on entry");
 575   }
 576 
 577   // WARNING: Initial instruction MUST be 5 bytes or longer so that
 578   // NativeJump::patch_verified_entry will be able to patch out the entry
 579   // code safely. The fldcw is ok at 6 bytes, the push to verify stack
 580   // depth is ok at 5 bytes, the frame allocation can be either 3 or
 581   // 6 bytes. So if we don't do the fldcw or the push then we must
 582   // use the 6 byte frame allocation even if we have no frame. :-(
 583   // If method sets FPU control word do it now
 584   if( C->in_24_bit_fp_mode() ) {
 585     MacroAssembler masm(&cbuf);
 586     masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
 587   }
 588 
 589   int framesize = C->frame_slots() << LogBytesPerInt;
 590   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 591   // Remove two words for return addr and rbp,
 592   framesize -= 2*wordSize;
 593 
 594   // Calls to C2R adapters often do not accept exceptional returns.
 595   // We require that their callers must bang for them.  But be careful, because
 596   // some VM calls (such as call site linkage) can use several kilobytes of
 597   // stack.  But the stack safety zone should account for that.
 598   // See bugs 4446381, 4468289, 4497237.
 599   if (C->need_stack_bang(framesize)) {
 600     MacroAssembler masm(&cbuf);
 601     masm.generate_stack_overflow_check(framesize);
 602   }
 603 
 604   // We always push rbp, so that on return to interpreter rbp, will be
 605   // restored correctly and we can correct the stack.
 606   emit_opcode(cbuf, 0x50 | EBP_enc);
 607 
 608   if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth
 609     emit_opcode(cbuf, 0x68); // push 0xbadb100d
 610     emit_d32(cbuf, 0xbadb100d);
 611     framesize -= wordSize;
 612   }
 613 
 614   if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) {
 615     if (framesize) {
 616       emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
 617       emit_rm(cbuf, 0x3, 0x05, ESP_enc);
 618       emit_d8(cbuf, framesize);
 619     }
 620   } else {
 621     emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
 622     emit_rm(cbuf, 0x3, 0x05, ESP_enc);
 623     emit_d32(cbuf, framesize);
 624   }
 625   C->set_frame_complete(cbuf.insts_size());
 626 
 627 #ifdef ASSERT
 628   if (VerifyStackAtCalls) {
 629     Label L;
 630     MacroAssembler masm(&cbuf);
 631     masm.push(rax);
 632     masm.mov(rax, rsp);
 633     masm.andptr(rax, StackAlignmentInBytes-1);
 634     masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
 635     masm.pop(rax);
 636     masm.jcc(Assembler::equal, L);
 637     masm.stop("Stack is not properly aligned!");
 638     masm.bind(L);
 639   }
 640 #endif
 641 
 642 }
 643 
 644 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 645   return MachNode::size(ra_); // too many variables; just compute it the hard way
 646 }
 647 
 648 int MachPrologNode::reloc() const {
 649   return 0; // a large enough number
 650 }
 651 
 652 //=============================================================================
 653 #ifndef PRODUCT
 654 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 655   Compile *C = ra_->C;
 656   int framesize = C->frame_slots() << LogBytesPerInt;
 657   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 658   // Remove two words for return addr and rbp,
 659   framesize -= 2*wordSize;
 660 
 661   if( C->in_24_bit_fp_mode() ) {
 662     st->print("FLDCW  standard control word");
 663     st->cr(); st->print("\t");
 664   }
 665   if( framesize ) {
 666     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 667     st->cr(); st->print("\t");
 668   }
 669   st->print_cr("POPL   EBP"); st->print("\t");
 670   if( do_polling() && C->is_method_compilation() ) {
 671     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 672     st->cr(); st->print("\t");
 673   }
 674 }
 675 #endif
 676 
 677 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 678   Compile *C = ra_->C;
 679 
 680   // If method set FPU control word, restore to standard control word
 681   if( C->in_24_bit_fp_mode() ) {
 682     MacroAssembler masm(&cbuf);
 683     masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 684   }
 685 
 686   int framesize = C->frame_slots() << LogBytesPerInt;
 687   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 688   // Remove two words for return addr and rbp,
 689   framesize -= 2*wordSize;
 690 
 691   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 692 
 693   if( framesize >= 128 ) {
 694     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 695     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 696     emit_d32(cbuf, framesize);
 697   }
 698   else if( framesize ) {
 699     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 700     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 701     emit_d8(cbuf, framesize);
 702   }
 703 
 704   emit_opcode(cbuf, 0x58 | EBP_enc);
 705 
 706   if( do_polling() && C->is_method_compilation() ) {
 707     cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 708     emit_opcode(cbuf,0x85);
 709     emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 710     emit_d32(cbuf, (intptr_t)os::get_polling_page());
 711   }
 712 }
 713 
 714 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 715   Compile *C = ra_->C;
 716   // If method set FPU control word, restore to standard control word
 717   int size = C->in_24_bit_fp_mode() ? 6 : 0;
 718   if( do_polling() && C->is_method_compilation() ) size += 6;
 719 
 720   int framesize = C->frame_slots() << LogBytesPerInt;
 721   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 722   // Remove two words for return addr and rbp,
 723   framesize -= 2*wordSize;
 724 
 725   size++; // popl rbp,
 726 
 727   if( framesize >= 128 ) {
 728     size += 6;
 729   } else {
 730     size += framesize ? 3 : 0;
 731   }
 732   return size;
 733 }
 734 
 735 int MachEpilogNode::reloc() const {
 736   return 0; // a large enough number
 737 }
 738 
 739 const Pipeline * MachEpilogNode::pipeline() const {
 740   return MachNode::pipeline_class();
 741 }
 742 
 743 int MachEpilogNode::safepoint_offset() const { return 0; }
 744 
 745 //=============================================================================
 746 
 747 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 748 static enum RC rc_class( OptoReg::Name reg ) {
 749 
 750   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 751   if (OptoReg::is_stack(reg)) return rc_stack;
 752 
 753   VMReg r = OptoReg::as_VMReg(reg);
 754   if (r->is_Register()) return rc_int;
 755   if (r->is_FloatRegister()) {
 756     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 757     return rc_float;
 758   }
 759   assert(r->is_XMMRegister(), "must be");
 760   return rc_xmm;
 761 }
 762 
 763 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 764                         int opcode, const char *op_str, int size, outputStream* st ) {
 765   if( cbuf ) {
 766     emit_opcode  (*cbuf, opcode );
 767     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, false);
 768 #ifndef PRODUCT
 769   } else if( !do_size ) {
 770     if( size != 0 ) st->print("\n\t");
 771     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 772       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 773       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 774     } else { // FLD, FST, PUSH, POP
 775       st->print("%s [ESP + #%d]",op_str,offset);
 776     }
 777 #endif
 778   }
 779   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 780   return size+3+offset_size;
 781 }
 782 
 783 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 784 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 785                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 786   if( cbuf ) {
 787     if( reg_lo+1 == reg_hi ) { // double move?
 788       if( is_load && !UseXmmLoadAndClearUpper )
 789         emit_opcode(*cbuf, 0x66 ); // use 'movlpd' for load
 790       else
 791         emit_opcode(*cbuf, 0xF2 ); // use 'movsd' otherwise
 792     } else {
 793       emit_opcode(*cbuf, 0xF3 );
 794     }
 795     emit_opcode(*cbuf, 0x0F );
 796     if( reg_lo+1 == reg_hi && is_load && !UseXmmLoadAndClearUpper )
 797       emit_opcode(*cbuf, 0x12 );   // use 'movlpd' for load
 798     else
 799       emit_opcode(*cbuf, is_load ? 0x10 : 0x11 );
 800     encode_RegMem(*cbuf, Matcher::_regEncode[reg_lo], ESP_enc, 0x4, 0, offset, false);
 801 #ifndef PRODUCT
 802   } else if( !do_size ) {
 803     if( size != 0 ) st->print("\n\t");
 804     if( reg_lo+1 == reg_hi ) { // double move?
 805       if( is_load ) st->print("%s %s,[ESP + #%d]",
 806                                UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 807                                Matcher::regName[reg_lo], offset);
 808       else          st->print("MOVSD  [ESP + #%d],%s",
 809                                offset, Matcher::regName[reg_lo]);
 810     } else {
 811       if( is_load ) st->print("MOVSS  %s,[ESP + #%d]",
 812                                Matcher::regName[reg_lo], offset);
 813       else          st->print("MOVSS  [ESP + #%d],%s",
 814                                offset, Matcher::regName[reg_lo]);
 815     }
 816 #endif
 817   }
 818   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 819   return size+5+offset_size;
 820 }
 821 
 822 
 823 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 824                             int src_hi, int dst_hi, int size, outputStream* st ) {
 825   if( UseXmmRegToRegMoveAll ) {//Use movaps,movapd to move between xmm registers
 826     if( cbuf ) {
 827       if( (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ) {
 828         emit_opcode(*cbuf, 0x66 );
 829       }
 830       emit_opcode(*cbuf, 0x0F );
 831       emit_opcode(*cbuf, 0x28 );
 832       emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
 833 #ifndef PRODUCT
 834     } else if( !do_size ) {
 835       if( size != 0 ) st->print("\n\t");
 836       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 837         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 838       } else {
 839         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 840       }
 841 #endif
 842     }
 843     return size + ((src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 4 : 3);
 844   } else {
 845     if( cbuf ) {
 846       emit_opcode(*cbuf, (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 0xF2 : 0xF3 );
 847       emit_opcode(*cbuf, 0x0F );
 848       emit_opcode(*cbuf, 0x10 );
 849       emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
 850 #ifndef PRODUCT
 851     } else if( !do_size ) {
 852       if( size != 0 ) st->print("\n\t");
 853       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 854         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 855       } else {
 856         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 857       }
 858 #endif
 859     }
 860     return size+4;
 861   }
 862 }
 863 
 864 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 865                             int src_hi, int dst_hi, int size, outputStream* st ) {
 866   // 32-bit
 867   if (cbuf) {
 868     emit_opcode(*cbuf, 0x66);
 869     emit_opcode(*cbuf, 0x0F);
 870     emit_opcode(*cbuf, 0x6E);
 871     emit_rm(*cbuf, 0x3, Matcher::_regEncode[dst_lo] & 7, Matcher::_regEncode[src_lo] & 7);
 872 #ifndef PRODUCT
 873   } else if (!do_size) {
 874     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 875 #endif
 876   }
 877   return 4;
 878 }
 879 
 880 
 881 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 882                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 883   // 32-bit
 884   if (cbuf) {
 885     emit_opcode(*cbuf, 0x66);
 886     emit_opcode(*cbuf, 0x0F);
 887     emit_opcode(*cbuf, 0x7E);
 888     emit_rm(*cbuf, 0x3, Matcher::_regEncode[src_lo] & 7, Matcher::_regEncode[dst_lo] & 7);
 889 #ifndef PRODUCT
 890   } else if (!do_size) {
 891     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 892 #endif
 893   }
 894   return 4;
 895 }
 896 
 897 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 898   if( cbuf ) {
 899     emit_opcode(*cbuf, 0x8B );
 900     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 901 #ifndef PRODUCT
 902   } else if( !do_size ) {
 903     if( size != 0 ) st->print("\n\t");
 904     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 905 #endif
 906   }
 907   return size+2;
 908 }
 909 
 910 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 911                                  int offset, int size, outputStream* st ) {
 912   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 913     if( cbuf ) {
 914       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 915       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 916 #ifndef PRODUCT
 917     } else if( !do_size ) {
 918       if( size != 0 ) st->print("\n\t");
 919       st->print("FLD    %s",Matcher::regName[src_lo]);
 920 #endif
 921     }
 922     size += 2;
 923   }
 924 
 925   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 926   const char *op_str;
 927   int op;
 928   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 929     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 930     op = 0xDD;
 931   } else {                   // 32-bit store
 932     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 933     op = 0xD9;
 934     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 935   }
 936 
 937   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 938 }
 939 
 940 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 941   // Get registers to move
 942   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 943   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 944   OptoReg::Name dst_second = ra_->get_reg_second(this );
 945   OptoReg::Name dst_first = ra_->get_reg_first(this );
 946 
 947   enum RC src_second_rc = rc_class(src_second);
 948   enum RC src_first_rc = rc_class(src_first);
 949   enum RC dst_second_rc = rc_class(dst_second);
 950   enum RC dst_first_rc = rc_class(dst_first);
 951 
 952   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 953 
 954   // Generate spill code!
 955   int size = 0;
 956 
 957   if( src_first == dst_first && src_second == dst_second )
 958     return size;            // Self copy, no move
 959 
 960   // --------------------------------------
 961   // Check for mem-mem move.  push/pop to move.
 962   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 963     if( src_second == dst_first ) { // overlapping stack copy ranges
 964       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 965       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 966       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 967       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 968     }
 969     // move low bits
 970     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 971     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 972     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 973       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 974       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 975     }
 976     return size;
 977   }
 978 
 979   // --------------------------------------
 980   // Check for integer reg-reg copy
 981   if( src_first_rc == rc_int && dst_first_rc == rc_int )
 982     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
 983 
 984   // Check for integer store
 985   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 986     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 987 
 988   // Check for integer load
 989   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
 990     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 991 
 992   // Check for integer reg-xmm reg copy
 993   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
 994     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 995             "no 64 bit integer-float reg moves" );
 996     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 997   }
 998   // --------------------------------------
 999   // Check for float reg-reg copy
1000   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1001     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1002             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1003     if( cbuf ) {
1004 
1005       // Note the mucking with the register encode to compensate for the 0/1
1006       // indexing issue mentioned in a comment in the reg_def sections
1007       // for FPR registers many lines above here.
1008 
1009       if( src_first != FPR1L_num ) {
1010         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1011         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1012         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1013         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1014      } else {
1015         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1016         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1017      }
1018 #ifndef PRODUCT
1019     } else if( !do_size ) {
1020       if( size != 0 ) st->print("\n\t");
1021       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1022       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1023 #endif
1024     }
1025     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1026   }
1027 
1028   // Check for float store
1029   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1030     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1031   }
1032 
1033   // Check for float load
1034   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1035     int offset = ra_->reg2offset(src_first);
1036     const char *op_str;
1037     int op;
1038     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1039       op_str = "FLD_D";
1040       op = 0xDD;
1041     } else {                   // 32-bit load
1042       op_str = "FLD_S";
1043       op = 0xD9;
1044       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1045     }
1046     if( cbuf ) {
1047       emit_opcode  (*cbuf, op );
1048       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, false);
1049       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1050       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1051 #ifndef PRODUCT
1052     } else if( !do_size ) {
1053       if( size != 0 ) st->print("\n\t");
1054       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1055 #endif
1056     }
1057     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1058     return size + 3+offset_size+2;
1059   }
1060 
1061   // Check for xmm reg-reg copy
1062   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1063     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1064             (src_first+1 == src_second && dst_first+1 == dst_second),
1065             "no non-adjacent float-moves" );
1066     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1067   }
1068 
1069   // Check for xmm reg-integer reg copy
1070   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1071     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1072             "no 64 bit float-integer reg moves" );
1073     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1074   }
1075 
1076   // Check for xmm store
1077   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1078     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1079   }
1080 
1081   // Check for float xmm load
1082   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1083     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1084   }
1085 
1086   // Copy from float reg to xmm reg
1087   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1088     // copy to the top of stack from floating point reg
1089     // and use LEA to preserve flags
1090     if( cbuf ) {
1091       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1092       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1093       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1094       emit_d8(*cbuf,0xF8);
1095 #ifndef PRODUCT
1096     } else if( !do_size ) {
1097       if( size != 0 ) st->print("\n\t");
1098       st->print("LEA    ESP,[ESP-8]");
1099 #endif
1100     }
1101     size += 4;
1102 
1103     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1104 
1105     // Copy from the temp memory to the xmm reg.
1106     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1107 
1108     if( cbuf ) {
1109       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1110       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1111       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1112       emit_d8(*cbuf,0x08);
1113 #ifndef PRODUCT
1114     } else if( !do_size ) {
1115       if( size != 0 ) st->print("\n\t");
1116       st->print("LEA    ESP,[ESP+8]");
1117 #endif
1118     }
1119     size += 4;
1120     return size;
1121   }
1122 
1123   assert( size > 0, "missed a case" );
1124 
1125   // --------------------------------------------------------------------
1126   // Check for second bits still needing moving.
1127   if( src_second == dst_second )
1128     return size;               // Self copy; no move
1129   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1130 
1131   // Check for second word int-int move
1132   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1133     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1134 
1135   // Check for second word integer store
1136   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1137     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1138 
1139   // Check for second word integer load
1140   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1141     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1142 
1143 
1144   Unimplemented();
1145 }
1146 
1147 #ifndef PRODUCT
1148 void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1149   implementation( NULL, ra_, false, st );
1150 }
1151 #endif
1152 
1153 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1154   implementation( &cbuf, ra_, false, NULL );
1155 }
1156 
1157 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1158   return implementation( NULL, ra_, true, NULL );
1159 }
1160 
1161 //=============================================================================
1162 #ifndef PRODUCT
1163 void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const {
1164   st->print("NOP \t# %d bytes pad for loops and calls", _count);
1165 }
1166 #endif
1167 
1168 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
1169   MacroAssembler _masm(&cbuf);
1170   __ nop(_count);
1171 }
1172 
1173 uint MachNopNode::size(PhaseRegAlloc *) const {
1174   return _count;
1175 }
1176 
1177 
1178 //=============================================================================
1179 #ifndef PRODUCT
1180 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1181   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1182   int reg = ra_->get_reg_first(this);
1183   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1184 }
1185 #endif
1186 
1187 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1188   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1189   int reg = ra_->get_encode(this);
1190   if( offset >= 128 ) {
1191     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1192     emit_rm(cbuf, 0x2, reg, 0x04);
1193     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1194     emit_d32(cbuf, offset);
1195   }
1196   else {
1197     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1198     emit_rm(cbuf, 0x1, reg, 0x04);
1199     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1200     emit_d8(cbuf, offset);
1201   }
1202 }
1203 
1204 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1205   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1206   if( offset >= 128 ) {
1207     return 7;
1208   }
1209   else {
1210     return 4;
1211   }
1212 }
1213 
1214 //=============================================================================
1215 
1216 // emit call stub, compiled java to interpreter
1217 void emit_java_to_interp(CodeBuffer &cbuf ) {
1218   // Stub is fixed up when the corresponding call is converted from calling
1219   // compiled code to calling interpreted code.
1220   // mov rbx,0
1221   // jmp -1
1222 
1223   address mark = cbuf.insts_mark();  // get mark within main instrs section
1224 
1225   // Note that the code buffer's insts_mark is always relative to insts.
1226   // That's why we must use the macroassembler to generate a stub.
1227   MacroAssembler _masm(&cbuf);
1228 
1229   address base =
1230   __ start_a_stub(Compile::MAX_stubs_size);
1231   if (base == NULL)  return;  // CodeBuffer::expand failed
1232   // static stub relocation stores the instruction address of the call
1233   __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM32);
1234   // static stub relocation also tags the methodOop in the code-stream.
1235   __ movoop(rbx, (jobject)NULL);  // method is zapped till fixup time
1236   // This is recognized as unresolved by relocs/nativeInst/ic code
1237   __ jump(RuntimeAddress(__ pc()));
1238 
1239   __ end_a_stub();
1240   // Update current stubs pointer and restore insts_end.
1241 }
1242 // size of call stub, compiled java to interpretor
1243 uint size_java_to_interp() {
1244   return 10;  // movl; jmp
1245 }
1246 // relocation entries for call stub, compiled java to interpretor
1247 uint reloc_java_to_interp() {
1248   return 4;  // 3 in emit_java_to_interp + 1 in Java_Static_Call
1249 }
1250 
1251 //=============================================================================
1252 #ifndef PRODUCT
1253 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1254   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1255   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1256   st->print_cr("\tNOP");
1257   st->print_cr("\tNOP");
1258   if( !OptoBreakpoint )
1259     st->print_cr("\tNOP");
1260 }
1261 #endif
1262 
1263 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1264   MacroAssembler masm(&cbuf);
1265 #ifdef ASSERT
1266   uint insts_size = cbuf.insts_size();
1267 #endif
1268   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1269   masm.jump_cc(Assembler::notEqual,
1270                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1271   /* WARNING these NOPs are critical so that verified entry point is properly
1272      aligned for patching by NativeJump::patch_verified_entry() */
1273   int nops_cnt = 2;
1274   if( !OptoBreakpoint ) // Leave space for int3
1275      nops_cnt += 1;
1276   masm.nop(nops_cnt);
1277 
1278   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1279 }
1280 
1281 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1282   return OptoBreakpoint ? 11 : 12;
1283 }
1284 
1285 
1286 //=============================================================================
1287 uint size_exception_handler() {
1288   // NativeCall instruction size is the same as NativeJump.
1289   // exception handler starts out as jump and can be patched to
1290   // a call be deoptimization.  (4932387)
1291   // Note that this value is also credited (in output.cpp) to
1292   // the size of the code section.
1293   return NativeJump::instruction_size;
1294 }
1295 
1296 // Emit exception handler code.  Stuff framesize into a register
1297 // and call a VM stub routine.
1298 int emit_exception_handler(CodeBuffer& cbuf) {
1299 
1300   // Note that the code buffer's insts_mark is always relative to insts.
1301   // That's why we must use the macroassembler to generate a handler.
1302   MacroAssembler _masm(&cbuf);
1303   address base =
1304   __ start_a_stub(size_exception_handler());
1305   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1306   int offset = __ offset();
1307   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1308   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1309   __ end_a_stub();
1310   return offset;
1311 }
1312 
1313 uint size_deopt_handler() {
1314   // NativeCall instruction size is the same as NativeJump.
1315   // exception handler starts out as jump and can be patched to
1316   // a call be deoptimization.  (4932387)
1317   // Note that this value is also credited (in output.cpp) to
1318   // the size of the code section.
1319   return 5 + NativeJump::instruction_size; // pushl(); jmp;
1320 }
1321 
1322 // Emit deopt handler code.
1323 int emit_deopt_handler(CodeBuffer& cbuf) {
1324 
1325   // Note that the code buffer's insts_mark is always relative to insts.
1326   // That's why we must use the macroassembler to generate a handler.
1327   MacroAssembler _masm(&cbuf);
1328   address base =
1329   __ start_a_stub(size_exception_handler());
1330   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1331   int offset = __ offset();
1332   InternalAddress here(__ pc());
1333   __ pushptr(here.addr());
1334 
1335   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1336   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1337   __ end_a_stub();
1338   return offset;
1339 }
1340 
1341 
1342 const bool Matcher::match_rule_supported(int opcode) {
1343   if (!has_match_rule(opcode))
1344     return false;
1345 
1346   return true;  // Per default match rules are supported.
1347 }
1348 
1349 int Matcher::regnum_to_fpu_offset(int regnum) {
1350   return regnum - 32; // The FP registers are in the second chunk
1351 }
1352 
1353 // This is UltraSparc specific, true just means we have fast l2f conversion
1354 const bool Matcher::convL2FSupported(void) {
1355   return true;
1356 }
1357 
1358 // Vector width in bytes
1359 const uint Matcher::vector_width_in_bytes(void) {
1360   return UseSSE >= 2 ? 8 : 0;
1361 }
1362 
1363 // Vector ideal reg
1364 const uint Matcher::vector_ideal_reg(void) {
1365   return Op_RegD;
1366 }
1367 
1368 // Is this branch offset short enough that a short branch can be used?
1369 //
1370 // NOTE: If the platform does not provide any short branch variants, then
1371 //       this method should return false for offset 0.
1372 bool Matcher::is_short_branch_offset(int rule, int offset) {
1373   // the short version of jmpConUCF2 contains multiple branches,
1374   // making the reach slightly less
1375   if (rule == jmpConUCF2_rule)
1376     return (-126 <= offset && offset <= 125);
1377   return (-128 <= offset && offset <= 127);
1378 }
1379 
1380 const bool Matcher::isSimpleConstant64(jlong value) {
1381   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1382   return false;
1383 }
1384 
1385 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1386 const bool Matcher::init_array_count_is_in_bytes = false;
1387 
1388 // Threshold size for cleararray.
1389 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1390 
1391 // Should the Matcher clone shifts on addressing modes, expecting them to
1392 // be subsumed into complex addressing expressions or compute them into
1393 // registers?  True for Intel but false for most RISCs
1394 const bool Matcher::clone_shift_expressions = true;
1395 
1396 // Do we need to mask the count passed to shift instructions or does
1397 // the cpu only look at the lower 5/6 bits anyway?
1398 const bool Matcher::need_masked_shift_count = false;
1399 
1400 bool Matcher::narrow_oop_use_complex_address() {
1401   ShouldNotCallThis();
1402   return true;
1403 }
1404 
1405 
1406 // Is it better to copy float constants, or load them directly from memory?
1407 // Intel can load a float constant from a direct address, requiring no
1408 // extra registers.  Most RISCs will have to materialize an address into a
1409 // register first, so they would do better to copy the constant from stack.
1410 const bool Matcher::rematerialize_float_constants = true;
1411 
1412 // If CPU can load and store mis-aligned doubles directly then no fixup is
1413 // needed.  Else we split the double into 2 integer pieces and move it
1414 // piece-by-piece.  Only happens when passing doubles into C code as the
1415 // Java calling convention forces doubles to be aligned.
1416 const bool Matcher::misaligned_doubles_ok = true;
1417 
1418 
1419 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1420   // Get the memory operand from the node
1421   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1422   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1423   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1424   uint opcnt     = 1;                 // First operand
1425   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1426   while( idx >= skipped+num_edges ) {
1427     skipped += num_edges;
1428     opcnt++;                          // Bump operand count
1429     assert( opcnt < numopnds, "Accessing non-existent operand" );
1430     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1431   }
1432 
1433   MachOper *memory = node->_opnds[opcnt];
1434   MachOper *new_memory = NULL;
1435   switch (memory->opcode()) {
1436   case DIRECT:
1437   case INDOFFSET32X:
1438     // No transformation necessary.
1439     return;
1440   case INDIRECT:
1441     new_memory = new (C) indirect_win95_safeOper( );
1442     break;
1443   case INDOFFSET8:
1444     new_memory = new (C) indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1445     break;
1446   case INDOFFSET32:
1447     new_memory = new (C) indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1448     break;
1449   case INDINDEXOFFSET:
1450     new_memory = new (C) indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1451     break;
1452   case INDINDEXSCALE:
1453     new_memory = new (C) indIndexScale_win95_safeOper(memory->scale());
1454     break;
1455   case INDINDEXSCALEOFFSET:
1456     new_memory = new (C) indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1457     break;
1458   case LOAD_LONG_INDIRECT:
1459   case LOAD_LONG_INDOFFSET32:
1460     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1461     return;
1462   default:
1463     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1464     return;
1465   }
1466   node->_opnds[opcnt] = new_memory;
1467 }
1468 
1469 // Advertise here if the CPU requires explicit rounding operations
1470 // to implement the UseStrictFP mode.
1471 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1472 
1473 // Are floats conerted to double when stored to stack during deoptimization?
1474 // On x32 it is stored with convertion only when FPU is used for floats.
1475 bool Matcher::float_in_double() { return (UseSSE == 0); }
1476 
1477 // Do ints take an entire long register or just half?
1478 const bool Matcher::int_in_long = false;
1479 
1480 // Return whether or not this register is ever used as an argument.  This
1481 // function is used on startup to build the trampoline stubs in generateOptoStub.
1482 // Registers not mentioned will be killed by the VM call in the trampoline, and
1483 // arguments in those registers not be available to the callee.
1484 bool Matcher::can_be_java_arg( int reg ) {
1485   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1486   if( (reg == XMM0a_num || reg == XMM1a_num) && UseSSE>=1 ) return true;
1487   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1488   return false;
1489 }
1490 
1491 bool Matcher::is_spillable_arg( int reg ) {
1492   return can_be_java_arg(reg);
1493 }
1494 
1495 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1496   // Use hardware integer DIV instruction when
1497   // it is faster than a code which use multiply.
1498   // Only when constant divisor fits into 32 bit
1499   // (min_jint is excluded to get only correct
1500   // positive 32 bit values from negative).
1501   return VM_Version::has_fast_idiv() &&
1502          (divisor == (int)divisor && divisor != min_jint);
1503 }
1504 
1505 // Register for DIVI projection of divmodI
1506 RegMask Matcher::divI_proj_mask() {
1507   return EAX_REG_mask;
1508 }
1509 
1510 // Register for MODI projection of divmodI
1511 RegMask Matcher::modI_proj_mask() {
1512   return EDX_REG_mask;
1513 }
1514 
1515 // Register for DIVL projection of divmodL
1516 RegMask Matcher::divL_proj_mask() {
1517   ShouldNotReachHere();
1518   return RegMask();
1519 }
1520 
1521 // Register for MODL projection of divmodL
1522 RegMask Matcher::modL_proj_mask() {
1523   ShouldNotReachHere();
1524   return RegMask();
1525 }
1526 
1527 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1528   return EBP_REG_mask;
1529 }
1530 
1531 // Returns true if the high 32 bits of the value is known to be zero.
1532 bool is_operand_hi32_zero(Node* n) {
1533   int opc = n->Opcode();
1534   if (opc == Op_LoadUI2L) {
1535     return true;
1536   }
1537   if (opc == Op_AndL) {
1538     Node* o2 = n->in(2);
1539     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1540       return true;
1541     }
1542   }
1543   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1544     return true;
1545   }
1546   return false;
1547 }
1548 
1549 %}
1550 
1551 //----------ENCODING BLOCK-----------------------------------------------------
1552 // This block specifies the encoding classes used by the compiler to output
1553 // byte streams.  Encoding classes generate functions which are called by
1554 // Machine Instruction Nodes in order to generate the bit encoding of the
1555 // instruction.  Operands specify their base encoding interface with the
1556 // interface keyword.  There are currently supported four interfaces,
1557 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1558 // operand to generate a function which returns its register number when
1559 // queried.   CONST_INTER causes an operand to generate a function which
1560 // returns the value of the constant when queried.  MEMORY_INTER causes an
1561 // operand to generate four functions which return the Base Register, the
1562 // Index Register, the Scale Value, and the Offset Value of the operand when
1563 // queried.  COND_INTER causes an operand to generate six functions which
1564 // return the encoding code (ie - encoding bits for the instruction)
1565 // associated with each basic boolean condition for a conditional instruction.
1566 // Instructions specify two basic values for encoding.  They use the
1567 // ins_encode keyword to specify their encoding class (which must be one of
1568 // the class names specified in the encoding block), and they use the
1569 // opcode keyword to specify, in order, their primary, secondary, and
1570 // tertiary opcode.  Only the opcode sections which a particular instruction
1571 // needs for encoding need to be specified.
1572 encode %{
1573   // Build emit functions for each basic byte or larger field in the intel
1574   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1575   // code in the enc_class source block.  Emit functions will live in the
1576   // main source block for now.  In future, we can generalize this by
1577   // adding a syntax that specifies the sizes of fields in an order,
1578   // so that the adlc can build the emit functions automagically
1579 
1580   // Emit primary opcode
1581   enc_class OpcP %{
1582     emit_opcode(cbuf, $primary);
1583   %}
1584 
1585   // Emit secondary opcode
1586   enc_class OpcS %{
1587     emit_opcode(cbuf, $secondary);
1588   %}
1589 
1590   // Emit opcode directly
1591   enc_class Opcode(immI d8) %{
1592     emit_opcode(cbuf, $d8$$constant);
1593   %}
1594 
1595   enc_class SizePrefix %{
1596     emit_opcode(cbuf,0x66);
1597   %}
1598 
1599   enc_class RegReg (eRegI dst, eRegI src) %{    // RegReg(Many)
1600     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1601   %}
1602 
1603   enc_class OpcRegReg (immI opcode, eRegI dst, eRegI src) %{    // OpcRegReg(Many)
1604     emit_opcode(cbuf,$opcode$$constant);
1605     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1606   %}
1607 
1608   enc_class mov_r32_imm0( eRegI dst ) %{
1609     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1610     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1611   %}
1612 
1613   enc_class cdq_enc %{
1614     // Full implementation of Java idiv and irem; checks for
1615     // special case as described in JVM spec., p.243 & p.271.
1616     //
1617     //         normal case                           special case
1618     //
1619     // input : rax,: dividend                         min_int
1620     //         reg: divisor                          -1
1621     //
1622     // output: rax,: quotient  (= rax, idiv reg)       min_int
1623     //         rdx: remainder (= rax, irem reg)       0
1624     //
1625     //  Code sequnce:
1626     //
1627     //  81 F8 00 00 00 80    cmp         rax,80000000h
1628     //  0F 85 0B 00 00 00    jne         normal_case
1629     //  33 D2                xor         rdx,edx
1630     //  83 F9 FF             cmp         rcx,0FFh
1631     //  0F 84 03 00 00 00    je          done
1632     //                  normal_case:
1633     //  99                   cdq
1634     //  F7 F9                idiv        rax,ecx
1635     //                  done:
1636     //
1637     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1638     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1639     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1640     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1641     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1642     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1643     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1644     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1645     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1646     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1647     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1648     // normal_case:
1649     emit_opcode(cbuf,0x99);                                         // cdq
1650     // idiv (note: must be emitted by the user of this rule)
1651     // normal:
1652   %}
1653 
1654   // Dense encoding for older common ops
1655   enc_class Opc_plus(immI opcode, eRegI reg) %{
1656     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1657   %}
1658 
1659 
1660   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1661   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1662     // Check for 8-bit immediate, and set sign extend bit in opcode
1663     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1664       emit_opcode(cbuf, $primary | 0x02);
1665     }
1666     else {                          // If 32-bit immediate
1667       emit_opcode(cbuf, $primary);
1668     }
1669   %}
1670 
1671   enc_class OpcSErm (eRegI dst, immI imm) %{    // OpcSEr/m
1672     // Emit primary opcode and set sign-extend bit
1673     // Check for 8-bit immediate, and set sign extend bit in opcode
1674     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1675       emit_opcode(cbuf, $primary | 0x02);    }
1676     else {                          // If 32-bit immediate
1677       emit_opcode(cbuf, $primary);
1678     }
1679     // Emit r/m byte with secondary opcode, after primary opcode.
1680     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1681   %}
1682 
1683   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1684     // Check for 8-bit immediate, and set sign extend bit in opcode
1685     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1686       $$$emit8$imm$$constant;
1687     }
1688     else {                          // If 32-bit immediate
1689       // Output immediate
1690       $$$emit32$imm$$constant;
1691     }
1692   %}
1693 
1694   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1695     // Emit primary opcode and set sign-extend bit
1696     // Check for 8-bit immediate, and set sign extend bit in opcode
1697     int con = (int)$imm$$constant; // Throw away top bits
1698     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1699     // Emit r/m byte with secondary opcode, after primary opcode.
1700     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1701     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1702     else                               emit_d32(cbuf,con);
1703   %}
1704 
1705   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1706     // Emit primary opcode and set sign-extend bit
1707     // Check for 8-bit immediate, and set sign extend bit in opcode
1708     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1709     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1710     // Emit r/m byte with tertiary opcode, after primary opcode.
1711     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1712     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1713     else                               emit_d32(cbuf,con);
1714   %}
1715 
1716   enc_class Lbl (label labl) %{ // GOTO
1717     Label *l = $labl$$label;
1718     emit_d32(cbuf, (l->loc_pos() - (cbuf.insts_size()+4)));
1719   %}
1720 
1721   enc_class LblShort (label labl) %{ // GOTO
1722     Label *l = $labl$$label;
1723     int disp = l->loc_pos() - (cbuf.insts_size()+1);
1724     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
1725     emit_d8(cbuf, disp);
1726   %}
1727 
1728   enc_class OpcSReg (eRegI dst) %{    // BSWAP
1729     emit_cc(cbuf, $secondary, $dst$$reg );
1730   %}
1731 
1732   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1733     int destlo = $dst$$reg;
1734     int desthi = HIGH_FROM_LOW(destlo);
1735     // bswap lo
1736     emit_opcode(cbuf, 0x0F);
1737     emit_cc(cbuf, 0xC8, destlo);
1738     // bswap hi
1739     emit_opcode(cbuf, 0x0F);
1740     emit_cc(cbuf, 0xC8, desthi);
1741     // xchg lo and hi
1742     emit_opcode(cbuf, 0x87);
1743     emit_rm(cbuf, 0x3, destlo, desthi);
1744   %}
1745 
1746   enc_class RegOpc (eRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1747     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1748   %}
1749 
1750   enc_class Jcc (cmpOp cop, label labl) %{    // JCC
1751     Label *l = $labl$$label;
1752     assert(l != NULL, "need Label");
1753     $$$emit8$primary;
1754     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1755     emit_d32(cbuf, (l->loc_pos() - (cbuf.insts_size()+4)));
1756   %}
1757 
1758   enc_class JccShort (cmpOp cop, label labl) %{    // JCC
1759     Label *l = $labl$$label;
1760     assert(l != NULL, "need Label");
1761     emit_cc(cbuf, $primary, $cop$$cmpcode);
1762     int disp = l->loc_pos() - (cbuf.insts_size()+1);
1763     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
1764     emit_d8(cbuf, disp);
1765   %}
1766 
1767   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1768     $$$emit8$primary;
1769     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1770   %}
1771 
1772   enc_class enc_cmov_d(cmpOp cop, regD src ) %{ // CMOV
1773     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1774     emit_d8(cbuf, op >> 8 );
1775     emit_d8(cbuf, op & 255);
1776   %}
1777 
1778   // emulate a CMOV with a conditional branch around a MOV
1779   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1780     // Invert sense of branch from sense of CMOV
1781     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1782     emit_d8( cbuf, $brOffs$$constant );
1783   %}
1784 
1785   enc_class enc_PartialSubtypeCheck( ) %{
1786     Register Redi = as_Register(EDI_enc); // result register
1787     Register Reax = as_Register(EAX_enc); // super class
1788     Register Recx = as_Register(ECX_enc); // killed
1789     Register Resi = as_Register(ESI_enc); // sub class
1790     Label miss;
1791 
1792     MacroAssembler _masm(&cbuf);
1793     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1794                                      NULL, &miss,
1795                                      /*set_cond_codes:*/ true);
1796     if ($primary) {
1797       __ xorptr(Redi, Redi);
1798     }
1799     __ bind(miss);
1800   %}
1801 
1802   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1803     MacroAssembler masm(&cbuf);
1804     int start = masm.offset();
1805     if (UseSSE >= 2) {
1806       if (VerifyFPU) {
1807         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1808       }
1809     } else {
1810       // External c_calling_convention expects the FPU stack to be 'clean'.
1811       // Compiled code leaves it dirty.  Do cleanup now.
1812       masm.empty_FPU_stack();
1813     }
1814     if (sizeof_FFree_Float_Stack_All == -1) {
1815       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1816     } else {
1817       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1818     }
1819   %}
1820 
1821   enc_class Verify_FPU_For_Leaf %{
1822     if( VerifyFPU ) {
1823       MacroAssembler masm(&cbuf);
1824       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1825     }
1826   %}
1827 
1828   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1829     // This is the instruction starting address for relocation info.
1830     cbuf.set_insts_mark();
1831     $$$emit8$primary;
1832     // CALL directly to the runtime
1833     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1834                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1835 
1836     if (UseSSE >= 2) {
1837       MacroAssembler _masm(&cbuf);
1838       BasicType rt = tf()->return_type();
1839 
1840       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1841         // A C runtime call where the return value is unused.  In SSE2+
1842         // mode the result needs to be removed from the FPU stack.  It's
1843         // likely that this function call could be removed by the
1844         // optimizer if the C function is a pure function.
1845         __ ffree(0);
1846       } else if (rt == T_FLOAT) {
1847         __ lea(rsp, Address(rsp, -4));
1848         __ fstp_s(Address(rsp, 0));
1849         __ movflt(xmm0, Address(rsp, 0));
1850         __ lea(rsp, Address(rsp,  4));
1851       } else if (rt == T_DOUBLE) {
1852         __ lea(rsp, Address(rsp, -8));
1853         __ fstp_d(Address(rsp, 0));
1854         __ movdbl(xmm0, Address(rsp, 0));
1855         __ lea(rsp, Address(rsp,  8));
1856       }
1857     }
1858   %}
1859 
1860 
1861   enc_class pre_call_FPU %{
1862     // If method sets FPU control word restore it here
1863     debug_only(int off0 = cbuf.insts_size());
1864     if( Compile::current()->in_24_bit_fp_mode() ) {
1865       MacroAssembler masm(&cbuf);
1866       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1867     }
1868     debug_only(int off1 = cbuf.insts_size());
1869     assert(off1 - off0 == pre_call_FPU_size(), "correct size prediction");
1870   %}
1871 
1872   enc_class post_call_FPU %{
1873     // If method sets FPU control word do it here also
1874     if( Compile::current()->in_24_bit_fp_mode() ) {
1875       MacroAssembler masm(&cbuf);
1876       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1877     }
1878   %}
1879 
1880   enc_class preserve_SP %{
1881     debug_only(int off0 = cbuf.insts_size());
1882     MacroAssembler _masm(&cbuf);
1883     // RBP is preserved across all calls, even compiled calls.
1884     // Use it to preserve RSP in places where the callee might change the SP.
1885     __ movptr(rbp_mh_SP_save, rsp);
1886     debug_only(int off1 = cbuf.insts_size());
1887     assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
1888   %}
1889 
1890   enc_class restore_SP %{
1891     MacroAssembler _masm(&cbuf);
1892     __ movptr(rsp, rbp_mh_SP_save);
1893   %}
1894 
1895   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1896     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1897     // who we intended to call.
1898     cbuf.set_insts_mark();
1899     $$$emit8$primary;
1900     if ( !_method ) {
1901       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1902                      runtime_call_Relocation::spec(), RELOC_IMM32 );
1903     } else if(_optimized_virtual) {
1904       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1905                      opt_virtual_call_Relocation::spec(), RELOC_IMM32 );
1906     } else {
1907       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1908                      static_call_Relocation::spec(), RELOC_IMM32 );
1909     }
1910     if( _method ) {  // Emit stub for static call
1911       emit_java_to_interp(cbuf);
1912     }
1913   %}
1914 
1915   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1916     // !!!!!
1917     // Generate  "Mov EAX,0x00", placeholder instruction to load oop-info
1918     // emit_call_dynamic_prologue( cbuf );
1919     cbuf.set_insts_mark();
1920     emit_opcode(cbuf, 0xB8 + EAX_enc);        // mov    EAX,-1
1921     emit_d32_reloc(cbuf, (int)Universe::non_oop_word(), oop_Relocation::spec_for_immediate(), RELOC_IMM32);
1922     address  virtual_call_oop_addr = cbuf.insts_mark();
1923     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1924     // who we intended to call.
1925     cbuf.set_insts_mark();
1926     $$$emit8$primary;
1927     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1928                 virtual_call_Relocation::spec(virtual_call_oop_addr), RELOC_IMM32 );
1929   %}
1930 
1931   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1932     int disp = in_bytes(methodOopDesc::from_compiled_offset());
1933     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1934 
1935     // CALL *[EAX+in_bytes(methodOopDesc::from_compiled_code_entry_point_offset())]
1936     cbuf.set_insts_mark();
1937     $$$emit8$primary;
1938     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1939     emit_d8(cbuf, disp);             // Displacement
1940 
1941   %}
1942 
1943   enc_class Xor_Reg (eRegI dst) %{
1944     emit_opcode(cbuf, 0x33);
1945     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
1946   %}
1947 
1948 //   Following encoding is no longer used, but may be restored if calling
1949 //   convention changes significantly.
1950 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1951 //
1952 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1953 //     // int ic_reg     = Matcher::inline_cache_reg();
1954 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1955 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1956 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1957 //
1958 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1959 //     // // so we load it immediately before the call
1960 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1961 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1962 //
1963 //     // xor rbp,ebp
1964 //     emit_opcode(cbuf, 0x33);
1965 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1966 //
1967 //     // CALL to interpreter.
1968 //     cbuf.set_insts_mark();
1969 //     $$$emit8$primary;
1970 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1971 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1972 //   %}
1973 
1974   enc_class RegOpcImm (eRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1975     $$$emit8$primary;
1976     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1977     $$$emit8$shift$$constant;
1978   %}
1979 
1980   enc_class LdImmI (eRegI dst, immI src) %{    // Load Immediate
1981     // Load immediate does not have a zero or sign extended version
1982     // for 8-bit immediates
1983     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1984     $$$emit32$src$$constant;
1985   %}
1986 
1987   enc_class LdImmP (eRegI dst, immI src) %{    // Load Immediate
1988     // Load immediate does not have a zero or sign extended version
1989     // for 8-bit immediates
1990     emit_opcode(cbuf, $primary + $dst$$reg);
1991     $$$emit32$src$$constant;
1992   %}
1993 
1994   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1995     // Load immediate does not have a zero or sign extended version
1996     // for 8-bit immediates
1997     int dst_enc = $dst$$reg;
1998     int src_con = $src$$constant & 0x0FFFFFFFFL;
1999     if (src_con == 0) {
2000       // xor dst, dst
2001       emit_opcode(cbuf, 0x33);
2002       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2003     } else {
2004       emit_opcode(cbuf, $primary + dst_enc);
2005       emit_d32(cbuf, src_con);
2006     }
2007   %}
2008 
2009   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2010     // Load immediate does not have a zero or sign extended version
2011     // for 8-bit immediates
2012     int dst_enc = $dst$$reg + 2;
2013     int src_con = ((julong)($src$$constant)) >> 32;
2014     if (src_con == 0) {
2015       // xor dst, dst
2016       emit_opcode(cbuf, 0x33);
2017       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2018     } else {
2019       emit_opcode(cbuf, $primary + dst_enc);
2020       emit_d32(cbuf, src_con);
2021     }
2022   %}
2023 
2024 
2025   enc_class MovI2X_reg(regX dst, eRegI src) %{
2026     emit_opcode(cbuf, 0x66 );     // MOVD dst,src
2027     emit_opcode(cbuf, 0x0F );
2028     emit_opcode(cbuf, 0x6E );
2029     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2030   %}
2031 
2032   enc_class MovX2I_reg(eRegI dst, regX src) %{
2033     emit_opcode(cbuf, 0x66 );     // MOVD dst,src
2034     emit_opcode(cbuf, 0x0F );
2035     emit_opcode(cbuf, 0x7E );
2036     emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
2037   %}
2038 
2039   enc_class MovL2XD_reg(regXD dst, eRegL src, regXD tmp) %{
2040     { // MOVD $dst,$src.lo
2041       emit_opcode(cbuf,0x66);
2042       emit_opcode(cbuf,0x0F);
2043       emit_opcode(cbuf,0x6E);
2044       emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2045     }
2046     { // MOVD $tmp,$src.hi
2047       emit_opcode(cbuf,0x66);
2048       emit_opcode(cbuf,0x0F);
2049       emit_opcode(cbuf,0x6E);
2050       emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2051     }
2052     { // PUNPCKLDQ $dst,$tmp
2053       emit_opcode(cbuf,0x66);
2054       emit_opcode(cbuf,0x0F);
2055       emit_opcode(cbuf,0x62);
2056       emit_rm(cbuf, 0x3, $dst$$reg, $tmp$$reg);
2057      }
2058   %}
2059 
2060   enc_class MovXD2L_reg(eRegL dst, regXD src, regXD tmp) %{
2061     { // MOVD $dst.lo,$src
2062       emit_opcode(cbuf,0x66);
2063       emit_opcode(cbuf,0x0F);
2064       emit_opcode(cbuf,0x7E);
2065       emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
2066     }
2067     { // PSHUFLW $tmp,$src,0x4E  (01001110b)
2068       emit_opcode(cbuf,0xF2);
2069       emit_opcode(cbuf,0x0F);
2070       emit_opcode(cbuf,0x70);
2071       emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2072       emit_d8(cbuf, 0x4E);
2073     }
2074     { // MOVD $dst.hi,$tmp
2075       emit_opcode(cbuf,0x66);
2076       emit_opcode(cbuf,0x0F);
2077       emit_opcode(cbuf,0x7E);
2078       emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
2079     }
2080   %}
2081 
2082 
2083   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2084   enc_class enc_Copy( eRegI dst, eRegI src ) %{
2085     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2086   %}
2087 
2088   enc_class enc_CopyL_Lo( eRegI dst, eRegL src ) %{
2089     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2090   %}
2091 
2092   // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
2093   enc_class enc_CopyXD( RegXD dst, RegXD src ) %{
2094     encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2095   %}
2096 
2097   enc_class RegReg (eRegI dst, eRegI src) %{    // RegReg(Many)
2098     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2099   %}
2100 
2101   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2102     $$$emit8$primary;
2103     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2104   %}
2105 
2106   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2107     $$$emit8$secondary;
2108     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2109   %}
2110 
2111   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2112     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2113   %}
2114 
2115   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2116     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2117   %}
2118 
2119   enc_class RegReg_HiLo( eRegL src, eRegI dst ) %{
2120     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2121   %}
2122 
2123   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2124     // Output immediate
2125     $$$emit32$src$$constant;
2126   %}
2127 
2128   enc_class Con32F_as_bits(immF src) %{        // storeF_imm
2129     // Output Float immediate bits
2130     jfloat jf = $src$$constant;
2131     int    jf_as_bits = jint_cast( jf );
2132     emit_d32(cbuf, jf_as_bits);
2133   %}
2134 
2135   enc_class Con32XF_as_bits(immXF src) %{      // storeX_imm
2136     // Output Float immediate bits
2137     jfloat jf = $src$$constant;
2138     int    jf_as_bits = jint_cast( jf );
2139     emit_d32(cbuf, jf_as_bits);
2140   %}
2141 
2142   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2143     // Output immediate
2144     $$$emit16$src$$constant;
2145   %}
2146 
2147   enc_class Con_d32(immI src) %{
2148     emit_d32(cbuf,$src$$constant);
2149   %}
2150 
2151   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2152     // Output immediate memory reference
2153     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2154     emit_d32(cbuf, 0x00);
2155   %}
2156 
2157   enc_class lock_prefix( ) %{
2158     if( os::is_MP() )
2159       emit_opcode(cbuf,0xF0);         // [Lock]
2160   %}
2161 
2162   // Cmp-xchg long value.
2163   // Note: we need to swap rbx, and rcx before and after the
2164   //       cmpxchg8 instruction because the instruction uses
2165   //       rcx as the high order word of the new value to store but
2166   //       our register encoding uses rbx,.
2167   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2168 
2169     // XCHG  rbx,ecx
2170     emit_opcode(cbuf,0x87);
2171     emit_opcode(cbuf,0xD9);
2172     // [Lock]
2173     if( os::is_MP() )
2174       emit_opcode(cbuf,0xF0);
2175     // CMPXCHG8 [Eptr]
2176     emit_opcode(cbuf,0x0F);
2177     emit_opcode(cbuf,0xC7);
2178     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2179     // XCHG  rbx,ecx
2180     emit_opcode(cbuf,0x87);
2181     emit_opcode(cbuf,0xD9);
2182   %}
2183 
2184   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2185     // [Lock]
2186     if( os::is_MP() )
2187       emit_opcode(cbuf,0xF0);
2188 
2189     // CMPXCHG [Eptr]
2190     emit_opcode(cbuf,0x0F);
2191     emit_opcode(cbuf,0xB1);
2192     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2193   %}
2194 
2195   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2196     int res_encoding = $res$$reg;
2197 
2198     // MOV  res,0
2199     emit_opcode( cbuf, 0xB8 + res_encoding);
2200     emit_d32( cbuf, 0 );
2201     // JNE,s  fail
2202     emit_opcode(cbuf,0x75);
2203     emit_d8(cbuf, 5 );
2204     // MOV  res,1
2205     emit_opcode( cbuf, 0xB8 + res_encoding);
2206     emit_d32( cbuf, 1 );
2207     // fail:
2208   %}
2209 
2210   enc_class set_instruction_start( ) %{
2211     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2212   %}
2213 
2214   enc_class RegMem (eRegI ereg, memory mem) %{    // emit_reg_mem
2215     int reg_encoding = $ereg$$reg;
2216     int base  = $mem$$base;
2217     int index = $mem$$index;
2218     int scale = $mem$$scale;
2219     int displace = $mem$$disp;
2220     bool disp_is_oop = $mem->disp_is_oop();
2221     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2222   %}
2223 
2224   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2225     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2226     int base  = $mem$$base;
2227     int index = $mem$$index;
2228     int scale = $mem$$scale;
2229     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2230     assert( !$mem->disp_is_oop(), "Cannot add 4 to oop" );
2231     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, false/*disp_is_oop*/);
2232   %}
2233 
2234   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2235     int r1, r2;
2236     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2237     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2238     emit_opcode(cbuf,0x0F);
2239     emit_opcode(cbuf,$tertiary);
2240     emit_rm(cbuf, 0x3, r1, r2);
2241     emit_d8(cbuf,$cnt$$constant);
2242     emit_d8(cbuf,$primary);
2243     emit_rm(cbuf, 0x3, $secondary, r1);
2244     emit_d8(cbuf,$cnt$$constant);
2245   %}
2246 
2247   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2248     emit_opcode( cbuf, 0x8B ); // Move
2249     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2250     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2251       emit_d8(cbuf,$primary);
2252       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2253       emit_d8(cbuf,$cnt$$constant-32);
2254     }
2255     emit_d8(cbuf,$primary);
2256     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2257     emit_d8(cbuf,31);
2258   %}
2259 
2260   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2261     int r1, r2;
2262     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2263     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2264 
2265     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2266     emit_rm(cbuf, 0x3, r1, r2);
2267     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2268       emit_opcode(cbuf,$primary);
2269       emit_rm(cbuf, 0x3, $secondary, r1);
2270       emit_d8(cbuf,$cnt$$constant-32);
2271     }
2272     emit_opcode(cbuf,0x33);  // XOR r2,r2
2273     emit_rm(cbuf, 0x3, r2, r2);
2274   %}
2275 
2276   // Clone of RegMem but accepts an extra parameter to access each
2277   // half of a double in memory; it never needs relocation info.
2278   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, eRegI rm_reg) %{
2279     emit_opcode(cbuf,$opcode$$constant);
2280     int reg_encoding = $rm_reg$$reg;
2281     int base     = $mem$$base;
2282     int index    = $mem$$index;
2283     int scale    = $mem$$scale;
2284     int displace = $mem$$disp + $disp_for_half$$constant;
2285     bool disp_is_oop = false;
2286     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2287   %}
2288 
2289   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2290   //
2291   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2292   // and it never needs relocation information.
2293   // Frequently used to move data between FPU's Stack Top and memory.
2294   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2295     int rm_byte_opcode = $rm_opcode$$constant;
2296     int base     = $mem$$base;
2297     int index    = $mem$$index;
2298     int scale    = $mem$$scale;
2299     int displace = $mem$$disp;
2300     assert( !$mem->disp_is_oop(), "No oops here because no relo info allowed" );
2301     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, false);
2302   %}
2303 
2304   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2305     int rm_byte_opcode = $rm_opcode$$constant;
2306     int base     = $mem$$base;
2307     int index    = $mem$$index;
2308     int scale    = $mem$$scale;
2309     int displace = $mem$$disp;
2310     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
2311     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
2312   %}
2313 
2314   enc_class RegLea (eRegI dst, eRegI src0, immI src1 ) %{    // emit_reg_lea
2315     int reg_encoding = $dst$$reg;
2316     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2317     int index        = 0x04;            // 0x04 indicates no index
2318     int scale        = 0x00;            // 0x00 indicates no scale
2319     int displace     = $src1$$constant; // 0x00 indicates no displacement
2320     bool disp_is_oop = false;
2321     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2322   %}
2323 
2324   enc_class min_enc (eRegI dst, eRegI src) %{    // MIN
2325     // Compare dst,src
2326     emit_opcode(cbuf,0x3B);
2327     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2328     // jmp dst < src around move
2329     emit_opcode(cbuf,0x7C);
2330     emit_d8(cbuf,2);
2331     // move dst,src
2332     emit_opcode(cbuf,0x8B);
2333     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2334   %}
2335 
2336   enc_class max_enc (eRegI dst, eRegI src) %{    // MAX
2337     // Compare dst,src
2338     emit_opcode(cbuf,0x3B);
2339     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2340     // jmp dst > src around move
2341     emit_opcode(cbuf,0x7F);
2342     emit_d8(cbuf,2);
2343     // move dst,src
2344     emit_opcode(cbuf,0x8B);
2345     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2346   %}
2347 
2348   enc_class enc_FP_store(memory mem, regD src) %{
2349     // If src is FPR1, we can just FST to store it.
2350     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2351     int reg_encoding = 0x2; // Just store
2352     int base  = $mem$$base;
2353     int index = $mem$$index;
2354     int scale = $mem$$scale;
2355     int displace = $mem$$disp;
2356     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
2357     if( $src$$reg != FPR1L_enc ) {
2358       reg_encoding = 0x3;  // Store & pop
2359       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2360       emit_d8( cbuf, 0xC0-1+$src$$reg );
2361     }
2362     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2363     emit_opcode(cbuf,$primary);
2364     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2365   %}
2366 
2367   enc_class neg_reg(eRegI dst) %{
2368     // NEG $dst
2369     emit_opcode(cbuf,0xF7);
2370     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2371   %}
2372 
2373   enc_class setLT_reg(eCXRegI dst) %{
2374     // SETLT $dst
2375     emit_opcode(cbuf,0x0F);
2376     emit_opcode(cbuf,0x9C);
2377     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2378   %}
2379 
2380   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2381     int tmpReg = $tmp$$reg;
2382 
2383     // SUB $p,$q
2384     emit_opcode(cbuf,0x2B);
2385     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2386     // SBB $tmp,$tmp
2387     emit_opcode(cbuf,0x1B);
2388     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2389     // AND $tmp,$y
2390     emit_opcode(cbuf,0x23);
2391     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2392     // ADD $p,$tmp
2393     emit_opcode(cbuf,0x03);
2394     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2395   %}
2396 
2397   enc_class enc_cmpLTP_mem(eRegI p, eRegI q, memory mem, eCXRegI tmp) %{    // cadd_cmpLT
2398     int tmpReg = $tmp$$reg;
2399 
2400     // SUB $p,$q
2401     emit_opcode(cbuf,0x2B);
2402     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2403     // SBB $tmp,$tmp
2404     emit_opcode(cbuf,0x1B);
2405     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2406     // AND $tmp,$y
2407     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2408     emit_opcode(cbuf,0x23);
2409     int reg_encoding = tmpReg;
2410     int base  = $mem$$base;
2411     int index = $mem$$index;
2412     int scale = $mem$$scale;
2413     int displace = $mem$$disp;
2414     bool disp_is_oop = $mem->disp_is_oop();
2415     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2416     // ADD $p,$tmp
2417     emit_opcode(cbuf,0x03);
2418     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2419   %}
2420 
2421   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2422     // TEST shift,32
2423     emit_opcode(cbuf,0xF7);
2424     emit_rm(cbuf, 0x3, 0, ECX_enc);
2425     emit_d32(cbuf,0x20);
2426     // JEQ,s small
2427     emit_opcode(cbuf, 0x74);
2428     emit_d8(cbuf, 0x04);
2429     // MOV    $dst.hi,$dst.lo
2430     emit_opcode( cbuf, 0x8B );
2431     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2432     // CLR    $dst.lo
2433     emit_opcode(cbuf, 0x33);
2434     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2435 // small:
2436     // SHLD   $dst.hi,$dst.lo,$shift
2437     emit_opcode(cbuf,0x0F);
2438     emit_opcode(cbuf,0xA5);
2439     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2440     // SHL    $dst.lo,$shift"
2441     emit_opcode(cbuf,0xD3);
2442     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2443   %}
2444 
2445   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2446     // TEST shift,32
2447     emit_opcode(cbuf,0xF7);
2448     emit_rm(cbuf, 0x3, 0, ECX_enc);
2449     emit_d32(cbuf,0x20);
2450     // JEQ,s small
2451     emit_opcode(cbuf, 0x74);
2452     emit_d8(cbuf, 0x04);
2453     // MOV    $dst.lo,$dst.hi
2454     emit_opcode( cbuf, 0x8B );
2455     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2456     // CLR    $dst.hi
2457     emit_opcode(cbuf, 0x33);
2458     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2459 // small:
2460     // SHRD   $dst.lo,$dst.hi,$shift
2461     emit_opcode(cbuf,0x0F);
2462     emit_opcode(cbuf,0xAD);
2463     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2464     // SHR    $dst.hi,$shift"
2465     emit_opcode(cbuf,0xD3);
2466     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2467   %}
2468 
2469   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2470     // TEST shift,32
2471     emit_opcode(cbuf,0xF7);
2472     emit_rm(cbuf, 0x3, 0, ECX_enc);
2473     emit_d32(cbuf,0x20);
2474     // JEQ,s small
2475     emit_opcode(cbuf, 0x74);
2476     emit_d8(cbuf, 0x05);
2477     // MOV    $dst.lo,$dst.hi
2478     emit_opcode( cbuf, 0x8B );
2479     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2480     // SAR    $dst.hi,31
2481     emit_opcode(cbuf, 0xC1);
2482     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2483     emit_d8(cbuf, 0x1F );
2484 // small:
2485     // SHRD   $dst.lo,$dst.hi,$shift
2486     emit_opcode(cbuf,0x0F);
2487     emit_opcode(cbuf,0xAD);
2488     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2489     // SAR    $dst.hi,$shift"
2490     emit_opcode(cbuf,0xD3);
2491     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2492   %}
2493 
2494 
2495   // ----------------- Encodings for floating point unit -----------------
2496   // May leave result in FPU-TOS or FPU reg depending on opcodes
2497   enc_class OpcReg_F (regF src) %{    // FMUL, FDIV
2498     $$$emit8$primary;
2499     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2500   %}
2501 
2502   // Pop argument in FPR0 with FSTP ST(0)
2503   enc_class PopFPU() %{
2504     emit_opcode( cbuf, 0xDD );
2505     emit_d8( cbuf, 0xD8 );
2506   %}
2507 
2508   // !!!!! equivalent to Pop_Reg_F
2509   enc_class Pop_Reg_D( regD dst ) %{
2510     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2511     emit_d8( cbuf, 0xD8+$dst$$reg );
2512   %}
2513 
2514   enc_class Push_Reg_D( regD dst ) %{
2515     emit_opcode( cbuf, 0xD9 );
2516     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2517   %}
2518 
2519   enc_class strictfp_bias1( regD dst ) %{
2520     emit_opcode( cbuf, 0xDB );           // FLD m80real
2521     emit_opcode( cbuf, 0x2D );
2522     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2523     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2524     emit_opcode( cbuf, 0xC8+$dst$$reg );
2525   %}
2526 
2527   enc_class strictfp_bias2( regD dst ) %{
2528     emit_opcode( cbuf, 0xDB );           // FLD m80real
2529     emit_opcode( cbuf, 0x2D );
2530     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2531     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2532     emit_opcode( cbuf, 0xC8+$dst$$reg );
2533   %}
2534 
2535   // Special case for moving an integer register to a stack slot.
2536   enc_class OpcPRegSS( stackSlotI dst, eRegI src ) %{ // RegSS
2537     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2538   %}
2539 
2540   // Special case for moving a register to a stack slot.
2541   enc_class RegSS( stackSlotI dst, eRegI src ) %{ // RegSS
2542     // Opcode already emitted
2543     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2544     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2545     emit_d32(cbuf, $dst$$disp);   // Displacement
2546   %}
2547 
2548   // Push the integer in stackSlot 'src' onto FP-stack
2549   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2550     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2551   %}
2552 
2553   // Push the float in stackSlot 'src' onto FP-stack
2554   enc_class Push_Mem_F( memory src ) %{    // FLD_S   [ESP+src]
2555     store_to_stackslot( cbuf, 0xD9, 0x00, $src$$disp );
2556   %}
2557 
2558   // Push the double in stackSlot 'src' onto FP-stack
2559   enc_class Push_Mem_D( memory src ) %{    // FLD_D   [ESP+src]
2560     store_to_stackslot( cbuf, 0xDD, 0x00, $src$$disp );
2561   %}
2562 
2563   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2564   enc_class Pop_Mem_F( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2565     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2566   %}
2567 
2568   // Same as Pop_Mem_F except for opcode
2569   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2570   enc_class Pop_Mem_D( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2571     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2572   %}
2573 
2574   enc_class Pop_Reg_F( regF dst ) %{
2575     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2576     emit_d8( cbuf, 0xD8+$dst$$reg );
2577   %}
2578 
2579   enc_class Push_Reg_F( regF dst ) %{
2580     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2581     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2582   %}
2583 
2584   // Push FPU's float to a stack-slot, and pop FPU-stack
2585   enc_class Pop_Mem_Reg_F( stackSlotF dst, regF src ) %{
2586     int pop = 0x02;
2587     if ($src$$reg != FPR1L_enc) {
2588       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2589       emit_d8( cbuf, 0xC0-1+$src$$reg );
2590       pop = 0x03;
2591     }
2592     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2593   %}
2594 
2595   // Push FPU's double to a stack-slot, and pop FPU-stack
2596   enc_class Pop_Mem_Reg_D( stackSlotD dst, regD src ) %{
2597     int pop = 0x02;
2598     if ($src$$reg != FPR1L_enc) {
2599       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2600       emit_d8( cbuf, 0xC0-1+$src$$reg );
2601       pop = 0x03;
2602     }
2603     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2604   %}
2605 
2606   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2607   enc_class Pop_Reg_Reg_D( regD dst, regF src ) %{
2608     int pop = 0xD0 - 1; // -1 since we skip FLD
2609     if ($src$$reg != FPR1L_enc) {
2610       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2611       emit_d8( cbuf, 0xC0-1+$src$$reg );
2612       pop = 0xD8;
2613     }
2614     emit_opcode( cbuf, 0xDD );
2615     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2616   %}
2617 
2618 
2619   enc_class Mul_Add_F( regF dst, regF src, regF src1, regF src2 ) %{
2620     MacroAssembler masm(&cbuf);
2621     masm.fld_s(  $src1$$reg-1);   // nothing at TOS, load TOS from src1.reg
2622     masm.fmul(   $src2$$reg+0);   // value at TOS
2623     masm.fadd(   $src$$reg+0);    // value at TOS
2624     masm.fstp_d( $dst$$reg+0);    // value at TOS, popped off after store
2625   %}
2626 
2627 
2628   enc_class Push_Reg_Mod_D( regD dst, regD src) %{
2629     // load dst in FPR0
2630     emit_opcode( cbuf, 0xD9 );
2631     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2632     if ($src$$reg != FPR1L_enc) {
2633       // fincstp
2634       emit_opcode (cbuf, 0xD9);
2635       emit_opcode (cbuf, 0xF7);
2636       // swap src with FPR1:
2637       // FXCH FPR1 with src
2638       emit_opcode(cbuf, 0xD9);
2639       emit_d8(cbuf, 0xC8-1+$src$$reg );
2640       // fdecstp
2641       emit_opcode (cbuf, 0xD9);
2642       emit_opcode (cbuf, 0xF6);
2643     }
2644   %}
2645 
2646   enc_class Push_ModD_encoding( regXD src0, regXD src1) %{
2647     // Allocate a word
2648     emit_opcode(cbuf,0x83);            // SUB ESP,8
2649     emit_opcode(cbuf,0xEC);
2650     emit_d8(cbuf,0x08);
2651 
2652     emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src1
2653     emit_opcode  (cbuf, 0x0F );
2654     emit_opcode  (cbuf, 0x11 );
2655     encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
2656 
2657     emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2658     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2659 
2660     emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src0
2661     emit_opcode  (cbuf, 0x0F );
2662     emit_opcode  (cbuf, 0x11 );
2663     encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
2664 
2665     emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2666     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2667 
2668   %}
2669 
2670   enc_class Push_ModX_encoding( regX src0, regX src1) %{
2671     // Allocate a word
2672     emit_opcode(cbuf,0x83);            // SUB ESP,4
2673     emit_opcode(cbuf,0xEC);
2674     emit_d8(cbuf,0x04);
2675 
2676     emit_opcode  (cbuf, 0xF3 );     // MOVSS [ESP], src1
2677     emit_opcode  (cbuf, 0x0F );
2678     emit_opcode  (cbuf, 0x11 );
2679     encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
2680 
2681     emit_opcode(cbuf,0xD9 );      // FLD [ESP]
2682     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2683 
2684     emit_opcode  (cbuf, 0xF3 );     // MOVSS [ESP], src0
2685     emit_opcode  (cbuf, 0x0F );
2686     emit_opcode  (cbuf, 0x11 );
2687     encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
2688 
2689     emit_opcode(cbuf,0xD9 );      // FLD [ESP]
2690     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2691 
2692   %}
2693 
2694   enc_class Push_ResultXD(regXD dst) %{
2695     store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [ESP]
2696 
2697     // UseXmmLoadAndClearUpper ? movsd dst,[esp] : movlpd dst,[esp]
2698     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2699     emit_opcode  (cbuf, 0x0F );
2700     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2701     encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
2702 
2703     emit_opcode(cbuf,0x83);    // ADD ESP,8
2704     emit_opcode(cbuf,0xC4);
2705     emit_d8(cbuf,0x08);
2706   %}
2707 
2708   enc_class Push_ResultX(regX dst, immI d8) %{
2709     store_to_stackslot( cbuf, 0xD9, 0x03, 0 ); //FSTP_S [ESP]
2710 
2711     emit_opcode  (cbuf, 0xF3 );     // MOVSS dst(xmm), [ESP]
2712     emit_opcode  (cbuf, 0x0F );
2713     emit_opcode  (cbuf, 0x10 );
2714     encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
2715 
2716     emit_opcode(cbuf,0x83);    // ADD ESP,d8 (4 or 8)
2717     emit_opcode(cbuf,0xC4);
2718     emit_d8(cbuf,$d8$$constant);
2719   %}
2720 
2721   enc_class Push_SrcXD(regXD src) %{
2722     // Allocate a word
2723     emit_opcode(cbuf,0x83);            // SUB ESP,8
2724     emit_opcode(cbuf,0xEC);
2725     emit_d8(cbuf,0x08);
2726 
2727     emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src
2728     emit_opcode  (cbuf, 0x0F );
2729     emit_opcode  (cbuf, 0x11 );
2730     encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
2731 
2732     emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2733     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2734   %}
2735 
2736   enc_class push_stack_temp_qword() %{
2737     emit_opcode(cbuf,0x83);     // SUB ESP,8
2738     emit_opcode(cbuf,0xEC);
2739     emit_d8    (cbuf,0x08);
2740   %}
2741 
2742   enc_class pop_stack_temp_qword() %{
2743     emit_opcode(cbuf,0x83);     // ADD ESP,8
2744     emit_opcode(cbuf,0xC4);
2745     emit_d8    (cbuf,0x08);
2746   %}
2747 
2748   enc_class push_xmm_to_fpr1( regXD xmm_src ) %{
2749     emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], xmm_src
2750     emit_opcode  (cbuf, 0x0F );
2751     emit_opcode  (cbuf, 0x11 );
2752     encode_RegMem(cbuf, $xmm_src$$reg, ESP_enc, 0x4, 0, 0, false);
2753 
2754     emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2755     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2756   %}
2757 
2758   // Compute X^Y using Intel's fast hardware instructions, if possible.
2759   // Otherwise return a NaN.
2760   enc_class pow_exp_core_encoding %{
2761     // FPR1 holds Y*ln2(X).  Compute FPR1 = 2^(Y*ln2(X))
2762     emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xC0);  // fdup = fld st(0)          Q       Q
2763     emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xFC);  // frndint               int(Q)      Q
2764     emit_opcode(cbuf,0xDC); emit_opcode(cbuf,0xE9);  // fsub st(1) -= st(0);  int(Q) frac(Q)
2765     emit_opcode(cbuf,0xDB);                          // FISTP [ESP]           frac(Q)
2766     emit_opcode(cbuf,0x1C);
2767     emit_d8(cbuf,0x24);
2768     emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xF0);  // f2xm1                 2^frac(Q)-1
2769     emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xE8);  // fld1                  1 2^frac(Q)-1
2770     emit_opcode(cbuf,0xDE); emit_opcode(cbuf,0xC1);  // faddp                 2^frac(Q)
2771     emit_opcode(cbuf,0x8B);                          // mov rax,[esp+0]=int(Q)
2772     encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 0, false);
2773     emit_opcode(cbuf,0xC7);                          // mov rcx,0xFFFFF800 - overflow mask
2774     emit_rm(cbuf, 0x3, 0x0, ECX_enc);
2775     emit_d32(cbuf,0xFFFFF800);
2776     emit_opcode(cbuf,0x81);                          // add rax,1023 - the double exponent bias
2777     emit_rm(cbuf, 0x3, 0x0, EAX_enc);
2778     emit_d32(cbuf,1023);
2779     emit_opcode(cbuf,0x8B);                          // mov rbx,eax
2780     emit_rm(cbuf, 0x3, EBX_enc, EAX_enc);
2781     emit_opcode(cbuf,0xC1);                          // shl rax,20 - Slide to exponent position
2782     emit_rm(cbuf,0x3,0x4,EAX_enc);
2783     emit_d8(cbuf,20);
2784     emit_opcode(cbuf,0x85);                          // test rbx,ecx - check for overflow
2785     emit_rm(cbuf, 0x3, EBX_enc, ECX_enc);
2786     emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0x45);  // CMOVne rax,ecx - overflow; stuff NAN into EAX
2787     emit_rm(cbuf, 0x3, EAX_enc, ECX_enc);
2788     emit_opcode(cbuf,0x89);                          // mov [esp+4],eax - Store as part of double word
2789     encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 4, false);
2790     emit_opcode(cbuf,0xC7);                          // mov [esp+0],0   - [ESP] = (double)(1<<int(Q)) = 2^int(Q)
2791     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2792     emit_d32(cbuf,0);
2793     emit_opcode(cbuf,0xDC);                          // fmul dword st(0),[esp+0]; FPR1 = 2^int(Q)*2^frac(Q) = 2^Q
2794     encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false);
2795   %}
2796 
2797 //   enc_class Pop_Reg_Mod_D( regD dst, regD src)
2798 //   was replaced by Push_Result_Mod_D followed by Pop_Reg_X() or Pop_Mem_X()
2799 
2800   enc_class Push_Result_Mod_D( regD src) %{
2801     if ($src$$reg != FPR1L_enc) {
2802       // fincstp
2803       emit_opcode (cbuf, 0xD9);
2804       emit_opcode (cbuf, 0xF7);
2805       // FXCH FPR1 with src
2806       emit_opcode(cbuf, 0xD9);
2807       emit_d8(cbuf, 0xC8-1+$src$$reg );
2808       // fdecstp
2809       emit_opcode (cbuf, 0xD9);
2810       emit_opcode (cbuf, 0xF6);
2811     }
2812     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2813     // // FSTP   FPR$dst$$reg
2814     // emit_opcode( cbuf, 0xDD );
2815     // emit_d8( cbuf, 0xD8+$dst$$reg );
2816   %}
2817 
2818   enc_class fnstsw_sahf_skip_parity() %{
2819     // fnstsw ax
2820     emit_opcode( cbuf, 0xDF );
2821     emit_opcode( cbuf, 0xE0 );
2822     // sahf
2823     emit_opcode( cbuf, 0x9E );
2824     // jnp  ::skip
2825     emit_opcode( cbuf, 0x7B );
2826     emit_opcode( cbuf, 0x05 );
2827   %}
2828 
2829   enc_class emitModD() %{
2830     // fprem must be iterative
2831     // :: loop
2832     // fprem
2833     emit_opcode( cbuf, 0xD9 );
2834     emit_opcode( cbuf, 0xF8 );
2835     // wait
2836     emit_opcode( cbuf, 0x9b );
2837     // fnstsw ax
2838     emit_opcode( cbuf, 0xDF );
2839     emit_opcode( cbuf, 0xE0 );
2840     // sahf
2841     emit_opcode( cbuf, 0x9E );
2842     // jp  ::loop
2843     emit_opcode( cbuf, 0x0F );
2844     emit_opcode( cbuf, 0x8A );
2845     emit_opcode( cbuf, 0xF4 );
2846     emit_opcode( cbuf, 0xFF );
2847     emit_opcode( cbuf, 0xFF );
2848     emit_opcode( cbuf, 0xFF );
2849   %}
2850 
2851   enc_class fpu_flags() %{
2852     // fnstsw_ax
2853     emit_opcode( cbuf, 0xDF);
2854     emit_opcode( cbuf, 0xE0);
2855     // test ax,0x0400
2856     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2857     emit_opcode( cbuf, 0xA9 );
2858     emit_d16   ( cbuf, 0x0400 );
2859     // // // This sequence works, but stalls for 12-16 cycles on PPro
2860     // // test rax,0x0400
2861     // emit_opcode( cbuf, 0xA9 );
2862     // emit_d32   ( cbuf, 0x00000400 );
2863     //
2864     // jz exit (no unordered comparison)
2865     emit_opcode( cbuf, 0x74 );
2866     emit_d8    ( cbuf, 0x02 );
2867     // mov ah,1 - treat as LT case (set carry flag)
2868     emit_opcode( cbuf, 0xB4 );
2869     emit_d8    ( cbuf, 0x01 );
2870     // sahf
2871     emit_opcode( cbuf, 0x9E);
2872   %}
2873 
2874   enc_class cmpF_P6_fixup() %{
2875     // Fixup the integer flags in case comparison involved a NaN
2876     //
2877     // JNP exit (no unordered comparison, P-flag is set by NaN)
2878     emit_opcode( cbuf, 0x7B );
2879     emit_d8    ( cbuf, 0x03 );
2880     // MOV AH,1 - treat as LT case (set carry flag)
2881     emit_opcode( cbuf, 0xB4 );
2882     emit_d8    ( cbuf, 0x01 );
2883     // SAHF
2884     emit_opcode( cbuf, 0x9E);
2885     // NOP     // target for branch to avoid branch to branch
2886     emit_opcode( cbuf, 0x90);
2887   %}
2888 
2889 //     fnstsw_ax();
2890 //     sahf();
2891 //     movl(dst, nan_result);
2892 //     jcc(Assembler::parity, exit);
2893 //     movl(dst, less_result);
2894 //     jcc(Assembler::below, exit);
2895 //     movl(dst, equal_result);
2896 //     jcc(Assembler::equal, exit);
2897 //     movl(dst, greater_result);
2898 
2899 // less_result     =  1;
2900 // greater_result  = -1;
2901 // equal_result    = 0;
2902 // nan_result      = -1;
2903 
2904   enc_class CmpF_Result(eRegI dst) %{
2905     // fnstsw_ax();
2906     emit_opcode( cbuf, 0xDF);
2907     emit_opcode( cbuf, 0xE0);
2908     // sahf
2909     emit_opcode( cbuf, 0x9E);
2910     // movl(dst, nan_result);
2911     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2912     emit_d32( cbuf, -1 );
2913     // jcc(Assembler::parity, exit);
2914     emit_opcode( cbuf, 0x7A );
2915     emit_d8    ( cbuf, 0x13 );
2916     // movl(dst, less_result);
2917     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2918     emit_d32( cbuf, -1 );
2919     // jcc(Assembler::below, exit);
2920     emit_opcode( cbuf, 0x72 );
2921     emit_d8    ( cbuf, 0x0C );
2922     // movl(dst, equal_result);
2923     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2924     emit_d32( cbuf, 0 );
2925     // jcc(Assembler::equal, exit);
2926     emit_opcode( cbuf, 0x74 );
2927     emit_d8    ( cbuf, 0x05 );
2928     // movl(dst, greater_result);
2929     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2930     emit_d32( cbuf, 1 );
2931   %}
2932 
2933 
2934   // XMM version of CmpF_Result. Because the XMM compare
2935   // instructions set the EFLAGS directly. It becomes simpler than
2936   // the float version above.
2937   enc_class CmpX_Result(eRegI dst) %{
2938     MacroAssembler _masm(&cbuf);
2939     Label nan, inc, done;
2940 
2941     __ jccb(Assembler::parity, nan);
2942     __ jccb(Assembler::equal,  done);
2943     __ jccb(Assembler::above,  inc);
2944     __ bind(nan);
2945     __ decrement(as_Register($dst$$reg)); // NO L qqq
2946     __ jmpb(done);
2947     __ bind(inc);
2948     __ increment(as_Register($dst$$reg)); // NO L qqq
2949     __ bind(done);
2950   %}
2951 
2952   // Compare the longs and set flags
2953   // BROKEN!  Do Not use as-is
2954   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2955     // CMP    $src1.hi,$src2.hi
2956     emit_opcode( cbuf, 0x3B );
2957     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2958     // JNE,s  done
2959     emit_opcode(cbuf,0x75);
2960     emit_d8(cbuf, 2 );
2961     // CMP    $src1.lo,$src2.lo
2962     emit_opcode( cbuf, 0x3B );
2963     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2964 // done:
2965   %}
2966 
2967   enc_class convert_int_long( regL dst, eRegI src ) %{
2968     // mov $dst.lo,$src
2969     int dst_encoding = $dst$$reg;
2970     int src_encoding = $src$$reg;
2971     encode_Copy( cbuf, dst_encoding  , src_encoding );
2972     // mov $dst.hi,$src
2973     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2974     // sar $dst.hi,31
2975     emit_opcode( cbuf, 0xC1 );
2976     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2977     emit_d8(cbuf, 0x1F );
2978   %}
2979 
2980   enc_class convert_long_double( eRegL src ) %{
2981     // push $src.hi
2982     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2983     // push $src.lo
2984     emit_opcode(cbuf, 0x50+$src$$reg  );
2985     // fild 64-bits at [SP]
2986     emit_opcode(cbuf,0xdf);
2987     emit_d8(cbuf, 0x6C);
2988     emit_d8(cbuf, 0x24);
2989     emit_d8(cbuf, 0x00);
2990     // pop stack
2991     emit_opcode(cbuf, 0x83); // add  SP, #8
2992     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2993     emit_d8(cbuf, 0x8);
2994   %}
2995 
2996   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2997     // IMUL   EDX:EAX,$src1
2998     emit_opcode( cbuf, 0xF7 );
2999     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
3000     // SAR    EDX,$cnt-32
3001     int shift_count = ((int)$cnt$$constant) - 32;
3002     if (shift_count > 0) {
3003       emit_opcode(cbuf, 0xC1);
3004       emit_rm(cbuf, 0x3, 7, $dst$$reg );
3005       emit_d8(cbuf, shift_count);
3006     }
3007   %}
3008 
3009   // this version doesn't have add sp, 8
3010   enc_class convert_long_double2( eRegL src ) %{
3011     // push $src.hi
3012     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
3013     // push $src.lo
3014     emit_opcode(cbuf, 0x50+$src$$reg  );
3015     // fild 64-bits at [SP]
3016     emit_opcode(cbuf,0xdf);
3017     emit_d8(cbuf, 0x6C);
3018     emit_d8(cbuf, 0x24);
3019     emit_d8(cbuf, 0x00);
3020   %}
3021 
3022   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
3023     // Basic idea: long = (long)int * (long)int
3024     // IMUL EDX:EAX, src
3025     emit_opcode( cbuf, 0xF7 );
3026     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
3027   %}
3028 
3029   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
3030     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
3031     // MUL EDX:EAX, src
3032     emit_opcode( cbuf, 0xF7 );
3033     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
3034   %}
3035 
3036   enc_class long_multiply( eADXRegL dst, eRegL src, eRegI tmp ) %{
3037     // Basic idea: lo(result) = lo(x_lo * y_lo)
3038     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
3039     // MOV    $tmp,$src.lo
3040     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
3041     // IMUL   $tmp,EDX
3042     emit_opcode( cbuf, 0x0F );
3043     emit_opcode( cbuf, 0xAF );
3044     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
3045     // MOV    EDX,$src.hi
3046     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
3047     // IMUL   EDX,EAX
3048     emit_opcode( cbuf, 0x0F );
3049     emit_opcode( cbuf, 0xAF );
3050     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
3051     // ADD    $tmp,EDX
3052     emit_opcode( cbuf, 0x03 );
3053     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
3054     // MUL   EDX:EAX,$src.lo
3055     emit_opcode( cbuf, 0xF7 );
3056     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
3057     // ADD    EDX,ESI
3058     emit_opcode( cbuf, 0x03 );
3059     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
3060   %}
3061 
3062   enc_class long_multiply_con( eADXRegL dst, immL_127 src, eRegI tmp ) %{
3063     // Basic idea: lo(result) = lo(src * y_lo)
3064     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
3065     // IMUL   $tmp,EDX,$src
3066     emit_opcode( cbuf, 0x6B );
3067     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
3068     emit_d8( cbuf, (int)$src$$constant );
3069     // MOV    EDX,$src
3070     emit_opcode(cbuf, 0xB8 + EDX_enc);
3071     emit_d32( cbuf, (int)$src$$constant );
3072     // MUL   EDX:EAX,EDX
3073     emit_opcode( cbuf, 0xF7 );
3074     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
3075     // ADD    EDX,ESI
3076     emit_opcode( cbuf, 0x03 );
3077     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
3078   %}
3079 
3080   enc_class long_div( eRegL src1, eRegL src2 ) %{
3081     // PUSH src1.hi
3082     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
3083     // PUSH src1.lo
3084     emit_opcode(cbuf,               0x50+$src1$$reg  );
3085     // PUSH src2.hi
3086     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
3087     // PUSH src2.lo
3088     emit_opcode(cbuf,               0x50+$src2$$reg  );
3089     // CALL directly to the runtime
3090     cbuf.set_insts_mark();
3091     emit_opcode(cbuf,0xE8);       // Call into runtime
3092     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3093     // Restore stack
3094     emit_opcode(cbuf, 0x83); // add  SP, #framesize
3095     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
3096     emit_d8(cbuf, 4*4);
3097   %}
3098 
3099   enc_class long_mod( eRegL src1, eRegL src2 ) %{
3100     // PUSH src1.hi
3101     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
3102     // PUSH src1.lo
3103     emit_opcode(cbuf,               0x50+$src1$$reg  );
3104     // PUSH src2.hi
3105     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
3106     // PUSH src2.lo
3107     emit_opcode(cbuf,               0x50+$src2$$reg  );
3108     // CALL directly to the runtime
3109     cbuf.set_insts_mark();
3110     emit_opcode(cbuf,0xE8);       // Call into runtime
3111     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3112     // Restore stack
3113     emit_opcode(cbuf, 0x83); // add  SP, #framesize
3114     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
3115     emit_d8(cbuf, 4*4);
3116   %}
3117 
3118   enc_class long_cmp_flags0( eRegL src, eRegI tmp ) %{
3119     // MOV   $tmp,$src.lo
3120     emit_opcode(cbuf, 0x8B);
3121     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
3122     // OR    $tmp,$src.hi
3123     emit_opcode(cbuf, 0x0B);
3124     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
3125   %}
3126 
3127   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
3128     // CMP    $src1.lo,$src2.lo
3129     emit_opcode( cbuf, 0x3B );
3130     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
3131     // JNE,s  skip
3132     emit_cc(cbuf, 0x70, 0x5);
3133     emit_d8(cbuf,2);
3134     // CMP    $src1.hi,$src2.hi
3135     emit_opcode( cbuf, 0x3B );
3136     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
3137   %}
3138 
3139   enc_class long_cmp_flags2( eRegL src1, eRegL src2, eRegI tmp ) %{
3140     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
3141     emit_opcode( cbuf, 0x3B );
3142     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
3143     // MOV    $tmp,$src1.hi
3144     emit_opcode( cbuf, 0x8B );
3145     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
3146     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
3147     emit_opcode( cbuf, 0x1B );
3148     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
3149   %}
3150 
3151   enc_class long_cmp_flags3( eRegL src, eRegI tmp ) %{
3152     // XOR    $tmp,$tmp
3153     emit_opcode(cbuf,0x33);  // XOR
3154     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
3155     // CMP    $tmp,$src.lo
3156     emit_opcode( cbuf, 0x3B );
3157     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
3158     // SBB    $tmp,$src.hi
3159     emit_opcode( cbuf, 0x1B );
3160     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
3161   %}
3162 
3163  // Sniff, sniff... smells like Gnu Superoptimizer
3164   enc_class neg_long( eRegL dst ) %{
3165     emit_opcode(cbuf,0xF7);    // NEG hi
3166     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
3167     emit_opcode(cbuf,0xF7);    // NEG lo
3168     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
3169     emit_opcode(cbuf,0x83);    // SBB hi,0
3170     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
3171     emit_d8    (cbuf,0 );
3172   %}
3173 
3174   enc_class movq_ld(regXD dst, memory mem) %{
3175     MacroAssembler _masm(&cbuf);
3176     __ movq($dst$$XMMRegister, $mem$$Address);
3177   %}
3178 
3179   enc_class movq_st(memory mem, regXD src) %{
3180     MacroAssembler _masm(&cbuf);
3181     __ movq($mem$$Address, $src$$XMMRegister);
3182   %}
3183 
3184   enc_class pshufd_8x8(regX dst, regX src) %{
3185     MacroAssembler _masm(&cbuf);
3186 
3187     encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3188     __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3189     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3190   %}
3191 
3192   enc_class pshufd_4x16(regX dst, regX src) %{
3193     MacroAssembler _masm(&cbuf);
3194 
3195     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3196   %}
3197 
3198   enc_class pshufd(regXD dst, regXD src, int mode) %{
3199     MacroAssembler _masm(&cbuf);
3200 
3201     __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3202   %}
3203 
3204   enc_class pxor(regXD dst, regXD src) %{
3205     MacroAssembler _masm(&cbuf);
3206 
3207     __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3208   %}
3209 
3210   enc_class mov_i2x(regXD dst, eRegI src) %{
3211     MacroAssembler _masm(&cbuf);
3212 
3213     __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3214   %}
3215 
3216 
3217   // Because the transitions from emitted code to the runtime
3218   // monitorenter/exit helper stubs are so slow it's critical that
3219   // we inline both the stack-locking fast-path and the inflated fast path.
3220   //
3221   // See also: cmpFastLock and cmpFastUnlock.
3222   //
3223   // What follows is a specialized inline transliteration of the code
3224   // in slow_enter() and slow_exit().  If we're concerned about I$ bloat
3225   // another option would be to emit TrySlowEnter and TrySlowExit methods
3226   // at startup-time.  These methods would accept arguments as
3227   // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
3228   // indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
3229   // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
3230   // In practice, however, the # of lock sites is bounded and is usually small.
3231   // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
3232   // if the processor uses simple bimodal branch predictors keyed by EIP
3233   // Since the helper routines would be called from multiple synchronization
3234   // sites.
3235   //
3236   // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
3237   // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
3238   // to those specialized methods.  That'd give us a mostly platform-independent
3239   // implementation that the JITs could optimize and inline at their pleasure.
3240   // Done correctly, the only time we'd need to cross to native could would be
3241   // to park() or unpark() threads.  We'd also need a few more unsafe operators
3242   // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
3243   // (b) explicit barriers or fence operations.
3244   //
3245   // TODO:
3246   //
3247   // *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
3248   //    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
3249   //    Given TLAB allocation, Self is usually manifested in a register, so passing it into
3250   //    the lock operators would typically be faster than reifying Self.
3251   //
3252   // *  Ideally I'd define the primitives as:
3253   //       fast_lock   (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
3254   //       fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
3255   //    Unfortunately ADLC bugs prevent us from expressing the ideal form.
3256   //    Instead, we're stuck with a rather awkward and brittle register assignments below.
3257   //    Furthermore the register assignments are overconstrained, possibly resulting in
3258   //    sub-optimal code near the synchronization site.
3259   //
3260   // *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
3261   //    Alternately, use a better sp-proximity test.
3262   //
3263   // *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
3264   //    Either one is sufficient to uniquely identify a thread.
3265   //    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
3266   //
3267   // *  Intrinsify notify() and notifyAll() for the common cases where the
3268   //    object is locked by the calling thread but the waitlist is empty.
3269   //    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
3270   //
3271   // *  use jccb and jmpb instead of jcc and jmp to improve code density.
3272   //    But beware of excessive branch density on AMD Opterons.
3273   //
3274   // *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
3275   //    or failure of the fast-path.  If the fast-path fails then we pass
3276   //    control to the slow-path, typically in C.  In Fast_Lock and
3277   //    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
3278   //    will emit a conditional branch immediately after the node.
3279   //    So we have branches to branches and lots of ICC.ZF games.
3280   //    Instead, it might be better to have C2 pass a "FailureLabel"
3281   //    into Fast_Lock and Fast_Unlock.  In the case of success, control
3282   //    will drop through the node.  ICC.ZF is undefined at exit.
3283   //    In the case of failure, the node will branch directly to the
3284   //    FailureLabel
3285 
3286 
3287   // obj: object to lock
3288   // box: on-stack box address (displaced header location) - KILLED
3289   // rax,: tmp -- KILLED
3290   // scr: tmp -- KILLED
3291   enc_class Fast_Lock( eRegP obj, eRegP box, eAXRegI tmp, eRegP scr ) %{
3292 
3293     Register objReg = as_Register($obj$$reg);
3294     Register boxReg = as_Register($box$$reg);
3295     Register tmpReg = as_Register($tmp$$reg);
3296     Register scrReg = as_Register($scr$$reg);
3297 
3298     // Ensure the register assignents are disjoint
3299     guarantee (objReg != boxReg, "") ;
3300     guarantee (objReg != tmpReg, "") ;
3301     guarantee (objReg != scrReg, "") ;
3302     guarantee (boxReg != tmpReg, "") ;
3303     guarantee (boxReg != scrReg, "") ;
3304     guarantee (tmpReg == as_Register(EAX_enc), "") ;
3305 
3306     MacroAssembler masm(&cbuf);
3307 
3308     if (_counters != NULL) {
3309       masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3310     }
3311     if (EmitSync & 1) {
3312         // set box->dhw = unused_mark (3)
3313         // Force all sync thru slow-path: slow_enter() and slow_exit() 
3314         masm.movptr (Address(boxReg, 0), int32_t(markOopDesc::unused_mark())) ;             
3315         masm.cmpptr (rsp, (int32_t)0) ;                        
3316     } else 
3317     if (EmitSync & 2) { 
3318         Label DONE_LABEL ;           
3319         if (UseBiasedLocking) {
3320            // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3321            masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3322         }
3323 
3324         masm.movptr(tmpReg, Address(objReg, 0)) ;          // fetch markword 
3325         masm.orptr (tmpReg, 0x1);
3326         masm.movptr(Address(boxReg, 0), tmpReg);           // Anticipate successful CAS 
3327         if (os::is_MP()) { masm.lock();  }
3328         masm.cmpxchgptr(boxReg, Address(objReg, 0));          // Updates tmpReg
3329         masm.jcc(Assembler::equal, DONE_LABEL);
3330         // Recursive locking
3331         masm.subptr(tmpReg, rsp);
3332         masm.andptr(tmpReg, (int32_t) 0xFFFFF003 );
3333         masm.movptr(Address(boxReg, 0), tmpReg);
3334         masm.bind(DONE_LABEL) ; 
3335     } else {  
3336       // Possible cases that we'll encounter in fast_lock 
3337       // ------------------------------------------------
3338       // * Inflated
3339       //    -- unlocked
3340       //    -- Locked
3341       //       = by self
3342       //       = by other
3343       // * biased
3344       //    -- by Self
3345       //    -- by other
3346       // * neutral
3347       // * stack-locked
3348       //    -- by self
3349       //       = sp-proximity test hits
3350       //       = sp-proximity test generates false-negative
3351       //    -- by other
3352       //
3353 
3354       Label IsInflated, DONE_LABEL, PopDone ;
3355 
3356       // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
3357       // order to reduce the number of conditional branches in the most common cases.
3358       // Beware -- there's a subtle invariant that fetch of the markword
3359       // at [FETCH], below, will never observe a biased encoding (*101b).
3360       // If this invariant is not held we risk exclusion (safety) failure.
3361       if (UseBiasedLocking && !UseOptoBiasInlining) {
3362         masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3363       }
3364 
3365       masm.movptr(tmpReg, Address(objReg, 0)) ;         // [FETCH]
3366       masm.testptr(tmpReg, 0x02) ;                      // Inflated v (Stack-locked or neutral)
3367       masm.jccb  (Assembler::notZero, IsInflated) ;
3368 
3369       // Attempt stack-locking ...
3370       masm.orptr (tmpReg, 0x1);
3371       masm.movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
3372       if (os::is_MP()) { masm.lock();  }
3373       masm.cmpxchgptr(boxReg, Address(objReg, 0));           // Updates tmpReg
3374       if (_counters != NULL) {
3375         masm.cond_inc32(Assembler::equal,
3376                         ExternalAddress((address)_counters->fast_path_entry_count_addr()));
3377       }
3378       masm.jccb (Assembler::equal, DONE_LABEL);
3379 
3380       // Recursive locking
3381       masm.subptr(tmpReg, rsp);
3382       masm.andptr(tmpReg, 0xFFFFF003 );
3383       masm.movptr(Address(boxReg, 0), tmpReg);
3384       if (_counters != NULL) {
3385         masm.cond_inc32(Assembler::equal,
3386                         ExternalAddress((address)_counters->fast_path_entry_count_addr()));
3387       }
3388       masm.jmp  (DONE_LABEL) ;
3389 
3390       masm.bind (IsInflated) ;
3391 
3392       // The object is inflated.
3393       //
3394       // TODO-FIXME: eliminate the ugly use of manifest constants:
3395       //   Use markOopDesc::monitor_value instead of "2".
3396       //   use markOop::unused_mark() instead of "3".
3397       // The tmpReg value is an objectMonitor reference ORed with
3398       // markOopDesc::monitor_value (2).   We can either convert tmpReg to an
3399       // objectmonitor pointer by masking off the "2" bit or we can just
3400       // use tmpReg as an objectmonitor pointer but bias the objectmonitor
3401       // field offsets with "-2" to compensate for and annul the low-order tag bit.
3402       //
3403       // I use the latter as it avoids AGI stalls.
3404       // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]"
3405       // instead of "mov r, [tmpReg+OFFSETOF(Owner)]".
3406       //
3407       #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2)
3408 
3409       // boxReg refers to the on-stack BasicLock in the current frame.
3410       // We'd like to write:
3411       //   set box->_displaced_header = markOop::unused_mark().  Any non-0 value suffices.
3412       // This is convenient but results a ST-before-CAS penalty.  The following CAS suffers
3413       // additional latency as we have another ST in the store buffer that must drain.
3414 
3415       if (EmitSync & 8192) { 
3416          masm.movptr(Address(boxReg, 0), 3) ;            // results in ST-before-CAS penalty
3417          masm.get_thread (scrReg) ; 
3418          masm.movptr(boxReg, tmpReg);                    // consider: LEA box, [tmp-2] 
3419          masm.movptr(tmpReg, NULL_WORD);                 // consider: xor vs mov
3420          if (os::is_MP()) { masm.lock(); } 
3421          masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3422       } else 
3423       if ((EmitSync & 128) == 0) {                      // avoid ST-before-CAS
3424          masm.movptr(scrReg, boxReg) ; 
3425          masm.movptr(boxReg, tmpReg);                   // consider: LEA box, [tmp-2] 
3426 
3427          // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
3428          if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
3429             // prefetchw [eax + Offset(_owner)-2]
3430             masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
3431          }
3432 
3433          if ((EmitSync & 64) == 0) {
3434            // Optimistic form: consider XORL tmpReg,tmpReg
3435            masm.movptr(tmpReg, NULL_WORD) ; 
3436          } else { 
3437            // Can suffer RTS->RTO upgrades on shared or cold $ lines
3438            // Test-And-CAS instead of CAS
3439            masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;   // rax, = m->_owner
3440            masm.testptr(tmpReg, tmpReg) ;                   // Locked ? 
3441            masm.jccb  (Assembler::notZero, DONE_LABEL) ;                   
3442          }
3443 
3444          // Appears unlocked - try to swing _owner from null to non-null.
3445          // Ideally, I'd manifest "Self" with get_thread and then attempt
3446          // to CAS the register containing Self into m->Owner.
3447          // But we don't have enough registers, so instead we can either try to CAS
3448          // rsp or the address of the box (in scr) into &m->owner.  If the CAS succeeds
3449          // we later store "Self" into m->Owner.  Transiently storing a stack address
3450          // (rsp or the address of the box) into  m->owner is harmless.
3451          // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
3452          if (os::is_MP()) { masm.lock();  }
3453          masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3454          masm.movptr(Address(scrReg, 0), 3) ;          // box->_displaced_header = 3
3455          masm.jccb  (Assembler::notZero, DONE_LABEL) ; 
3456          masm.get_thread (scrReg) ;                    // beware: clobbers ICCs
3457          masm.movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg) ; 
3458          masm.xorptr(boxReg, boxReg) ;                 // set icc.ZFlag = 1 to indicate success
3459                        
3460          // If the CAS fails we can either retry or pass control to the slow-path.  
3461          // We use the latter tactic.  
3462          // Pass the CAS result in the icc.ZFlag into DONE_LABEL
3463          // If the CAS was successful ...
3464          //   Self has acquired the lock
3465          //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
3466          // Intentional fall-through into DONE_LABEL ...
3467       } else {
3468          masm.movptr(Address(boxReg, 0), 3) ;       // results in ST-before-CAS penalty
3469          masm.movptr(boxReg, tmpReg) ; 
3470 
3471          // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
3472          if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
3473             // prefetchw [eax + Offset(_owner)-2]
3474             masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
3475          }
3476 
3477          if ((EmitSync & 64) == 0) {
3478            // Optimistic form
3479            masm.xorptr  (tmpReg, tmpReg) ; 
3480          } else { 
3481            // Can suffer RTS->RTO upgrades on shared or cold $ lines
3482            masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;   // rax, = m->_owner
3483            masm.testptr(tmpReg, tmpReg) ;                   // Locked ? 
3484            masm.jccb  (Assembler::notZero, DONE_LABEL) ;                   
3485          }
3486 
3487          // Appears unlocked - try to swing _owner from null to non-null.
3488          // Use either "Self" (in scr) or rsp as thread identity in _owner.
3489          // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
3490          masm.get_thread (scrReg) ;
3491          if (os::is_MP()) { masm.lock(); }
3492          masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3493 
3494          // If the CAS fails we can either retry or pass control to the slow-path.
3495          // We use the latter tactic.
3496          // Pass the CAS result in the icc.ZFlag into DONE_LABEL
3497          // If the CAS was successful ...
3498          //   Self has acquired the lock
3499          //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
3500          // Intentional fall-through into DONE_LABEL ...
3501       }
3502 
3503       // DONE_LABEL is a hot target - we'd really like to place it at the
3504       // start of cache line by padding with NOPs.
3505       // See the AMD and Intel software optimization manuals for the
3506       // most efficient "long" NOP encodings.
3507       // Unfortunately none of our alignment mechanisms suffice.
3508       masm.bind(DONE_LABEL);
3509 
3510       // Avoid branch-to-branch on AMD processors
3511       // This appears to be superstition.
3512       if (EmitSync & 32) masm.nop() ;
3513 
3514 
3515       // At DONE_LABEL the icc ZFlag is set as follows ...
3516       // Fast_Unlock uses the same protocol.
3517       // ZFlag == 1 -> Success
3518       // ZFlag == 0 -> Failure - force control through the slow-path
3519     }
3520   %}
3521 
3522   // obj: object to unlock
3523   // box: box address (displaced header location), killed.  Must be EAX.
3524   // rbx,: killed tmp; cannot be obj nor box.
3525   //
3526   // Some commentary on balanced locking:
3527   //
3528   // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
3529   // Methods that don't have provably balanced locking are forced to run in the
3530   // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
3531   // The interpreter provides two properties:
3532   // I1:  At return-time the interpreter automatically and quietly unlocks any
3533   //      objects acquired the current activation (frame).  Recall that the
3534   //      interpreter maintains an on-stack list of locks currently held by
3535   //      a frame.
3536   // I2:  If a method attempts to unlock an object that is not held by the
3537   //      the frame the interpreter throws IMSX.
3538   //
3539   // Lets say A(), which has provably balanced locking, acquires O and then calls B().
3540   // B() doesn't have provably balanced locking so it runs in the interpreter.
3541   // Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
3542   // is still locked by A().
3543   //
3544   // The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
3545   // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
3546   // should not be unlocked by "normal" java-level locking and vice-versa.  The specification
3547   // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
3548 
3549   enc_class Fast_Unlock( nabxRegP obj, eAXRegP box, eRegP tmp) %{
3550 
3551     Register objReg = as_Register($obj$$reg);
3552     Register boxReg = as_Register($box$$reg);
3553     Register tmpReg = as_Register($tmp$$reg);
3554 
3555     guarantee (objReg != boxReg, "") ;
3556     guarantee (objReg != tmpReg, "") ;
3557     guarantee (boxReg != tmpReg, "") ;
3558     guarantee (boxReg == as_Register(EAX_enc), "") ;
3559     MacroAssembler masm(&cbuf);
3560 
3561     if (EmitSync & 4) {
3562       // Disable - inhibit all inlining.  Force control through the slow-path
3563       masm.cmpptr (rsp, 0) ; 
3564     } else 
3565     if (EmitSync & 8) {
3566       Label DONE_LABEL ;
3567       if (UseBiasedLocking) {
3568          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3569       }
3570       // classic stack-locking code ...
3571       masm.movptr(tmpReg, Address(boxReg, 0)) ;
3572       masm.testptr(tmpReg, tmpReg) ;
3573       masm.jcc   (Assembler::zero, DONE_LABEL) ;
3574       if (os::is_MP()) { masm.lock(); }
3575       masm.cmpxchgptr(tmpReg, Address(objReg, 0));          // Uses EAX which is box
3576       masm.bind(DONE_LABEL);
3577     } else {
3578       Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
3579 
3580       // Critically, the biased locking test must have precedence over
3581       // and appear before the (box->dhw == 0) recursive stack-lock test.
3582       if (UseBiasedLocking && !UseOptoBiasInlining) {
3583          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3584       }
3585       
3586       masm.cmpptr(Address(boxReg, 0), 0) ;            // Examine the displaced header
3587       masm.movptr(tmpReg, Address(objReg, 0)) ;       // Examine the object's markword
3588       masm.jccb  (Assembler::zero, DONE_LABEL) ;      // 0 indicates recursive stack-lock
3589 
3590       masm.testptr(tmpReg, 0x02) ;                     // Inflated? 
3591       masm.jccb  (Assembler::zero, Stacked) ;
3592 
3593       masm.bind  (Inflated) ;
3594       // It's inflated.
3595       // Despite our balanced locking property we still check that m->_owner == Self
3596       // as java routines or native JNI code called by this thread might
3597       // have released the lock.
3598       // Refer to the comments in synchronizer.cpp for how we might encode extra
3599       // state in _succ so we can avoid fetching EntryList|cxq.
3600       //
3601       // I'd like to add more cases in fast_lock() and fast_unlock() --
3602       // such as recursive enter and exit -- but we have to be wary of
3603       // I$ bloat, T$ effects and BP$ effects.
3604       //
3605       // If there's no contention try a 1-0 exit.  That is, exit without
3606       // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
3607       // we detect and recover from the race that the 1-0 exit admits.
3608       //
3609       // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
3610       // before it STs null into _owner, releasing the lock.  Updates
3611       // to data protected by the critical section must be visible before
3612       // we drop the lock (and thus before any other thread could acquire
3613       // the lock and observe the fields protected by the lock).
3614       // IA32's memory-model is SPO, so STs are ordered with respect to
3615       // each other and there's no need for an explicit barrier (fence).
3616       // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
3617 
3618       masm.get_thread (boxReg) ;
3619       if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
3620         // prefetchw [ebx + Offset(_owner)-2]
3621         masm.prefetchw(Address(rbx, ObjectMonitor::owner_offset_in_bytes()-2));
3622       }
3623 
3624       // Note that we could employ various encoding schemes to reduce
3625       // the number of loads below (currently 4) to just 2 or 3.
3626       // Refer to the comments in synchronizer.cpp.
3627       // In practice the chain of fetches doesn't seem to impact performance, however.
3628       if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
3629          // Attempt to reduce branch density - AMD's branch predictor.
3630          masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;  
3631          masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3632          masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
3633          masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
3634          masm.jccb  (Assembler::notZero, DONE_LABEL) ; 
3635          masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
3636          masm.jmpb  (DONE_LABEL) ; 
3637       } else { 
3638          masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;  
3639          masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3640          masm.jccb  (Assembler::notZero, DONE_LABEL) ; 
3641          masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
3642          masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
3643          masm.jccb  (Assembler::notZero, CheckSucc) ; 
3644          masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
3645          masm.jmpb  (DONE_LABEL) ; 
3646       }
3647 
3648       // The Following code fragment (EmitSync & 65536) improves the performance of
3649       // contended applications and contended synchronization microbenchmarks.
3650       // Unfortunately the emission of the code - even though not executed - causes regressions
3651       // in scimark and jetstream, evidently because of $ effects.  Replacing the code
3652       // with an equal number of never-executed NOPs results in the same regression.
3653       // We leave it off by default.
3654 
3655       if ((EmitSync & 65536) != 0) {
3656          Label LSuccess, LGoSlowPath ;
3657 
3658          masm.bind  (CheckSucc) ;
3659 
3660          // Optional pre-test ... it's safe to elide this
3661          if ((EmitSync & 16) == 0) { 
3662             masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; 
3663             masm.jccb  (Assembler::zero, LGoSlowPath) ; 
3664          }
3665 
3666          // We have a classic Dekker-style idiom:
3667          //    ST m->_owner = 0 ; MEMBAR; LD m->_succ
3668          // There are a number of ways to implement the barrier:
3669          // (1) lock:andl &m->_owner, 0
3670          //     is fast, but mask doesn't currently support the "ANDL M,IMM32" form.
3671          //     LOCK: ANDL [ebx+Offset(_Owner)-2], 0
3672          //     Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8
3673          // (2) If supported, an explicit MFENCE is appealing.
3674          //     In older IA32 processors MFENCE is slower than lock:add or xchg
3675          //     particularly if the write-buffer is full as might be the case if
3676          //     if stores closely precede the fence or fence-equivalent instruction.
3677          //     In more modern implementations MFENCE appears faster, however.
3678          // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
3679          //     The $lines underlying the top-of-stack should be in M-state.
3680          //     The locked add instruction is serializing, of course.
3681          // (4) Use xchg, which is serializing
3682          //     mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works
3683          // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
3684          //     The integer condition codes will tell us if succ was 0.
3685          //     Since _succ and _owner should reside in the same $line and
3686          //     we just stored into _owner, it's likely that the $line
3687          //     remains in M-state for the lock:orl.
3688          //
3689          // We currently use (3), although it's likely that switching to (2)
3690          // is correct for the future.
3691             
3692          masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
3693          if (os::is_MP()) { 
3694             if (VM_Version::supports_sse2() && 1 == FenceInstruction) { 
3695               masm.mfence();
3696             } else { 
3697               masm.lock () ; masm.addptr(Address(rsp, 0), 0) ; 
3698             }
3699          }
3700          // Ratify _succ remains non-null
3701          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; 
3702          masm.jccb  (Assembler::notZero, LSuccess) ; 
3703 
3704          masm.xorptr(boxReg, boxReg) ;                  // box is really EAX
3705          if (os::is_MP()) { masm.lock(); }
3706          masm.cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3707          masm.jccb  (Assembler::notEqual, LSuccess) ;
3708          // Since we're low on registers we installed rsp as a placeholding in _owner.
3709          // Now install Self over rsp.  This is safe as we're transitioning from
3710          // non-null to non=null
3711          masm.get_thread (boxReg) ;
3712          masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg) ;
3713          // Intentional fall-through into LGoSlowPath ...
3714 
3715          masm.bind  (LGoSlowPath) ; 
3716          masm.orptr(boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
3717          masm.jmpb  (DONE_LABEL) ; 
3718 
3719          masm.bind  (LSuccess) ; 
3720          masm.xorptr(boxReg, boxReg) ;                 // set ICC.ZF=1 to indicate success
3721          masm.jmpb  (DONE_LABEL) ; 
3722       }
3723 
3724       masm.bind (Stacked) ;
3725       // It's not inflated and it's not recursively stack-locked and it's not biased.
3726       // It must be stack-locked.
3727       // Try to reset the header to displaced header.
3728       // The "box" value on the stack is stable, so we can reload
3729       // and be assured we observe the same value as above.
3730       masm.movptr(tmpReg, Address(boxReg, 0)) ;
3731       if (os::is_MP()) {   masm.lock();    }
3732       masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box
3733       // Intention fall-thru into DONE_LABEL
3734 
3735 
3736       // DONE_LABEL is a hot target - we'd really like to place it at the
3737       // start of cache line by padding with NOPs.
3738       // See the AMD and Intel software optimization manuals for the
3739       // most efficient "long" NOP encodings.
3740       // Unfortunately none of our alignment mechanisms suffice.
3741       if ((EmitSync & 65536) == 0) {
3742          masm.bind (CheckSucc) ;
3743       }
3744       masm.bind(DONE_LABEL);
3745 
3746       // Avoid branch to branch on AMD processors
3747       if (EmitSync & 32768) { masm.nop() ; }
3748     }
3749   %}
3750 
3751 
3752   enc_class enc_pop_rdx() %{
3753     emit_opcode(cbuf,0x5A);
3754   %}
3755 
3756   enc_class enc_rethrow() %{
3757     cbuf.set_insts_mark();
3758     emit_opcode(cbuf, 0xE9);        // jmp    entry
3759     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
3760                    runtime_call_Relocation::spec(), RELOC_IMM32 );
3761   %}
3762 
3763 
3764   // Convert a double to an int.  Java semantics require we do complex
3765   // manglelations in the corner cases.  So we set the rounding mode to
3766   // 'zero', store the darned double down as an int, and reset the
3767   // rounding mode to 'nearest'.  The hardware throws an exception which
3768   // patches up the correct value directly to the stack.
3769   enc_class D2I_encoding( regD src ) %{
3770     // Flip to round-to-zero mode.  We attempted to allow invalid-op
3771     // exceptions here, so that a NAN or other corner-case value will
3772     // thrown an exception (but normal values get converted at full speed).
3773     // However, I2C adapters and other float-stack manglers leave pending
3774     // invalid-op exceptions hanging.  We would have to clear them before
3775     // enabling them and that is more expensive than just testing for the
3776     // invalid value Intel stores down in the corner cases.
3777     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3778     emit_opcode(cbuf,0x2D);
3779     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3780     // Allocate a word
3781     emit_opcode(cbuf,0x83);            // SUB ESP,4
3782     emit_opcode(cbuf,0xEC);
3783     emit_d8(cbuf,0x04);
3784     // Encoding assumes a double has been pushed into FPR0.
3785     // Store down the double as an int, popping the FPU stack
3786     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
3787     emit_opcode(cbuf,0x1C);
3788     emit_d8(cbuf,0x24);
3789     // Restore the rounding mode; mask the exception
3790     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3791     emit_opcode(cbuf,0x2D);
3792     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3793         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3794         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3795 
3796     // Load the converted int; adjust CPU stack
3797     emit_opcode(cbuf,0x58);       // POP EAX
3798     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
3799     emit_d32   (cbuf,0x80000000); //         0x80000000
3800     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3801     emit_d8    (cbuf,0x07);       // Size of slow_call
3802     // Push src onto stack slow-path
3803     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3804     emit_d8    (cbuf,0xC0-1+$src$$reg );
3805     // CALL directly to the runtime
3806     cbuf.set_insts_mark();
3807     emit_opcode(cbuf,0xE8);       // Call into runtime
3808     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3809     // Carry on here...
3810   %}
3811 
3812   enc_class D2L_encoding( regD src ) %{
3813     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3814     emit_opcode(cbuf,0x2D);
3815     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3816     // Allocate a word
3817     emit_opcode(cbuf,0x83);            // SUB ESP,8
3818     emit_opcode(cbuf,0xEC);
3819     emit_d8(cbuf,0x08);
3820     // Encoding assumes a double has been pushed into FPR0.
3821     // Store down the double as a long, popping the FPU stack
3822     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3823     emit_opcode(cbuf,0x3C);
3824     emit_d8(cbuf,0x24);
3825     // Restore the rounding mode; mask the exception
3826     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3827     emit_opcode(cbuf,0x2D);
3828     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3829         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3830         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3831 
3832     // Load the converted int; adjust CPU stack
3833     emit_opcode(cbuf,0x58);       // POP EAX
3834     emit_opcode(cbuf,0x5A);       // POP EDX
3835     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3836     emit_d8    (cbuf,0xFA);       // rdx
3837     emit_d32   (cbuf,0x80000000); //         0x80000000
3838     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3839     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3840     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3841     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3842     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3843     emit_d8    (cbuf,0x07);       // Size of slow_call
3844     // Push src onto stack slow-path
3845     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3846     emit_d8    (cbuf,0xC0-1+$src$$reg );
3847     // CALL directly to the runtime
3848     cbuf.set_insts_mark();
3849     emit_opcode(cbuf,0xE8);       // Call into runtime
3850     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3851     // Carry on here...
3852   %}
3853 
3854   enc_class X2L_encoding( regX src ) %{
3855     // Allocate a word
3856     emit_opcode(cbuf,0x83);      // SUB ESP,8
3857     emit_opcode(cbuf,0xEC);
3858     emit_d8(cbuf,0x08);
3859 
3860     emit_opcode  (cbuf, 0xF3 );  // MOVSS [ESP], src
3861     emit_opcode  (cbuf, 0x0F );
3862     emit_opcode  (cbuf, 0x11 );
3863     encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3864 
3865     emit_opcode(cbuf,0xD9 );     // FLD_S [ESP]
3866     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3867 
3868     emit_opcode(cbuf,0xD9);      // FLDCW  trunc
3869     emit_opcode(cbuf,0x2D);
3870     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3871 
3872     // Encoding assumes a double has been pushed into FPR0.
3873     // Store down the double as a long, popping the FPU stack
3874     emit_opcode(cbuf,0xDF);      // FISTP [ESP]
3875     emit_opcode(cbuf,0x3C);
3876     emit_d8(cbuf,0x24);
3877 
3878     // Restore the rounding mode; mask the exception
3879     emit_opcode(cbuf,0xD9);      // FLDCW   std/24-bit mode
3880     emit_opcode(cbuf,0x2D);
3881     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3882       ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3883       : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3884 
3885     // Load the converted int; adjust CPU stack
3886     emit_opcode(cbuf,0x58);      // POP EAX
3887 
3888     emit_opcode(cbuf,0x5A);      // POP EDX
3889 
3890     emit_opcode(cbuf,0x81);      // CMP EDX,imm
3891     emit_d8    (cbuf,0xFA);      // rdx
3892     emit_d32   (cbuf,0x80000000);//         0x80000000
3893 
3894     emit_opcode(cbuf,0x75);      // JNE around_slow_call
3895     emit_d8    (cbuf,0x13+4);    // Size of slow_call
3896 
3897     emit_opcode(cbuf,0x85);      // TEST EAX,EAX
3898     emit_opcode(cbuf,0xC0);      // 2/rax,/rax,
3899 
3900     emit_opcode(cbuf,0x75);      // JNE around_slow_call
3901     emit_d8    (cbuf,0x13);      // Size of slow_call
3902 
3903     // Allocate a word
3904     emit_opcode(cbuf,0x83);      // SUB ESP,4
3905     emit_opcode(cbuf,0xEC);
3906     emit_d8(cbuf,0x04);
3907 
3908     emit_opcode  (cbuf, 0xF3 );  // MOVSS [ESP], src
3909     emit_opcode  (cbuf, 0x0F );
3910     emit_opcode  (cbuf, 0x11 );
3911     encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3912 
3913     emit_opcode(cbuf,0xD9 );     // FLD_S [ESP]
3914     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3915 
3916     emit_opcode(cbuf,0x83);      // ADD ESP,4
3917     emit_opcode(cbuf,0xC4);
3918     emit_d8(cbuf,0x04);
3919 
3920     // CALL directly to the runtime
3921     cbuf.set_insts_mark();
3922     emit_opcode(cbuf,0xE8);       // Call into runtime
3923     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3924     // Carry on here...
3925   %}
3926 
3927   enc_class XD2L_encoding( regXD src ) %{
3928     // Allocate a word
3929     emit_opcode(cbuf,0x83);      // SUB ESP,8
3930     emit_opcode(cbuf,0xEC);
3931     emit_d8(cbuf,0x08);
3932 
3933     emit_opcode  (cbuf, 0xF2 );  // MOVSD [ESP], src
3934     emit_opcode  (cbuf, 0x0F );
3935     emit_opcode  (cbuf, 0x11 );
3936     encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3937 
3938     emit_opcode(cbuf,0xDD );     // FLD_D [ESP]
3939     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3940 
3941     emit_opcode(cbuf,0xD9);      // FLDCW  trunc
3942     emit_opcode(cbuf,0x2D);
3943     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3944 
3945     // Encoding assumes a double has been pushed into FPR0.
3946     // Store down the double as a long, popping the FPU stack
3947     emit_opcode(cbuf,0xDF);      // FISTP [ESP]
3948     emit_opcode(cbuf,0x3C);
3949     emit_d8(cbuf,0x24);
3950 
3951     // Restore the rounding mode; mask the exception
3952     emit_opcode(cbuf,0xD9);      // FLDCW   std/24-bit mode
3953     emit_opcode(cbuf,0x2D);
3954     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3955       ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3956       : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3957 
3958     // Load the converted int; adjust CPU stack
3959     emit_opcode(cbuf,0x58);      // POP EAX
3960 
3961     emit_opcode(cbuf,0x5A);      // POP EDX
3962 
3963     emit_opcode(cbuf,0x81);      // CMP EDX,imm
3964     emit_d8    (cbuf,0xFA);      // rdx
3965     emit_d32   (cbuf,0x80000000); //         0x80000000
3966 
3967     emit_opcode(cbuf,0x75);      // JNE around_slow_call
3968     emit_d8    (cbuf,0x13+4);    // Size of slow_call
3969 
3970     emit_opcode(cbuf,0x85);      // TEST EAX,EAX
3971     emit_opcode(cbuf,0xC0);      // 2/rax,/rax,
3972 
3973     emit_opcode(cbuf,0x75);      // JNE around_slow_call
3974     emit_d8    (cbuf,0x13);      // Size of slow_call
3975 
3976     // Push src onto stack slow-path
3977     // Allocate a word
3978     emit_opcode(cbuf,0x83);      // SUB ESP,8
3979     emit_opcode(cbuf,0xEC);
3980     emit_d8(cbuf,0x08);
3981 
3982     emit_opcode  (cbuf, 0xF2 );  // MOVSD [ESP], src
3983     emit_opcode  (cbuf, 0x0F );
3984     emit_opcode  (cbuf, 0x11 );
3985     encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3986 
3987     emit_opcode(cbuf,0xDD );     // FLD_D [ESP]
3988     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3989 
3990     emit_opcode(cbuf,0x83);      // ADD ESP,8
3991     emit_opcode(cbuf,0xC4);
3992     emit_d8(cbuf,0x08);
3993 
3994     // CALL directly to the runtime
3995     cbuf.set_insts_mark();
3996     emit_opcode(cbuf,0xE8);      // Call into runtime
3997     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3998     // Carry on here...
3999   %}
4000 
4001   enc_class D2X_encoding( regX dst, regD src ) %{
4002     // Allocate a word
4003     emit_opcode(cbuf,0x83);            // SUB ESP,4
4004     emit_opcode(cbuf,0xEC);
4005     emit_d8(cbuf,0x04);
4006     int pop = 0x02;
4007     if ($src$$reg != FPR1L_enc) {
4008       emit_opcode( cbuf, 0xD9 );       // FLD    ST(i-1)
4009       emit_d8( cbuf, 0xC0-1+$src$$reg );
4010       pop = 0x03;
4011     }
4012     store_to_stackslot( cbuf, 0xD9, pop, 0 ); // FST<P>_S  [ESP]
4013 
4014     emit_opcode  (cbuf, 0xF3 );        // MOVSS dst(xmm), [ESP]
4015     emit_opcode  (cbuf, 0x0F );
4016     emit_opcode  (cbuf, 0x10 );
4017     encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
4018 
4019     emit_opcode(cbuf,0x83);            // ADD ESP,4
4020     emit_opcode(cbuf,0xC4);
4021     emit_d8(cbuf,0x04);
4022     // Carry on here...
4023   %}
4024 
4025   enc_class FX2I_encoding( regX src, eRegI dst ) %{
4026     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
4027 
4028     // Compare the result to see if we need to go to the slow path
4029     emit_opcode(cbuf,0x81);       // CMP dst,imm
4030     emit_rm    (cbuf,0x3,0x7,$dst$$reg);
4031     emit_d32   (cbuf,0x80000000); //         0x80000000
4032 
4033     emit_opcode(cbuf,0x75);       // JNE around_slow_call
4034     emit_d8    (cbuf,0x13);       // Size of slow_call
4035     // Store xmm to a temp memory
4036     // location and push it onto stack.
4037 
4038     emit_opcode(cbuf,0x83);  // SUB ESP,4
4039     emit_opcode(cbuf,0xEC);
4040     emit_d8(cbuf, $primary ? 0x8 : 0x4);
4041 
4042     emit_opcode  (cbuf, $primary ? 0xF2 : 0xF3 );   // MOVSS [ESP], xmm
4043     emit_opcode  (cbuf, 0x0F );
4044     emit_opcode  (cbuf, 0x11 );
4045     encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4046 
4047     emit_opcode(cbuf, $primary ? 0xDD : 0xD9 );      // FLD [ESP]
4048     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4049 
4050     emit_opcode(cbuf,0x83);    // ADD ESP,4
4051     emit_opcode(cbuf,0xC4);
4052     emit_d8(cbuf, $primary ? 0x8 : 0x4);
4053 
4054     // CALL directly to the runtime
4055     cbuf.set_insts_mark();
4056     emit_opcode(cbuf,0xE8);       // Call into runtime
4057     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
4058 
4059     // Carry on here...
4060   %}
4061 
4062   enc_class X2D_encoding( regD dst, regX src ) %{
4063     // Allocate a word
4064     emit_opcode(cbuf,0x83);     // SUB ESP,4
4065     emit_opcode(cbuf,0xEC);
4066     emit_d8(cbuf,0x04);
4067 
4068     emit_opcode  (cbuf, 0xF3 ); // MOVSS [ESP], xmm
4069     emit_opcode  (cbuf, 0x0F );
4070     emit_opcode  (cbuf, 0x11 );
4071     encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4072 
4073     emit_opcode(cbuf,0xD9 );    // FLD_S [ESP]
4074     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4075 
4076     emit_opcode(cbuf,0x83);     // ADD ESP,4
4077     emit_opcode(cbuf,0xC4);
4078     emit_d8(cbuf,0x04);
4079 
4080     // Carry on here...
4081   %}
4082 
4083   enc_class AbsXF_encoding(regX dst) %{
4084     address signmask_address=(address)float_signmask_pool;
4085     // andpd:\tANDPS  $dst,[signconst]
4086     emit_opcode(cbuf, 0x0F);
4087     emit_opcode(cbuf, 0x54);
4088     emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4089     emit_d32(cbuf, (int)signmask_address);
4090   %}
4091 
4092   enc_class AbsXD_encoding(regXD dst) %{
4093     address signmask_address=(address)double_signmask_pool;
4094     // andpd:\tANDPD  $dst,[signconst]
4095     emit_opcode(cbuf, 0x66);
4096     emit_opcode(cbuf, 0x0F);
4097     emit_opcode(cbuf, 0x54);
4098     emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4099     emit_d32(cbuf, (int)signmask_address);
4100   %}
4101 
4102   enc_class NegXF_encoding(regX dst) %{
4103     address signmask_address=(address)float_signflip_pool;
4104     // andpd:\tXORPS  $dst,[signconst]
4105     emit_opcode(cbuf, 0x0F);
4106     emit_opcode(cbuf, 0x57);
4107     emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4108     emit_d32(cbuf, (int)signmask_address);
4109   %}
4110 
4111   enc_class NegXD_encoding(regXD dst) %{
4112     address signmask_address=(address)double_signflip_pool;
4113     // andpd:\tXORPD  $dst,[signconst]
4114     emit_opcode(cbuf, 0x66);
4115     emit_opcode(cbuf, 0x0F);
4116     emit_opcode(cbuf, 0x57);
4117     emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4118     emit_d32(cbuf, (int)signmask_address);
4119   %}
4120 
4121   enc_class FMul_ST_reg( eRegF src1 ) %{
4122     // Operand was loaded from memory into fp ST (stack top)
4123     // FMUL   ST,$src  /* D8 C8+i */
4124     emit_opcode(cbuf, 0xD8);
4125     emit_opcode(cbuf, 0xC8 + $src1$$reg);
4126   %}
4127 
4128   enc_class FAdd_ST_reg( eRegF src2 ) %{
4129     // FADDP  ST,src2  /* D8 C0+i */
4130     emit_opcode(cbuf, 0xD8);
4131     emit_opcode(cbuf, 0xC0 + $src2$$reg);
4132     //could use FADDP  src2,fpST  /* DE C0+i */
4133   %}
4134 
4135   enc_class FAddP_reg_ST( eRegF src2 ) %{
4136     // FADDP  src2,ST  /* DE C0+i */
4137     emit_opcode(cbuf, 0xDE);
4138     emit_opcode(cbuf, 0xC0 + $src2$$reg);
4139   %}
4140 
4141   enc_class subF_divF_encode( eRegF src1, eRegF src2) %{
4142     // Operand has been loaded into fp ST (stack top)
4143       // FSUB   ST,$src1
4144       emit_opcode(cbuf, 0xD8);
4145       emit_opcode(cbuf, 0xE0 + $src1$$reg);
4146 
4147       // FDIV
4148       emit_opcode(cbuf, 0xD8);
4149       emit_opcode(cbuf, 0xF0 + $src2$$reg);
4150   %}
4151 
4152   enc_class MulFAddF (eRegF src1, eRegF src2) %{
4153     // Operand was loaded from memory into fp ST (stack top)
4154     // FADD   ST,$src  /* D8 C0+i */
4155     emit_opcode(cbuf, 0xD8);
4156     emit_opcode(cbuf, 0xC0 + $src1$$reg);
4157 
4158     // FMUL  ST,src2  /* D8 C*+i */
4159     emit_opcode(cbuf, 0xD8);
4160     emit_opcode(cbuf, 0xC8 + $src2$$reg);
4161   %}
4162 
4163 
4164   enc_class MulFAddFreverse (eRegF src1, eRegF src2) %{
4165     // Operand was loaded from memory into fp ST (stack top)
4166     // FADD   ST,$src  /* D8 C0+i */
4167     emit_opcode(cbuf, 0xD8);
4168     emit_opcode(cbuf, 0xC0 + $src1$$reg);
4169 
4170     // FMULP  src2,ST  /* DE C8+i */
4171     emit_opcode(cbuf, 0xDE);
4172     emit_opcode(cbuf, 0xC8 + $src2$$reg);
4173   %}
4174 
4175   // Atomically load the volatile long
4176   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
4177     emit_opcode(cbuf,0xDF);
4178     int rm_byte_opcode = 0x05;
4179     int base     = $mem$$base;
4180     int index    = $mem$$index;
4181     int scale    = $mem$$scale;
4182     int displace = $mem$$disp;
4183     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4184     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
4185     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
4186   %}
4187 
4188   enc_class enc_loadLX_volatile( memory mem, stackSlotL dst, regXD tmp ) %{
4189     { // Atomic long load
4190       // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
4191       emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4192       emit_opcode(cbuf,0x0F);
4193       emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4194       int base     = $mem$$base;
4195       int index    = $mem$$index;
4196       int scale    = $mem$$scale;
4197       int displace = $mem$$disp;
4198       bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4199       encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4200     }
4201     { // MOVSD $dst,$tmp ! atomic long store
4202       emit_opcode(cbuf,0xF2);
4203       emit_opcode(cbuf,0x0F);
4204       emit_opcode(cbuf,0x11);
4205       int base     = $dst$$base;
4206       int index    = $dst$$index;
4207       int scale    = $dst$$scale;
4208       int displace = $dst$$disp;
4209       bool disp_is_oop = $dst->disp_is_oop(); // disp-as-oop when working with static globals
4210       encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4211     }
4212   %}
4213 
4214   enc_class enc_loadLX_reg_volatile( memory mem, eRegL dst, regXD tmp ) %{
4215     { // Atomic long load
4216       // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
4217       emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4218       emit_opcode(cbuf,0x0F);
4219       emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4220       int base     = $mem$$base;
4221       int index    = $mem$$index;
4222       int scale    = $mem$$scale;
4223       int displace = $mem$$disp;
4224       bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4225       encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4226     }
4227     { // MOVD $dst.lo,$tmp
4228       emit_opcode(cbuf,0x66);
4229       emit_opcode(cbuf,0x0F);
4230       emit_opcode(cbuf,0x7E);
4231       emit_rm(cbuf, 0x3, $tmp$$reg, $dst$$reg);
4232     }
4233     { // PSRLQ $tmp,32
4234       emit_opcode(cbuf,0x66);
4235       emit_opcode(cbuf,0x0F);
4236       emit_opcode(cbuf,0x73);
4237       emit_rm(cbuf, 0x3, 0x02, $tmp$$reg);
4238       emit_d8(cbuf, 0x20);
4239     }
4240     { // MOVD $dst.hi,$tmp
4241       emit_opcode(cbuf,0x66);
4242       emit_opcode(cbuf,0x0F);
4243       emit_opcode(cbuf,0x7E);
4244       emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
4245     }
4246   %}
4247 
4248   // Volatile Store Long.  Must be atomic, so move it into
4249   // the FP TOS and then do a 64-bit FIST.  Has to probe the
4250   // target address before the store (for null-ptr checks)
4251   // so the memory operand is used twice in the encoding.
4252   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
4253     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
4254     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
4255     emit_opcode(cbuf,0xDF);
4256     int rm_byte_opcode = 0x07;
4257     int base     = $mem$$base;
4258     int index    = $mem$$index;
4259     int scale    = $mem$$scale;
4260     int displace = $mem$$disp;
4261     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4262     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
4263   %}
4264 
4265   enc_class enc_storeLX_volatile( memory mem, stackSlotL src, regXD tmp) %{
4266     { // Atomic long load
4267       // UseXmmLoadAndClearUpper ? movsd $tmp,[$src] : movlpd $tmp,[$src]
4268       emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4269       emit_opcode(cbuf,0x0F);
4270       emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4271       int base     = $src$$base;
4272       int index    = $src$$index;
4273       int scale    = $src$$scale;
4274       int displace = $src$$disp;
4275       bool disp_is_oop = $src->disp_is_oop(); // disp-as-oop when working with static globals
4276       encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4277     }
4278     cbuf.set_insts_mark();            // Mark start of MOVSD in case $mem has an oop
4279     { // MOVSD $mem,$tmp ! atomic long store
4280       emit_opcode(cbuf,0xF2);
4281       emit_opcode(cbuf,0x0F);
4282       emit_opcode(cbuf,0x11);
4283       int base     = $mem$$base;
4284       int index    = $mem$$index;
4285       int scale    = $mem$$scale;
4286       int displace = $mem$$disp;
4287       bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4288       encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4289     }
4290   %}
4291 
4292   enc_class enc_storeLX_reg_volatile( memory mem, eRegL src, regXD tmp, regXD tmp2) %{
4293     { // MOVD $tmp,$src.lo
4294       emit_opcode(cbuf,0x66);
4295       emit_opcode(cbuf,0x0F);
4296       emit_opcode(cbuf,0x6E);
4297       emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
4298     }
4299     { // MOVD $tmp2,$src.hi
4300       emit_opcode(cbuf,0x66);
4301       emit_opcode(cbuf,0x0F);
4302       emit_opcode(cbuf,0x6E);
4303       emit_rm(cbuf, 0x3, $tmp2$$reg, HIGH_FROM_LOW($src$$reg));
4304     }
4305     { // PUNPCKLDQ $tmp,$tmp2
4306       emit_opcode(cbuf,0x66);
4307       emit_opcode(cbuf,0x0F);
4308       emit_opcode(cbuf,0x62);
4309       emit_rm(cbuf, 0x3, $tmp$$reg, $tmp2$$reg);
4310     }
4311     cbuf.set_insts_mark();            // Mark start of MOVSD in case $mem has an oop
4312     { // MOVSD $mem,$tmp ! atomic long store
4313       emit_opcode(cbuf,0xF2);
4314       emit_opcode(cbuf,0x0F);
4315       emit_opcode(cbuf,0x11);
4316       int base     = $mem$$base;
4317       int index    = $mem$$index;
4318       int scale    = $mem$$scale;
4319       int displace = $mem$$disp;
4320       bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4321       encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4322     }
4323   %}
4324 
4325   // Safepoint Poll.  This polls the safepoint page, and causes an
4326   // exception if it is not readable. Unfortunately, it kills the condition code
4327   // in the process
4328   // We current use TESTL [spp],EDI
4329   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
4330 
4331   enc_class Safepoint_Poll() %{
4332     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
4333     emit_opcode(cbuf,0x85);
4334     emit_rm (cbuf, 0x0, 0x7, 0x5);
4335     emit_d32(cbuf, (intptr_t)os::get_polling_page());
4336   %}
4337 %}
4338 
4339 
4340 //----------FRAME--------------------------------------------------------------
4341 // Definition of frame structure and management information.
4342 //
4343 //  S T A C K   L A Y O U T    Allocators stack-slot number
4344 //                             |   (to get allocators register number
4345 //  G  Owned by    |        |  v    add OptoReg::stack0())
4346 //  r   CALLER     |        |
4347 //  o     |        +--------+      pad to even-align allocators stack-slot
4348 //  w     V        |  pad0  |        numbers; owned by CALLER
4349 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
4350 //  h     ^        |   in   |  5
4351 //        |        |  args  |  4   Holes in incoming args owned by SELF
4352 //  |     |        |        |  3
4353 //  |     |        +--------+
4354 //  V     |        | old out|      Empty on Intel, window on Sparc
4355 //        |    old |preserve|      Must be even aligned.
4356 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
4357 //        |        |   in   |  3   area for Intel ret address
4358 //     Owned by    |preserve|      Empty on Sparc.
4359 //       SELF      +--------+
4360 //        |        |  pad2  |  2   pad to align old SP
4361 //        |        +--------+  1
4362 //        |        | locks  |  0
4363 //        |        +--------+----> OptoReg::stack0(), even aligned
4364 //        |        |  pad1  | 11   pad to align new SP
4365 //        |        +--------+
4366 //        |        |        | 10
4367 //        |        | spills |  9   spills
4368 //        V        |        |  8   (pad0 slot for callee)
4369 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
4370 //        ^        |  out   |  7
4371 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
4372 //     Owned by    +--------+
4373 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
4374 //        |    new |preserve|      Must be even-aligned.
4375 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
4376 //        |        |        |
4377 //
4378 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
4379 //         known from SELF's arguments and the Java calling convention.
4380 //         Region 6-7 is determined per call site.
4381 // Note 2: If the calling convention leaves holes in the incoming argument
4382 //         area, those holes are owned by SELF.  Holes in the outgoing area
4383 //         are owned by the CALLEE.  Holes should not be nessecary in the
4384 //         incoming area, as the Java calling convention is completely under
4385 //         the control of the AD file.  Doubles can be sorted and packed to
4386 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
4387 //         varargs C calling conventions.
4388 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
4389 //         even aligned with pad0 as needed.
4390 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
4391 //         region 6-11 is even aligned; it may be padded out more so that
4392 //         the region from SP to FP meets the minimum stack alignment.
4393 
4394 frame %{
4395   // What direction does stack grow in (assumed to be same for C & Java)
4396   stack_direction(TOWARDS_LOW);
4397 
4398   // These three registers define part of the calling convention
4399   // between compiled code and the interpreter.
4400   inline_cache_reg(EAX);                // Inline Cache Register
4401   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
4402 
4403   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
4404   cisc_spilling_operand_name(indOffset32);
4405 
4406   // Number of stack slots consumed by locking an object
4407   sync_stack_slots(1);
4408 
4409   // Compiled code's Frame Pointer
4410   frame_pointer(ESP);
4411   // Interpreter stores its frame pointer in a register which is
4412   // stored to the stack by I2CAdaptors.
4413   // I2CAdaptors convert from interpreted java to compiled java.
4414   interpreter_frame_pointer(EBP);
4415 
4416   // Stack alignment requirement
4417   // Alignment size in bytes (128-bit -> 16 bytes)
4418   stack_alignment(StackAlignmentInBytes);
4419 
4420   // Number of stack slots between incoming argument block and the start of
4421   // a new frame.  The PROLOG must add this many slots to the stack.  The
4422   // EPILOG must remove this many slots.  Intel needs one slot for
4423   // return address and one for rbp, (must save rbp)
4424   in_preserve_stack_slots(2+VerifyStackAtCalls);
4425 
4426   // Number of outgoing stack slots killed above the out_preserve_stack_slots
4427   // for calls to C.  Supports the var-args backing area for register parms.
4428   varargs_C_out_slots_killed(0);
4429 
4430   // The after-PROLOG location of the return address.  Location of
4431   // return address specifies a type (REG or STACK) and a number
4432   // representing the register number (i.e. - use a register name) or
4433   // stack slot.
4434   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4435   // Otherwise, it is above the locks and verification slot and alignment word
4436   return_addr(STACK - 1 +
4437               round_to(1+VerifyStackAtCalls+
4438               Compile::current()->fixed_slots(),
4439               (StackAlignmentInBytes/wordSize)));
4440 
4441   // Body of function which returns an integer array locating
4442   // arguments either in registers or in stack slots.  Passed an array
4443   // of ideal registers called "sig" and a "length" count.  Stack-slot
4444   // offsets are based on outgoing arguments, i.e. a CALLER setting up
4445   // arguments for a CALLEE.  Incoming stack arguments are
4446   // automatically biased by the preserve_stack_slots field above.
4447   calling_convention %{
4448     // No difference between ingoing/outgoing just pass false
4449     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4450   %}
4451 
4452 
4453   // Body of function which returns an integer array locating
4454   // arguments either in registers or in stack slots.  Passed an array
4455   // of ideal registers called "sig" and a "length" count.  Stack-slot
4456   // offsets are based on outgoing arguments, i.e. a CALLER setting up
4457   // arguments for a CALLEE.  Incoming stack arguments are
4458   // automatically biased by the preserve_stack_slots field above.
4459   c_calling_convention %{
4460     // This is obviously always outgoing
4461     (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
4462   %}
4463 
4464   // Location of C & interpreter return values
4465   c_return_value %{
4466     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
4467     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
4468     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
4469 
4470     // in SSE2+ mode we want to keep the FPU stack clean so pretend
4471     // that C functions return float and double results in XMM0.
4472     if( ideal_reg == Op_RegD && UseSSE>=2 )
4473       return OptoRegPair(XMM0b_num,XMM0a_num);
4474     if( ideal_reg == Op_RegF && UseSSE>=2 )
4475       return OptoRegPair(OptoReg::Bad,XMM0a_num);
4476 
4477     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
4478   %}
4479 
4480   // Location of return values
4481   return_value %{
4482     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
4483     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
4484     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
4485     if( ideal_reg == Op_RegD && UseSSE>=2 )
4486       return OptoRegPair(XMM0b_num,XMM0a_num);
4487     if( ideal_reg == Op_RegF && UseSSE>=1 )
4488       return OptoRegPair(OptoReg::Bad,XMM0a_num);
4489     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
4490   %}
4491 
4492 %}
4493 
4494 //----------ATTRIBUTES---------------------------------------------------------
4495 //----------Operand Attributes-------------------------------------------------
4496 op_attrib op_cost(0);        // Required cost attribute
4497 
4498 //----------Instruction Attributes---------------------------------------------
4499 ins_attrib ins_cost(100);       // Required cost attribute
4500 ins_attrib ins_size(8);         // Required size attribute (in bits)
4501 ins_attrib ins_pc_relative(0);  // Required PC Relative flag
4502 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
4503                                 // non-matching short branch variant of some
4504                                                             // long branch?
4505 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
4506                                 // specifies the alignment that some part of the instruction (not
4507                                 // necessarily the start) requires.  If > 1, a compute_padding()
4508                                 // function must be provided for the instruction
4509 
4510 //----------OPERANDS-----------------------------------------------------------
4511 // Operand definitions must precede instruction definitions for correct parsing
4512 // in the ADLC because operands constitute user defined types which are used in
4513 // instruction definitions.
4514 
4515 //----------Simple Operands----------------------------------------------------
4516 // Immediate Operands
4517 // Integer Immediate
4518 operand immI() %{
4519   match(ConI);
4520 
4521   op_cost(10);
4522   format %{ %}
4523   interface(CONST_INTER);
4524 %}
4525 
4526 // Constant for test vs zero
4527 operand immI0() %{
4528   predicate(n->get_int() == 0);
4529   match(ConI);
4530 
4531   op_cost(0);
4532   format %{ %}
4533   interface(CONST_INTER);
4534 %}
4535 
4536 // Constant for increment
4537 operand immI1() %{
4538   predicate(n->get_int() == 1);
4539   match(ConI);
4540 
4541   op_cost(0);
4542   format %{ %}
4543   interface(CONST_INTER);
4544 %}
4545 
4546 // Constant for decrement
4547 operand immI_M1() %{
4548   predicate(n->get_int() == -1);
4549   match(ConI);
4550 
4551   op_cost(0);
4552   format %{ %}
4553   interface(CONST_INTER);
4554 %}
4555 
4556 // Valid scale values for addressing modes
4557 operand immI2() %{
4558   predicate(0 <= n->get_int() && (n->get_int() <= 3));
4559   match(ConI);
4560 
4561   format %{ %}
4562   interface(CONST_INTER);
4563 %}
4564 
4565 operand immI8() %{
4566   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
4567   match(ConI);
4568 
4569   op_cost(5);
4570   format %{ %}
4571   interface(CONST_INTER);
4572 %}
4573 
4574 operand immI16() %{
4575   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4576   match(ConI);
4577 
4578   op_cost(10);
4579   format %{ %}
4580   interface(CONST_INTER);
4581 %}
4582 
4583 // Constant for long shifts
4584 operand immI_32() %{
4585   predicate( n->get_int() == 32 );
4586   match(ConI);
4587 
4588   op_cost(0);
4589   format %{ %}
4590   interface(CONST_INTER);
4591 %}
4592 
4593 operand immI_1_31() %{
4594   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
4595   match(ConI);
4596 
4597   op_cost(0);
4598   format %{ %}
4599   interface(CONST_INTER);
4600 %}
4601 
4602 operand immI_32_63() %{
4603   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
4604   match(ConI);
4605   op_cost(0);
4606 
4607   format %{ %}
4608   interface(CONST_INTER);
4609 %}
4610 
4611 operand immI_1() %{
4612   predicate( n->get_int() == 1 );
4613   match(ConI);
4614 
4615   op_cost(0);
4616   format %{ %}
4617   interface(CONST_INTER);
4618 %}
4619 
4620 operand immI_2() %{
4621   predicate( n->get_int() == 2 );
4622   match(ConI);
4623 
4624   op_cost(0);
4625   format %{ %}
4626   interface(CONST_INTER);
4627 %}
4628 
4629 operand immI_3() %{
4630   predicate( n->get_int() == 3 );
4631   match(ConI);
4632 
4633   op_cost(0);
4634   format %{ %}
4635   interface(CONST_INTER);
4636 %}
4637 
4638 // Pointer Immediate
4639 operand immP() %{
4640   match(ConP);
4641 
4642   op_cost(10);
4643   format %{ %}
4644   interface(CONST_INTER);
4645 %}
4646 
4647 // NULL Pointer Immediate
4648 operand immP0() %{
4649   predicate( n->get_ptr() == 0 );
4650   match(ConP);
4651   op_cost(0);
4652 
4653   format %{ %}
4654   interface(CONST_INTER);
4655 %}
4656 
4657 // Long Immediate
4658 operand immL() %{
4659   match(ConL);
4660 
4661   op_cost(20);
4662   format %{ %}
4663   interface(CONST_INTER);
4664 %}
4665 
4666 // Long Immediate zero
4667 operand immL0() %{
4668   predicate( n->get_long() == 0L );
4669   match(ConL);
4670   op_cost(0);
4671 
4672   format %{ %}
4673   interface(CONST_INTER);
4674 %}
4675 
4676 // Long Immediate zero
4677 operand immL_M1() %{
4678   predicate( n->get_long() == -1L );
4679   match(ConL);
4680   op_cost(0);
4681 
4682   format %{ %}
4683   interface(CONST_INTER);
4684 %}
4685 
4686 // Long immediate from 0 to 127.
4687 // Used for a shorter form of long mul by 10.
4688 operand immL_127() %{
4689   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
4690   match(ConL);
4691   op_cost(0);
4692 
4693   format %{ %}
4694   interface(CONST_INTER);
4695 %}
4696 
4697 // Long Immediate: low 32-bit mask
4698 operand immL_32bits() %{
4699   predicate(n->get_long() == 0xFFFFFFFFL);
4700   match(ConL);
4701   op_cost(0);
4702 
4703   format %{ %}
4704   interface(CONST_INTER);
4705 %}
4706 
4707 // Long Immediate: low 32-bit mask
4708 operand immL32() %{
4709   predicate(n->get_long() == (int)(n->get_long()));
4710   match(ConL);
4711   op_cost(20);
4712 
4713   format %{ %}
4714   interface(CONST_INTER);
4715 %}
4716 
4717 //Double Immediate zero
4718 operand immD0() %{
4719   // Do additional (and counter-intuitive) test against NaN to work around VC++
4720   // bug that generates code such that NaNs compare equal to 0.0
4721   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
4722   match(ConD);
4723 
4724   op_cost(5);
4725   format %{ %}
4726   interface(CONST_INTER);
4727 %}
4728 
4729 // Double Immediate one
4730 operand immD1() %{
4731   predicate( UseSSE<=1 && n->getd() == 1.0 );
4732   match(ConD);
4733 
4734   op_cost(5);
4735   format %{ %}
4736   interface(CONST_INTER);
4737 %}
4738 
4739 // Double Immediate
4740 operand immD() %{
4741   predicate(UseSSE<=1);
4742   match(ConD);
4743 
4744   op_cost(5);
4745   format %{ %}
4746   interface(CONST_INTER);
4747 %}
4748 
4749 operand immXD() %{
4750   predicate(UseSSE>=2);
4751   match(ConD);
4752 
4753   op_cost(5);
4754   format %{ %}
4755   interface(CONST_INTER);
4756 %}
4757 
4758 // Double Immediate zero
4759 operand immXD0() %{
4760   // Do additional (and counter-intuitive) test against NaN to work around VC++
4761   // bug that generates code such that NaNs compare equal to 0.0 AND do not
4762   // compare equal to -0.0.
4763   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
4764   match(ConD);
4765 
4766   format %{ %}
4767   interface(CONST_INTER);
4768 %}
4769 
4770 // Float Immediate zero
4771 operand immF0() %{
4772   predicate(UseSSE == 0 && n->getf() == 0.0F);
4773   match(ConF);
4774 
4775   op_cost(5);
4776   format %{ %}
4777   interface(CONST_INTER);
4778 %}
4779 
4780 // Float Immediate one
4781 operand immF1() %{
4782   predicate(UseSSE == 0 && n->getf() == 1.0F);
4783   match(ConF);
4784 
4785   op_cost(5);
4786   format %{ %}
4787   interface(CONST_INTER);
4788 %}
4789 
4790 // Float Immediate
4791 operand immF() %{
4792   predicate( UseSSE == 0 );
4793   match(ConF);
4794 
4795   op_cost(5);
4796   format %{ %}
4797   interface(CONST_INTER);
4798 %}
4799 
4800 // Float Immediate
4801 operand immXF() %{
4802   predicate(UseSSE >= 1);
4803   match(ConF);
4804 
4805   op_cost(5);
4806   format %{ %}
4807   interface(CONST_INTER);
4808 %}
4809 
4810 // Float Immediate zero.  Zero and not -0.0
4811 operand immXF0() %{
4812   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
4813   match(ConF);
4814 
4815   op_cost(5);
4816   format %{ %}
4817   interface(CONST_INTER);
4818 %}
4819 
4820 // Immediates for special shifts (sign extend)
4821 
4822 // Constants for increment
4823 operand immI_16() %{
4824   predicate( n->get_int() == 16 );
4825   match(ConI);
4826 
4827   format %{ %}
4828   interface(CONST_INTER);
4829 %}
4830 
4831 operand immI_24() %{
4832   predicate( n->get_int() == 24 );
4833   match(ConI);
4834 
4835   format %{ %}
4836   interface(CONST_INTER);
4837 %}
4838 
4839 // Constant for byte-wide masking
4840 operand immI_255() %{
4841   predicate( n->get_int() == 255 );
4842   match(ConI);
4843 
4844   format %{ %}
4845   interface(CONST_INTER);
4846 %}
4847 
4848 // Constant for short-wide masking
4849 operand immI_65535() %{
4850   predicate(n->get_int() == 65535);
4851   match(ConI);
4852 
4853   format %{ %}
4854   interface(CONST_INTER);
4855 %}
4856 
4857 // Register Operands
4858 // Integer Register
4859 operand eRegI() %{
4860   constraint(ALLOC_IN_RC(e_reg));
4861   match(RegI);
4862   match(xRegI);
4863   match(eAXRegI);
4864   match(eBXRegI);
4865   match(eCXRegI);
4866   match(eDXRegI);
4867   match(eDIRegI);
4868   match(eSIRegI);
4869 
4870   format %{ %}
4871   interface(REG_INTER);
4872 %}
4873 
4874 // Subset of Integer Register
4875 operand xRegI(eRegI reg) %{
4876   constraint(ALLOC_IN_RC(x_reg));
4877   match(reg);
4878   match(eAXRegI);
4879   match(eBXRegI);
4880   match(eCXRegI);
4881   match(eDXRegI);
4882 
4883   format %{ %}
4884   interface(REG_INTER);
4885 %}
4886 
4887 // Special Registers
4888 operand eAXRegI(xRegI reg) %{
4889   constraint(ALLOC_IN_RC(eax_reg));
4890   match(reg);
4891   match(eRegI);
4892 
4893   format %{ "EAX" %}
4894   interface(REG_INTER);
4895 %}
4896 
4897 // Special Registers
4898 operand eBXRegI(xRegI reg) %{
4899   constraint(ALLOC_IN_RC(ebx_reg));
4900   match(reg);
4901   match(eRegI);
4902 
4903   format %{ "EBX" %}
4904   interface(REG_INTER);
4905 %}
4906 
4907 operand eCXRegI(xRegI reg) %{
4908   constraint(ALLOC_IN_RC(ecx_reg));
4909   match(reg);
4910   match(eRegI);
4911 
4912   format %{ "ECX" %}
4913   interface(REG_INTER);
4914 %}
4915 
4916 operand eDXRegI(xRegI reg) %{
4917   constraint(ALLOC_IN_RC(edx_reg));
4918   match(reg);
4919   match(eRegI);
4920 
4921   format %{ "EDX" %}
4922   interface(REG_INTER);
4923 %}
4924 
4925 operand eDIRegI(xRegI reg) %{
4926   constraint(ALLOC_IN_RC(edi_reg));
4927   match(reg);
4928   match(eRegI);
4929 
4930   format %{ "EDI" %}
4931   interface(REG_INTER);
4932 %}
4933 
4934 operand naxRegI() %{
4935   constraint(ALLOC_IN_RC(nax_reg));
4936   match(RegI);
4937   match(eCXRegI);
4938   match(eDXRegI);
4939   match(eSIRegI);
4940   match(eDIRegI);
4941 
4942   format %{ %}
4943   interface(REG_INTER);
4944 %}
4945 
4946 operand nadxRegI() %{
4947   constraint(ALLOC_IN_RC(nadx_reg));
4948   match(RegI);
4949   match(eBXRegI);
4950   match(eCXRegI);
4951   match(eSIRegI);
4952   match(eDIRegI);
4953 
4954   format %{ %}
4955   interface(REG_INTER);
4956 %}
4957 
4958 operand ncxRegI() %{
4959   constraint(ALLOC_IN_RC(ncx_reg));
4960   match(RegI);
4961   match(eAXRegI);
4962   match(eDXRegI);
4963   match(eSIRegI);
4964   match(eDIRegI);
4965 
4966   format %{ %}
4967   interface(REG_INTER);
4968 %}
4969 
4970 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
4971 // //
4972 operand eSIRegI(xRegI reg) %{
4973    constraint(ALLOC_IN_RC(esi_reg));
4974    match(reg);
4975    match(eRegI);
4976 
4977    format %{ "ESI" %}
4978    interface(REG_INTER);
4979 %}
4980 
4981 // Pointer Register
4982 operand anyRegP() %{
4983   constraint(ALLOC_IN_RC(any_reg));
4984   match(RegP);
4985   match(eAXRegP);
4986   match(eBXRegP);
4987   match(eCXRegP);
4988   match(eDIRegP);
4989   match(eRegP);
4990 
4991   format %{ %}
4992   interface(REG_INTER);
4993 %}
4994 
4995 operand eRegP() %{
4996   constraint(ALLOC_IN_RC(e_reg));
4997   match(RegP);
4998   match(eAXRegP);
4999   match(eBXRegP);
5000   match(eCXRegP);
5001   match(eDIRegP);
5002 
5003   format %{ %}
5004   interface(REG_INTER);
5005 %}
5006 
5007 // On windows95, EBP is not safe to use for implicit null tests.
5008 operand eRegP_no_EBP() %{
5009   constraint(ALLOC_IN_RC(e_reg_no_rbp));
5010   match(RegP);
5011   match(eAXRegP);
5012   match(eBXRegP);
5013   match(eCXRegP);
5014   match(eDIRegP);
5015 
5016   op_cost(100);
5017   format %{ %}
5018   interface(REG_INTER);
5019 %}
5020 
5021 operand naxRegP() %{
5022   constraint(ALLOC_IN_RC(nax_reg));
5023   match(RegP);
5024   match(eBXRegP);
5025   match(eDXRegP);
5026   match(eCXRegP);
5027   match(eSIRegP);
5028   match(eDIRegP);
5029 
5030   format %{ %}
5031   interface(REG_INTER);
5032 %}
5033 
5034 operand nabxRegP() %{
5035   constraint(ALLOC_IN_RC(nabx_reg));
5036   match(RegP);
5037   match(eCXRegP);
5038   match(eDXRegP);
5039   match(eSIRegP);
5040   match(eDIRegP);
5041 
5042   format %{ %}
5043   interface(REG_INTER);
5044 %}
5045 
5046 operand pRegP() %{
5047   constraint(ALLOC_IN_RC(p_reg));
5048   match(RegP);
5049   match(eBXRegP);
5050   match(eDXRegP);
5051   match(eSIRegP);
5052   match(eDIRegP);
5053 
5054   format %{ %}
5055   interface(REG_INTER);
5056 %}
5057 
5058 // Special Registers
5059 // Return a pointer value
5060 operand eAXRegP(eRegP reg) %{
5061   constraint(ALLOC_IN_RC(eax_reg));
5062   match(reg);
5063   format %{ "EAX" %}
5064   interface(REG_INTER);
5065 %}
5066 
5067 // Used in AtomicAdd
5068 operand eBXRegP(eRegP reg) %{
5069   constraint(ALLOC_IN_RC(ebx_reg));
5070   match(reg);
5071   format %{ "EBX" %}
5072   interface(REG_INTER);
5073 %}
5074 
5075 // Tail-call (interprocedural jump) to interpreter
5076 operand eCXRegP(eRegP reg) %{
5077   constraint(ALLOC_IN_RC(ecx_reg));
5078   match(reg);
5079   format %{ "ECX" %}
5080   interface(REG_INTER);
5081 %}
5082 
5083 operand eSIRegP(eRegP reg) %{
5084   constraint(ALLOC_IN_RC(esi_reg));
5085   match(reg);
5086   format %{ "ESI" %}
5087   interface(REG_INTER);
5088 %}
5089 
5090 // Used in rep stosw
5091 operand eDIRegP(eRegP reg) %{
5092   constraint(ALLOC_IN_RC(edi_reg));
5093   match(reg);
5094   format %{ "EDI" %}
5095   interface(REG_INTER);
5096 %}
5097 
5098 operand eBPRegP() %{
5099   constraint(ALLOC_IN_RC(ebp_reg));
5100   match(RegP);
5101   format %{ "EBP" %}
5102   interface(REG_INTER);
5103 %}
5104 
5105 operand eRegL() %{
5106   constraint(ALLOC_IN_RC(long_reg));
5107   match(RegL);
5108   match(eADXRegL);
5109 
5110   format %{ %}
5111   interface(REG_INTER);
5112 %}
5113 
5114 operand eADXRegL( eRegL reg ) %{
5115   constraint(ALLOC_IN_RC(eadx_reg));
5116   match(reg);
5117 
5118   format %{ "EDX:EAX" %}
5119   interface(REG_INTER);
5120 %}
5121 
5122 operand eBCXRegL( eRegL reg ) %{
5123   constraint(ALLOC_IN_RC(ebcx_reg));
5124   match(reg);
5125 
5126   format %{ "EBX:ECX" %}
5127   interface(REG_INTER);
5128 %}
5129 
5130 // Special case for integer high multiply
5131 operand eADXRegL_low_only() %{
5132   constraint(ALLOC_IN_RC(eadx_reg));
5133   match(RegL);
5134 
5135   format %{ "EAX" %}
5136   interface(REG_INTER);
5137 %}
5138 
5139 // Flags register, used as output of compare instructions
5140 operand eFlagsReg() %{
5141   constraint(ALLOC_IN_RC(int_flags));
5142   match(RegFlags);
5143 
5144   format %{ "EFLAGS" %}
5145   interface(REG_INTER);
5146 %}
5147 
5148 // Flags register, used as output of FLOATING POINT compare instructions
5149 operand eFlagsRegU() %{
5150   constraint(ALLOC_IN_RC(int_flags));
5151   match(RegFlags);
5152 
5153   format %{ "EFLAGS_U" %}
5154   interface(REG_INTER);
5155 %}
5156 
5157 operand eFlagsRegUCF() %{
5158   constraint(ALLOC_IN_RC(int_flags));
5159   match(RegFlags);
5160   predicate(false);
5161 
5162   format %{ "EFLAGS_U_CF" %}
5163   interface(REG_INTER);
5164 %}
5165 
5166 // Condition Code Register used by long compare
5167 operand flagsReg_long_LTGE() %{
5168   constraint(ALLOC_IN_RC(int_flags));
5169   match(RegFlags);
5170   format %{ "FLAGS_LTGE" %}
5171   interface(REG_INTER);
5172 %}
5173 operand flagsReg_long_EQNE() %{
5174   constraint(ALLOC_IN_RC(int_flags));
5175   match(RegFlags);
5176   format %{ "FLAGS_EQNE" %}
5177   interface(REG_INTER);
5178 %}
5179 operand flagsReg_long_LEGT() %{
5180   constraint(ALLOC_IN_RC(int_flags));
5181   match(RegFlags);
5182   format %{ "FLAGS_LEGT" %}
5183   interface(REG_INTER);
5184 %}
5185 
5186 // Float register operands
5187 operand regD() %{
5188   predicate( UseSSE < 2 );
5189   constraint(ALLOC_IN_RC(dbl_reg));
5190   match(RegD);
5191   match(regDPR1);
5192   match(regDPR2);
5193   format %{ %}
5194   interface(REG_INTER);
5195 %}
5196 
5197 operand regDPR1(regD reg) %{
5198   predicate( UseSSE < 2 );
5199   constraint(ALLOC_IN_RC(dbl_reg0));
5200   match(reg);
5201   format %{ "FPR1" %}
5202   interface(REG_INTER);
5203 %}
5204 
5205 operand regDPR2(regD reg) %{
5206   predicate( UseSSE < 2 );
5207   constraint(ALLOC_IN_RC(dbl_reg1));
5208   match(reg);
5209   format %{ "FPR2" %}
5210   interface(REG_INTER);
5211 %}
5212 
5213 operand regnotDPR1(regD reg) %{
5214   predicate( UseSSE < 2 );
5215   constraint(ALLOC_IN_RC(dbl_notreg0));
5216   match(reg);
5217   format %{ %}
5218   interface(REG_INTER);
5219 %}
5220 
5221 // XMM Double register operands
5222 operand regXD() %{
5223   predicate( UseSSE>=2 );
5224   constraint(ALLOC_IN_RC(xdb_reg));
5225   match(RegD);
5226   match(regXD6);
5227   match(regXD7);
5228   format %{ %}
5229   interface(REG_INTER);
5230 %}
5231 
5232 // XMM6 double register operands
5233 operand regXD6(regXD reg) %{
5234   predicate( UseSSE>=2 );
5235   constraint(ALLOC_IN_RC(xdb_reg6));
5236   match(reg);
5237   format %{ "XMM6" %}
5238   interface(REG_INTER);
5239 %}
5240 
5241 // XMM7 double register operands
5242 operand regXD7(regXD reg) %{
5243   predicate( UseSSE>=2 );
5244   constraint(ALLOC_IN_RC(xdb_reg7));
5245   match(reg);
5246   format %{ "XMM7" %}
5247   interface(REG_INTER);
5248 %}
5249 
5250 // Float register operands
5251 operand regF() %{
5252   predicate( UseSSE < 2 );
5253   constraint(ALLOC_IN_RC(flt_reg));
5254   match(RegF);
5255   match(regFPR1);
5256   format %{ %}
5257   interface(REG_INTER);
5258 %}
5259 
5260 // Float register operands
5261 operand regFPR1(regF reg) %{
5262   predicate( UseSSE < 2 );
5263   constraint(ALLOC_IN_RC(flt_reg0));
5264   match(reg);
5265   format %{ "FPR1" %}
5266   interface(REG_INTER);
5267 %}
5268 
5269 // XMM register operands
5270 operand regX() %{
5271   predicate( UseSSE>=1 );
5272   constraint(ALLOC_IN_RC(xmm_reg));
5273   match(RegF);
5274   format %{ %}
5275   interface(REG_INTER);
5276 %}
5277 
5278 
5279 //----------Memory Operands----------------------------------------------------
5280 // Direct Memory Operand
5281 operand direct(immP addr) %{
5282   match(addr);
5283 
5284   format %{ "[$addr]" %}
5285   interface(MEMORY_INTER) %{
5286     base(0xFFFFFFFF);
5287     index(0x4);
5288     scale(0x0);
5289     disp($addr);
5290   %}
5291 %}
5292 
5293 // Indirect Memory Operand
5294 operand indirect(eRegP reg) %{
5295   constraint(ALLOC_IN_RC(e_reg));
5296   match(reg);
5297 
5298   format %{ "[$reg]" %}
5299   interface(MEMORY_INTER) %{
5300     base($reg);
5301     index(0x4);
5302     scale(0x0);
5303     disp(0x0);
5304   %}
5305 %}
5306 
5307 // Indirect Memory Plus Short Offset Operand
5308 operand indOffset8(eRegP reg, immI8 off) %{
5309   match(AddP reg off);
5310 
5311   format %{ "[$reg + $off]" %}
5312   interface(MEMORY_INTER) %{
5313     base($reg);
5314     index(0x4);
5315     scale(0x0);
5316     disp($off);
5317   %}
5318 %}
5319 
5320 // Indirect Memory Plus Long Offset Operand
5321 operand indOffset32(eRegP reg, immI off) %{
5322   match(AddP reg off);
5323 
5324   format %{ "[$reg + $off]" %}
5325   interface(MEMORY_INTER) %{
5326     base($reg);
5327     index(0x4);
5328     scale(0x0);
5329     disp($off);
5330   %}
5331 %}
5332 
5333 // Indirect Memory Plus Long Offset Operand
5334 operand indOffset32X(eRegI reg, immP off) %{
5335   match(AddP off reg);
5336 
5337   format %{ "[$reg + $off]" %}
5338   interface(MEMORY_INTER) %{
5339     base($reg);
5340     index(0x4);
5341     scale(0x0);
5342     disp($off);
5343   %}
5344 %}
5345 
5346 // Indirect Memory Plus Index Register Plus Offset Operand
5347 operand indIndexOffset(eRegP reg, eRegI ireg, immI off) %{
5348   match(AddP (AddP reg ireg) off);
5349 
5350   op_cost(10);
5351   format %{"[$reg + $off + $ireg]" %}
5352   interface(MEMORY_INTER) %{
5353     base($reg);
5354     index($ireg);
5355     scale(0x0);
5356     disp($off);
5357   %}
5358 %}
5359 
5360 // Indirect Memory Plus Index Register Plus Offset Operand
5361 operand indIndex(eRegP reg, eRegI ireg) %{
5362   match(AddP reg ireg);
5363 
5364   op_cost(10);
5365   format %{"[$reg + $ireg]" %}
5366   interface(MEMORY_INTER) %{
5367     base($reg);
5368     index($ireg);
5369     scale(0x0);
5370     disp(0x0);
5371   %}
5372 %}
5373 
5374 // // -------------------------------------------------------------------------
5375 // // 486 architecture doesn't support "scale * index + offset" with out a base
5376 // // -------------------------------------------------------------------------
5377 // // Scaled Memory Operands
5378 // // Indirect Memory Times Scale Plus Offset Operand
5379 // operand indScaleOffset(immP off, eRegI ireg, immI2 scale) %{
5380 //   match(AddP off (LShiftI ireg scale));
5381 //
5382 //   op_cost(10);
5383 //   format %{"[$off + $ireg << $scale]" %}
5384 //   interface(MEMORY_INTER) %{
5385 //     base(0x4);
5386 //     index($ireg);
5387 //     scale($scale);
5388 //     disp($off);
5389 //   %}
5390 // %}
5391 
5392 // Indirect Memory Times Scale Plus Index Register
5393 operand indIndexScale(eRegP reg, eRegI ireg, immI2 scale) %{
5394   match(AddP reg (LShiftI ireg scale));
5395 
5396   op_cost(10);
5397   format %{"[$reg + $ireg << $scale]" %}
5398   interface(MEMORY_INTER) %{
5399     base($reg);
5400     index($ireg);
5401     scale($scale);
5402     disp(0x0);
5403   %}
5404 %}
5405 
5406 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5407 operand indIndexScaleOffset(eRegP reg, immI off, eRegI ireg, immI2 scale) %{
5408   match(AddP (AddP reg (LShiftI ireg scale)) off);
5409 
5410   op_cost(10);
5411   format %{"[$reg + $off + $ireg << $scale]" %}
5412   interface(MEMORY_INTER) %{
5413     base($reg);
5414     index($ireg);
5415     scale($scale);
5416     disp($off);
5417   %}
5418 %}
5419 
5420 //----------Load Long Memory Operands------------------------------------------
5421 // The load-long idiom will use it's address expression again after loading
5422 // the first word of the long.  If the load-long destination overlaps with
5423 // registers used in the addressing expression, the 2nd half will be loaded
5424 // from a clobbered address.  Fix this by requiring that load-long use
5425 // address registers that do not overlap with the load-long target.
5426 
5427 // load-long support
5428 operand load_long_RegP() %{
5429   constraint(ALLOC_IN_RC(esi_reg));
5430   match(RegP);
5431   match(eSIRegP);
5432   op_cost(100);
5433   format %{  %}
5434   interface(REG_INTER);
5435 %}
5436 
5437 // Indirect Memory Operand Long
5438 operand load_long_indirect(load_long_RegP reg) %{
5439   constraint(ALLOC_IN_RC(esi_reg));
5440   match(reg);
5441 
5442   format %{ "[$reg]" %}
5443   interface(MEMORY_INTER) %{
5444     base($reg);
5445     index(0x4);
5446     scale(0x0);
5447     disp(0x0);
5448   %}
5449 %}
5450 
5451 // Indirect Memory Plus Long Offset Operand
5452 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
5453   match(AddP reg off);
5454 
5455   format %{ "[$reg + $off]" %}
5456   interface(MEMORY_INTER) %{
5457     base($reg);
5458     index(0x4);
5459     scale(0x0);
5460     disp($off);
5461   %}
5462 %}
5463 
5464 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
5465 
5466 
5467 //----------Special Memory Operands--------------------------------------------
5468 // Stack Slot Operand - This operand is used for loading and storing temporary
5469 //                      values on the stack where a match requires a value to
5470 //                      flow through memory.
5471 operand stackSlotP(sRegP reg) %{
5472   constraint(ALLOC_IN_RC(stack_slots));
5473   // No match rule because this operand is only generated in matching
5474   format %{ "[$reg]" %}
5475   interface(MEMORY_INTER) %{
5476     base(0x4);   // ESP
5477     index(0x4);  // No Index
5478     scale(0x0);  // No Scale
5479     disp($reg);  // Stack Offset
5480   %}
5481 %}
5482 
5483 operand stackSlotI(sRegI reg) %{
5484   constraint(ALLOC_IN_RC(stack_slots));
5485   // No match rule because this operand is only generated in matching
5486   format %{ "[$reg]" %}
5487   interface(MEMORY_INTER) %{
5488     base(0x4);   // ESP
5489     index(0x4);  // No Index
5490     scale(0x0);  // No Scale
5491     disp($reg);  // Stack Offset
5492   %}
5493 %}
5494 
5495 operand stackSlotF(sRegF reg) %{
5496   constraint(ALLOC_IN_RC(stack_slots));
5497   // No match rule because this operand is only generated in matching
5498   format %{ "[$reg]" %}
5499   interface(MEMORY_INTER) %{
5500     base(0x4);   // ESP
5501     index(0x4);  // No Index
5502     scale(0x0);  // No Scale
5503     disp($reg);  // Stack Offset
5504   %}
5505 %}
5506 
5507 operand stackSlotD(sRegD reg) %{
5508   constraint(ALLOC_IN_RC(stack_slots));
5509   // No match rule because this operand is only generated in matching
5510   format %{ "[$reg]" %}
5511   interface(MEMORY_INTER) %{
5512     base(0x4);   // ESP
5513     index(0x4);  // No Index
5514     scale(0x0);  // No Scale
5515     disp($reg);  // Stack Offset
5516   %}
5517 %}
5518 
5519 operand stackSlotL(sRegL reg) %{
5520   constraint(ALLOC_IN_RC(stack_slots));
5521   // No match rule because this operand is only generated in matching
5522   format %{ "[$reg]" %}
5523   interface(MEMORY_INTER) %{
5524     base(0x4);   // ESP
5525     index(0x4);  // No Index
5526     scale(0x0);  // No Scale
5527     disp($reg);  // Stack Offset
5528   %}
5529 %}
5530 
5531 //----------Memory Operands - Win95 Implicit Null Variants----------------
5532 // Indirect Memory Operand
5533 operand indirect_win95_safe(eRegP_no_EBP reg)
5534 %{
5535   constraint(ALLOC_IN_RC(e_reg));
5536   match(reg);
5537 
5538   op_cost(100);
5539   format %{ "[$reg]" %}
5540   interface(MEMORY_INTER) %{
5541     base($reg);
5542     index(0x4);
5543     scale(0x0);
5544     disp(0x0);
5545   %}
5546 %}
5547 
5548 // Indirect Memory Plus Short Offset Operand
5549 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
5550 %{
5551   match(AddP reg off);
5552 
5553   op_cost(100);
5554   format %{ "[$reg + $off]" %}
5555   interface(MEMORY_INTER) %{
5556     base($reg);
5557     index(0x4);
5558     scale(0x0);
5559     disp($off);
5560   %}
5561 %}
5562 
5563 // Indirect Memory Plus Long Offset Operand
5564 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
5565 %{
5566   match(AddP reg off);
5567 
5568   op_cost(100);
5569   format %{ "[$reg + $off]" %}
5570   interface(MEMORY_INTER) %{
5571     base($reg);
5572     index(0x4);
5573     scale(0x0);
5574     disp($off);
5575   %}
5576 %}
5577 
5578 // Indirect Memory Plus Index Register Plus Offset Operand
5579 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI off)
5580 %{
5581   match(AddP (AddP reg ireg) off);
5582 
5583   op_cost(100);
5584   format %{"[$reg + $off + $ireg]" %}
5585   interface(MEMORY_INTER) %{
5586     base($reg);
5587     index($ireg);
5588     scale(0x0);
5589     disp($off);
5590   %}
5591 %}
5592 
5593 // Indirect Memory Times Scale Plus Index Register
5594 operand indIndexScale_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI2 scale)
5595 %{
5596   match(AddP reg (LShiftI ireg scale));
5597 
5598   op_cost(100);
5599   format %{"[$reg + $ireg << $scale]" %}
5600   interface(MEMORY_INTER) %{
5601     base($reg);
5602     index($ireg);
5603     scale($scale);
5604     disp(0x0);
5605   %}
5606 %}
5607 
5608 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5609 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, eRegI ireg, immI2 scale)
5610 %{
5611   match(AddP (AddP reg (LShiftI ireg scale)) off);
5612 
5613   op_cost(100);
5614   format %{"[$reg + $off + $ireg << $scale]" %}
5615   interface(MEMORY_INTER) %{
5616     base($reg);
5617     index($ireg);
5618     scale($scale);
5619     disp($off);
5620   %}
5621 %}
5622 
5623 //----------Conditional Branch Operands----------------------------------------
5624 // Comparison Op  - This is the operation of the comparison, and is limited to
5625 //                  the following set of codes:
5626 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5627 //
5628 // Other attributes of the comparison, such as unsignedness, are specified
5629 // by the comparison instruction that sets a condition code flags register.
5630 // That result is represented by a flags operand whose subtype is appropriate
5631 // to the unsignedness (etc.) of the comparison.
5632 //
5633 // Later, the instruction which matches both the Comparison Op (a Bool) and
5634 // the flags (produced by the Cmp) specifies the coding of the comparison op
5635 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5636 
5637 // Comparision Code
5638 operand cmpOp() %{
5639   match(Bool);
5640 
5641   format %{ "" %}
5642   interface(COND_INTER) %{
5643     equal(0x4, "e");
5644     not_equal(0x5, "ne");
5645     less(0xC, "l");
5646     greater_equal(0xD, "ge");
5647     less_equal(0xE, "le");
5648     greater(0xF, "g");
5649   %}
5650 %}
5651 
5652 // Comparison Code, unsigned compare.  Used by FP also, with
5653 // C2 (unordered) turned into GT or LT already.  The other bits
5654 // C0 and C3 are turned into Carry & Zero flags.
5655 operand cmpOpU() %{
5656   match(Bool);
5657 
5658   format %{ "" %}
5659   interface(COND_INTER) %{
5660     equal(0x4, "e");
5661     not_equal(0x5, "ne");
5662     less(0x2, "b");
5663     greater_equal(0x3, "nb");
5664     less_equal(0x6, "be");
5665     greater(0x7, "nbe");
5666   %}
5667 %}
5668 
5669 // Floating comparisons that don't require any fixup for the unordered case
5670 operand cmpOpUCF() %{
5671   match(Bool);
5672   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5673             n->as_Bool()->_test._test == BoolTest::ge ||
5674             n->as_Bool()->_test._test == BoolTest::le ||
5675             n->as_Bool()->_test._test == BoolTest::gt);
5676   format %{ "" %}
5677   interface(COND_INTER) %{
5678     equal(0x4, "e");
5679     not_equal(0x5, "ne");
5680     less(0x2, "b");
5681     greater_equal(0x3, "nb");
5682     less_equal(0x6, "be");
5683     greater(0x7, "nbe");
5684   %}
5685 %}
5686 
5687 
5688 // Floating comparisons that can be fixed up with extra conditional jumps
5689 operand cmpOpUCF2() %{
5690   match(Bool);
5691   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
5692             n->as_Bool()->_test._test == BoolTest::eq);
5693   format %{ "" %}
5694   interface(COND_INTER) %{
5695     equal(0x4, "e");
5696     not_equal(0x5, "ne");
5697     less(0x2, "b");
5698     greater_equal(0x3, "nb");
5699     less_equal(0x6, "be");
5700     greater(0x7, "nbe");
5701   %}
5702 %}
5703 
5704 // Comparison Code for FP conditional move
5705 operand cmpOp_fcmov() %{
5706   match(Bool);
5707 
5708   format %{ "" %}
5709   interface(COND_INTER) %{
5710     equal        (0x0C8);
5711     not_equal    (0x1C8);
5712     less         (0x0C0);
5713     greater_equal(0x1C0);
5714     less_equal   (0x0D0);
5715     greater      (0x1D0);
5716   %}
5717 %}
5718 
5719 // Comparision Code used in long compares
5720 operand cmpOp_commute() %{
5721   match(Bool);
5722 
5723   format %{ "" %}
5724   interface(COND_INTER) %{
5725     equal(0x4, "e");
5726     not_equal(0x5, "ne");
5727     less(0xF, "g");
5728     greater_equal(0xE, "le");
5729     less_equal(0xD, "ge");
5730     greater(0xC, "l");
5731   %}
5732 %}
5733 
5734 //----------OPERAND CLASSES----------------------------------------------------
5735 // Operand Classes are groups of operands that are used as to simplify
5736 // instruction definitions by not requiring the AD writer to specify separate
5737 // instructions for every form of operand when the instruction accepts
5738 // multiple operand types with the same basic encoding and format.  The classic
5739 // case of this is memory operands.
5740 
5741 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
5742                indIndex, indIndexScale, indIndexScaleOffset);
5743 
5744 // Long memory operations are encoded in 2 instructions and a +4 offset.
5745 // This means some kind of offset is always required and you cannot use
5746 // an oop as the offset (done when working on static globals).
5747 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
5748                     indIndex, indIndexScale, indIndexScaleOffset);
5749 
5750 
5751 //----------PIPELINE-----------------------------------------------------------
5752 // Rules which define the behavior of the target architectures pipeline.
5753 pipeline %{
5754 
5755 //----------ATTRIBUTES---------------------------------------------------------
5756 attributes %{
5757   variable_size_instructions;        // Fixed size instructions
5758   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
5759   instruction_unit_size = 1;         // An instruction is 1 bytes long
5760   instruction_fetch_unit_size = 16;  // The processor fetches one line
5761   instruction_fetch_units = 1;       // of 16 bytes
5762 
5763   // List of nop instructions
5764   nops( MachNop );
5765 %}
5766 
5767 //----------RESOURCES----------------------------------------------------------
5768 // Resources are the functional units available to the machine
5769 
5770 // Generic P2/P3 pipeline
5771 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5772 // 3 instructions decoded per cycle.
5773 // 2 load/store ops per cycle, 1 branch, 1 FPU,
5774 // 2 ALU op, only ALU0 handles mul/div instructions.
5775 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5776            MS0, MS1, MEM = MS0 | MS1,
5777            BR, FPU,
5778            ALU0, ALU1, ALU = ALU0 | ALU1 );
5779 
5780 //----------PIPELINE DESCRIPTION-----------------------------------------------
5781 // Pipeline Description specifies the stages in the machine's pipeline
5782 
5783 // Generic P2/P3 pipeline
5784 pipe_desc(S0, S1, S2, S3, S4, S5);
5785 
5786 //----------PIPELINE CLASSES---------------------------------------------------
5787 // Pipeline Classes describe the stages in which input and output are
5788 // referenced by the hardware pipeline.
5789 
5790 // Naming convention: ialu or fpu
5791 // Then: _reg
5792 // Then: _reg if there is a 2nd register
5793 // Then: _long if it's a pair of instructions implementing a long
5794 // Then: _fat if it requires the big decoder
5795 //   Or: _mem if it requires the big decoder and a memory unit.
5796 
5797 // Integer ALU reg operation
5798 pipe_class ialu_reg(eRegI dst) %{
5799     single_instruction;
5800     dst    : S4(write);
5801     dst    : S3(read);
5802     DECODE : S0;        // any decoder
5803     ALU    : S3;        // any alu
5804 %}
5805 
5806 // Long ALU reg operation
5807 pipe_class ialu_reg_long(eRegL dst) %{
5808     instruction_count(2);
5809     dst    : S4(write);
5810     dst    : S3(read);
5811     DECODE : S0(2);     // any 2 decoders
5812     ALU    : S3(2);     // both alus
5813 %}
5814 
5815 // Integer ALU reg operation using big decoder
5816 pipe_class ialu_reg_fat(eRegI dst) %{
5817     single_instruction;
5818     dst    : S4(write);
5819     dst    : S3(read);
5820     D0     : S0;        // big decoder only
5821     ALU    : S3;        // any alu
5822 %}
5823 
5824 // Long ALU reg operation using big decoder
5825 pipe_class ialu_reg_long_fat(eRegL dst) %{
5826     instruction_count(2);
5827     dst    : S4(write);
5828     dst    : S3(read);
5829     D0     : S0(2);     // big decoder only; twice
5830     ALU    : S3(2);     // any 2 alus
5831 %}
5832 
5833 // Integer ALU reg-reg operation
5834 pipe_class ialu_reg_reg(eRegI dst, eRegI src) %{
5835     single_instruction;
5836     dst    : S4(write);
5837     src    : S3(read);
5838     DECODE : S0;        // any decoder
5839     ALU    : S3;        // any alu
5840 %}
5841 
5842 // Long ALU reg-reg operation
5843 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
5844     instruction_count(2);
5845     dst    : S4(write);
5846     src    : S3(read);
5847     DECODE : S0(2);     // any 2 decoders
5848     ALU    : S3(2);     // both alus
5849 %}
5850 
5851 // Integer ALU reg-reg operation
5852 pipe_class ialu_reg_reg_fat(eRegI dst, memory src) %{
5853     single_instruction;
5854     dst    : S4(write);
5855     src    : S3(read);
5856     D0     : S0;        // big decoder only
5857     ALU    : S3;        // any alu
5858 %}
5859 
5860 // Long ALU reg-reg operation
5861 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
5862     instruction_count(2);
5863     dst    : S4(write);
5864     src    : S3(read);
5865     D0     : S0(2);     // big decoder only; twice
5866     ALU    : S3(2);     // both alus
5867 %}
5868 
5869 // Integer ALU reg-mem operation
5870 pipe_class ialu_reg_mem(eRegI dst, memory mem) %{
5871     single_instruction;
5872     dst    : S5(write);
5873     mem    : S3(read);
5874     D0     : S0;        // big decoder only
5875     ALU    : S4;        // any alu
5876     MEM    : S3;        // any mem
5877 %}
5878 
5879 // Long ALU reg-mem operation
5880 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
5881     instruction_count(2);
5882     dst    : S5(write);
5883     mem    : S3(read);
5884     D0     : S0(2);     // big decoder only; twice
5885     ALU    : S4(2);     // any 2 alus
5886     MEM    : S3(2);     // both mems
5887 %}
5888 
5889 // Integer mem operation (prefetch)
5890 pipe_class ialu_mem(memory mem)
5891 %{
5892     single_instruction;
5893     mem    : S3(read);
5894     D0     : S0;        // big decoder only
5895     MEM    : S3;        // any mem
5896 %}
5897 
5898 // Integer Store to Memory
5899 pipe_class ialu_mem_reg(memory mem, eRegI src) %{
5900     single_instruction;
5901     mem    : S3(read);
5902     src    : S5(read);
5903     D0     : S0;        // big decoder only
5904     ALU    : S4;        // any alu
5905     MEM    : S3;
5906 %}
5907 
5908 // Long Store to Memory
5909 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
5910     instruction_count(2);
5911     mem    : S3(read);
5912     src    : S5(read);
5913     D0     : S0(2);     // big decoder only; twice
5914     ALU    : S4(2);     // any 2 alus
5915     MEM    : S3(2);     // Both mems
5916 %}
5917 
5918 // Integer Store to Memory
5919 pipe_class ialu_mem_imm(memory mem) %{
5920     single_instruction;
5921     mem    : S3(read);
5922     D0     : S0;        // big decoder only
5923     ALU    : S4;        // any alu
5924     MEM    : S3;
5925 %}
5926 
5927 // Integer ALU0 reg-reg operation
5928 pipe_class ialu_reg_reg_alu0(eRegI dst, eRegI src) %{
5929     single_instruction;
5930     dst    : S4(write);
5931     src    : S3(read);
5932     D0     : S0;        // Big decoder only
5933     ALU0   : S3;        // only alu0
5934 %}
5935 
5936 // Integer ALU0 reg-mem operation
5937 pipe_class ialu_reg_mem_alu0(eRegI dst, memory mem) %{
5938     single_instruction;
5939     dst    : S5(write);
5940     mem    : S3(read);
5941     D0     : S0;        // big decoder only
5942     ALU0   : S4;        // ALU0 only
5943     MEM    : S3;        // any mem
5944 %}
5945 
5946 // Integer ALU reg-reg operation
5947 pipe_class ialu_cr_reg_reg(eFlagsReg cr, eRegI src1, eRegI src2) %{
5948     single_instruction;
5949     cr     : S4(write);
5950     src1   : S3(read);
5951     src2   : S3(read);
5952     DECODE : S0;        // any decoder
5953     ALU    : S3;        // any alu
5954 %}
5955 
5956 // Integer ALU reg-imm operation
5957 pipe_class ialu_cr_reg_imm(eFlagsReg cr, eRegI src1) %{
5958     single_instruction;
5959     cr     : S4(write);
5960     src1   : S3(read);
5961     DECODE : S0;        // any decoder
5962     ALU    : S3;        // any alu
5963 %}
5964 
5965 // Integer ALU reg-mem operation
5966 pipe_class ialu_cr_reg_mem(eFlagsReg cr, eRegI src1, memory src2) %{
5967     single_instruction;
5968     cr     : S4(write);
5969     src1   : S3(read);
5970     src2   : S3(read);
5971     D0     : S0;        // big decoder only
5972     ALU    : S4;        // any alu
5973     MEM    : S3;
5974 %}
5975 
5976 // Conditional move reg-reg
5977 pipe_class pipe_cmplt( eRegI p, eRegI q, eRegI y ) %{
5978     instruction_count(4);
5979     y      : S4(read);
5980     q      : S3(read);
5981     p      : S3(read);
5982     DECODE : S0(4);     // any decoder
5983 %}
5984 
5985 // Conditional move reg-reg
5986 pipe_class pipe_cmov_reg( eRegI dst, eRegI src, eFlagsReg cr ) %{
5987     single_instruction;
5988     dst    : S4(write);
5989     src    : S3(read);
5990     cr     : S3(read);
5991     DECODE : S0;        // any decoder
5992 %}
5993 
5994 // Conditional move reg-mem
5995 pipe_class pipe_cmov_mem( eFlagsReg cr, eRegI dst, memory src) %{
5996     single_instruction;
5997     dst    : S4(write);
5998     src    : S3(read);
5999     cr     : S3(read);
6000     DECODE : S0;        // any decoder
6001     MEM    : S3;
6002 %}
6003 
6004 // Conditional move reg-reg long
6005 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
6006     single_instruction;
6007     dst    : S4(write);
6008     src    : S3(read);
6009     cr     : S3(read);
6010     DECODE : S0(2);     // any 2 decoders
6011 %}
6012 
6013 // Conditional move double reg-reg
6014 pipe_class pipe_cmovD_reg( eFlagsReg cr, regDPR1 dst, regD src) %{
6015     single_instruction;
6016     dst    : S4(write);
6017     src    : S3(read);
6018     cr     : S3(read);
6019     DECODE : S0;        // any decoder
6020 %}
6021 
6022 // Float reg-reg operation
6023 pipe_class fpu_reg(regD dst) %{
6024     instruction_count(2);
6025     dst    : S3(read);
6026     DECODE : S0(2);     // any 2 decoders
6027     FPU    : S3;
6028 %}
6029 
6030 // Float reg-reg operation
6031 pipe_class fpu_reg_reg(regD dst, regD src) %{
6032     instruction_count(2);
6033     dst    : S4(write);
6034     src    : S3(read);
6035     DECODE : S0(2);     // any 2 decoders
6036     FPU    : S3;
6037 %}
6038 
6039 // Float reg-reg operation
6040 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2) %{
6041     instruction_count(3);
6042     dst    : S4(write);
6043     src1   : S3(read);
6044     src2   : S3(read);
6045     DECODE : S0(3);     // any 3 decoders
6046     FPU    : S3(2);
6047 %}
6048 
6049 // Float reg-reg operation
6050 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
6051     instruction_count(4);
6052     dst    : S4(write);
6053     src1   : S3(read);
6054     src2   : S3(read);
6055     src3   : S3(read);
6056     DECODE : S0(4);     // any 3 decoders
6057     FPU    : S3(2);
6058 %}
6059 
6060 // Float reg-reg operation
6061 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3) %{
6062     instruction_count(4);
6063     dst    : S4(write);
6064     src1   : S3(read);
6065     src2   : S3(read);
6066     src3   : S3(read);
6067     DECODE : S1(3);     // any 3 decoders
6068     D0     : S0;        // Big decoder only
6069     FPU    : S3(2);
6070     MEM    : S3;
6071 %}
6072 
6073 // Float reg-mem operation
6074 pipe_class fpu_reg_mem(regD dst, memory mem) %{
6075     instruction_count(2);
6076     dst    : S5(write);
6077     mem    : S3(read);
6078     D0     : S0;        // big decoder only
6079     DECODE : S1;        // any decoder for FPU POP
6080     FPU    : S4;
6081     MEM    : S3;        // any mem
6082 %}
6083 
6084 // Float reg-mem operation
6085 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem) %{
6086     instruction_count(3);
6087     dst    : S5(write);
6088     src1   : S3(read);
6089     mem    : S3(read);
6090     D0     : S0;        // big decoder only
6091     DECODE : S1(2);     // any decoder for FPU POP
6092     FPU    : S4;
6093     MEM    : S3;        // any mem
6094 %}
6095 
6096 // Float mem-reg operation
6097 pipe_class fpu_mem_reg(memory mem, regD src) %{
6098     instruction_count(2);
6099     src    : S5(read);
6100     mem    : S3(read);
6101     DECODE : S0;        // any decoder for FPU PUSH
6102     D0     : S1;        // big decoder only
6103     FPU    : S4;
6104     MEM    : S3;        // any mem
6105 %}
6106 
6107 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2) %{
6108     instruction_count(3);
6109     src1   : S3(read);
6110     src2   : S3(read);
6111     mem    : S3(read);
6112     DECODE : S0(2);     // any decoder for FPU PUSH
6113     D0     : S1;        // big decoder only
6114     FPU    : S4;
6115     MEM    : S3;        // any mem
6116 %}
6117 
6118 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2) %{
6119     instruction_count(3);
6120     src1   : S3(read);
6121     src2   : S3(read);
6122     mem    : S4(read);
6123     DECODE : S0;        // any decoder for FPU PUSH
6124     D0     : S0(2);     // big decoder only
6125     FPU    : S4;
6126     MEM    : S3(2);     // any mem
6127 %}
6128 
6129 pipe_class fpu_mem_mem(memory dst, memory src1) %{
6130     instruction_count(2);
6131     src1   : S3(read);
6132     dst    : S4(read);
6133     D0     : S0(2);     // big decoder only
6134     MEM    : S3(2);     // any mem
6135 %}
6136 
6137 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
6138     instruction_count(3);
6139     src1   : S3(read);
6140     src2   : S3(read);
6141     dst    : S4(read);
6142     D0     : S0(3);     // big decoder only
6143     FPU    : S4;
6144     MEM    : S3(3);     // any mem
6145 %}
6146 
6147 pipe_class fpu_mem_reg_con(memory mem, regD src1) %{
6148     instruction_count(3);
6149     src1   : S4(read);
6150     mem    : S4(read);
6151     DECODE : S0;        // any decoder for FPU PUSH
6152     D0     : S0(2);     // big decoder only
6153     FPU    : S4;
6154     MEM    : S3(2);     // any mem
6155 %}
6156 
6157 // Float load constant
6158 pipe_class fpu_reg_con(regD dst) %{
6159     instruction_count(2);
6160     dst    : S5(write);
6161     D0     : S0;        // big decoder only for the load
6162     DECODE : S1;        // any decoder for FPU POP
6163     FPU    : S4;
6164     MEM    : S3;        // any mem
6165 %}
6166 
6167 // Float load constant
6168 pipe_class fpu_reg_reg_con(regD dst, regD src) %{
6169     instruction_count(3);
6170     dst    : S5(write);
6171     src    : S3(read);
6172     D0     : S0;        // big decoder only for the load
6173     DECODE : S1(2);     // any decoder for FPU POP
6174     FPU    : S4;
6175     MEM    : S3;        // any mem
6176 %}
6177 
6178 // UnConditional branch
6179 pipe_class pipe_jmp( label labl ) %{
6180     single_instruction;
6181     BR   : S3;
6182 %}
6183 
6184 // Conditional branch
6185 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
6186     single_instruction;
6187     cr    : S1(read);
6188     BR    : S3;
6189 %}
6190 
6191 // Allocation idiom
6192 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
6193     instruction_count(1); force_serialization;
6194     fixed_latency(6);
6195     heap_ptr : S3(read);
6196     DECODE   : S0(3);
6197     D0       : S2;
6198     MEM      : S3;
6199     ALU      : S3(2);
6200     dst      : S5(write);
6201     BR       : S5;
6202 %}
6203 
6204 // Generic big/slow expanded idiom
6205 pipe_class pipe_slow(  ) %{
6206     instruction_count(10); multiple_bundles; force_serialization;
6207     fixed_latency(100);
6208     D0  : S0(2);
6209     MEM : S3(2);
6210 %}
6211 
6212 // The real do-nothing guy
6213 pipe_class empty( ) %{
6214     instruction_count(0);
6215 %}
6216 
6217 // Define the class for the Nop node
6218 define %{
6219    MachNop = empty;
6220 %}
6221 
6222 %}
6223 
6224 //----------INSTRUCTIONS-------------------------------------------------------
6225 //
6226 // match      -- States which machine-independent subtree may be replaced
6227 //               by this instruction.
6228 // ins_cost   -- The estimated cost of this instruction is used by instruction
6229 //               selection to identify a minimum cost tree of machine
6230 //               instructions that matches a tree of machine-independent
6231 //               instructions.
6232 // format     -- A string providing the disassembly for this instruction.
6233 //               The value of an instruction's operand may be inserted
6234 //               by referring to it with a '$' prefix.
6235 // opcode     -- Three instruction opcodes may be provided.  These are referred
6236 //               to within an encode class as $primary, $secondary, and $tertiary
6237 //               respectively.  The primary opcode is commonly used to
6238 //               indicate the type of machine instruction, while secondary
6239 //               and tertiary are often used for prefix options or addressing
6240 //               modes.
6241 // ins_encode -- A list of encode classes with parameters. The encode class
6242 //               name must have been defined in an 'enc_class' specification
6243 //               in the encode section of the architecture description.
6244 
6245 //----------BSWAP-Instruction--------------------------------------------------
6246 instruct bytes_reverse_int(eRegI dst) %{
6247   match(Set dst (ReverseBytesI dst));
6248 
6249   format %{ "BSWAP  $dst" %}
6250   opcode(0x0F, 0xC8);
6251   ins_encode( OpcP, OpcSReg(dst) );
6252   ins_pipe( ialu_reg );
6253 %}
6254 
6255 instruct bytes_reverse_long(eRegL dst) %{
6256   match(Set dst (ReverseBytesL dst));
6257 
6258   format %{ "BSWAP  $dst.lo\n\t"
6259             "BSWAP  $dst.hi\n\t"
6260             "XCHG   $dst.lo $dst.hi" %}
6261 
6262   ins_cost(125);
6263   ins_encode( bswap_long_bytes(dst) );
6264   ins_pipe( ialu_reg_reg);
6265 %}
6266 
6267 instruct bytes_reverse_unsigned_short(eRegI dst) %{
6268   match(Set dst (ReverseBytesUS dst));
6269 
6270   format %{ "BSWAP  $dst\n\t" 
6271             "SHR    $dst,16\n\t" %}
6272   ins_encode %{
6273     __ bswapl($dst$$Register);
6274     __ shrl($dst$$Register, 16); 
6275   %}
6276   ins_pipe( ialu_reg );
6277 %}
6278 
6279 instruct bytes_reverse_short(eRegI dst) %{
6280   match(Set dst (ReverseBytesS dst));
6281 
6282   format %{ "BSWAP  $dst\n\t" 
6283             "SAR    $dst,16\n\t" %}
6284   ins_encode %{
6285     __ bswapl($dst$$Register);
6286     __ sarl($dst$$Register, 16); 
6287   %}
6288   ins_pipe( ialu_reg );
6289 %}
6290 
6291 
6292 //---------- Zeros Count Instructions ------------------------------------------
6293 
6294 instruct countLeadingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
6295   predicate(UseCountLeadingZerosInstruction);
6296   match(Set dst (CountLeadingZerosI src));
6297   effect(KILL cr);
6298 
6299   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
6300   ins_encode %{
6301     __ lzcntl($dst$$Register, $src$$Register);
6302   %}
6303   ins_pipe(ialu_reg);
6304 %}
6305 
6306 instruct countLeadingZerosI_bsr(eRegI dst, eRegI src, eFlagsReg cr) %{
6307   predicate(!UseCountLeadingZerosInstruction);
6308   match(Set dst (CountLeadingZerosI src));
6309   effect(KILL cr);
6310 
6311   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
6312             "JNZ    skip\n\t"
6313             "MOV    $dst, -1\n"
6314       "skip:\n\t"
6315             "NEG    $dst\n\t"
6316             "ADD    $dst, 31" %}
6317   ins_encode %{
6318     Register Rdst = $dst$$Register;
6319     Register Rsrc = $src$$Register;
6320     Label skip;
6321     __ bsrl(Rdst, Rsrc);
6322     __ jccb(Assembler::notZero, skip);
6323     __ movl(Rdst, -1);
6324     __ bind(skip);
6325     __ negl(Rdst);
6326     __ addl(Rdst, BitsPerInt - 1);
6327   %}
6328   ins_pipe(ialu_reg);
6329 %}
6330 
6331 instruct countLeadingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
6332   predicate(UseCountLeadingZerosInstruction);
6333   match(Set dst (CountLeadingZerosL src));
6334   effect(TEMP dst, KILL cr);
6335 
6336   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
6337             "JNC    done\n\t"
6338             "LZCNT  $dst, $src.lo\n\t"
6339             "ADD    $dst, 32\n"
6340       "done:" %}
6341   ins_encode %{
6342     Register Rdst = $dst$$Register;
6343     Register Rsrc = $src$$Register;
6344     Label done;
6345     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
6346     __ jccb(Assembler::carryClear, done);
6347     __ lzcntl(Rdst, Rsrc);
6348     __ addl(Rdst, BitsPerInt);
6349     __ bind(done);
6350   %}
6351   ins_pipe(ialu_reg);
6352 %}
6353 
6354 instruct countLeadingZerosL_bsr(eRegI dst, eRegL src, eFlagsReg cr) %{
6355   predicate(!UseCountLeadingZerosInstruction);
6356   match(Set dst (CountLeadingZerosL src));
6357   effect(TEMP dst, KILL cr);
6358 
6359   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
6360             "JZ     msw_is_zero\n\t"
6361             "ADD    $dst, 32\n\t"
6362             "JMP    not_zero\n"
6363       "msw_is_zero:\n\t"
6364             "BSR    $dst, $src.lo\n\t"
6365             "JNZ    not_zero\n\t"
6366             "MOV    $dst, -1\n"
6367       "not_zero:\n\t"
6368             "NEG    $dst\n\t"
6369             "ADD    $dst, 63\n" %}
6370  ins_encode %{
6371     Register Rdst = $dst$$Register;
6372     Register Rsrc = $src$$Register;
6373     Label msw_is_zero;
6374     Label not_zero;
6375     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
6376     __ jccb(Assembler::zero, msw_is_zero);
6377     __ addl(Rdst, BitsPerInt);
6378     __ jmpb(not_zero);
6379     __ bind(msw_is_zero);
6380     __ bsrl(Rdst, Rsrc);
6381     __ jccb(Assembler::notZero, not_zero);
6382     __ movl(Rdst, -1);
6383     __ bind(not_zero);
6384     __ negl(Rdst);
6385     __ addl(Rdst, BitsPerLong - 1);
6386   %}
6387   ins_pipe(ialu_reg);
6388 %}
6389 
6390 instruct countTrailingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
6391   match(Set dst (CountTrailingZerosI src));
6392   effect(KILL cr);
6393 
6394   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
6395             "JNZ    done\n\t"
6396             "MOV    $dst, 32\n"
6397       "done:" %}
6398   ins_encode %{
6399     Register Rdst = $dst$$Register;
6400     Label done;
6401     __ bsfl(Rdst, $src$$Register);
6402     __ jccb(Assembler::notZero, done);
6403     __ movl(Rdst, BitsPerInt);
6404     __ bind(done);
6405   %}
6406   ins_pipe(ialu_reg);
6407 %}
6408 
6409 instruct countTrailingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
6410   match(Set dst (CountTrailingZerosL src));
6411   effect(TEMP dst, KILL cr);
6412 
6413   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
6414             "JNZ    done\n\t"
6415             "BSF    $dst, $src.hi\n\t"
6416             "JNZ    msw_not_zero\n\t"
6417             "MOV    $dst, 32\n"
6418       "msw_not_zero:\n\t"
6419             "ADD    $dst, 32\n"
6420       "done:" %}
6421   ins_encode %{
6422     Register Rdst = $dst$$Register;
6423     Register Rsrc = $src$$Register;
6424     Label msw_not_zero;
6425     Label done;
6426     __ bsfl(Rdst, Rsrc);
6427     __ jccb(Assembler::notZero, done);
6428     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
6429     __ jccb(Assembler::notZero, msw_not_zero);
6430     __ movl(Rdst, BitsPerInt);
6431     __ bind(msw_not_zero);
6432     __ addl(Rdst, BitsPerInt);
6433     __ bind(done);
6434   %}
6435   ins_pipe(ialu_reg);
6436 %}
6437 
6438 
6439 //---------- Population Count Instructions -------------------------------------
6440 
6441 instruct popCountI(eRegI dst, eRegI src) %{
6442   predicate(UsePopCountInstruction);
6443   match(Set dst (PopCountI src));
6444 
6445   format %{ "POPCNT $dst, $src" %}
6446   ins_encode %{
6447     __ popcntl($dst$$Register, $src$$Register);
6448   %}
6449   ins_pipe(ialu_reg);
6450 %}
6451 
6452 instruct popCountI_mem(eRegI dst, memory mem) %{
6453   predicate(UsePopCountInstruction);
6454   match(Set dst (PopCountI (LoadI mem)));
6455 
6456   format %{ "POPCNT $dst, $mem" %}
6457   ins_encode %{
6458     __ popcntl($dst$$Register, $mem$$Address);
6459   %}
6460   ins_pipe(ialu_reg);
6461 %}
6462 
6463 // Note: Long.bitCount(long) returns an int.
6464 instruct popCountL(eRegI dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
6465   predicate(UsePopCountInstruction);
6466   match(Set dst (PopCountL src));
6467   effect(KILL cr, TEMP tmp, TEMP dst);
6468 
6469   format %{ "POPCNT $dst, $src.lo\n\t"
6470             "POPCNT $tmp, $src.hi\n\t"
6471             "ADD    $dst, $tmp" %}
6472   ins_encode %{
6473     __ popcntl($dst$$Register, $src$$Register);
6474     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
6475     __ addl($dst$$Register, $tmp$$Register);
6476   %}
6477   ins_pipe(ialu_reg);
6478 %}
6479 
6480 // Note: Long.bitCount(long) returns an int.
6481 instruct popCountL_mem(eRegI dst, memory mem, eRegI tmp, eFlagsReg cr) %{
6482   predicate(UsePopCountInstruction);
6483   match(Set dst (PopCountL (LoadL mem)));
6484   effect(KILL cr, TEMP tmp, TEMP dst);
6485 
6486   format %{ "POPCNT $dst, $mem\n\t"
6487             "POPCNT $tmp, $mem+4\n\t"
6488             "ADD    $dst, $tmp" %}
6489   ins_encode %{
6490     //__ popcntl($dst$$Register, $mem$$Address$$first);
6491     //__ popcntl($tmp$$Register, $mem$$Address$$second);
6492     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false));
6493     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false));
6494     __ addl($dst$$Register, $tmp$$Register);
6495   %}
6496   ins_pipe(ialu_reg);
6497 %}
6498 
6499 
6500 //----------Load/Store/Move Instructions---------------------------------------
6501 //----------Load Instructions--------------------------------------------------
6502 // Load Byte (8bit signed)
6503 instruct loadB(xRegI dst, memory mem) %{
6504   match(Set dst (LoadB mem));
6505 
6506   ins_cost(125);
6507   format %{ "MOVSX8 $dst,$mem\t# byte" %}
6508 
6509   ins_encode %{
6510     __ movsbl($dst$$Register, $mem$$Address);
6511   %}
6512 
6513   ins_pipe(ialu_reg_mem);
6514 %}
6515 
6516 // Load Byte (8bit signed) into Long Register
6517 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
6518   match(Set dst (ConvI2L (LoadB mem)));
6519   effect(KILL cr);
6520 
6521   ins_cost(375);
6522   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
6523             "MOV    $dst.hi,$dst.lo\n\t"
6524             "SAR    $dst.hi,7" %}
6525 
6526   ins_encode %{
6527     __ movsbl($dst$$Register, $mem$$Address);
6528     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
6529     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
6530   %}
6531 
6532   ins_pipe(ialu_reg_mem);
6533 %}
6534 
6535 // Load Unsigned Byte (8bit UNsigned)
6536 instruct loadUB(xRegI dst, memory mem) %{
6537   match(Set dst (LoadUB mem));
6538 
6539   ins_cost(125);
6540   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
6541 
6542   ins_encode %{
6543     __ movzbl($dst$$Register, $mem$$Address);
6544   %}
6545 
6546   ins_pipe(ialu_reg_mem);
6547 %}
6548 
6549 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6550 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
6551   match(Set dst (ConvI2L (LoadUB mem)));
6552   effect(KILL cr);
6553 
6554   ins_cost(250);
6555   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
6556             "XOR    $dst.hi,$dst.hi" %}
6557 
6558   ins_encode %{
6559     Register Rdst = $dst$$Register;
6560     __ movzbl(Rdst, $mem$$Address);
6561     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6562   %}
6563 
6564   ins_pipe(ialu_reg_mem);
6565 %}
6566 
6567 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
6568 instruct loadUB2L_immI8(eRegL dst, memory mem, immI8 mask, eFlagsReg cr) %{
6569   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6570   effect(KILL cr);
6571 
6572   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 8-bit mask -> long\n\t"
6573             "XOR    $dst.hi,$dst.hi\n\t"
6574             "AND    $dst.lo,$mask" %}
6575   ins_encode %{
6576     Register Rdst = $dst$$Register;
6577     __ movzbl(Rdst, $mem$$Address);
6578     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6579     __ andl(Rdst, $mask$$constant);
6580   %}
6581   ins_pipe(ialu_reg_mem);
6582 %}
6583 
6584 // Load Short (16bit signed)
6585 instruct loadS(eRegI dst, memory mem) %{
6586   match(Set dst (LoadS mem));
6587 
6588   ins_cost(125);
6589   format %{ "MOVSX  $dst,$mem\t# short" %}
6590 
6591   ins_encode %{
6592     __ movswl($dst$$Register, $mem$$Address);
6593   %}
6594 
6595   ins_pipe(ialu_reg_mem);
6596 %}
6597 
6598 // Load Short (16 bit signed) to Byte (8 bit signed)
6599 instruct loadS2B(eRegI dst, memory mem, immI_24 twentyfour) %{
6600   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6601 
6602   ins_cost(125);
6603   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
6604   ins_encode %{
6605     __ movsbl($dst$$Register, $mem$$Address);
6606   %}
6607   ins_pipe(ialu_reg_mem);
6608 %}
6609 
6610 // Load Short (16bit signed) into Long Register
6611 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
6612   match(Set dst (ConvI2L (LoadS mem)));
6613   effect(KILL cr);
6614 
6615   ins_cost(375);
6616   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
6617             "MOV    $dst.hi,$dst.lo\n\t"
6618             "SAR    $dst.hi,15" %}
6619 
6620   ins_encode %{
6621     __ movswl($dst$$Register, $mem$$Address);
6622     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
6623     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
6624   %}
6625 
6626   ins_pipe(ialu_reg_mem);
6627 %}
6628 
6629 // Load Unsigned Short/Char (16bit unsigned)
6630 instruct loadUS(eRegI dst, memory mem) %{
6631   match(Set dst (LoadUS mem));
6632 
6633   ins_cost(125);
6634   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
6635 
6636   ins_encode %{
6637     __ movzwl($dst$$Register, $mem$$Address);
6638   %}
6639 
6640   ins_pipe(ialu_reg_mem);
6641 %}
6642 
6643 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
6644 instruct loadUS2B(eRegI dst, memory mem, immI_24 twentyfour) %{
6645   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
6646 
6647   ins_cost(125);
6648   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
6649   ins_encode %{
6650     __ movsbl($dst$$Register, $mem$$Address);
6651   %}
6652   ins_pipe(ialu_reg_mem);
6653 %}
6654 
6655 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
6656 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
6657   match(Set dst (ConvI2L (LoadUS mem)));
6658   effect(KILL cr);
6659 
6660   ins_cost(250);
6661   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
6662             "XOR    $dst.hi,$dst.hi" %}
6663 
6664   ins_encode %{
6665     __ movzwl($dst$$Register, $mem$$Address);
6666     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
6667   %}
6668 
6669   ins_pipe(ialu_reg_mem);
6670 %}
6671 
6672 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
6673 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
6674   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6675   effect(KILL cr);
6676 
6677   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
6678             "XOR    $dst.hi,$dst.hi" %}
6679   ins_encode %{
6680     Register Rdst = $dst$$Register;
6681     __ movzbl(Rdst, $mem$$Address);
6682     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6683   %}
6684   ins_pipe(ialu_reg_mem);
6685 %}
6686 
6687 // Load Unsigned Short/Char (16 bit UNsigned) with a 16-bit mask into Long Register
6688 instruct loadUS2L_immI16(eRegL dst, memory mem, immI16 mask, eFlagsReg cr) %{
6689   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6690   effect(KILL cr);
6691 
6692   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 16-bit mask -> long\n\t"
6693             "XOR    $dst.hi,$dst.hi\n\t"
6694             "AND    $dst.lo,$mask" %}
6695   ins_encode %{
6696     Register Rdst = $dst$$Register;
6697     __ movzwl(Rdst, $mem$$Address);
6698     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6699     __ andl(Rdst, $mask$$constant);
6700   %}
6701   ins_pipe(ialu_reg_mem);
6702 %}
6703 
6704 // Load Integer
6705 instruct loadI(eRegI dst, memory mem) %{
6706   match(Set dst (LoadI mem));
6707 
6708   ins_cost(125);
6709   format %{ "MOV    $dst,$mem\t# int" %}
6710 
6711   ins_encode %{
6712     __ movl($dst$$Register, $mem$$Address);
6713   %}
6714 
6715   ins_pipe(ialu_reg_mem);
6716 %}
6717 
6718 // Load Integer (32 bit signed) to Byte (8 bit signed)
6719 instruct loadI2B(eRegI dst, memory mem, immI_24 twentyfour) %{
6720   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
6721 
6722   ins_cost(125);
6723   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
6724   ins_encode %{
6725     __ movsbl($dst$$Register, $mem$$Address);
6726   %}
6727   ins_pipe(ialu_reg_mem);
6728 %}
6729 
6730 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
6731 instruct loadI2UB(eRegI dst, memory mem, immI_255 mask) %{
6732   match(Set dst (AndI (LoadI mem) mask));
6733 
6734   ins_cost(125);
6735   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
6736   ins_encode %{
6737     __ movzbl($dst$$Register, $mem$$Address);
6738   %}
6739   ins_pipe(ialu_reg_mem);
6740 %}
6741 
6742 // Load Integer (32 bit signed) to Short (16 bit signed)
6743 instruct loadI2S(eRegI dst, memory mem, immI_16 sixteen) %{
6744   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
6745 
6746   ins_cost(125);
6747   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
6748   ins_encode %{
6749     __ movswl($dst$$Register, $mem$$Address);
6750   %}
6751   ins_pipe(ialu_reg_mem);
6752 %}
6753 
6754 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
6755 instruct loadI2US(eRegI dst, memory mem, immI_65535 mask) %{
6756   match(Set dst (AndI (LoadI mem) mask));
6757 
6758   ins_cost(125);
6759   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
6760   ins_encode %{
6761     __ movzwl($dst$$Register, $mem$$Address);
6762   %}
6763   ins_pipe(ialu_reg_mem);
6764 %}
6765 
6766 // Load Integer into Long Register
6767 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
6768   match(Set dst (ConvI2L (LoadI mem)));
6769   effect(KILL cr);
6770 
6771   ins_cost(375);
6772   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
6773             "MOV    $dst.hi,$dst.lo\n\t"
6774             "SAR    $dst.hi,31" %}
6775 
6776   ins_encode %{
6777     __ movl($dst$$Register, $mem$$Address);
6778     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
6779     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
6780   %}
6781 
6782   ins_pipe(ialu_reg_mem);
6783 %}
6784 
6785 // Load Integer with mask 0xFF into Long Register
6786 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
6787   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6788   effect(KILL cr);
6789 
6790   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
6791             "XOR    $dst.hi,$dst.hi" %}
6792   ins_encode %{
6793     Register Rdst = $dst$$Register;
6794     __ movzbl(Rdst, $mem$$Address);
6795     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6796   %}
6797   ins_pipe(ialu_reg_mem);
6798 %}
6799 
6800 // Load Integer with mask 0xFFFF into Long Register
6801 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
6802   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6803   effect(KILL cr);
6804 
6805   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
6806             "XOR    $dst.hi,$dst.hi" %}
6807   ins_encode %{
6808     Register Rdst = $dst$$Register;
6809     __ movzwl(Rdst, $mem$$Address);
6810     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6811   %}
6812   ins_pipe(ialu_reg_mem);
6813 %}
6814 
6815 // Load Integer with 32-bit mask into Long Register
6816 instruct loadI2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
6817   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6818   effect(KILL cr);
6819 
6820   format %{ "MOV    $dst.lo,$mem\t# int & 32-bit mask -> long\n\t"
6821             "XOR    $dst.hi,$dst.hi\n\t"
6822             "AND    $dst.lo,$mask" %}
6823   ins_encode %{
6824     Register Rdst = $dst$$Register;
6825     __ movl(Rdst, $mem$$Address);
6826     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6827     __ andl(Rdst, $mask$$constant);
6828   %}
6829   ins_pipe(ialu_reg_mem);
6830 %}
6831 
6832 // Load Unsigned Integer into Long Register
6833 instruct loadUI2L(eRegL dst, memory mem, eFlagsReg cr) %{
6834   match(Set dst (LoadUI2L mem));
6835   effect(KILL cr);
6836 
6837   ins_cost(250);
6838   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
6839             "XOR    $dst.hi,$dst.hi" %}
6840 
6841   ins_encode %{
6842     __ movl($dst$$Register, $mem$$Address);
6843     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
6844   %}
6845 
6846   ins_pipe(ialu_reg_mem);
6847 %}
6848 
6849 // Load Long.  Cannot clobber address while loading, so restrict address
6850 // register to ESI
6851 instruct loadL(eRegL dst, load_long_memory mem) %{
6852   predicate(!((LoadLNode*)n)->require_atomic_access());
6853   match(Set dst (LoadL mem));
6854 
6855   ins_cost(250);
6856   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
6857             "MOV    $dst.hi,$mem+4" %}
6858 
6859   ins_encode %{
6860     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false);
6861     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false);
6862     __ movl($dst$$Register, Amemlo);
6863     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
6864   %}
6865 
6866   ins_pipe(ialu_reg_long_mem);
6867 %}
6868 
6869 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
6870 // then store it down to the stack and reload on the int
6871 // side.
6872 instruct loadL_volatile(stackSlotL dst, memory mem) %{
6873   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
6874   match(Set dst (LoadL mem));
6875 
6876   ins_cost(200);
6877   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
6878             "FISTp  $dst" %}
6879   ins_encode(enc_loadL_volatile(mem,dst));
6880   ins_pipe( fpu_reg_mem );
6881 %}
6882 
6883 instruct loadLX_volatile(stackSlotL dst, memory mem, regXD tmp) %{
6884   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6885   match(Set dst (LoadL mem));
6886   effect(TEMP tmp);
6887   ins_cost(180);
6888   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
6889             "MOVSD  $dst,$tmp" %}
6890   ins_encode(enc_loadLX_volatile(mem, dst, tmp));
6891   ins_pipe( pipe_slow );
6892 %}
6893 
6894 instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{
6895   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6896   match(Set dst (LoadL mem));
6897   effect(TEMP tmp);
6898   ins_cost(160);
6899   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
6900             "MOVD   $dst.lo,$tmp\n\t"
6901             "PSRLQ  $tmp,32\n\t"
6902             "MOVD   $dst.hi,$tmp" %}
6903   ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));
6904   ins_pipe( pipe_slow );
6905 %}
6906 
6907 // Load Range
6908 instruct loadRange(eRegI dst, memory mem) %{
6909   match(Set dst (LoadRange mem));
6910 
6911   ins_cost(125);
6912   format %{ "MOV    $dst,$mem" %}
6913   opcode(0x8B);
6914   ins_encode( OpcP, RegMem(dst,mem));
6915   ins_pipe( ialu_reg_mem );
6916 %}
6917 
6918 
6919 // Load Pointer
6920 instruct loadP(eRegP dst, memory mem) %{
6921   match(Set dst (LoadP mem));
6922 
6923   ins_cost(125);
6924   format %{ "MOV    $dst,$mem" %}
6925   opcode(0x8B);
6926   ins_encode( OpcP, RegMem(dst,mem));
6927   ins_pipe( ialu_reg_mem );
6928 %}
6929 
6930 // Load Klass Pointer
6931 instruct loadKlass(eRegP dst, memory mem) %{
6932   match(Set dst (LoadKlass mem));
6933 
6934   ins_cost(125);
6935   format %{ "MOV    $dst,$mem" %}
6936   opcode(0x8B);
6937   ins_encode( OpcP, RegMem(dst,mem));
6938   ins_pipe( ialu_reg_mem );
6939 %}
6940 
6941 // Load Double
6942 instruct loadD(regD dst, memory mem) %{
6943   predicate(UseSSE<=1);
6944   match(Set dst (LoadD mem));
6945 
6946   ins_cost(150);
6947   format %{ "FLD_D  ST,$mem\n\t"
6948             "FSTP   $dst" %}
6949   opcode(0xDD);               /* DD /0 */
6950   ins_encode( OpcP, RMopc_Mem(0x00,mem),
6951               Pop_Reg_D(dst) );
6952   ins_pipe( fpu_reg_mem );
6953 %}
6954 
6955 // Load Double to XMM
6956 instruct loadXD(regXD dst, memory mem) %{
6957   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
6958   match(Set dst (LoadD mem));
6959   ins_cost(145);
6960   format %{ "MOVSD  $dst,$mem" %}
6961   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
6962   ins_pipe( pipe_slow );
6963 %}
6964 
6965 instruct loadXD_partial(regXD dst, memory mem) %{
6966   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
6967   match(Set dst (LoadD mem));
6968   ins_cost(145);
6969   format %{ "MOVLPD $dst,$mem" %}
6970   ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,mem));
6971   ins_pipe( pipe_slow );
6972 %}
6973 
6974 // Load to XMM register (single-precision floating point)
6975 // MOVSS instruction
6976 instruct loadX(regX dst, memory mem) %{
6977   predicate(UseSSE>=1);
6978   match(Set dst (LoadF mem));
6979   ins_cost(145);
6980   format %{ "MOVSS  $dst,$mem" %}
6981   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
6982   ins_pipe( pipe_slow );
6983 %}
6984 
6985 // Load Float
6986 instruct loadF(regF dst, memory mem) %{
6987   predicate(UseSSE==0);
6988   match(Set dst (LoadF mem));
6989 
6990   ins_cost(150);
6991   format %{ "FLD_S  ST,$mem\n\t"
6992             "FSTP   $dst" %}
6993   opcode(0xD9);               /* D9 /0 */
6994   ins_encode( OpcP, RMopc_Mem(0x00,mem),
6995               Pop_Reg_F(dst) );
6996   ins_pipe( fpu_reg_mem );
6997 %}
6998 
6999 // Load Aligned Packed Byte to XMM register
7000 instruct loadA8B(regXD dst, memory mem) %{
7001   predicate(UseSSE>=1);
7002   match(Set dst (Load8B mem));
7003   ins_cost(125);
7004   format %{ "MOVQ  $dst,$mem\t! packed8B" %}
7005   ins_encode( movq_ld(dst, mem));
7006   ins_pipe( pipe_slow );
7007 %}
7008 
7009 // Load Aligned Packed Short to XMM register
7010 instruct loadA4S(regXD dst, memory mem) %{
7011   predicate(UseSSE>=1);
7012   match(Set dst (Load4S mem));
7013   ins_cost(125);
7014   format %{ "MOVQ  $dst,$mem\t! packed4S" %}
7015   ins_encode( movq_ld(dst, mem));
7016   ins_pipe( pipe_slow );
7017 %}
7018 
7019 // Load Aligned Packed Char to XMM register
7020 instruct loadA4C(regXD dst, memory mem) %{
7021   predicate(UseSSE>=1);
7022   match(Set dst (Load4C mem));
7023   ins_cost(125);
7024   format %{ "MOVQ  $dst,$mem\t! packed4C" %}
7025   ins_encode( movq_ld(dst, mem));
7026   ins_pipe( pipe_slow );
7027 %}
7028 
7029 // Load Aligned Packed Integer to XMM register
7030 instruct load2IU(regXD dst, memory mem) %{
7031   predicate(UseSSE>=1);
7032   match(Set dst (Load2I mem));
7033   ins_cost(125);
7034   format %{ "MOVQ  $dst,$mem\t! packed2I" %}
7035   ins_encode( movq_ld(dst, mem));
7036   ins_pipe( pipe_slow );
7037 %}
7038 
7039 // Load Aligned Packed Single to XMM
7040 instruct loadA2F(regXD dst, memory mem) %{
7041   predicate(UseSSE>=1);
7042   match(Set dst (Load2F mem));
7043   ins_cost(145);
7044   format %{ "MOVQ  $dst,$mem\t! packed2F" %}
7045   ins_encode( movq_ld(dst, mem));
7046   ins_pipe( pipe_slow );
7047 %}
7048 
7049 // Load Effective Address
7050 instruct leaP8(eRegP dst, indOffset8 mem) %{
7051   match(Set dst mem);
7052 
7053   ins_cost(110);
7054   format %{ "LEA    $dst,$mem" %}
7055   opcode(0x8D);
7056   ins_encode( OpcP, RegMem(dst,mem));
7057   ins_pipe( ialu_reg_reg_fat );
7058 %}
7059 
7060 instruct leaP32(eRegP dst, indOffset32 mem) %{
7061   match(Set dst mem);
7062 
7063   ins_cost(110);
7064   format %{ "LEA    $dst,$mem" %}
7065   opcode(0x8D);
7066   ins_encode( OpcP, RegMem(dst,mem));
7067   ins_pipe( ialu_reg_reg_fat );
7068 %}
7069 
7070 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
7071   match(Set dst mem);
7072 
7073   ins_cost(110);
7074   format %{ "LEA    $dst,$mem" %}
7075   opcode(0x8D);
7076   ins_encode( OpcP, RegMem(dst,mem));
7077   ins_pipe( ialu_reg_reg_fat );
7078 %}
7079 
7080 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
7081   match(Set dst mem);
7082 
7083   ins_cost(110);
7084   format %{ "LEA    $dst,$mem" %}
7085   opcode(0x8D);
7086   ins_encode( OpcP, RegMem(dst,mem));
7087   ins_pipe( ialu_reg_reg_fat );
7088 %}
7089 
7090 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
7091   match(Set dst mem);
7092 
7093   ins_cost(110);
7094   format %{ "LEA    $dst,$mem" %}
7095   opcode(0x8D);
7096   ins_encode( OpcP, RegMem(dst,mem));
7097   ins_pipe( ialu_reg_reg_fat );
7098 %}
7099 
7100 // Load Constant
7101 instruct loadConI(eRegI dst, immI src) %{
7102   match(Set dst src);
7103 
7104   format %{ "MOV    $dst,$src" %}
7105   ins_encode( LdImmI(dst, src) );
7106   ins_pipe( ialu_reg_fat );
7107 %}
7108 
7109 // Load Constant zero
7110 instruct loadConI0(eRegI dst, immI0 src, eFlagsReg cr) %{
7111   match(Set dst src);
7112   effect(KILL cr);
7113 
7114   ins_cost(50);
7115   format %{ "XOR    $dst,$dst" %}
7116   opcode(0x33);  /* + rd */
7117   ins_encode( OpcP, RegReg( dst, dst ) );
7118   ins_pipe( ialu_reg );
7119 %}
7120 
7121 instruct loadConP(eRegP dst, immP src) %{
7122   match(Set dst src);
7123 
7124   format %{ "MOV    $dst,$src" %}
7125   opcode(0xB8);  /* + rd */
7126   ins_encode( LdImmP(dst, src) );
7127   ins_pipe( ialu_reg_fat );
7128 %}
7129 
7130 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
7131   match(Set dst src);
7132   effect(KILL cr);
7133   ins_cost(200);
7134   format %{ "MOV    $dst.lo,$src.lo\n\t"
7135             "MOV    $dst.hi,$src.hi" %}
7136   opcode(0xB8);
7137   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
7138   ins_pipe( ialu_reg_long_fat );
7139 %}
7140 
7141 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
7142   match(Set dst src);
7143   effect(KILL cr);
7144   ins_cost(150);
7145   format %{ "XOR    $dst.lo,$dst.lo\n\t"
7146             "XOR    $dst.hi,$dst.hi" %}
7147   opcode(0x33,0x33);
7148   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
7149   ins_pipe( ialu_reg_long );
7150 %}
7151 
7152 // The instruction usage is guarded by predicate in operand immF().
7153 instruct loadConF(regF dst, immF con) %{
7154   match(Set dst con);
7155   ins_cost(125);
7156   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
7157             "FSTP   $dst" %}
7158   ins_encode %{
7159     __ fld_s($constantaddress($con));
7160     __ fstp_d($dst$$reg);
7161   %}
7162   ins_pipe(fpu_reg_con);
7163 %}
7164 
7165 // The instruction usage is guarded by predicate in operand immF0().
7166 instruct loadConF0(regF dst, immF0 con) %{
7167   match(Set dst con);
7168   ins_cost(125);
7169   format %{ "FLDZ   ST\n\t"
7170             "FSTP   $dst" %}
7171   ins_encode %{
7172     __ fldz();
7173     __ fstp_d($dst$$reg);
7174   %}
7175   ins_pipe(fpu_reg_con);
7176 %}
7177 
7178 // The instruction usage is guarded by predicate in operand immF1().
7179 instruct loadConF1(regF dst, immF1 con) %{
7180   match(Set dst con);
7181   ins_cost(125);
7182   format %{ "FLD1   ST\n\t"
7183             "FSTP   $dst" %}
7184   ins_encode %{
7185     __ fld1();
7186     __ fstp_d($dst$$reg);
7187   %}
7188   ins_pipe(fpu_reg_con);
7189 %}
7190 
7191 // The instruction usage is guarded by predicate in operand immXF().
7192 instruct loadConX(regX dst, immXF con) %{
7193   match(Set dst con);
7194   ins_cost(125);
7195   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
7196   ins_encode %{
7197     __ movflt($dst$$XMMRegister, $constantaddress($con));
7198   %}
7199   ins_pipe(pipe_slow);
7200 %}
7201 
7202 // The instruction usage is guarded by predicate in operand immXF0().
7203 instruct loadConX0(regX dst, immXF0 src) %{
7204   match(Set dst src);
7205   ins_cost(100);
7206   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
7207   ins_encode %{
7208     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7209   %}
7210   ins_pipe(pipe_slow);
7211 %}
7212 
7213 // The instruction usage is guarded by predicate in operand immD().
7214 instruct loadConD(regD dst, immD con) %{
7215   match(Set dst con);
7216   ins_cost(125);
7217 
7218   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
7219             "FSTP   $dst" %}
7220   ins_encode %{
7221     __ fld_d($constantaddress($con));
7222     __ fstp_d($dst$$reg);
7223   %}
7224   ins_pipe(fpu_reg_con);
7225 %}
7226 
7227 // The instruction usage is guarded by predicate in operand immD0().
7228 instruct loadConD0(regD dst, immD0 con) %{
7229   match(Set dst con);
7230   ins_cost(125);
7231 
7232   format %{ "FLDZ   ST\n\t"
7233             "FSTP   $dst" %}
7234   ins_encode %{
7235     __ fldz();
7236     __ fstp_d($dst$$reg);
7237   %}
7238   ins_pipe(fpu_reg_con);
7239 %}
7240 
7241 // The instruction usage is guarded by predicate in operand immD1().
7242 instruct loadConD1(regD dst, immD1 con) %{
7243   match(Set dst con);
7244   ins_cost(125);
7245 
7246   format %{ "FLD1   ST\n\t"
7247             "FSTP   $dst" %}
7248   ins_encode %{
7249     __ fld1();
7250     __ fstp_d($dst$$reg);
7251   %}
7252   ins_pipe(fpu_reg_con);
7253 %}
7254 
7255 // The instruction usage is guarded by predicate in operand immXD().
7256 instruct loadConXD(regXD dst, immXD con) %{
7257   match(Set dst con);
7258   ins_cost(125);
7259   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
7260   ins_encode %{
7261     __ movdbl($dst$$XMMRegister, $constantaddress($con));
7262   %}
7263   ins_pipe(pipe_slow);
7264 %}
7265 
7266 // The instruction usage is guarded by predicate in operand immXD0().
7267 instruct loadConXD0(regXD dst, immXD0 src) %{
7268   match(Set dst src);
7269   ins_cost(100);
7270   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
7271   ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x57), RegReg(dst,dst));
7272   ins_pipe( pipe_slow );
7273 %}
7274 
7275 // Load Stack Slot
7276 instruct loadSSI(eRegI dst, stackSlotI src) %{
7277   match(Set dst src);
7278   ins_cost(125);
7279 
7280   format %{ "MOV    $dst,$src" %}
7281   opcode(0x8B);
7282   ins_encode( OpcP, RegMem(dst,src));
7283   ins_pipe( ialu_reg_mem );
7284 %}
7285 
7286 instruct loadSSL(eRegL dst, stackSlotL src) %{
7287   match(Set dst src);
7288 
7289   ins_cost(200);
7290   format %{ "MOV    $dst,$src.lo\n\t"
7291             "MOV    $dst+4,$src.hi" %}
7292   opcode(0x8B, 0x8B);
7293   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
7294   ins_pipe( ialu_mem_long_reg );
7295 %}
7296 
7297 // Load Stack Slot
7298 instruct loadSSP(eRegP dst, stackSlotP src) %{
7299   match(Set dst src);
7300   ins_cost(125);
7301 
7302   format %{ "MOV    $dst,$src" %}
7303   opcode(0x8B);
7304   ins_encode( OpcP, RegMem(dst,src));
7305   ins_pipe( ialu_reg_mem );
7306 %}
7307 
7308 // Load Stack Slot
7309 instruct loadSSF(regF dst, stackSlotF src) %{
7310   match(Set dst src);
7311   ins_cost(125);
7312 
7313   format %{ "FLD_S  $src\n\t"
7314             "FSTP   $dst" %}
7315   opcode(0xD9);               /* D9 /0, FLD m32real */
7316   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
7317               Pop_Reg_F(dst) );
7318   ins_pipe( fpu_reg_mem );
7319 %}
7320 
7321 // Load Stack Slot
7322 instruct loadSSD(regD dst, stackSlotD src) %{
7323   match(Set dst src);
7324   ins_cost(125);
7325 
7326   format %{ "FLD_D  $src\n\t"
7327             "FSTP   $dst" %}
7328   opcode(0xDD);               /* DD /0, FLD m64real */
7329   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
7330               Pop_Reg_D(dst) );
7331   ins_pipe( fpu_reg_mem );
7332 %}
7333 
7334 // Prefetch instructions.
7335 // Must be safe to execute with invalid address (cannot fault).
7336 
7337 instruct prefetchr0( memory mem ) %{
7338   predicate(UseSSE==0 && !VM_Version::supports_3dnow_prefetch());
7339   match(PrefetchRead mem);
7340   ins_cost(0);
7341   size(0);
7342   format %{ "PREFETCHR (non-SSE is empty encoding)" %}
7343   ins_encode();
7344   ins_pipe(empty);
7345 %}
7346 
7347 instruct prefetchr( memory mem ) %{
7348   predicate(UseSSE==0 && VM_Version::supports_3dnow_prefetch() || ReadPrefetchInstr==3);
7349   match(PrefetchRead mem);
7350   ins_cost(100);
7351 
7352   format %{ "PREFETCHR $mem\t! Prefetch into level 1 cache for read" %}
7353   opcode(0x0F, 0x0d);     /* Opcode 0F 0d /0 */
7354   ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
7355   ins_pipe(ialu_mem);
7356 %}
7357 
7358 instruct prefetchrNTA( memory mem ) %{
7359   predicate(UseSSE>=1 && ReadPrefetchInstr==0);
7360   match(PrefetchRead mem);
7361   ins_cost(100);
7362 
7363   format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for read" %}
7364   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
7365   ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
7366   ins_pipe(ialu_mem);
7367 %}
7368 
7369 instruct prefetchrT0( memory mem ) %{
7370   predicate(UseSSE>=1 && ReadPrefetchInstr==1);
7371   match(PrefetchRead mem);
7372   ins_cost(100);
7373 
7374   format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for read" %}
7375   opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
7376   ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
7377   ins_pipe(ialu_mem);
7378 %}
7379 
7380 instruct prefetchrT2( memory mem ) %{
7381   predicate(UseSSE>=1 && ReadPrefetchInstr==2);
7382   match(PrefetchRead mem);
7383   ins_cost(100);
7384 
7385   format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for read" %}
7386   opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
7387   ins_encode(OpcP, OpcS, RMopc_Mem(0x03,mem));
7388   ins_pipe(ialu_mem);
7389 %}
7390 
7391 instruct prefetchw0( memory mem ) %{
7392   predicate(UseSSE==0 && !VM_Version::supports_3dnow_prefetch());
7393   match(PrefetchWrite mem);
7394   ins_cost(0);
7395   size(0);
7396   format %{ "Prefetch (non-SSE is empty encoding)" %}
7397   ins_encode();
7398   ins_pipe(empty);
7399 %}
7400 
7401 instruct prefetchw( memory mem ) %{
7402   predicate(UseSSE==0 && VM_Version::supports_3dnow_prefetch() || AllocatePrefetchInstr==3);
7403   match( PrefetchWrite mem );
7404   ins_cost(100);
7405 
7406   format %{ "PREFETCHW $mem\t! Prefetch into L1 cache and mark modified" %}
7407   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /1 */
7408   ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
7409   ins_pipe(ialu_mem);
7410 %}
7411 
7412 instruct prefetchwNTA( memory mem ) %{
7413   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
7414   match(PrefetchWrite mem);
7415   ins_cost(100);
7416 
7417   format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for write" %}
7418   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
7419   ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
7420   ins_pipe(ialu_mem);
7421 %}
7422 
7423 instruct prefetchwT0( memory mem ) %{
7424   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
7425   match(PrefetchWrite mem);
7426   ins_cost(100);
7427 
7428   format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for write" %}
7429   opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
7430   ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
7431   ins_pipe(ialu_mem);
7432 %}
7433 
7434 instruct prefetchwT2( memory mem ) %{
7435   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
7436   match(PrefetchWrite mem);
7437   ins_cost(100);
7438 
7439   format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for write" %}
7440   opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
7441   ins_encode(OpcP, OpcS, RMopc_Mem(0x03,mem));
7442   ins_pipe(ialu_mem);
7443 %}
7444 
7445 //----------Store Instructions-------------------------------------------------
7446 
7447 // Store Byte
7448 instruct storeB(memory mem, xRegI src) %{
7449   match(Set mem (StoreB mem src));
7450 
7451   ins_cost(125);
7452   format %{ "MOV8   $mem,$src" %}
7453   opcode(0x88);
7454   ins_encode( OpcP, RegMem( src, mem ) );
7455   ins_pipe( ialu_mem_reg );
7456 %}
7457 
7458 // Store Char/Short
7459 instruct storeC(memory mem, eRegI src) %{
7460   match(Set mem (StoreC mem src));
7461 
7462   ins_cost(125);
7463   format %{ "MOV16  $mem,$src" %}
7464   opcode(0x89, 0x66);
7465   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
7466   ins_pipe( ialu_mem_reg );
7467 %}
7468 
7469 // Store Integer
7470 instruct storeI(memory mem, eRegI src) %{
7471   match(Set mem (StoreI mem src));
7472 
7473   ins_cost(125);
7474   format %{ "MOV    $mem,$src" %}
7475   opcode(0x89);
7476   ins_encode( OpcP, RegMem( src, mem ) );
7477   ins_pipe( ialu_mem_reg );
7478 %}
7479 
7480 // Store Long
7481 instruct storeL(long_memory mem, eRegL src) %{
7482   predicate(!((StoreLNode*)n)->require_atomic_access());
7483   match(Set mem (StoreL mem src));
7484 
7485   ins_cost(200);
7486   format %{ "MOV    $mem,$src.lo\n\t"
7487             "MOV    $mem+4,$src.hi" %}
7488   opcode(0x89, 0x89);
7489   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
7490   ins_pipe( ialu_mem_long_reg );
7491 %}
7492 
7493 // Store Long to Integer
7494 instruct storeL2I(memory mem, eRegL src) %{
7495   match(Set mem (StoreI mem (ConvL2I src)));
7496 
7497   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
7498   ins_encode %{
7499     __ movl($mem$$Address, $src$$Register);
7500   %}
7501   ins_pipe(ialu_mem_reg);
7502 %}
7503 
7504 // Volatile Store Long.  Must be atomic, so move it into
7505 // the FP TOS and then do a 64-bit FIST.  Has to probe the
7506 // target address before the store (for null-ptr checks)
7507 // so the memory operand is used twice in the encoding.
7508 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
7509   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
7510   match(Set mem (StoreL mem src));
7511   effect( KILL cr );
7512   ins_cost(400);
7513   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
7514             "FILD   $src\n\t"
7515             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
7516   opcode(0x3B);
7517   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
7518   ins_pipe( fpu_reg_mem );
7519 %}
7520 
7521 instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %{
7522   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7523   match(Set mem (StoreL mem src));
7524   effect( TEMP tmp, KILL cr );
7525   ins_cost(380);
7526   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
7527             "MOVSD  $tmp,$src\n\t"
7528             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
7529   opcode(0x3B);
7530   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_volatile(mem, src, tmp));
7531   ins_pipe( pipe_slow );
7532 %}
7533 
7534 instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFlagsReg cr) %{
7535   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7536   match(Set mem (StoreL mem src));
7537   effect( TEMP tmp2 , TEMP tmp, KILL cr );
7538   ins_cost(360);
7539   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
7540             "MOVD   $tmp,$src.lo\n\t"
7541             "MOVD   $tmp2,$src.hi\n\t"
7542             "PUNPCKLDQ $tmp,$tmp2\n\t"
7543             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
7544   opcode(0x3B);
7545   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_reg_volatile(mem, src, tmp, tmp2));
7546   ins_pipe( pipe_slow );
7547 %}
7548 
7549 // Store Pointer; for storing unknown oops and raw pointers
7550 instruct storeP(memory mem, anyRegP src) %{
7551   match(Set mem (StoreP mem src));
7552 
7553   ins_cost(125);
7554   format %{ "MOV    $mem,$src" %}
7555   opcode(0x89);
7556   ins_encode( OpcP, RegMem( src, mem ) );
7557   ins_pipe( ialu_mem_reg );
7558 %}
7559 
7560 // Store Integer Immediate
7561 instruct storeImmI(memory mem, immI src) %{
7562   match(Set mem (StoreI mem src));
7563 
7564   ins_cost(150);
7565   format %{ "MOV    $mem,$src" %}
7566   opcode(0xC7);               /* C7 /0 */
7567   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
7568   ins_pipe( ialu_mem_imm );
7569 %}
7570 
7571 // Store Short/Char Immediate
7572 instruct storeImmI16(memory mem, immI16 src) %{
7573   predicate(UseStoreImmI16);
7574   match(Set mem (StoreC mem src));
7575 
7576   ins_cost(150);
7577   format %{ "MOV16  $mem,$src" %}
7578   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
7579   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
7580   ins_pipe( ialu_mem_imm );
7581 %}
7582 
7583 // Store Pointer Immediate; null pointers or constant oops that do not
7584 // need card-mark barriers.
7585 instruct storeImmP(memory mem, immP src) %{
7586   match(Set mem (StoreP mem src));
7587 
7588   ins_cost(150);
7589   format %{ "MOV    $mem,$src" %}
7590   opcode(0xC7);               /* C7 /0 */
7591   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
7592   ins_pipe( ialu_mem_imm );
7593 %}
7594 
7595 // Store Byte Immediate
7596 instruct storeImmB(memory mem, immI8 src) %{
7597   match(Set mem (StoreB mem src));
7598 
7599   ins_cost(150);
7600   format %{ "MOV8   $mem,$src" %}
7601   opcode(0xC6);               /* C6 /0 */
7602   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
7603   ins_pipe( ialu_mem_imm );
7604 %}
7605 
7606 // Store Aligned Packed Byte XMM register to memory
7607 instruct storeA8B(memory mem, regXD src) %{
7608   predicate(UseSSE>=1);
7609   match(Set mem (Store8B mem src));
7610   ins_cost(145);
7611   format %{ "MOVQ  $mem,$src\t! packed8B" %}
7612   ins_encode( movq_st(mem, src));
7613   ins_pipe( pipe_slow );
7614 %}
7615 
7616 // Store Aligned Packed Char/Short XMM register to memory
7617 instruct storeA4C(memory mem, regXD src) %{
7618   predicate(UseSSE>=1);
7619   match(Set mem (Store4C mem src));
7620   ins_cost(145);
7621   format %{ "MOVQ  $mem,$src\t! packed4C" %}
7622   ins_encode( movq_st(mem, src));
7623   ins_pipe( pipe_slow );
7624 %}
7625 
7626 // Store Aligned Packed Integer XMM register to memory
7627 instruct storeA2I(memory mem, regXD src) %{
7628   predicate(UseSSE>=1);
7629   match(Set mem (Store2I mem src));
7630   ins_cost(145);
7631   format %{ "MOVQ  $mem,$src\t! packed2I" %}
7632   ins_encode( movq_st(mem, src));
7633   ins_pipe( pipe_slow );
7634 %}
7635 
7636 // Store CMS card-mark Immediate
7637 instruct storeImmCM(memory mem, immI8 src) %{
7638   match(Set mem (StoreCM mem src));
7639 
7640   ins_cost(150);
7641   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
7642   opcode(0xC6);               /* C6 /0 */
7643   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
7644   ins_pipe( ialu_mem_imm );
7645 %}
7646 
7647 // Store Double
7648 instruct storeD( memory mem, regDPR1 src) %{
7649   predicate(UseSSE<=1);
7650   match(Set mem (StoreD mem src));
7651 
7652   ins_cost(100);
7653   format %{ "FST_D  $mem,$src" %}
7654   opcode(0xDD);       /* DD /2 */
7655   ins_encode( enc_FP_store(mem,src) );
7656   ins_pipe( fpu_mem_reg );
7657 %}
7658 
7659 // Store double does rounding on x86
7660 instruct storeD_rounded( memory mem, regDPR1 src) %{
7661   predicate(UseSSE<=1);
7662   match(Set mem (StoreD mem (RoundDouble src)));
7663 
7664   ins_cost(100);
7665   format %{ "FST_D  $mem,$src\t# round" %}
7666   opcode(0xDD);       /* DD /2 */
7667   ins_encode( enc_FP_store(mem,src) );
7668   ins_pipe( fpu_mem_reg );
7669 %}
7670 
7671 // Store XMM register to memory (double-precision floating points)
7672 // MOVSD instruction
7673 instruct storeXD(memory mem, regXD src) %{
7674   predicate(UseSSE>=2);
7675   match(Set mem (StoreD mem src));
7676   ins_cost(95);
7677   format %{ "MOVSD  $mem,$src" %}
7678   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
7679   ins_pipe( pipe_slow );
7680 %}
7681 
7682 // Store XMM register to memory (single-precision floating point)
7683 // MOVSS instruction
7684 instruct storeX(memory mem, regX src) %{
7685   predicate(UseSSE>=1);
7686   match(Set mem (StoreF mem src));
7687   ins_cost(95);
7688   format %{ "MOVSS  $mem,$src" %}
7689   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
7690   ins_pipe( pipe_slow );
7691 %}
7692 
7693 // Store Aligned Packed Single Float XMM register to memory
7694 instruct storeA2F(memory mem, regXD src) %{
7695   predicate(UseSSE>=1);
7696   match(Set mem (Store2F mem src));
7697   ins_cost(145);
7698   format %{ "MOVQ  $mem,$src\t! packed2F" %}
7699   ins_encode( movq_st(mem, src));
7700   ins_pipe( pipe_slow );
7701 %}
7702 
7703 // Store Float
7704 instruct storeF( memory mem, regFPR1 src) %{
7705   predicate(UseSSE==0);
7706   match(Set mem (StoreF mem src));
7707 
7708   ins_cost(100);
7709   format %{ "FST_S  $mem,$src" %}
7710   opcode(0xD9);       /* D9 /2 */
7711   ins_encode( enc_FP_store(mem,src) );
7712   ins_pipe( fpu_mem_reg );
7713 %}
7714 
7715 // Store Float does rounding on x86
7716 instruct storeF_rounded( memory mem, regFPR1 src) %{
7717   predicate(UseSSE==0);
7718   match(Set mem (StoreF mem (RoundFloat src)));
7719 
7720   ins_cost(100);
7721   format %{ "FST_S  $mem,$src\t# round" %}
7722   opcode(0xD9);       /* D9 /2 */
7723   ins_encode( enc_FP_store(mem,src) );
7724   ins_pipe( fpu_mem_reg );
7725 %}
7726 
7727 // Store Float does rounding on x86
7728 instruct storeF_Drounded( memory mem, regDPR1 src) %{
7729   predicate(UseSSE<=1);
7730   match(Set mem (StoreF mem (ConvD2F src)));
7731 
7732   ins_cost(100);
7733   format %{ "FST_S  $mem,$src\t# D-round" %}
7734   opcode(0xD9);       /* D9 /2 */
7735   ins_encode( enc_FP_store(mem,src) );
7736   ins_pipe( fpu_mem_reg );
7737 %}
7738 
7739 // Store immediate Float value (it is faster than store from FPU register)
7740 // The instruction usage is guarded by predicate in operand immF().
7741 instruct storeF_imm( memory mem, immF src) %{
7742   match(Set mem (StoreF mem src));
7743 
7744   ins_cost(50);
7745   format %{ "MOV    $mem,$src\t# store float" %}
7746   opcode(0xC7);               /* C7 /0 */
7747   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
7748   ins_pipe( ialu_mem_imm );
7749 %}
7750 
7751 // Store immediate Float value (it is faster than store from XMM register)
7752 // The instruction usage is guarded by predicate in operand immXF().
7753 instruct storeX_imm( memory mem, immXF src) %{
7754   match(Set mem (StoreF mem src));
7755 
7756   ins_cost(50);
7757   format %{ "MOV    $mem,$src\t# store float" %}
7758   opcode(0xC7);               /* C7 /0 */
7759   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32XF_as_bits( src ));
7760   ins_pipe( ialu_mem_imm );
7761 %}
7762 
7763 // Store Integer to stack slot
7764 instruct storeSSI(stackSlotI dst, eRegI src) %{
7765   match(Set dst src);
7766 
7767   ins_cost(100);
7768   format %{ "MOV    $dst,$src" %}
7769   opcode(0x89);
7770   ins_encode( OpcPRegSS( dst, src ) );
7771   ins_pipe( ialu_mem_reg );
7772 %}
7773 
7774 // Store Integer to stack slot
7775 instruct storeSSP(stackSlotP dst, eRegP src) %{
7776   match(Set dst src);
7777 
7778   ins_cost(100);
7779   format %{ "MOV    $dst,$src" %}
7780   opcode(0x89);
7781   ins_encode( OpcPRegSS( dst, src ) );
7782   ins_pipe( ialu_mem_reg );
7783 %}
7784 
7785 // Store Long to stack slot
7786 instruct storeSSL(stackSlotL dst, eRegL src) %{
7787   match(Set dst src);
7788 
7789   ins_cost(200);
7790   format %{ "MOV    $dst,$src.lo\n\t"
7791             "MOV    $dst+4,$src.hi" %}
7792   opcode(0x89, 0x89);
7793   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
7794   ins_pipe( ialu_mem_long_reg );
7795 %}
7796 
7797 //----------MemBar Instructions-----------------------------------------------
7798 // Memory barrier flavors
7799 
7800 instruct membar_acquire() %{
7801   match(MemBarAcquire);
7802   ins_cost(400);
7803 
7804   size(0);
7805   format %{ "MEMBAR-acquire ! (empty encoding)" %}
7806   ins_encode();
7807   ins_pipe(empty);
7808 %}
7809 
7810 instruct membar_acquire_lock() %{
7811   match(MemBarAcquire);
7812   predicate(Matcher::prior_fast_lock(n));
7813   ins_cost(0);
7814 
7815   size(0);
7816   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
7817   ins_encode( );
7818   ins_pipe(empty);
7819 %}
7820 
7821 instruct membar_release() %{
7822   match(MemBarRelease);
7823   ins_cost(400);
7824 
7825   size(0);
7826   format %{ "MEMBAR-release ! (empty encoding)" %}
7827   ins_encode( );
7828   ins_pipe(empty);
7829 %}
7830 
7831 instruct membar_release_lock() %{
7832   match(MemBarRelease);
7833   predicate(Matcher::post_fast_unlock(n));
7834   ins_cost(0);
7835 
7836   size(0);
7837   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
7838   ins_encode( );
7839   ins_pipe(empty);
7840 %}
7841 
7842 instruct membar_volatile(eFlagsReg cr) %{
7843   match(MemBarVolatile);
7844   effect(KILL cr);
7845   ins_cost(400);
7846 
7847   format %{ 
7848     $$template
7849     if (os::is_MP()) {
7850       $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
7851     } else {
7852       $$emit$$"MEMBAR-volatile ! (empty encoding)"
7853     }
7854   %}
7855   ins_encode %{
7856     __ membar(Assembler::StoreLoad);
7857   %}
7858   ins_pipe(pipe_slow);
7859 %}
7860 
7861 instruct unnecessary_membar_volatile() %{
7862   match(MemBarVolatile);
7863   predicate(Matcher::post_store_load_barrier(n));
7864   ins_cost(0);
7865 
7866   size(0);
7867   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
7868   ins_encode( );
7869   ins_pipe(empty);
7870 %}
7871 
7872 //----------Move Instructions--------------------------------------------------
7873 instruct castX2P(eAXRegP dst, eAXRegI src) %{
7874   match(Set dst (CastX2P src));
7875   format %{ "# X2P  $dst, $src" %}
7876   ins_encode( /*empty encoding*/ );
7877   ins_cost(0);
7878   ins_pipe(empty);
7879 %}
7880 
7881 instruct castP2X(eRegI dst, eRegP src ) %{
7882   match(Set dst (CastP2X src));
7883   ins_cost(50);
7884   format %{ "MOV    $dst, $src\t# CastP2X" %}
7885   ins_encode( enc_Copy( dst, src) );
7886   ins_pipe( ialu_reg_reg );
7887 %}
7888 
7889 //----------Conditional Move---------------------------------------------------
7890 // Conditional move
7891 instruct cmovI_reg(eRegI dst, eRegI src, eFlagsReg cr, cmpOp cop ) %{
7892   predicate(VM_Version::supports_cmov() );
7893   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7894   ins_cost(200);
7895   format %{ "CMOV$cop $dst,$src" %}
7896   opcode(0x0F,0x40);
7897   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7898   ins_pipe( pipe_cmov_reg );
7899 %}
7900 
7901 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, eRegI dst, eRegI src ) %{
7902   predicate(VM_Version::supports_cmov() );
7903   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7904   ins_cost(200);
7905   format %{ "CMOV$cop $dst,$src" %}
7906   opcode(0x0F,0x40);
7907   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7908   ins_pipe( pipe_cmov_reg );
7909 %}
7910 
7911 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, eRegI src ) %{
7912   predicate(VM_Version::supports_cmov() );
7913   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7914   ins_cost(200);
7915   expand %{
7916     cmovI_regU(cop, cr, dst, src);
7917   %}
7918 %}
7919 
7920 // Conditional move
7921 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, eRegI dst, memory src) %{
7922   predicate(VM_Version::supports_cmov() );
7923   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7924   ins_cost(250);
7925   format %{ "CMOV$cop $dst,$src" %}
7926   opcode(0x0F,0x40);
7927   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7928   ins_pipe( pipe_cmov_mem );
7929 %}
7930 
7931 // Conditional move
7932 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, eRegI dst, memory src) %{
7933   predicate(VM_Version::supports_cmov() );
7934   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7935   ins_cost(250);
7936   format %{ "CMOV$cop $dst,$src" %}
7937   opcode(0x0F,0x40);
7938   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7939   ins_pipe( pipe_cmov_mem );
7940 %}
7941 
7942 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, memory src) %{
7943   predicate(VM_Version::supports_cmov() );
7944   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7945   ins_cost(250);
7946   expand %{
7947     cmovI_memU(cop, cr, dst, src);
7948   %}
7949 %}
7950 
7951 // Conditional move
7952 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
7953   predicate(VM_Version::supports_cmov() );
7954   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7955   ins_cost(200);
7956   format %{ "CMOV$cop $dst,$src\t# ptr" %}
7957   opcode(0x0F,0x40);
7958   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7959   ins_pipe( pipe_cmov_reg );
7960 %}
7961 
7962 // Conditional move (non-P6 version)
7963 // Note:  a CMoveP is generated for  stubs and native wrappers
7964 //        regardless of whether we are on a P6, so we
7965 //        emulate a cmov here
7966 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
7967   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7968   ins_cost(300);
7969   format %{ "Jn$cop   skip\n\t"
7970           "MOV    $dst,$src\t# pointer\n"
7971       "skip:" %}
7972   opcode(0x8b);
7973   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
7974   ins_pipe( pipe_cmov_reg );
7975 %}
7976 
7977 // Conditional move
7978 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
7979   predicate(VM_Version::supports_cmov() );
7980   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7981   ins_cost(200);
7982   format %{ "CMOV$cop $dst,$src\t# ptr" %}
7983   opcode(0x0F,0x40);
7984   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7985   ins_pipe( pipe_cmov_reg );
7986 %}
7987 
7988 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
7989   predicate(VM_Version::supports_cmov() );
7990   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7991   ins_cost(200);
7992   expand %{
7993     cmovP_regU(cop, cr, dst, src);
7994   %}
7995 %}
7996 
7997 // DISABLED: Requires the ADLC to emit a bottom_type call that
7998 // correctly meets the two pointer arguments; one is an incoming
7999 // register but the other is a memory operand.  ALSO appears to
8000 // be buggy with implicit null checks.
8001 //
8002 //// Conditional move
8003 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
8004 //  predicate(VM_Version::supports_cmov() );
8005 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
8006 //  ins_cost(250);
8007 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
8008 //  opcode(0x0F,0x40);
8009 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
8010 //  ins_pipe( pipe_cmov_mem );
8011 //%}
8012 //
8013 //// Conditional move
8014 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
8015 //  predicate(VM_Version::supports_cmov() );
8016 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
8017 //  ins_cost(250);
8018 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
8019 //  opcode(0x0F,0x40);
8020 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
8021 //  ins_pipe( pipe_cmov_mem );
8022 //%}
8023 
8024 // Conditional move
8025 instruct fcmovD_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regD src) %{
8026   predicate(UseSSE<=1);
8027   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8028   ins_cost(200);
8029   format %{ "FCMOV$cop $dst,$src\t# double" %}
8030   opcode(0xDA);
8031   ins_encode( enc_cmov_d(cop,src) );
8032   ins_pipe( pipe_cmovD_reg );
8033 %}
8034 
8035 // Conditional move
8036 instruct fcmovF_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regF src) %{
8037   predicate(UseSSE==0);
8038   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8039   ins_cost(200);
8040   format %{ "FCMOV$cop $dst,$src\t# float" %}
8041   opcode(0xDA);
8042   ins_encode( enc_cmov_d(cop,src) );
8043   ins_pipe( pipe_cmovD_reg );
8044 %}
8045 
8046 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
8047 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
8048   predicate(UseSSE<=1);
8049   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8050   ins_cost(200);
8051   format %{ "Jn$cop   skip\n\t"
8052             "MOV    $dst,$src\t# double\n"
8053       "skip:" %}
8054   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
8055   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_D(src), OpcP, RegOpc(dst) );
8056   ins_pipe( pipe_cmovD_reg );
8057 %}
8058 
8059 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
8060 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
8061   predicate(UseSSE==0);
8062   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8063   ins_cost(200);
8064   format %{ "Jn$cop    skip\n\t"
8065             "MOV    $dst,$src\t# float\n"
8066       "skip:" %}
8067   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
8068   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_F(src), OpcP, RegOpc(dst) );
8069   ins_pipe( pipe_cmovD_reg );
8070 %}
8071 
8072 // No CMOVE with SSE/SSE2
8073 instruct fcmovX_regS(cmpOp cop, eFlagsReg cr, regX dst, regX src) %{
8074   predicate (UseSSE>=1);
8075   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8076   ins_cost(200);
8077   format %{ "Jn$cop   skip\n\t"
8078             "MOVSS  $dst,$src\t# float\n"
8079       "skip:" %}
8080   ins_encode %{
8081     Label skip;
8082     // Invert sense of branch from sense of CMOV
8083     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
8084     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
8085     __ bind(skip);
8086   %}
8087   ins_pipe( pipe_slow );
8088 %}
8089 
8090 // No CMOVE with SSE/SSE2
8091 instruct fcmovXD_regS(cmpOp cop, eFlagsReg cr, regXD dst, regXD src) %{
8092   predicate (UseSSE>=2);
8093   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8094   ins_cost(200);
8095   format %{ "Jn$cop   skip\n\t"
8096             "MOVSD  $dst,$src\t# float\n"
8097       "skip:" %}
8098   ins_encode %{
8099     Label skip;
8100     // Invert sense of branch from sense of CMOV
8101     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
8102     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
8103     __ bind(skip);
8104   %}
8105   ins_pipe( pipe_slow );
8106 %}
8107 
8108 // unsigned version
8109 instruct fcmovX_regU(cmpOpU cop, eFlagsRegU cr, regX dst, regX src) %{
8110   predicate (UseSSE>=1);
8111   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8112   ins_cost(200);
8113   format %{ "Jn$cop   skip\n\t"
8114             "MOVSS  $dst,$src\t# float\n"
8115       "skip:" %}
8116   ins_encode %{
8117     Label skip;
8118     // Invert sense of branch from sense of CMOV
8119     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
8120     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
8121     __ bind(skip);
8122   %}
8123   ins_pipe( pipe_slow );
8124 %}
8125 
8126 instruct fcmovX_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regX dst, regX src) %{
8127   predicate (UseSSE>=1);
8128   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8129   ins_cost(200);
8130   expand %{
8131     fcmovX_regU(cop, cr, dst, src);
8132   %}
8133 %}
8134 
8135 // unsigned version
8136 instruct fcmovXD_regU(cmpOpU cop, eFlagsRegU cr, regXD dst, regXD src) %{
8137   predicate (UseSSE>=2);
8138   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8139   ins_cost(200);
8140   format %{ "Jn$cop   skip\n\t"
8141             "MOVSD  $dst,$src\t# float\n"
8142       "skip:" %}
8143   ins_encode %{
8144     Label skip;
8145     // Invert sense of branch from sense of CMOV
8146     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
8147     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
8148     __ bind(skip);
8149   %}
8150   ins_pipe( pipe_slow );
8151 %}
8152 
8153 instruct fcmovXD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regXD dst, regXD src) %{
8154   predicate (UseSSE>=2);
8155   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8156   ins_cost(200);
8157   expand %{
8158     fcmovXD_regU(cop, cr, dst, src);
8159   %}
8160 %}
8161 
8162 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
8163   predicate(VM_Version::supports_cmov() );
8164   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8165   ins_cost(200);
8166   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
8167             "CMOV$cop $dst.hi,$src.hi" %}
8168   opcode(0x0F,0x40);
8169   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
8170   ins_pipe( pipe_cmov_reg_long );
8171 %}
8172 
8173 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
8174   predicate(VM_Version::supports_cmov() );
8175   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8176   ins_cost(200);
8177   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
8178             "CMOV$cop $dst.hi,$src.hi" %}
8179   opcode(0x0F,0x40);
8180   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
8181   ins_pipe( pipe_cmov_reg_long );
8182 %}
8183 
8184 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
8185   predicate(VM_Version::supports_cmov() );
8186   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8187   ins_cost(200);
8188   expand %{
8189     cmovL_regU(cop, cr, dst, src);
8190   %}
8191 %}
8192 
8193 //----------Arithmetic Instructions--------------------------------------------
8194 //----------Addition Instructions----------------------------------------------
8195 // Integer Addition Instructions
8196 instruct addI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8197   match(Set dst (AddI dst src));
8198   effect(KILL cr);
8199 
8200   size(2);
8201   format %{ "ADD    $dst,$src" %}
8202   opcode(0x03);
8203   ins_encode( OpcP, RegReg( dst, src) );
8204   ins_pipe( ialu_reg_reg );
8205 %}
8206 
8207 instruct addI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8208   match(Set dst (AddI dst src));
8209   effect(KILL cr);
8210 
8211   format %{ "ADD    $dst,$src" %}
8212   opcode(0x81, 0x00); /* /0 id */
8213   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8214   ins_pipe( ialu_reg );
8215 %}
8216 
8217 instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
8218   predicate(UseIncDec);
8219   match(Set dst (AddI dst src));
8220   effect(KILL cr);
8221 
8222   size(1);
8223   format %{ "INC    $dst" %}
8224   opcode(0x40); /*  */
8225   ins_encode( Opc_plus( primary, dst ) );
8226   ins_pipe( ialu_reg );
8227 %}
8228 
8229 instruct leaI_eReg_immI(eRegI dst, eRegI src0, immI src1) %{
8230   match(Set dst (AddI src0 src1));
8231   ins_cost(110);
8232 
8233   format %{ "LEA    $dst,[$src0 + $src1]" %}
8234   opcode(0x8D); /* 0x8D /r */
8235   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
8236   ins_pipe( ialu_reg_reg );
8237 %}
8238 
8239 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
8240   match(Set dst (AddP src0 src1));
8241   ins_cost(110);
8242 
8243   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
8244   opcode(0x8D); /* 0x8D /r */
8245   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
8246   ins_pipe( ialu_reg_reg );
8247 %}
8248 
8249 instruct decI_eReg(eRegI dst, immI_M1 src, eFlagsReg cr) %{
8250   predicate(UseIncDec);
8251   match(Set dst (AddI dst src));
8252   effect(KILL cr);
8253 
8254   size(1);
8255   format %{ "DEC    $dst" %}
8256   opcode(0x48); /*  */
8257   ins_encode( Opc_plus( primary, dst ) );
8258   ins_pipe( ialu_reg );
8259 %}
8260 
8261 instruct addP_eReg(eRegP dst, eRegI src, eFlagsReg cr) %{
8262   match(Set dst (AddP dst src));
8263   effect(KILL cr);
8264 
8265   size(2);
8266   format %{ "ADD    $dst,$src" %}
8267   opcode(0x03);
8268   ins_encode( OpcP, RegReg( dst, src) );
8269   ins_pipe( ialu_reg_reg );
8270 %}
8271 
8272 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
8273   match(Set dst (AddP dst src));
8274   effect(KILL cr);
8275 
8276   format %{ "ADD    $dst,$src" %}
8277   opcode(0x81,0x00); /* Opcode 81 /0 id */
8278   // ins_encode( RegImm( dst, src) );
8279   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8280   ins_pipe( ialu_reg );
8281 %}
8282 
8283 instruct addI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8284   match(Set dst (AddI dst (LoadI src)));
8285   effect(KILL cr);
8286 
8287   ins_cost(125);
8288   format %{ "ADD    $dst,$src" %}
8289   opcode(0x03);
8290   ins_encode( OpcP, RegMem( dst, src) );
8291   ins_pipe( ialu_reg_mem );
8292 %}
8293 
8294 instruct addI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8295   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8296   effect(KILL cr);
8297 
8298   ins_cost(150);
8299   format %{ "ADD    $dst,$src" %}
8300   opcode(0x01);  /* Opcode 01 /r */
8301   ins_encode( OpcP, RegMem( src, dst ) );
8302   ins_pipe( ialu_mem_reg );
8303 %}
8304 
8305 // Add Memory with Immediate
8306 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8307   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8308   effect(KILL cr);
8309 
8310   ins_cost(125);
8311   format %{ "ADD    $dst,$src" %}
8312   opcode(0x81);               /* Opcode 81 /0 id */
8313   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
8314   ins_pipe( ialu_mem_imm );
8315 %}
8316 
8317 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
8318   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8319   effect(KILL cr);
8320 
8321   ins_cost(125);
8322   format %{ "INC    $dst" %}
8323   opcode(0xFF);               /* Opcode FF /0 */
8324   ins_encode( OpcP, RMopc_Mem(0x00,dst));
8325   ins_pipe( ialu_mem_imm );
8326 %}
8327 
8328 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
8329   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8330   effect(KILL cr);
8331 
8332   ins_cost(125);
8333   format %{ "DEC    $dst" %}
8334   opcode(0xFF);               /* Opcode FF /1 */
8335   ins_encode( OpcP, RMopc_Mem(0x01,dst));
8336   ins_pipe( ialu_mem_imm );
8337 %}
8338 
8339 
8340 instruct checkCastPP( eRegP dst ) %{
8341   match(Set dst (CheckCastPP dst));
8342 
8343   size(0);
8344   format %{ "#checkcastPP of $dst" %}
8345   ins_encode( /*empty encoding*/ );
8346   ins_pipe( empty );
8347 %}
8348 
8349 instruct castPP( eRegP dst ) %{
8350   match(Set dst (CastPP dst));
8351   format %{ "#castPP of $dst" %}
8352   ins_encode( /*empty encoding*/ );
8353   ins_pipe( empty );
8354 %}
8355 
8356 instruct castII( eRegI dst ) %{
8357   match(Set dst (CastII dst));
8358   format %{ "#castII of $dst" %}
8359   ins_encode( /*empty encoding*/ );
8360   ins_cost(0);
8361   ins_pipe( empty );
8362 %}
8363 
8364 
8365 // Load-locked - same as a regular pointer load when used with compare-swap
8366 instruct loadPLocked(eRegP dst, memory mem) %{
8367   match(Set dst (LoadPLocked mem));
8368 
8369   ins_cost(125);
8370   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
8371   opcode(0x8B);
8372   ins_encode( OpcP, RegMem(dst,mem));
8373   ins_pipe( ialu_reg_mem );
8374 %}
8375 
8376 // LoadLong-locked - same as a volatile long load when used with compare-swap
8377 instruct loadLLocked(stackSlotL dst, load_long_memory mem) %{
8378   predicate(UseSSE<=1);
8379   match(Set dst (LoadLLocked mem));
8380 
8381   ins_cost(200);
8382   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
8383             "FISTp  $dst" %}
8384   ins_encode(enc_loadL_volatile(mem,dst));
8385   ins_pipe( fpu_reg_mem );
8386 %}
8387 
8388 instruct loadLX_Locked(stackSlotL dst, load_long_memory mem, regXD tmp) %{
8389   predicate(UseSSE>=2);
8390   match(Set dst (LoadLLocked mem));
8391   effect(TEMP tmp);
8392   ins_cost(180);
8393   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
8394             "MOVSD  $dst,$tmp" %}
8395   ins_encode(enc_loadLX_volatile(mem, dst, tmp));
8396   ins_pipe( pipe_slow );
8397 %}
8398 
8399 instruct loadLX_reg_Locked(eRegL dst, load_long_memory mem, regXD tmp) %{
8400   predicate(UseSSE>=2);
8401   match(Set dst (LoadLLocked mem));
8402   effect(TEMP tmp);
8403   ins_cost(160);
8404   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
8405             "MOVD   $dst.lo,$tmp\n\t"
8406             "PSRLQ  $tmp,32\n\t"
8407             "MOVD   $dst.hi,$tmp" %}
8408   ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));
8409   ins_pipe( pipe_slow );
8410 %}
8411 
8412 // Conditional-store of the updated heap-top.
8413 // Used during allocation of the shared heap.
8414 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
8415 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
8416   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8417   // EAX is killed if there is contention, but then it's also unused.
8418   // In the common case of no contention, EAX holds the new oop address.
8419   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
8420   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
8421   ins_pipe( pipe_cmpxchg );
8422 %}
8423 
8424 // Conditional-store of an int value.
8425 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
8426 instruct storeIConditional( memory mem, eAXRegI oldval, eRegI newval, eFlagsReg cr ) %{
8427   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8428   effect(KILL oldval);
8429   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
8430   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
8431   ins_pipe( pipe_cmpxchg );
8432 %}
8433 
8434 // Conditional-store of a long value.
8435 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
8436 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
8437   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8438   effect(KILL oldval);
8439   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
8440             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
8441             "XCHG   EBX,ECX"
8442   %}
8443   ins_encode %{
8444     // Note: we need to swap rbx, and rcx before and after the
8445     //       cmpxchg8 instruction because the instruction uses
8446     //       rcx as the high order word of the new value to store but
8447     //       our register encoding uses rbx.
8448     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
8449     if( os::is_MP() )
8450       __ lock();
8451     __ cmpxchg8($mem$$Address);
8452     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
8453   %}
8454   ins_pipe( pipe_cmpxchg );
8455 %}
8456 
8457 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
8458 
8459 instruct compareAndSwapL( eRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
8460   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
8461   effect(KILL cr, KILL oldval);
8462   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
8463             "MOV    $res,0\n\t"
8464             "JNE,s  fail\n\t"
8465             "MOV    $res,1\n"
8466           "fail:" %}
8467   ins_encode( enc_cmpxchg8(mem_ptr),
8468               enc_flags_ne_to_boolean(res) );
8469   ins_pipe( pipe_cmpxchg );
8470 %}
8471 
8472 instruct compareAndSwapP( eRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
8473   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
8474   effect(KILL cr, KILL oldval);
8475   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
8476             "MOV    $res,0\n\t"
8477             "JNE,s  fail\n\t"
8478             "MOV    $res,1\n"
8479           "fail:" %}
8480   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
8481   ins_pipe( pipe_cmpxchg );
8482 %}
8483 
8484 instruct compareAndSwapI( eRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
8485   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
8486   effect(KILL cr, KILL oldval);
8487   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
8488             "MOV    $res,0\n\t"
8489             "JNE,s  fail\n\t"
8490             "MOV    $res,1\n"
8491           "fail:" %}
8492   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
8493   ins_pipe( pipe_cmpxchg );
8494 %}
8495 
8496 //----------Subtraction Instructions-------------------------------------------
8497 // Integer Subtraction Instructions
8498 instruct subI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8499   match(Set dst (SubI dst src));
8500   effect(KILL cr);
8501 
8502   size(2);
8503   format %{ "SUB    $dst,$src" %}
8504   opcode(0x2B);
8505   ins_encode( OpcP, RegReg( dst, src) );
8506   ins_pipe( ialu_reg_reg );
8507 %}
8508 
8509 instruct subI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8510   match(Set dst (SubI dst src));
8511   effect(KILL cr);
8512 
8513   format %{ "SUB    $dst,$src" %}
8514   opcode(0x81,0x05);  /* Opcode 81 /5 */
8515   // ins_encode( RegImm( dst, src) );
8516   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8517   ins_pipe( ialu_reg );
8518 %}
8519 
8520 instruct subI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8521   match(Set dst (SubI dst (LoadI src)));
8522   effect(KILL cr);
8523 
8524   ins_cost(125);
8525   format %{ "SUB    $dst,$src" %}
8526   opcode(0x2B);
8527   ins_encode( OpcP, RegMem( dst, src) );
8528   ins_pipe( ialu_reg_mem );
8529 %}
8530 
8531 instruct subI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8532   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8533   effect(KILL cr);
8534 
8535   ins_cost(150);
8536   format %{ "SUB    $dst,$src" %}
8537   opcode(0x29);  /* Opcode 29 /r */
8538   ins_encode( OpcP, RegMem( src, dst ) );
8539   ins_pipe( ialu_mem_reg );
8540 %}
8541 
8542 // Subtract from a pointer
8543 instruct subP_eReg(eRegP dst, eRegI src, immI0 zero, eFlagsReg cr) %{
8544   match(Set dst (AddP dst (SubI zero src)));
8545   effect(KILL cr);
8546 
8547   size(2);
8548   format %{ "SUB    $dst,$src" %}
8549   opcode(0x2B);
8550   ins_encode( OpcP, RegReg( dst, src) );
8551   ins_pipe( ialu_reg_reg );
8552 %}
8553 
8554 instruct negI_eReg(eRegI dst, immI0 zero, eFlagsReg cr) %{
8555   match(Set dst (SubI zero dst));
8556   effect(KILL cr);
8557 
8558   size(2);
8559   format %{ "NEG    $dst" %}
8560   opcode(0xF7,0x03);  // Opcode F7 /3
8561   ins_encode( OpcP, RegOpc( dst ) );
8562   ins_pipe( ialu_reg );
8563 %}
8564 
8565 
8566 //----------Multiplication/Division Instructions-------------------------------
8567 // Integer Multiplication Instructions
8568 // Multiply Register
8569 instruct mulI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8570   match(Set dst (MulI dst src));
8571   effect(KILL cr);
8572 
8573   size(3);
8574   ins_cost(300);
8575   format %{ "IMUL   $dst,$src" %}
8576   opcode(0xAF, 0x0F);
8577   ins_encode( OpcS, OpcP, RegReg( dst, src) );
8578   ins_pipe( ialu_reg_reg_alu0 );
8579 %}
8580 
8581 // Multiply 32-bit Immediate
8582 instruct mulI_eReg_imm(eRegI dst, eRegI src, immI imm, eFlagsReg cr) %{
8583   match(Set dst (MulI src imm));
8584   effect(KILL cr);
8585 
8586   ins_cost(300);
8587   format %{ "IMUL   $dst,$src,$imm" %}
8588   opcode(0x69);  /* 69 /r id */
8589   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
8590   ins_pipe( ialu_reg_reg_alu0 );
8591 %}
8592 
8593 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
8594   match(Set dst src);
8595   effect(KILL cr);
8596 
8597   // Note that this is artificially increased to make it more expensive than loadConL
8598   ins_cost(250);
8599   format %{ "MOV    EAX,$src\t// low word only" %}
8600   opcode(0xB8);
8601   ins_encode( LdImmL_Lo(dst, src) );
8602   ins_pipe( ialu_reg_fat );
8603 %}
8604 
8605 // Multiply by 32-bit Immediate, taking the shifted high order results
8606 //  (special case for shift by 32)
8607 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
8608   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
8609   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
8610              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
8611              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
8612   effect(USE src1, KILL cr);
8613 
8614   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
8615   ins_cost(0*100 + 1*400 - 150);
8616   format %{ "IMUL   EDX:EAX,$src1" %}
8617   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
8618   ins_pipe( pipe_slow );
8619 %}
8620 
8621 // Multiply by 32-bit Immediate, taking the shifted high order results
8622 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
8623   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
8624   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
8625              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
8626              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
8627   effect(USE src1, KILL cr);
8628 
8629   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
8630   ins_cost(1*100 + 1*400 - 150);
8631   format %{ "IMUL   EDX:EAX,$src1\n\t"
8632             "SAR    EDX,$cnt-32" %}
8633   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
8634   ins_pipe( pipe_slow );
8635 %}
8636 
8637 // Multiply Memory 32-bit Immediate
8638 instruct mulI_mem_imm(eRegI dst, memory src, immI imm, eFlagsReg cr) %{
8639   match(Set dst (MulI (LoadI src) imm));
8640   effect(KILL cr);
8641 
8642   ins_cost(300);
8643   format %{ "IMUL   $dst,$src,$imm" %}
8644   opcode(0x69);  /* 69 /r id */
8645   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
8646   ins_pipe( ialu_reg_mem_alu0 );
8647 %}
8648 
8649 // Multiply Memory
8650 instruct mulI(eRegI dst, memory src, eFlagsReg cr) %{
8651   match(Set dst (MulI dst (LoadI src)));
8652   effect(KILL cr);
8653 
8654   ins_cost(350);
8655   format %{ "IMUL   $dst,$src" %}
8656   opcode(0xAF, 0x0F);
8657   ins_encode( OpcS, OpcP, RegMem( dst, src) );
8658   ins_pipe( ialu_reg_mem_alu0 );
8659 %}
8660 
8661 // Multiply Register Int to Long
8662 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
8663   // Basic Idea: long = (long)int * (long)int
8664   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
8665   effect(DEF dst, USE src, USE src1, KILL flags);
8666 
8667   ins_cost(300);
8668   format %{ "IMUL   $dst,$src1" %}
8669 
8670   ins_encode( long_int_multiply( dst, src1 ) );
8671   ins_pipe( ialu_reg_reg_alu0 );
8672 %}
8673 
8674 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
8675   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
8676   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
8677   effect(KILL flags);
8678 
8679   ins_cost(300);
8680   format %{ "MUL    $dst,$src1" %}
8681 
8682   ins_encode( long_uint_multiply(dst, src1) );
8683   ins_pipe( ialu_reg_reg_alu0 );
8684 %}
8685 
8686 // Multiply Register Long
8687 instruct mulL_eReg(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
8688   match(Set dst (MulL dst src));
8689   effect(KILL cr, TEMP tmp);
8690   ins_cost(4*100+3*400);
8691 // Basic idea: lo(result) = lo(x_lo * y_lo)
8692 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
8693   format %{ "MOV    $tmp,$src.lo\n\t"
8694             "IMUL   $tmp,EDX\n\t"
8695             "MOV    EDX,$src.hi\n\t"
8696             "IMUL   EDX,EAX\n\t"
8697             "ADD    $tmp,EDX\n\t"
8698             "MUL    EDX:EAX,$src.lo\n\t"
8699             "ADD    EDX,$tmp" %}
8700   ins_encode( long_multiply( dst, src, tmp ) );
8701   ins_pipe( pipe_slow );
8702 %}
8703 
8704 // Multiply Register Long where the left operand's high 32 bits are zero
8705 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
8706   predicate(is_operand_hi32_zero(n->in(1)));
8707   match(Set dst (MulL dst src));
8708   effect(KILL cr, TEMP tmp);
8709   ins_cost(2*100+2*400);
8710 // Basic idea: lo(result) = lo(x_lo * y_lo)
8711 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
8712   format %{ "MOV    $tmp,$src.hi\n\t"
8713             "IMUL   $tmp,EAX\n\t"
8714             "MUL    EDX:EAX,$src.lo\n\t"
8715             "ADD    EDX,$tmp" %}
8716   ins_encode %{
8717     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
8718     __ imull($tmp$$Register, rax);
8719     __ mull($src$$Register);
8720     __ addl(rdx, $tmp$$Register);
8721   %}
8722   ins_pipe( pipe_slow );
8723 %}
8724 
8725 // Multiply Register Long where the right operand's high 32 bits are zero
8726 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
8727   predicate(is_operand_hi32_zero(n->in(2)));
8728   match(Set dst (MulL dst src));
8729   effect(KILL cr, TEMP tmp);
8730   ins_cost(2*100+2*400);
8731 // Basic idea: lo(result) = lo(x_lo * y_lo)
8732 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
8733   format %{ "MOV    $tmp,$src.lo\n\t"
8734             "IMUL   $tmp,EDX\n\t"
8735             "MUL    EDX:EAX,$src.lo\n\t"
8736             "ADD    EDX,$tmp" %}
8737   ins_encode %{
8738     __ movl($tmp$$Register, $src$$Register);
8739     __ imull($tmp$$Register, rdx);
8740     __ mull($src$$Register);
8741     __ addl(rdx, $tmp$$Register);
8742   %}
8743   ins_pipe( pipe_slow );
8744 %}
8745 
8746 // Multiply Register Long where the left and the right operands' high 32 bits are zero
8747 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
8748   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
8749   match(Set dst (MulL dst src));
8750   effect(KILL cr);
8751   ins_cost(1*400);
8752 // Basic idea: lo(result) = lo(x_lo * y_lo)
8753 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
8754   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
8755   ins_encode %{
8756     __ mull($src$$Register);
8757   %}
8758   ins_pipe( pipe_slow );
8759 %}
8760 
8761 // Multiply Register Long by small constant
8762 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, eRegI tmp, eFlagsReg cr) %{
8763   match(Set dst (MulL dst src));
8764   effect(KILL cr, TEMP tmp);
8765   ins_cost(2*100+2*400);
8766   size(12);
8767 // Basic idea: lo(result) = lo(src * EAX)
8768 //             hi(result) = hi(src * EAX) + lo(src * EDX)
8769   format %{ "IMUL   $tmp,EDX,$src\n\t"
8770             "MOV    EDX,$src\n\t"
8771             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
8772             "ADD    EDX,$tmp" %}
8773   ins_encode( long_multiply_con( dst, src, tmp ) );
8774   ins_pipe( pipe_slow );
8775 %}
8776 
8777 // Integer DIV with Register
8778 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
8779   match(Set rax (DivI rax div));
8780   effect(KILL rdx, KILL cr);
8781   size(26);
8782   ins_cost(30*100+10*100);
8783   format %{ "CMP    EAX,0x80000000\n\t"
8784             "JNE,s  normal\n\t"
8785             "XOR    EDX,EDX\n\t"
8786             "CMP    ECX,-1\n\t"
8787             "JE,s   done\n"
8788     "normal: CDQ\n\t"
8789             "IDIV   $div\n\t"
8790     "done:"        %}
8791   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8792   ins_encode( cdq_enc, OpcP, RegOpc(div) );
8793   ins_pipe( ialu_reg_reg_alu0 );
8794 %}
8795 
8796 // Divide Register Long
8797 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
8798   match(Set dst (DivL src1 src2));
8799   effect( KILL cr, KILL cx, KILL bx );
8800   ins_cost(10000);
8801   format %{ "PUSH   $src1.hi\n\t"
8802             "PUSH   $src1.lo\n\t"
8803             "PUSH   $src2.hi\n\t"
8804             "PUSH   $src2.lo\n\t"
8805             "CALL   SharedRuntime::ldiv\n\t"
8806             "ADD    ESP,16" %}
8807   ins_encode( long_div(src1,src2) );
8808   ins_pipe( pipe_slow );
8809 %}
8810 
8811 // Integer DIVMOD with Register, both quotient and mod results
8812 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
8813   match(DivModI rax div);
8814   effect(KILL cr);
8815   size(26);
8816   ins_cost(30*100+10*100);
8817   format %{ "CMP    EAX,0x80000000\n\t"
8818             "JNE,s  normal\n\t"
8819             "XOR    EDX,EDX\n\t"
8820             "CMP    ECX,-1\n\t"
8821             "JE,s   done\n"
8822     "normal: CDQ\n\t"
8823             "IDIV   $div\n\t"
8824     "done:"        %}
8825   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8826   ins_encode( cdq_enc, OpcP, RegOpc(div) );
8827   ins_pipe( pipe_slow );
8828 %}
8829 
8830 // Integer MOD with Register
8831 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
8832   match(Set rdx (ModI rax div));
8833   effect(KILL rax, KILL cr);
8834 
8835   size(26);
8836   ins_cost(300);
8837   format %{ "CDQ\n\t"
8838             "IDIV   $div" %}
8839   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8840   ins_encode( cdq_enc, OpcP, RegOpc(div) );
8841   ins_pipe( ialu_reg_reg_alu0 );
8842 %}
8843 
8844 // Remainder Register Long
8845 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
8846   match(Set dst (ModL src1 src2));
8847   effect( KILL cr, KILL cx, KILL bx );
8848   ins_cost(10000);
8849   format %{ "PUSH   $src1.hi\n\t"
8850             "PUSH   $src1.lo\n\t"
8851             "PUSH   $src2.hi\n\t"
8852             "PUSH   $src2.lo\n\t"
8853             "CALL   SharedRuntime::lrem\n\t"
8854             "ADD    ESP,16" %}
8855   ins_encode( long_mod(src1,src2) );
8856   ins_pipe( pipe_slow );
8857 %}
8858 
8859 // Divide Register Long (no special case since divisor != -1)
8860 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{
8861   match(Set dst (DivL dst imm));
8862   effect( TEMP tmp, TEMP tmp2, KILL cr );
8863   ins_cost(1000);
8864   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
8865             "XOR    $tmp2,$tmp2\n\t"
8866             "CMP    $tmp,EDX\n\t"
8867             "JA,s   fast\n\t"
8868             "MOV    $tmp2,EAX\n\t"
8869             "MOV    EAX,EDX\n\t"
8870             "MOV    EDX,0\n\t"
8871             "JLE,s  pos\n\t"
8872             "LNEG   EAX : $tmp2\n\t"
8873             "DIV    $tmp # unsigned division\n\t"
8874             "XCHG   EAX,$tmp2\n\t"
8875             "DIV    $tmp\n\t"
8876             "LNEG   $tmp2 : EAX\n\t"
8877             "JMP,s  done\n"
8878     "pos:\n\t"
8879             "DIV    $tmp\n\t"
8880             "XCHG   EAX,$tmp2\n"
8881     "fast:\n\t"
8882             "DIV    $tmp\n"
8883     "done:\n\t"
8884             "MOV    EDX,$tmp2\n\t"
8885             "NEG    EDX:EAX # if $imm < 0" %}
8886   ins_encode %{
8887     int con = (int)$imm$$constant;
8888     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8889     int pcon = (con > 0) ? con : -con;
8890     Label Lfast, Lpos, Ldone;
8891 
8892     __ movl($tmp$$Register, pcon);
8893     __ xorl($tmp2$$Register,$tmp2$$Register);
8894     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8895     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
8896 
8897     __ movl($tmp2$$Register, $dst$$Register); // save
8898     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8899     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8900     __ jccb(Assembler::lessEqual, Lpos); // result is positive
8901 
8902     // Negative dividend.
8903     // convert value to positive to use unsigned division
8904     __ lneg($dst$$Register, $tmp2$$Register);
8905     __ divl($tmp$$Register);
8906     __ xchgl($dst$$Register, $tmp2$$Register);
8907     __ divl($tmp$$Register);
8908     // revert result back to negative
8909     __ lneg($tmp2$$Register, $dst$$Register);
8910     __ jmpb(Ldone);
8911 
8912     __ bind(Lpos);
8913     __ divl($tmp$$Register); // Use unsigned division
8914     __ xchgl($dst$$Register, $tmp2$$Register);
8915     // Fallthrow for final divide, tmp2 has 32 bit hi result
8916 
8917     __ bind(Lfast);
8918     // fast path: src is positive
8919     __ divl($tmp$$Register); // Use unsigned division
8920 
8921     __ bind(Ldone);
8922     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
8923     if (con < 0) {
8924       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
8925     }
8926   %}
8927   ins_pipe( pipe_slow );
8928 %}
8929 
8930 // Remainder Register Long (remainder fit into 32 bits)
8931 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{
8932   match(Set dst (ModL dst imm));
8933   effect( TEMP tmp, TEMP tmp2, KILL cr );
8934   ins_cost(1000);
8935   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
8936             "CMP    $tmp,EDX\n\t"
8937             "JA,s   fast\n\t"
8938             "MOV    $tmp2,EAX\n\t"
8939             "MOV    EAX,EDX\n\t"
8940             "MOV    EDX,0\n\t"
8941             "JLE,s  pos\n\t"
8942             "LNEG   EAX : $tmp2\n\t"
8943             "DIV    $tmp # unsigned division\n\t"
8944             "MOV    EAX,$tmp2\n\t"
8945             "DIV    $tmp\n\t"
8946             "NEG    EDX\n\t"
8947             "JMP,s  done\n"
8948     "pos:\n\t"
8949             "DIV    $tmp\n\t"
8950             "MOV    EAX,$tmp2\n"
8951     "fast:\n\t"
8952             "DIV    $tmp\n"
8953     "done:\n\t"
8954             "MOV    EAX,EDX\n\t"
8955             "SAR    EDX,31\n\t" %}
8956   ins_encode %{
8957     int con = (int)$imm$$constant;
8958     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8959     int pcon = (con > 0) ? con : -con;
8960     Label  Lfast, Lpos, Ldone;
8961 
8962     __ movl($tmp$$Register, pcon);
8963     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8964     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
8965 
8966     __ movl($tmp2$$Register, $dst$$Register); // save
8967     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8968     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8969     __ jccb(Assembler::lessEqual, Lpos); // result is positive
8970 
8971     // Negative dividend.
8972     // convert value to positive to use unsigned division
8973     __ lneg($dst$$Register, $tmp2$$Register);
8974     __ divl($tmp$$Register);
8975     __ movl($dst$$Register, $tmp2$$Register);
8976     __ divl($tmp$$Register);
8977     // revert remainder back to negative
8978     __ negl(HIGH_FROM_LOW($dst$$Register));
8979     __ jmpb(Ldone);
8980 
8981     __ bind(Lpos);
8982     __ divl($tmp$$Register);
8983     __ movl($dst$$Register, $tmp2$$Register);
8984 
8985     __ bind(Lfast);
8986     // fast path: src is positive
8987     __ divl($tmp$$Register);
8988 
8989     __ bind(Ldone);
8990     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8991     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
8992 
8993   %}
8994   ins_pipe( pipe_slow );
8995 %}
8996 
8997 // Integer Shift Instructions
8998 // Shift Left by one
8999 instruct shlI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9000   match(Set dst (LShiftI dst shift));
9001   effect(KILL cr);
9002 
9003   size(2);
9004   format %{ "SHL    $dst,$shift" %}
9005   opcode(0xD1, 0x4);  /* D1 /4 */
9006   ins_encode( OpcP, RegOpc( dst ) );
9007   ins_pipe( ialu_reg );
9008 %}
9009 
9010 // Shift Left by 8-bit immediate
9011 instruct salI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
9012   match(Set dst (LShiftI dst shift));
9013   effect(KILL cr);
9014 
9015   size(3);
9016   format %{ "SHL    $dst,$shift" %}
9017   opcode(0xC1, 0x4);  /* C1 /4 ib */
9018   ins_encode( RegOpcImm( dst, shift) );
9019   ins_pipe( ialu_reg );
9020 %}
9021 
9022 // Shift Left by variable
9023 instruct salI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
9024   match(Set dst (LShiftI dst shift));
9025   effect(KILL cr);
9026 
9027   size(2);
9028   format %{ "SHL    $dst,$shift" %}
9029   opcode(0xD3, 0x4);  /* D3 /4 */
9030   ins_encode( OpcP, RegOpc( dst ) );
9031   ins_pipe( ialu_reg_reg );
9032 %}
9033 
9034 // Arithmetic shift right by one
9035 instruct sarI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9036   match(Set dst (RShiftI dst shift));
9037   effect(KILL cr);
9038 
9039   size(2);
9040   format %{ "SAR    $dst,$shift" %}
9041   opcode(0xD1, 0x7);  /* D1 /7 */
9042   ins_encode( OpcP, RegOpc( dst ) );
9043   ins_pipe( ialu_reg );
9044 %}
9045 
9046 // Arithmetic shift right by one
9047 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
9048   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9049   effect(KILL cr);
9050   format %{ "SAR    $dst,$shift" %}
9051   opcode(0xD1, 0x7);  /* D1 /7 */
9052   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
9053   ins_pipe( ialu_mem_imm );
9054 %}
9055 
9056 // Arithmetic Shift Right by 8-bit immediate
9057 instruct sarI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
9058   match(Set dst (RShiftI dst shift));
9059   effect(KILL cr);
9060 
9061   size(3);
9062   format %{ "SAR    $dst,$shift" %}
9063   opcode(0xC1, 0x7);  /* C1 /7 ib */
9064   ins_encode( RegOpcImm( dst, shift ) );
9065   ins_pipe( ialu_mem_imm );
9066 %}
9067 
9068 // Arithmetic Shift Right by 8-bit immediate
9069 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
9070   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9071   effect(KILL cr);
9072 
9073   format %{ "SAR    $dst,$shift" %}
9074   opcode(0xC1, 0x7);  /* C1 /7 ib */
9075   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
9076   ins_pipe( ialu_mem_imm );
9077 %}
9078 
9079 // Arithmetic Shift Right by variable
9080 instruct sarI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
9081   match(Set dst (RShiftI dst shift));
9082   effect(KILL cr);
9083 
9084   size(2);
9085   format %{ "SAR    $dst,$shift" %}
9086   opcode(0xD3, 0x7);  /* D3 /7 */
9087   ins_encode( OpcP, RegOpc( dst ) );
9088   ins_pipe( ialu_reg_reg );
9089 %}
9090 
9091 // Logical shift right by one
9092 instruct shrI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9093   match(Set dst (URShiftI dst shift));
9094   effect(KILL cr);
9095 
9096   size(2);
9097   format %{ "SHR    $dst,$shift" %}
9098   opcode(0xD1, 0x5);  /* D1 /5 */
9099   ins_encode( OpcP, RegOpc( dst ) );
9100   ins_pipe( ialu_reg );
9101 %}
9102 
9103 // Logical Shift Right by 8-bit immediate
9104 instruct shrI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
9105   match(Set dst (URShiftI dst shift));
9106   effect(KILL cr);
9107 
9108   size(3);
9109   format %{ "SHR    $dst,$shift" %}
9110   opcode(0xC1, 0x5);  /* C1 /5 ib */
9111   ins_encode( RegOpcImm( dst, shift) );
9112   ins_pipe( ialu_reg );
9113 %}
9114 
9115 
9116 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
9117 // This idiom is used by the compiler for the i2b bytecode.
9118 instruct i2b(eRegI dst, xRegI src, immI_24 twentyfour) %{
9119   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
9120 
9121   size(3);
9122   format %{ "MOVSX  $dst,$src :8" %}
9123   ins_encode %{
9124     __ movsbl($dst$$Register, $src$$Register);
9125   %}
9126   ins_pipe(ialu_reg_reg);
9127 %}
9128 
9129 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
9130 // This idiom is used by the compiler the i2s bytecode.
9131 instruct i2s(eRegI dst, xRegI src, immI_16 sixteen) %{
9132   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
9133 
9134   size(3);
9135   format %{ "MOVSX  $dst,$src :16" %}
9136   ins_encode %{
9137     __ movswl($dst$$Register, $src$$Register);
9138   %}
9139   ins_pipe(ialu_reg_reg);
9140 %}
9141 
9142 
9143 // Logical Shift Right by variable
9144 instruct shrI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
9145   match(Set dst (URShiftI dst shift));
9146   effect(KILL cr);
9147 
9148   size(2);
9149   format %{ "SHR    $dst,$shift" %}
9150   opcode(0xD3, 0x5);  /* D3 /5 */
9151   ins_encode( OpcP, RegOpc( dst ) );
9152   ins_pipe( ialu_reg_reg );
9153 %}
9154 
9155 
9156 //----------Logical Instructions-----------------------------------------------
9157 //----------Integer Logical Instructions---------------------------------------
9158 // And Instructions
9159 // And Register with Register
9160 instruct andI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
9161   match(Set dst (AndI dst src));
9162   effect(KILL cr);
9163 
9164   size(2);
9165   format %{ "AND    $dst,$src" %}
9166   opcode(0x23);
9167   ins_encode( OpcP, RegReg( dst, src) );
9168   ins_pipe( ialu_reg_reg );
9169 %}
9170 
9171 // And Register with Immediate
9172 instruct andI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
9173   match(Set dst (AndI dst src));
9174   effect(KILL cr);
9175 
9176   format %{ "AND    $dst,$src" %}
9177   opcode(0x81,0x04);  /* Opcode 81 /4 */
9178   // ins_encode( RegImm( dst, src) );
9179   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
9180   ins_pipe( ialu_reg );
9181 %}
9182 
9183 // And Register with Memory
9184 instruct andI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
9185   match(Set dst (AndI dst (LoadI src)));
9186   effect(KILL cr);
9187 
9188   ins_cost(125);
9189   format %{ "AND    $dst,$src" %}
9190   opcode(0x23);
9191   ins_encode( OpcP, RegMem( dst, src) );
9192   ins_pipe( ialu_reg_mem );
9193 %}
9194 
9195 // And Memory with Register
9196 instruct andI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
9197   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9198   effect(KILL cr);
9199 
9200   ins_cost(150);
9201   format %{ "AND    $dst,$src" %}
9202   opcode(0x21);  /* Opcode 21 /r */
9203   ins_encode( OpcP, RegMem( src, dst ) );
9204   ins_pipe( ialu_mem_reg );
9205 %}
9206 
9207 // And Memory with Immediate
9208 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
9209   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9210   effect(KILL cr);
9211 
9212   ins_cost(125);
9213   format %{ "AND    $dst,$src" %}
9214   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
9215   // ins_encode( MemImm( dst, src) );
9216   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
9217   ins_pipe( ialu_mem_imm );
9218 %}
9219 
9220 // Or Instructions
9221 // Or Register with Register
9222 instruct orI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
9223   match(Set dst (OrI dst src));
9224   effect(KILL cr);
9225 
9226   size(2);
9227   format %{ "OR     $dst,$src" %}
9228   opcode(0x0B);
9229   ins_encode( OpcP, RegReg( dst, src) );
9230   ins_pipe( ialu_reg_reg );
9231 %}
9232 
9233 instruct orI_eReg_castP2X(eRegI dst, eRegP src, eFlagsReg cr) %{
9234   match(Set dst (OrI dst (CastP2X src)));
9235   effect(KILL cr);
9236 
9237   size(2);
9238   format %{ "OR     $dst,$src" %}
9239   opcode(0x0B);
9240   ins_encode( OpcP, RegReg( dst, src) );
9241   ins_pipe( ialu_reg_reg );
9242 %}
9243 
9244 
9245 // Or Register with Immediate
9246 instruct orI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
9247   match(Set dst (OrI dst src));
9248   effect(KILL cr);
9249 
9250   format %{ "OR     $dst,$src" %}
9251   opcode(0x81,0x01);  /* Opcode 81 /1 id */
9252   // ins_encode( RegImm( dst, src) );
9253   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
9254   ins_pipe( ialu_reg );
9255 %}
9256 
9257 // Or Register with Memory
9258 instruct orI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
9259   match(Set dst (OrI dst (LoadI src)));
9260   effect(KILL cr);
9261 
9262   ins_cost(125);
9263   format %{ "OR     $dst,$src" %}
9264   opcode(0x0B);
9265   ins_encode( OpcP, RegMem( dst, src) );
9266   ins_pipe( ialu_reg_mem );
9267 %}
9268 
9269 // Or Memory with Register
9270 instruct orI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
9271   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9272   effect(KILL cr);
9273 
9274   ins_cost(150);
9275   format %{ "OR     $dst,$src" %}
9276   opcode(0x09);  /* Opcode 09 /r */
9277   ins_encode( OpcP, RegMem( src, dst ) );
9278   ins_pipe( ialu_mem_reg );
9279 %}
9280 
9281 // Or Memory with Immediate
9282 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
9283   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9284   effect(KILL cr);
9285 
9286   ins_cost(125);
9287   format %{ "OR     $dst,$src" %}
9288   opcode(0x81,0x1);  /* Opcode 81 /1 id */
9289   // ins_encode( MemImm( dst, src) );
9290   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
9291   ins_pipe( ialu_mem_imm );
9292 %}
9293 
9294 // ROL/ROR
9295 // ROL expand
9296 instruct rolI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9297   effect(USE_DEF dst, USE shift, KILL cr);
9298 
9299   format %{ "ROL    $dst, $shift" %}
9300   opcode(0xD1, 0x0); /* Opcode D1 /0 */
9301   ins_encode( OpcP, RegOpc( dst ));
9302   ins_pipe( ialu_reg );
9303 %}
9304 
9305 instruct rolI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{
9306   effect(USE_DEF dst, USE shift, KILL cr);
9307 
9308   format %{ "ROL    $dst, $shift" %}
9309   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
9310   ins_encode( RegOpcImm(dst, shift) );
9311   ins_pipe(ialu_reg);
9312 %}
9313 
9314 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
9315   effect(USE_DEF dst, USE shift, KILL cr);
9316 
9317   format %{ "ROL    $dst, $shift" %}
9318   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
9319   ins_encode(OpcP, RegOpc(dst));
9320   ins_pipe( ialu_reg_reg );
9321 %}
9322 // end of ROL expand
9323 
9324 // ROL 32bit by one once
9325 instruct rolI_eReg_i1(eRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
9326   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9327 
9328   expand %{
9329     rolI_eReg_imm1(dst, lshift, cr);
9330   %}
9331 %}
9332 
9333 // ROL 32bit var by imm8 once
9334 instruct rolI_eReg_i8(eRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
9335   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9336   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9337 
9338   expand %{
9339     rolI_eReg_imm8(dst, lshift, cr);
9340   %}
9341 %}
9342 
9343 // ROL 32bit var by var once
9344 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
9345   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
9346 
9347   expand %{
9348     rolI_eReg_CL(dst, shift, cr);
9349   %}
9350 %}
9351 
9352 // ROL 32bit var by var once
9353 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
9354   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
9355 
9356   expand %{
9357     rolI_eReg_CL(dst, shift, cr);
9358   %}
9359 %}
9360 
9361 // ROR expand
9362 instruct rorI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9363   effect(USE_DEF dst, USE shift, KILL cr);
9364 
9365   format %{ "ROR    $dst, $shift" %}
9366   opcode(0xD1,0x1);  /* Opcode D1 /1 */
9367   ins_encode( OpcP, RegOpc( dst ) );
9368   ins_pipe( ialu_reg );
9369 %}
9370 
9371 instruct rorI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{
9372   effect (USE_DEF dst, USE shift, KILL cr);
9373 
9374   format %{ "ROR    $dst, $shift" %}
9375   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
9376   ins_encode( RegOpcImm(dst, shift) );
9377   ins_pipe( ialu_reg );
9378 %}
9379 
9380 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
9381   effect(USE_DEF dst, USE shift, KILL cr);
9382 
9383   format %{ "ROR    $dst, $shift" %}
9384   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
9385   ins_encode(OpcP, RegOpc(dst));
9386   ins_pipe( ialu_reg_reg );
9387 %}
9388 // end of ROR expand
9389 
9390 // ROR right once
9391 instruct rorI_eReg_i1(eRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
9392   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9393 
9394   expand %{
9395     rorI_eReg_imm1(dst, rshift, cr);
9396   %}
9397 %}
9398 
9399 // ROR 32bit by immI8 once
9400 instruct rorI_eReg_i8(eRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
9401   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9402   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9403 
9404   expand %{
9405     rorI_eReg_imm8(dst, rshift, cr);
9406   %}
9407 %}
9408 
9409 // ROR 32bit var by var once
9410 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
9411   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
9412 
9413   expand %{
9414     rorI_eReg_CL(dst, shift, cr);
9415   %}
9416 %}
9417 
9418 // ROR 32bit var by var once
9419 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
9420   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
9421 
9422   expand %{
9423     rorI_eReg_CL(dst, shift, cr);
9424   %}
9425 %}
9426 
9427 // Xor Instructions
9428 // Xor Register with Register
9429 instruct xorI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
9430   match(Set dst (XorI dst src));
9431   effect(KILL cr);
9432 
9433   size(2);
9434   format %{ "XOR    $dst,$src" %}
9435   opcode(0x33);
9436   ins_encode( OpcP, RegReg( dst, src) );
9437   ins_pipe( ialu_reg_reg );
9438 %}
9439 
9440 // Xor Register with Immediate -1
9441 instruct xorI_eReg_im1(eRegI dst, immI_M1 imm) %{
9442   match(Set dst (XorI dst imm));  
9443 
9444   size(2);
9445   format %{ "NOT    $dst" %}  
9446   ins_encode %{
9447      __ notl($dst$$Register);
9448   %}
9449   ins_pipe( ialu_reg );
9450 %}
9451 
9452 // Xor Register with Immediate
9453 instruct xorI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
9454   match(Set dst (XorI dst src));
9455   effect(KILL cr);
9456 
9457   format %{ "XOR    $dst,$src" %}
9458   opcode(0x81,0x06);  /* Opcode 81 /6 id */
9459   // ins_encode( RegImm( dst, src) );
9460   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
9461   ins_pipe( ialu_reg );
9462 %}
9463 
9464 // Xor Register with Memory
9465 instruct xorI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
9466   match(Set dst (XorI dst (LoadI src)));
9467   effect(KILL cr);
9468 
9469   ins_cost(125);
9470   format %{ "XOR    $dst,$src" %}
9471   opcode(0x33);
9472   ins_encode( OpcP, RegMem(dst, src) );
9473   ins_pipe( ialu_reg_mem );
9474 %}
9475 
9476 // Xor Memory with Register
9477 instruct xorI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
9478   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9479   effect(KILL cr);
9480 
9481   ins_cost(150);
9482   format %{ "XOR    $dst,$src" %}
9483   opcode(0x31);  /* Opcode 31 /r */
9484   ins_encode( OpcP, RegMem( src, dst ) );
9485   ins_pipe( ialu_mem_reg );
9486 %}
9487 
9488 // Xor Memory with Immediate
9489 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
9490   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9491   effect(KILL cr);
9492 
9493   ins_cost(125);
9494   format %{ "XOR    $dst,$src" %}
9495   opcode(0x81,0x6);  /* Opcode 81 /6 id */
9496   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
9497   ins_pipe( ialu_mem_imm );
9498 %}
9499 
9500 //----------Convert Int to Boolean---------------------------------------------
9501 
9502 instruct movI_nocopy(eRegI dst, eRegI src) %{
9503   effect( DEF dst, USE src );
9504   format %{ "MOV    $dst,$src" %}
9505   ins_encode( enc_Copy( dst, src) );
9506   ins_pipe( ialu_reg_reg );
9507 %}
9508 
9509 instruct ci2b( eRegI dst, eRegI src, eFlagsReg cr ) %{
9510   effect( USE_DEF dst, USE src, KILL cr );
9511 
9512   size(4);
9513   format %{ "NEG    $dst\n\t"
9514             "ADC    $dst,$src" %}
9515   ins_encode( neg_reg(dst),
9516               OpcRegReg(0x13,dst,src) );
9517   ins_pipe( ialu_reg_reg_long );
9518 %}
9519 
9520 instruct convI2B( eRegI dst, eRegI src, eFlagsReg cr ) %{
9521   match(Set dst (Conv2B src));
9522 
9523   expand %{
9524     movI_nocopy(dst,src);
9525     ci2b(dst,src,cr);
9526   %}
9527 %}
9528 
9529 instruct movP_nocopy(eRegI dst, eRegP src) %{
9530   effect( DEF dst, USE src );
9531   format %{ "MOV    $dst,$src" %}
9532   ins_encode( enc_Copy( dst, src) );
9533   ins_pipe( ialu_reg_reg );
9534 %}
9535 
9536 instruct cp2b( eRegI dst, eRegP src, eFlagsReg cr ) %{
9537   effect( USE_DEF dst, USE src, KILL cr );
9538   format %{ "NEG    $dst\n\t"
9539             "ADC    $dst,$src" %}
9540   ins_encode( neg_reg(dst),
9541               OpcRegReg(0x13,dst,src) );
9542   ins_pipe( ialu_reg_reg_long );
9543 %}
9544 
9545 instruct convP2B( eRegI dst, eRegP src, eFlagsReg cr ) %{
9546   match(Set dst (Conv2B src));
9547 
9548   expand %{
9549     movP_nocopy(dst,src);
9550     cp2b(dst,src,cr);
9551   %}
9552 %}
9553 
9554 instruct cmpLTMask( eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr ) %{
9555   match(Set dst (CmpLTMask p q));
9556   effect( KILL cr );
9557   ins_cost(400);
9558 
9559   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
9560   format %{ "XOR    $dst,$dst\n\t"
9561             "CMP    $p,$q\n\t"
9562             "SETlt  $dst\n\t"
9563             "NEG    $dst" %}
9564   ins_encode( OpcRegReg(0x33,dst,dst),
9565               OpcRegReg(0x3B,p,q),
9566               setLT_reg(dst), neg_reg(dst) );
9567   ins_pipe( pipe_slow );
9568 %}
9569 
9570 instruct cmpLTMask0( eRegI dst, immI0 zero, eFlagsReg cr ) %{
9571   match(Set dst (CmpLTMask dst zero));
9572   effect( DEF dst, KILL cr );
9573   ins_cost(100);
9574 
9575   format %{ "SAR    $dst,31" %}
9576   opcode(0xC1, 0x7);  /* C1 /7 ib */
9577   ins_encode( RegOpcImm( dst, 0x1F ) );
9578   ins_pipe( ialu_reg );
9579 %}
9580 
9581 
9582 instruct cadd_cmpLTMask( ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp, eFlagsReg cr ) %{
9583   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
9584   effect( KILL tmp, KILL cr );
9585   ins_cost(400);
9586   // annoyingly, $tmp has no edges so you cant ask for it in
9587   // any format or encoding
9588   format %{ "SUB    $p,$q\n\t"
9589             "SBB    ECX,ECX\n\t"
9590             "AND    ECX,$y\n\t"
9591             "ADD    $p,ECX" %}
9592   ins_encode( enc_cmpLTP(p,q,y,tmp) );
9593   ins_pipe( pipe_cmplt );
9594 %}
9595 
9596 /* If I enable this, I encourage spilling in the inner loop of compress.
9597 instruct cadd_cmpLTMask_mem( ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr ) %{
9598   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
9599   effect( USE_KILL tmp, KILL cr );
9600   ins_cost(400);
9601 
9602   format %{ "SUB    $p,$q\n\t"
9603             "SBB    ECX,ECX\n\t"
9604             "AND    ECX,$y\n\t"
9605             "ADD    $p,ECX" %}
9606   ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
9607 %}
9608 */
9609 
9610 //----------Long Instructions------------------------------------------------
9611 // Add Long Register with Register
9612 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9613   match(Set dst (AddL dst src));
9614   effect(KILL cr);
9615   ins_cost(200);
9616   format %{ "ADD    $dst.lo,$src.lo\n\t"
9617             "ADC    $dst.hi,$src.hi" %}
9618   opcode(0x03, 0x13);
9619   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
9620   ins_pipe( ialu_reg_reg_long );
9621 %}
9622 
9623 // Add Long Register with Immediate
9624 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9625   match(Set dst (AddL dst src));
9626   effect(KILL cr);
9627   format %{ "ADD    $dst.lo,$src.lo\n\t"
9628             "ADC    $dst.hi,$src.hi" %}
9629   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
9630   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9631   ins_pipe( ialu_reg_long );
9632 %}
9633 
9634 // Add Long Register with Memory
9635 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9636   match(Set dst (AddL dst (LoadL mem)));
9637   effect(KILL cr);
9638   ins_cost(125);
9639   format %{ "ADD    $dst.lo,$mem\n\t"
9640             "ADC    $dst.hi,$mem+4" %}
9641   opcode(0x03, 0x13);
9642   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9643   ins_pipe( ialu_reg_long_mem );
9644 %}
9645 
9646 // Subtract Long Register with Register.
9647 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9648   match(Set dst (SubL dst src));
9649   effect(KILL cr);
9650   ins_cost(200);
9651   format %{ "SUB    $dst.lo,$src.lo\n\t"
9652             "SBB    $dst.hi,$src.hi" %}
9653   opcode(0x2B, 0x1B);
9654   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
9655   ins_pipe( ialu_reg_reg_long );
9656 %}
9657 
9658 // Subtract Long Register with Immediate
9659 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9660   match(Set dst (SubL dst src));
9661   effect(KILL cr);
9662   format %{ "SUB    $dst.lo,$src.lo\n\t"
9663             "SBB    $dst.hi,$src.hi" %}
9664   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
9665   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9666   ins_pipe( ialu_reg_long );
9667 %}
9668 
9669 // Subtract Long Register with Memory
9670 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9671   match(Set dst (SubL dst (LoadL mem)));
9672   effect(KILL cr);
9673   ins_cost(125);
9674   format %{ "SUB    $dst.lo,$mem\n\t"
9675             "SBB    $dst.hi,$mem+4" %}
9676   opcode(0x2B, 0x1B);
9677   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9678   ins_pipe( ialu_reg_long_mem );
9679 %}
9680 
9681 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
9682   match(Set dst (SubL zero dst));
9683   effect(KILL cr);
9684   ins_cost(300);
9685   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
9686   ins_encode( neg_long(dst) );
9687   ins_pipe( ialu_reg_reg_long );
9688 %}
9689 
9690 // And Long Register with Register
9691 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9692   match(Set dst (AndL dst src));
9693   effect(KILL cr);
9694   format %{ "AND    $dst.lo,$src.lo\n\t"
9695             "AND    $dst.hi,$src.hi" %}
9696   opcode(0x23,0x23);
9697   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9698   ins_pipe( ialu_reg_reg_long );
9699 %}
9700 
9701 // And Long Register with Immediate
9702 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9703   match(Set dst (AndL dst src));
9704   effect(KILL cr);
9705   format %{ "AND    $dst.lo,$src.lo\n\t"
9706             "AND    $dst.hi,$src.hi" %}
9707   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
9708   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9709   ins_pipe( ialu_reg_long );
9710 %}
9711 
9712 // And Long Register with Memory
9713 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9714   match(Set dst (AndL dst (LoadL mem)));
9715   effect(KILL cr);
9716   ins_cost(125);
9717   format %{ "AND    $dst.lo,$mem\n\t"
9718             "AND    $dst.hi,$mem+4" %}
9719   opcode(0x23, 0x23);
9720   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9721   ins_pipe( ialu_reg_long_mem );
9722 %}
9723 
9724 // Or Long Register with Register
9725 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9726   match(Set dst (OrL dst src));
9727   effect(KILL cr);
9728   format %{ "OR     $dst.lo,$src.lo\n\t"
9729             "OR     $dst.hi,$src.hi" %}
9730   opcode(0x0B,0x0B);
9731   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9732   ins_pipe( ialu_reg_reg_long );
9733 %}
9734 
9735 // Or Long Register with Immediate
9736 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9737   match(Set dst (OrL dst src));
9738   effect(KILL cr);
9739   format %{ "OR     $dst.lo,$src.lo\n\t"
9740             "OR     $dst.hi,$src.hi" %}
9741   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9742   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9743   ins_pipe( ialu_reg_long );
9744 %}
9745 
9746 // Or Long Register with Memory
9747 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9748   match(Set dst (OrL dst (LoadL mem)));
9749   effect(KILL cr);
9750   ins_cost(125);
9751   format %{ "OR     $dst.lo,$mem\n\t"
9752             "OR     $dst.hi,$mem+4" %}
9753   opcode(0x0B,0x0B);
9754   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9755   ins_pipe( ialu_reg_long_mem );
9756 %}
9757 
9758 // Xor Long Register with Register
9759 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9760   match(Set dst (XorL dst src));
9761   effect(KILL cr);
9762   format %{ "XOR    $dst.lo,$src.lo\n\t"
9763             "XOR    $dst.hi,$src.hi" %}
9764   opcode(0x33,0x33);
9765   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9766   ins_pipe( ialu_reg_reg_long );
9767 %}
9768 
9769 // Xor Long Register with Immediate -1
9770 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9771   match(Set dst (XorL dst imm));  
9772   format %{ "NOT    $dst.lo\n\t"
9773             "NOT    $dst.hi" %}
9774   ins_encode %{
9775      __ notl($dst$$Register);
9776      __ notl(HIGH_FROM_LOW($dst$$Register));
9777   %}
9778   ins_pipe( ialu_reg_long );
9779 %}
9780 
9781 // Xor Long Register with Immediate
9782 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9783   match(Set dst (XorL dst src));
9784   effect(KILL cr);
9785   format %{ "XOR    $dst.lo,$src.lo\n\t"
9786             "XOR    $dst.hi,$src.hi" %}
9787   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9788   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9789   ins_pipe( ialu_reg_long );
9790 %}
9791 
9792 // Xor Long Register with Memory
9793 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9794   match(Set dst (XorL dst (LoadL mem)));
9795   effect(KILL cr);
9796   ins_cost(125);
9797   format %{ "XOR    $dst.lo,$mem\n\t"
9798             "XOR    $dst.hi,$mem+4" %}
9799   opcode(0x33,0x33);
9800   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9801   ins_pipe( ialu_reg_long_mem );
9802 %}
9803 
9804 // Shift Left Long by 1
9805 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9806   predicate(UseNewLongLShift);
9807   match(Set dst (LShiftL dst cnt));
9808   effect(KILL cr);
9809   ins_cost(100);
9810   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9811             "ADC    $dst.hi,$dst.hi" %}
9812   ins_encode %{
9813     __ addl($dst$$Register,$dst$$Register);
9814     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9815   %}
9816   ins_pipe( ialu_reg_long );
9817 %}
9818 
9819 // Shift Left Long by 2
9820 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9821   predicate(UseNewLongLShift);
9822   match(Set dst (LShiftL dst cnt));
9823   effect(KILL cr);
9824   ins_cost(100);
9825   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9826             "ADC    $dst.hi,$dst.hi\n\t" 
9827             "ADD    $dst.lo,$dst.lo\n\t"
9828             "ADC    $dst.hi,$dst.hi" %}
9829   ins_encode %{
9830     __ addl($dst$$Register,$dst$$Register);
9831     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9832     __ addl($dst$$Register,$dst$$Register);
9833     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9834   %}
9835   ins_pipe( ialu_reg_long );
9836 %}
9837 
9838 // Shift Left Long by 3
9839 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9840   predicate(UseNewLongLShift);
9841   match(Set dst (LShiftL dst cnt));
9842   effect(KILL cr);
9843   ins_cost(100);
9844   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9845             "ADC    $dst.hi,$dst.hi\n\t" 
9846             "ADD    $dst.lo,$dst.lo\n\t"
9847             "ADC    $dst.hi,$dst.hi\n\t" 
9848             "ADD    $dst.lo,$dst.lo\n\t"
9849             "ADC    $dst.hi,$dst.hi" %}
9850   ins_encode %{
9851     __ addl($dst$$Register,$dst$$Register);
9852     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9853     __ addl($dst$$Register,$dst$$Register);
9854     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9855     __ addl($dst$$Register,$dst$$Register);
9856     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9857   %}
9858   ins_pipe( ialu_reg_long );
9859 %}
9860 
9861 // Shift Left Long by 1-31
9862 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9863   match(Set dst (LShiftL dst cnt));
9864   effect(KILL cr);
9865   ins_cost(200);
9866   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9867             "SHL    $dst.lo,$cnt" %}
9868   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9869   ins_encode( move_long_small_shift(dst,cnt) );
9870   ins_pipe( ialu_reg_long );
9871 %}
9872 
9873 // Shift Left Long by 32-63
9874 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9875   match(Set dst (LShiftL dst cnt));
9876   effect(KILL cr);
9877   ins_cost(300);
9878   format %{ "MOV    $dst.hi,$dst.lo\n"
9879           "\tSHL    $dst.hi,$cnt-32\n"
9880           "\tXOR    $dst.lo,$dst.lo" %}
9881   opcode(0xC1, 0x4);  /* C1 /4 ib */
9882   ins_encode( move_long_big_shift_clr(dst,cnt) );
9883   ins_pipe( ialu_reg_long );
9884 %}
9885 
9886 // Shift Left Long by variable
9887 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9888   match(Set dst (LShiftL dst shift));
9889   effect(KILL cr);
9890   ins_cost(500+200);
9891   size(17);
9892   format %{ "TEST   $shift,32\n\t"
9893             "JEQ,s  small\n\t"
9894             "MOV    $dst.hi,$dst.lo\n\t"
9895             "XOR    $dst.lo,$dst.lo\n"
9896     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9897             "SHL    $dst.lo,$shift" %}
9898   ins_encode( shift_left_long( dst, shift ) );
9899   ins_pipe( pipe_slow );
9900 %}
9901 
9902 // Shift Right Long by 1-31
9903 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9904   match(Set dst (URShiftL dst cnt));
9905   effect(KILL cr);
9906   ins_cost(200);
9907   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9908             "SHR    $dst.hi,$cnt" %}
9909   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9910   ins_encode( move_long_small_shift(dst,cnt) );
9911   ins_pipe( ialu_reg_long );
9912 %}
9913 
9914 // Shift Right Long by 32-63
9915 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9916   match(Set dst (URShiftL dst cnt));
9917   effect(KILL cr);
9918   ins_cost(300);
9919   format %{ "MOV    $dst.lo,$dst.hi\n"
9920           "\tSHR    $dst.lo,$cnt-32\n"
9921           "\tXOR    $dst.hi,$dst.hi" %}
9922   opcode(0xC1, 0x5);  /* C1 /5 ib */
9923   ins_encode( move_long_big_shift_clr(dst,cnt) );
9924   ins_pipe( ialu_reg_long );
9925 %}
9926 
9927 // Shift Right Long by variable
9928 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9929   match(Set dst (URShiftL dst shift));
9930   effect(KILL cr);
9931   ins_cost(600);
9932   size(17);
9933   format %{ "TEST   $shift,32\n\t"
9934             "JEQ,s  small\n\t"
9935             "MOV    $dst.lo,$dst.hi\n\t"
9936             "XOR    $dst.hi,$dst.hi\n"
9937     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9938             "SHR    $dst.hi,$shift" %}
9939   ins_encode( shift_right_long( dst, shift ) );
9940   ins_pipe( pipe_slow );
9941 %}
9942 
9943 // Shift Right Long by 1-31
9944 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9945   match(Set dst (RShiftL dst cnt));
9946   effect(KILL cr);
9947   ins_cost(200);
9948   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9949             "SAR    $dst.hi,$cnt" %}
9950   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9951   ins_encode( move_long_small_shift(dst,cnt) );
9952   ins_pipe( ialu_reg_long );
9953 %}
9954 
9955 // Shift Right Long by 32-63
9956 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9957   match(Set dst (RShiftL dst cnt));
9958   effect(KILL cr);
9959   ins_cost(300);
9960   format %{ "MOV    $dst.lo,$dst.hi\n"
9961           "\tSAR    $dst.lo,$cnt-32\n"
9962           "\tSAR    $dst.hi,31" %}
9963   opcode(0xC1, 0x7);  /* C1 /7 ib */
9964   ins_encode( move_long_big_shift_sign(dst,cnt) );
9965   ins_pipe( ialu_reg_long );
9966 %}
9967 
9968 // Shift Right arithmetic Long by variable
9969 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9970   match(Set dst (RShiftL dst shift));
9971   effect(KILL cr);
9972   ins_cost(600);
9973   size(18);
9974   format %{ "TEST   $shift,32\n\t"
9975             "JEQ,s  small\n\t"
9976             "MOV    $dst.lo,$dst.hi\n\t"
9977             "SAR    $dst.hi,31\n"
9978     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9979             "SAR    $dst.hi,$shift" %}
9980   ins_encode( shift_right_arith_long( dst, shift ) );
9981   ins_pipe( pipe_slow );
9982 %}
9983 
9984 
9985 //----------Double Instructions------------------------------------------------
9986 // Double Math
9987 
9988 // Compare & branch
9989 
9990 // P6 version of float compare, sets condition codes in EFLAGS
9991 instruct cmpD_cc_P6(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
9992   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9993   match(Set cr (CmpD src1 src2));
9994   effect(KILL rax);
9995   ins_cost(150);
9996   format %{ "FLD    $src1\n\t"
9997             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9998             "JNP    exit\n\t"
9999             "MOV    ah,1       // saw a NaN, set CF\n\t"
10000             "SAHF\n"
10001      "exit:\tNOP               // avoid branch to branch" %}
10002   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10003   ins_encode( Push_Reg_D(src1),
10004               OpcP, RegOpc(src2),
10005               cmpF_P6_fixup );
10006   ins_pipe( pipe_slow );
10007 %}
10008 
10009 instruct cmpD_cc_P6CF(eFlagsRegUCF cr, regD src1, regD src2) %{
10010   predicate(VM_Version::supports_cmov() && UseSSE <=1);
10011   match(Set cr (CmpD src1 src2));
10012   ins_cost(150);
10013   format %{ "FLD    $src1\n\t"
10014             "FUCOMIP ST,$src2  // P6 instruction" %}
10015   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10016   ins_encode( Push_Reg_D(src1),
10017               OpcP, RegOpc(src2));
10018   ins_pipe( pipe_slow );
10019 %}
10020 
10021 // Compare & branch
10022 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
10023   predicate(UseSSE<=1);
10024   match(Set cr (CmpD src1 src2));
10025   effect(KILL rax);
10026   ins_cost(200);
10027   format %{ "FLD    $src1\n\t"
10028             "FCOMp  $src2\n\t"
10029             "FNSTSW AX\n\t"
10030             "TEST   AX,0x400\n\t"
10031             "JZ,s   flags\n\t"
10032             "MOV    AH,1\t# unordered treat as LT\n"
10033     "flags:\tSAHF" %}
10034   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10035   ins_encode( Push_Reg_D(src1),
10036               OpcP, RegOpc(src2),
10037               fpu_flags);
10038   ins_pipe( pipe_slow );
10039 %}
10040 
10041 // Compare vs zero into -1,0,1
10042 instruct cmpD_0(eRegI dst, regD src1, immD0 zero, eAXRegI rax, eFlagsReg cr) %{
10043   predicate(UseSSE<=1);
10044   match(Set dst (CmpD3 src1 zero));
10045   effect(KILL cr, KILL rax);
10046   ins_cost(280);
10047   format %{ "FTSTD  $dst,$src1" %}
10048   opcode(0xE4, 0xD9);
10049   ins_encode( Push_Reg_D(src1),
10050               OpcS, OpcP, PopFPU,
10051               CmpF_Result(dst));
10052   ins_pipe( pipe_slow );
10053 %}
10054 
10055 // Compare into -1,0,1
10056 instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI rax, eFlagsReg cr) %{
10057   predicate(UseSSE<=1);
10058   match(Set dst (CmpD3 src1 src2));
10059   effect(KILL cr, KILL rax);
10060   ins_cost(300);
10061   format %{ "FCMPD  $dst,$src1,$src2" %}
10062   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10063   ins_encode( Push_Reg_D(src1),
10064               OpcP, RegOpc(src2),
10065               CmpF_Result(dst));
10066   ins_pipe( pipe_slow );
10067 %}
10068 
10069 // float compare and set condition codes in EFLAGS by XMM regs
10070 instruct cmpXD_cc(eFlagsRegU cr, regXD dst, regXD src, eAXRegI rax) %{
10071   predicate(UseSSE>=2);
10072   match(Set cr (CmpD dst src));
10073   effect(KILL rax);
10074   ins_cost(125);
10075   format %{ "COMISD $dst,$src\n"
10076           "\tJNP    exit\n"
10077           "\tMOV    ah,1       // saw a NaN, set CF\n"
10078           "\tSAHF\n"
10079      "exit:\tNOP               // avoid branch to branch" %}
10080   opcode(0x66, 0x0F, 0x2F);
10081   ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src), cmpF_P6_fixup);
10082   ins_pipe( pipe_slow );
10083 %}
10084 
10085 instruct cmpXD_ccCF(eFlagsRegUCF cr, regXD dst, regXD src) %{
10086   predicate(UseSSE>=2);
10087   match(Set cr (CmpD dst src));
10088   ins_cost(100);
10089   format %{ "COMISD $dst,$src" %}
10090   opcode(0x66, 0x0F, 0x2F);
10091   ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
10092   ins_pipe( pipe_slow );
10093 %}
10094 
10095 // float compare and set condition codes in EFLAGS by XMM regs
10096 instruct cmpXD_ccmem(eFlagsRegU cr, regXD dst, memory src, eAXRegI rax) %{
10097   predicate(UseSSE>=2);
10098   match(Set cr (CmpD dst (LoadD src)));
10099   effect(KILL rax);
10100   ins_cost(145);
10101   format %{ "COMISD $dst,$src\n"
10102           "\tJNP    exit\n"
10103           "\tMOV    ah,1       // saw a NaN, set CF\n"
10104           "\tSAHF\n"
10105      "exit:\tNOP               // avoid branch to branch" %}
10106   opcode(0x66, 0x0F, 0x2F);
10107   ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src), cmpF_P6_fixup);
10108   ins_pipe( pipe_slow );
10109 %}
10110 
10111 instruct cmpXD_ccmemCF(eFlagsRegUCF cr, regXD dst, memory src) %{
10112   predicate(UseSSE>=2);
10113   match(Set cr (CmpD dst (LoadD src)));
10114   ins_cost(100);
10115   format %{ "COMISD $dst,$src" %}
10116   opcode(0x66, 0x0F, 0x2F);
10117   ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src));
10118   ins_pipe( pipe_slow );
10119 %}
10120 
10121 // Compare into -1,0,1 in XMM
10122 instruct cmpXD_reg(eRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{
10123   predicate(UseSSE>=2);
10124   match(Set dst (CmpD3 src1 src2));
10125   effect(KILL cr);
10126   ins_cost(255);
10127   format %{ "XOR    $dst,$dst\n"
10128           "\tCOMISD $src1,$src2\n"
10129           "\tJP,s   nan\n"
10130           "\tJEQ,s  exit\n"
10131           "\tJA,s   inc\n"
10132       "nan:\tDEC    $dst\n"
10133           "\tJMP,s  exit\n"
10134       "inc:\tINC    $dst\n"
10135       "exit:"
10136                 %}
10137   opcode(0x66, 0x0F, 0x2F);
10138   ins_encode(Xor_Reg(dst), OpcP, OpcS, Opcode(tertiary), RegReg(src1, src2),
10139              CmpX_Result(dst));
10140   ins_pipe( pipe_slow );
10141 %}
10142 
10143 // Compare into -1,0,1 in XMM and memory
10144 instruct cmpXD_regmem(eRegI dst, regXD src1, memory mem, eFlagsReg cr) %{
10145   predicate(UseSSE>=2);
10146   match(Set dst (CmpD3 src1 (LoadD mem)));
10147   effect(KILL cr);
10148   ins_cost(275);
10149   format %{ "COMISD $src1,$mem\n"
10150           "\tMOV    $dst,0\t\t# do not blow flags\n"
10151           "\tJP,s   nan\n"
10152           "\tJEQ,s  exit\n"
10153           "\tJA,s   inc\n"
10154       "nan:\tDEC    $dst\n"
10155           "\tJMP,s  exit\n"
10156       "inc:\tINC    $dst\n"
10157       "exit:"
10158                 %}
10159   opcode(0x66, 0x0F, 0x2F);
10160   ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(src1, mem),
10161              LdImmI(dst,0x0), CmpX_Result(dst));
10162   ins_pipe( pipe_slow );
10163 %}
10164 
10165 
10166 instruct subD_reg(regD dst, regD src) %{
10167   predicate (UseSSE <=1);
10168   match(Set dst (SubD dst src));
10169 
10170   format %{ "FLD    $src\n\t"
10171             "DSUBp  $dst,ST" %}
10172   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10173   ins_cost(150);
10174   ins_encode( Push_Reg_D(src),
10175               OpcP, RegOpc(dst) );
10176   ins_pipe( fpu_reg_reg );
10177 %}
10178 
10179 instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{
10180   predicate (UseSSE <=1);
10181   match(Set dst (RoundDouble (SubD src1 src2)));
10182   ins_cost(250);
10183 
10184   format %{ "FLD    $src2\n\t"
10185             "DSUB   ST,$src1\n\t"
10186             "FSTP_D $dst\t# D-round" %}
10187   opcode(0xD8, 0x5);
10188   ins_encode( Push_Reg_D(src2),
10189               OpcP, RegOpc(src1), Pop_Mem_D(dst) );
10190   ins_pipe( fpu_mem_reg_reg );
10191 %}
10192 
10193 
10194 instruct subD_reg_mem(regD dst, memory src) %{
10195   predicate (UseSSE <=1);
10196   match(Set dst (SubD dst (LoadD src)));
10197   ins_cost(150);
10198 
10199   format %{ "FLD    $src\n\t"
10200             "DSUBp  $dst,ST" %}
10201   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
10202   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10203               OpcP, RegOpc(dst) );
10204   ins_pipe( fpu_reg_mem );
10205 %}
10206 
10207 instruct absD_reg(regDPR1 dst, regDPR1 src) %{
10208   predicate (UseSSE<=1);
10209   match(Set dst (AbsD src));
10210   ins_cost(100);
10211   format %{ "FABS" %}
10212   opcode(0xE1, 0xD9);
10213   ins_encode( OpcS, OpcP );
10214   ins_pipe( fpu_reg_reg );
10215 %}
10216 
10217 instruct absXD_reg( regXD dst ) %{
10218   predicate(UseSSE>=2);
10219   match(Set dst (AbsD dst));
10220   format %{ "ANDPD  $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %}
10221   ins_encode( AbsXD_encoding(dst));
10222   ins_pipe( pipe_slow );
10223 %}
10224 
10225 instruct negD_reg(regDPR1 dst, regDPR1 src) %{
10226   predicate(UseSSE<=1);
10227   match(Set dst (NegD src));
10228   ins_cost(100);
10229   format %{ "FCHS" %}
10230   opcode(0xE0, 0xD9);
10231   ins_encode( OpcS, OpcP );
10232   ins_pipe( fpu_reg_reg );
10233 %}
10234 
10235 instruct negXD_reg( regXD dst ) %{
10236   predicate(UseSSE>=2);
10237   match(Set dst (NegD dst));
10238   format %{ "XORPD  $dst,[0x8000000000000000]\t# CHS D by sign flipping" %}
10239   ins_encode %{
10240      __ xorpd($dst$$XMMRegister,
10241               ExternalAddress((address)double_signflip_pool));
10242   %}
10243   ins_pipe( pipe_slow );
10244 %}
10245 
10246 instruct addD_reg(regD dst, regD src) %{
10247   predicate(UseSSE<=1);
10248   match(Set dst (AddD dst src));
10249   format %{ "FLD    $src\n\t"
10250             "DADD   $dst,ST" %}
10251   size(4);
10252   ins_cost(150);
10253   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10254   ins_encode( Push_Reg_D(src),
10255               OpcP, RegOpc(dst) );
10256   ins_pipe( fpu_reg_reg );
10257 %}
10258 
10259 
10260 instruct addD_reg_round(stackSlotD dst, regD src1, regD src2) %{
10261   predicate(UseSSE<=1);
10262   match(Set dst (RoundDouble (AddD src1 src2)));
10263   ins_cost(250);
10264 
10265   format %{ "FLD    $src2\n\t"
10266             "DADD   ST,$src1\n\t"
10267             "FSTP_D $dst\t# D-round" %}
10268   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
10269   ins_encode( Push_Reg_D(src2),
10270               OpcP, RegOpc(src1), Pop_Mem_D(dst) );
10271   ins_pipe( fpu_mem_reg_reg );
10272 %}
10273 
10274 
10275 instruct addD_reg_mem(regD dst, memory src) %{
10276   predicate(UseSSE<=1);
10277   match(Set dst (AddD dst (LoadD src)));
10278   ins_cost(150);
10279 
10280   format %{ "FLD    $src\n\t"
10281             "DADDp  $dst,ST" %}
10282   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
10283   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10284               OpcP, RegOpc(dst) );
10285   ins_pipe( fpu_reg_mem );
10286 %}
10287 
10288 // add-to-memory
10289 instruct addD_mem_reg(memory dst, regD src) %{
10290   predicate(UseSSE<=1);
10291   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
10292   ins_cost(150);
10293 
10294   format %{ "FLD_D  $dst\n\t"
10295             "DADD   ST,$src\n\t"
10296             "FST_D  $dst" %}
10297   opcode(0xDD, 0x0);
10298   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
10299               Opcode(0xD8), RegOpc(src),
10300               set_instruction_start,
10301               Opcode(0xDD), RMopc_Mem(0x03,dst) );
10302   ins_pipe( fpu_reg_mem );
10303 %}
10304 
10305 instruct addD_reg_imm1(regD dst, immD1 con) %{
10306   predicate(UseSSE<=1);
10307   match(Set dst (AddD dst con));
10308   ins_cost(125);
10309   format %{ "FLD1\n\t"
10310             "DADDp  $dst,ST" %}
10311   ins_encode %{
10312     __ fld1();
10313     __ faddp($dst$$reg);
10314   %}
10315   ins_pipe(fpu_reg);
10316 %}
10317 
10318 instruct addD_reg_imm(regD dst, immD con) %{
10319   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
10320   match(Set dst (AddD dst con));
10321   ins_cost(200);
10322   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
10323             "DADDp  $dst,ST" %}
10324   ins_encode %{
10325     __ fld_d($constantaddress($con));
10326     __ faddp($dst$$reg);
10327   %}
10328   ins_pipe(fpu_reg_mem);
10329 %}
10330 
10331 instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{
10332   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
10333   match(Set dst (RoundDouble (AddD src con)));
10334   ins_cost(200);
10335   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
10336             "DADD   ST,$src\n\t"
10337             "FSTP_D $dst\t# D-round" %}
10338   ins_encode %{
10339     __ fld_d($constantaddress($con));
10340     __ fadd($src$$reg);
10341     __ fstp_d(Address(rsp, $dst$$disp));
10342   %}
10343   ins_pipe(fpu_mem_reg_con);
10344 %}
10345 
10346 // Add two double precision floating point values in xmm
10347 instruct addXD_reg(regXD dst, regXD src) %{
10348   predicate(UseSSE>=2);
10349   match(Set dst (AddD dst src));
10350   format %{ "ADDSD  $dst,$src" %}
10351   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
10352   ins_pipe( pipe_slow );
10353 %}
10354 
10355 instruct addXD_imm(regXD dst, immXD con) %{
10356   predicate(UseSSE>=2);
10357   match(Set dst (AddD dst con));
10358   format %{ "ADDSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10359   ins_encode %{
10360     __ addsd($dst$$XMMRegister, $constantaddress($con));
10361   %}
10362   ins_pipe(pipe_slow);
10363 %}
10364 
10365 instruct addXD_mem(regXD dst, memory mem) %{
10366   predicate(UseSSE>=2);
10367   match(Set dst (AddD dst (LoadD mem)));
10368   format %{ "ADDSD  $dst,$mem" %}
10369   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegMem(dst,mem));
10370   ins_pipe( pipe_slow );
10371 %}
10372 
10373 // Sub two double precision floating point values in xmm
10374 instruct subXD_reg(regXD dst, regXD src) %{
10375   predicate(UseSSE>=2);
10376   match(Set dst (SubD dst src));
10377   format %{ "SUBSD  $dst,$src" %}
10378   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
10379   ins_pipe( pipe_slow );
10380 %}
10381 
10382 instruct subXD_imm(regXD dst, immXD con) %{
10383   predicate(UseSSE>=2);
10384   match(Set dst (SubD dst con));
10385   format %{ "SUBSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10386   ins_encode %{
10387     __ subsd($dst$$XMMRegister, $constantaddress($con));
10388   %}
10389   ins_pipe(pipe_slow);
10390 %}
10391 
10392 instruct subXD_mem(regXD dst, memory mem) %{
10393   predicate(UseSSE>=2);
10394   match(Set dst (SubD dst (LoadD mem)));
10395   format %{ "SUBSD  $dst,$mem" %}
10396   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
10397   ins_pipe( pipe_slow );
10398 %}
10399 
10400 // Mul two double precision floating point values in xmm
10401 instruct mulXD_reg(regXD dst, regXD src) %{
10402   predicate(UseSSE>=2);
10403   match(Set dst (MulD dst src));
10404   format %{ "MULSD  $dst,$src" %}
10405   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
10406   ins_pipe( pipe_slow );
10407 %}
10408 
10409 instruct mulXD_imm(regXD dst, immXD con) %{
10410   predicate(UseSSE>=2);
10411   match(Set dst (MulD dst con));
10412   format %{ "MULSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10413   ins_encode %{
10414     __ mulsd($dst$$XMMRegister, $constantaddress($con));
10415   %}
10416   ins_pipe(pipe_slow);
10417 %}
10418 
10419 instruct mulXD_mem(regXD dst, memory mem) %{
10420   predicate(UseSSE>=2);
10421   match(Set dst (MulD dst (LoadD mem)));
10422   format %{ "MULSD  $dst,$mem" %}
10423   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
10424   ins_pipe( pipe_slow );
10425 %}
10426 
10427 // Div two double precision floating point values in xmm
10428 instruct divXD_reg(regXD dst, regXD src) %{
10429   predicate(UseSSE>=2);
10430   match(Set dst (DivD dst src));
10431   format %{ "DIVSD  $dst,$src" %}
10432   opcode(0xF2, 0x0F, 0x5E);
10433   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
10434   ins_pipe( pipe_slow );
10435 %}
10436 
10437 instruct divXD_imm(regXD dst, immXD con) %{
10438   predicate(UseSSE>=2);
10439   match(Set dst (DivD dst con));
10440   format %{ "DIVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10441   ins_encode %{
10442     __ divsd($dst$$XMMRegister, $constantaddress($con));
10443   %}
10444   ins_pipe(pipe_slow);
10445 %}
10446 
10447 instruct divXD_mem(regXD dst, memory mem) %{
10448   predicate(UseSSE>=2);
10449   match(Set dst (DivD dst (LoadD mem)));
10450   format %{ "DIVSD  $dst,$mem" %}
10451   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
10452   ins_pipe( pipe_slow );
10453 %}
10454 
10455 
10456 instruct mulD_reg(regD dst, regD src) %{
10457   predicate(UseSSE<=1);
10458   match(Set dst (MulD dst src));
10459   format %{ "FLD    $src\n\t"
10460             "DMULp  $dst,ST" %}
10461   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
10462   ins_cost(150);
10463   ins_encode( Push_Reg_D(src),
10464               OpcP, RegOpc(dst) );
10465   ins_pipe( fpu_reg_reg );
10466 %}
10467 
10468 // Strict FP instruction biases argument before multiply then
10469 // biases result to avoid double rounding of subnormals.
10470 //
10471 // scale arg1 by multiplying arg1 by 2^(-15360)
10472 // load arg2
10473 // multiply scaled arg1 by arg2
10474 // rescale product by 2^(15360)
10475 //
10476 instruct strictfp_mulD_reg(regDPR1 dst, regnotDPR1 src) %{
10477   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10478   match(Set dst (MulD dst src));
10479   ins_cost(1);   // Select this instruction for all strict FP double multiplies
10480 
10481   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
10482             "DMULp  $dst,ST\n\t"
10483             "FLD    $src\n\t"
10484             "DMULp  $dst,ST\n\t"
10485             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
10486             "DMULp  $dst,ST\n\t" %}
10487   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
10488   ins_encode( strictfp_bias1(dst),
10489               Push_Reg_D(src),
10490               OpcP, RegOpc(dst),
10491               strictfp_bias2(dst) );
10492   ins_pipe( fpu_reg_reg );
10493 %}
10494 
10495 instruct mulD_reg_imm(regD dst, immD con) %{
10496   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
10497   match(Set dst (MulD dst con));
10498   ins_cost(200);
10499   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
10500             "DMULp  $dst,ST" %}
10501   ins_encode %{
10502     __ fld_d($constantaddress($con));
10503     __ fmulp($dst$$reg);
10504   %}
10505   ins_pipe(fpu_reg_mem);
10506 %}
10507 
10508 
10509 instruct mulD_reg_mem(regD dst, memory src) %{
10510   predicate( UseSSE<=1 );
10511   match(Set dst (MulD dst (LoadD src)));
10512   ins_cost(200);
10513   format %{ "FLD_D  $src\n\t"
10514             "DMULp  $dst,ST" %}
10515   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
10516   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10517               OpcP, RegOpc(dst) );
10518   ins_pipe( fpu_reg_mem );
10519 %}
10520 
10521 //
10522 // Cisc-alternate to reg-reg multiply
10523 instruct mulD_reg_mem_cisc(regD dst, regD src, memory mem) %{
10524   predicate( UseSSE<=1 );
10525   match(Set dst (MulD src (LoadD mem)));
10526   ins_cost(250);
10527   format %{ "FLD_D  $mem\n\t"
10528             "DMUL   ST,$src\n\t"
10529             "FSTP_D $dst" %}
10530   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
10531   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
10532               OpcReg_F(src),
10533               Pop_Reg_D(dst) );
10534   ins_pipe( fpu_reg_reg_mem );
10535 %}
10536 
10537 
10538 // MACRO3 -- addD a mulD
10539 // This instruction is a '2-address' instruction in that the result goes
10540 // back to src2.  This eliminates a move from the macro; possibly the
10541 // register allocator will have to add it back (and maybe not).
10542 instruct addD_mulD_reg(regD src2, regD src1, regD src0) %{
10543   predicate( UseSSE<=1 );
10544   match(Set src2 (AddD (MulD src0 src1) src2));
10545   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
10546             "DMUL   ST,$src1\n\t"
10547             "DADDp  $src2,ST" %}
10548   ins_cost(250);
10549   opcode(0xDD); /* LoadD DD /0 */
10550   ins_encode( Push_Reg_F(src0),
10551               FMul_ST_reg(src1),
10552               FAddP_reg_ST(src2) );
10553   ins_pipe( fpu_reg_reg_reg );
10554 %}
10555 
10556 
10557 // MACRO3 -- subD a mulD
10558 instruct subD_mulD_reg(regD src2, regD src1, regD src0) %{
10559   predicate( UseSSE<=1 );
10560   match(Set src2 (SubD (MulD src0 src1) src2));
10561   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
10562             "DMUL   ST,$src1\n\t"
10563             "DSUBRp $src2,ST" %}
10564   ins_cost(250);
10565   ins_encode( Push_Reg_F(src0),
10566               FMul_ST_reg(src1),
10567               Opcode(0xDE), Opc_plus(0xE0,src2));
10568   ins_pipe( fpu_reg_reg_reg );
10569 %}
10570 
10571 
10572 instruct divD_reg(regD dst, regD src) %{
10573   predicate( UseSSE<=1 );
10574   match(Set dst (DivD dst src));
10575 
10576   format %{ "FLD    $src\n\t"
10577             "FDIVp  $dst,ST" %}
10578   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10579   ins_cost(150);
10580   ins_encode( Push_Reg_D(src),
10581               OpcP, RegOpc(dst) );
10582   ins_pipe( fpu_reg_reg );
10583 %}
10584 
10585 // Strict FP instruction biases argument before division then
10586 // biases result, to avoid double rounding of subnormals.
10587 //
10588 // scale dividend by multiplying dividend by 2^(-15360)
10589 // load divisor
10590 // divide scaled dividend by divisor
10591 // rescale quotient by 2^(15360)
10592 //
10593 instruct strictfp_divD_reg(regDPR1 dst, regnotDPR1 src) %{
10594   predicate (UseSSE<=1);
10595   match(Set dst (DivD dst src));
10596   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10597   ins_cost(01);
10598 
10599   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
10600             "DMULp  $dst,ST\n\t"
10601             "FLD    $src\n\t"
10602             "FDIVp  $dst,ST\n\t"
10603             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
10604             "DMULp  $dst,ST\n\t" %}
10605   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10606   ins_encode( strictfp_bias1(dst),
10607               Push_Reg_D(src),
10608               OpcP, RegOpc(dst),
10609               strictfp_bias2(dst) );
10610   ins_pipe( fpu_reg_reg );
10611 %}
10612 
10613 instruct divD_reg_round(stackSlotD dst, regD src1, regD src2) %{
10614   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
10615   match(Set dst (RoundDouble (DivD src1 src2)));
10616 
10617   format %{ "FLD    $src1\n\t"
10618             "FDIV   ST,$src2\n\t"
10619             "FSTP_D $dst\t# D-round" %}
10620   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
10621   ins_encode( Push_Reg_D(src1),
10622               OpcP, RegOpc(src2), Pop_Mem_D(dst) );
10623   ins_pipe( fpu_mem_reg_reg );
10624 %}
10625 
10626 
10627 instruct modD_reg(regD dst, regD src, eAXRegI rax, eFlagsReg cr) %{
10628   predicate(UseSSE<=1);
10629   match(Set dst (ModD dst src));
10630   effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
10631 
10632   format %{ "DMOD   $dst,$src" %}
10633   ins_cost(250);
10634   ins_encode(Push_Reg_Mod_D(dst, src),
10635               emitModD(),
10636               Push_Result_Mod_D(src),
10637               Pop_Reg_D(dst));
10638   ins_pipe( pipe_slow );
10639 %}
10640 
10641 instruct modXD_reg(regXD dst, regXD src0, regXD src1, eAXRegI rax, eFlagsReg cr) %{
10642   predicate(UseSSE>=2);
10643   match(Set dst (ModD src0 src1));
10644   effect(KILL rax, KILL cr);
10645 
10646   format %{ "SUB    ESP,8\t # DMOD\n"
10647           "\tMOVSD  [ESP+0],$src1\n"
10648           "\tFLD_D  [ESP+0]\n"
10649           "\tMOVSD  [ESP+0],$src0\n"
10650           "\tFLD_D  [ESP+0]\n"
10651      "loop:\tFPREM\n"
10652           "\tFWAIT\n"
10653           "\tFNSTSW AX\n"
10654           "\tSAHF\n"
10655           "\tJP     loop\n"
10656           "\tFSTP_D [ESP+0]\n"
10657           "\tMOVSD  $dst,[ESP+0]\n"
10658           "\tADD    ESP,8\n"
10659           "\tFSTP   ST0\t # Restore FPU Stack"
10660     %}
10661   ins_cost(250);
10662   ins_encode( Push_ModD_encoding(src0, src1), emitModD(), Push_ResultXD(dst), PopFPU);
10663   ins_pipe( pipe_slow );
10664 %}
10665 
10666 instruct sinD_reg(regDPR1 dst, regDPR1 src) %{
10667   predicate (UseSSE<=1);
10668   match(Set dst (SinD src));
10669   ins_cost(1800);
10670   format %{ "DSIN   $dst" %}
10671   opcode(0xD9, 0xFE);
10672   ins_encode( OpcP, OpcS );
10673   ins_pipe( pipe_slow );
10674 %}
10675 
10676 instruct sinXD_reg(regXD dst, eFlagsReg cr) %{
10677   predicate (UseSSE>=2);
10678   match(Set dst (SinD dst));
10679   effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10680   ins_cost(1800);
10681   format %{ "DSIN   $dst" %}
10682   opcode(0xD9, 0xFE);
10683   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10684   ins_pipe( pipe_slow );
10685 %}
10686 
10687 instruct cosD_reg(regDPR1 dst, regDPR1 src) %{
10688   predicate (UseSSE<=1);
10689   match(Set dst (CosD src));
10690   ins_cost(1800);
10691   format %{ "DCOS   $dst" %}
10692   opcode(0xD9, 0xFF);
10693   ins_encode( OpcP, OpcS );
10694   ins_pipe( pipe_slow );
10695 %}
10696 
10697 instruct cosXD_reg(regXD dst, eFlagsReg cr) %{
10698   predicate (UseSSE>=2);
10699   match(Set dst (CosD dst));
10700   effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10701   ins_cost(1800);
10702   format %{ "DCOS   $dst" %}
10703   opcode(0xD9, 0xFF);
10704   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10705   ins_pipe( pipe_slow );
10706 %}
10707 
10708 instruct tanD_reg(regDPR1 dst, regDPR1 src) %{
10709   predicate (UseSSE<=1);
10710   match(Set dst(TanD src));
10711   format %{ "DTAN   $dst" %}
10712   ins_encode( Opcode(0xD9), Opcode(0xF2),    // fptan
10713               Opcode(0xDD), Opcode(0xD8));   // fstp st
10714   ins_pipe( pipe_slow );
10715 %}
10716 
10717 instruct tanXD_reg(regXD dst, eFlagsReg cr) %{
10718   predicate (UseSSE>=2);
10719   match(Set dst(TanD dst));
10720   effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10721   format %{ "DTAN   $dst" %}
10722   ins_encode( Push_SrcXD(dst),
10723               Opcode(0xD9), Opcode(0xF2),    // fptan
10724               Opcode(0xDD), Opcode(0xD8),   // fstp st
10725               Push_ResultXD(dst) );
10726   ins_pipe( pipe_slow );
10727 %}
10728 
10729 instruct atanD_reg(regD dst, regD src) %{
10730   predicate (UseSSE<=1);
10731   match(Set dst(AtanD dst src));
10732   format %{ "DATA   $dst,$src" %}
10733   opcode(0xD9, 0xF3);
10734   ins_encode( Push_Reg_D(src),
10735               OpcP, OpcS, RegOpc(dst) );
10736   ins_pipe( pipe_slow );
10737 %}
10738 
10739 instruct atanXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10740   predicate (UseSSE>=2);
10741   match(Set dst(AtanD dst src));
10742   effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10743   format %{ "DATA   $dst,$src" %}
10744   opcode(0xD9, 0xF3);
10745   ins_encode( Push_SrcXD(src),
10746               OpcP, OpcS, Push_ResultXD(dst) );
10747   ins_pipe( pipe_slow );
10748 %}
10749 
10750 instruct sqrtD_reg(regD dst, regD src) %{
10751   predicate (UseSSE<=1);
10752   match(Set dst (SqrtD src));
10753   format %{ "DSQRT  $dst,$src" %}
10754   opcode(0xFA, 0xD9);
10755   ins_encode( Push_Reg_D(src),
10756               OpcS, OpcP, Pop_Reg_D(dst) );
10757   ins_pipe( pipe_slow );
10758 %}
10759 
10760 instruct powD_reg(regD X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10761   predicate (UseSSE<=1);
10762   match(Set Y (PowD X Y));  // Raise X to the Yth power
10763   effect(KILL rax, KILL rbx, KILL rcx);
10764   format %{ "SUB    ESP,8\t\t# Fast-path POW encoding\n\t"
10765             "FLD_D  $X\n\t"
10766             "FYL2X  \t\t\t# Q=Y*ln2(X)\n\t"
10767 
10768             "FDUP   \t\t\t# Q Q\n\t"
10769             "FRNDINT\t\t\t# int(Q) Q\n\t"
10770             "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10771             "FISTP  dword [ESP]\n\t"
10772             "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10773             "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10774             "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10775             "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10776             "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10777             "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10778             "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10779             "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10780             "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10781             "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10782             "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10783             "MOV    [ESP+0],0\n\t"
10784             "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10785 
10786             "ADD    ESP,8"
10787              %}
10788   ins_encode( push_stack_temp_qword,
10789               Push_Reg_D(X),
10790               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10791               pow_exp_core_encoding,
10792               pop_stack_temp_qword);
10793   ins_pipe( pipe_slow );
10794 %}
10795 
10796 instruct powXD_reg(regXD dst, regXD src0, regXD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{
10797   predicate (UseSSE>=2);
10798   match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
10799   effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx );
10800   format %{ "SUB    ESP,8\t\t# Fast-path POW encoding\n\t"
10801             "MOVSD  [ESP],$src1\n\t"
10802             "FLD    FPR1,$src1\n\t"
10803             "MOVSD  [ESP],$src0\n\t"
10804             "FLD    FPR1,$src0\n\t"
10805             "FYL2X  \t\t\t# Q=Y*ln2(X)\n\t"
10806 
10807             "FDUP   \t\t\t# Q Q\n\t"
10808             "FRNDINT\t\t\t# int(Q) Q\n\t"
10809             "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10810             "FISTP  dword [ESP]\n\t"
10811             "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10812             "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10813             "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10814             "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10815             "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10816             "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10817             "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10818             "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10819             "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10820             "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10821             "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10822             "MOV    [ESP+0],0\n\t"
10823             "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10824 
10825             "FST_D  [ESP]\n\t"
10826             "MOVSD  $dst,[ESP]\n\t"
10827             "ADD    ESP,8"
10828              %}
10829   ins_encode( push_stack_temp_qword,
10830               push_xmm_to_fpr1(src1),
10831               push_xmm_to_fpr1(src0),
10832               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10833               pow_exp_core_encoding,
10834               Push_ResultXD(dst) );
10835   ins_pipe( pipe_slow );
10836 %}
10837 
10838 
10839 instruct expD_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10840   predicate (UseSSE<=1);
10841   match(Set dpr1 (ExpD dpr1));
10842   effect(KILL rax, KILL rbx, KILL rcx);
10843   format %{ "SUB    ESP,8\t\t# Fast-path EXP encoding"
10844             "FLDL2E \t\t\t# Ld log2(e) X\n\t"
10845             "FMULP  \t\t\t# Q=X*log2(e)\n\t"
10846 
10847             "FDUP   \t\t\t# Q Q\n\t"
10848             "FRNDINT\t\t\t# int(Q) Q\n\t"
10849             "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10850             "FISTP  dword [ESP]\n\t"
10851             "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10852             "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10853             "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10854             "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10855             "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10856             "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10857             "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10858             "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10859             "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10860             "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10861             "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10862             "MOV    [ESP+0],0\n\t"
10863             "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10864 
10865             "ADD    ESP,8"
10866              %}
10867   ins_encode( push_stack_temp_qword,
10868               Opcode(0xD9), Opcode(0xEA),   // fldl2e
10869               Opcode(0xDE), Opcode(0xC9),   // fmulp
10870               pow_exp_core_encoding,
10871               pop_stack_temp_qword);
10872   ins_pipe( pipe_slow );
10873 %}
10874 
10875 instruct expXD_reg(regXD dst, regXD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10876   predicate (UseSSE>=2);
10877   match(Set dst (ExpD src));
10878   effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx);
10879   format %{ "SUB    ESP,8\t\t# Fast-path EXP encoding\n\t"
10880             "MOVSD  [ESP],$src\n\t"
10881             "FLDL2E \t\t\t# Ld log2(e) X\n\t"
10882             "FMULP  \t\t\t# Q=X*log2(e) X\n\t"
10883 
10884             "FDUP   \t\t\t# Q Q\n\t"
10885             "FRNDINT\t\t\t# int(Q) Q\n\t"
10886             "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10887             "FISTP  dword [ESP]\n\t"
10888             "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10889             "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10890             "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10891             "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10892             "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10893             "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10894             "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10895             "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10896             "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10897             "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10898             "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10899             "MOV    [ESP+0],0\n\t"
10900             "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10901 
10902             "FST_D  [ESP]\n\t"
10903             "MOVSD  $dst,[ESP]\n\t"
10904             "ADD    ESP,8"
10905              %}
10906   ins_encode( Push_SrcXD(src),
10907               Opcode(0xD9), Opcode(0xEA),   // fldl2e
10908               Opcode(0xDE), Opcode(0xC9),   // fmulp
10909               pow_exp_core_encoding,
10910               Push_ResultXD(dst) );
10911   ins_pipe( pipe_slow );
10912 %}
10913 
10914 
10915 
10916 instruct log10D_reg(regDPR1 dst, regDPR1 src) %{
10917   predicate (UseSSE<=1);
10918   // The source Double operand on FPU stack
10919   match(Set dst (Log10D src));
10920   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
10921   // fxch         ; swap ST(0) with ST(1)
10922   // fyl2x        ; compute log_10(2) * log_2(x)
10923   format %{ "FLDLG2 \t\t\t#Log10\n\t"
10924             "FXCH   \n\t"
10925             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
10926          %}
10927   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
10928               Opcode(0xD9), Opcode(0xC9),   // fxch
10929               Opcode(0xD9), Opcode(0xF1));  // fyl2x
10930 
10931   ins_pipe( pipe_slow );
10932 %}
10933 
10934 instruct log10XD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10935   predicate (UseSSE>=2);
10936   effect(KILL cr);
10937   match(Set dst (Log10D src));
10938   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
10939   // fyl2x        ; compute log_10(2) * log_2(x)
10940   format %{ "FLDLG2 \t\t\t#Log10\n\t"
10941             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
10942          %}
10943   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
10944               Push_SrcXD(src),
10945               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10946               Push_ResultXD(dst));
10947 
10948   ins_pipe( pipe_slow );
10949 %}
10950 
10951 instruct logD_reg(regDPR1 dst, regDPR1 src) %{
10952   predicate (UseSSE<=1);
10953   // The source Double operand on FPU stack
10954   match(Set dst (LogD src));
10955   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
10956   // fxch         ; swap ST(0) with ST(1)
10957   // fyl2x        ; compute log_e(2) * log_2(x)
10958   format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10959             "FXCH   \n\t"
10960             "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
10961          %}
10962   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
10963               Opcode(0xD9), Opcode(0xC9),   // fxch
10964               Opcode(0xD9), Opcode(0xF1));  // fyl2x
10965 
10966   ins_pipe( pipe_slow );
10967 %}
10968 
10969 instruct logXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10970   predicate (UseSSE>=2);
10971   effect(KILL cr);
10972   // The source and result Double operands in XMM registers
10973   match(Set dst (LogD src));
10974   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
10975   // fyl2x        ; compute log_e(2) * log_2(x)
10976   format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10977             "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
10978          %}
10979   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
10980               Push_SrcXD(src),
10981               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10982               Push_ResultXD(dst));
10983   ins_pipe( pipe_slow );
10984 %}
10985 
10986 //-------------Float Instructions-------------------------------
10987 // Float Math
10988 
10989 // Code for float compare:
10990 //     fcompp();
10991 //     fwait(); fnstsw_ax();
10992 //     sahf();
10993 //     movl(dst, unordered_result);
10994 //     jcc(Assembler::parity, exit);
10995 //     movl(dst, less_result);
10996 //     jcc(Assembler::below, exit);
10997 //     movl(dst, equal_result);
10998 //     jcc(Assembler::equal, exit);
10999 //     movl(dst, greater_result);
11000 //   exit:
11001 
11002 // P6 version of float compare, sets condition codes in EFLAGS
11003 instruct cmpF_cc_P6(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
11004   predicate(VM_Version::supports_cmov() && UseSSE == 0);
11005   match(Set cr (CmpF src1 src2));
11006   effect(KILL rax);
11007   ins_cost(150);
11008   format %{ "FLD    $src1\n\t"
11009             "FUCOMIP ST,$src2  // P6 instruction\n\t"
11010             "JNP    exit\n\t"
11011             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
11012             "SAHF\n"
11013      "exit:\tNOP               // avoid branch to branch" %}
11014   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
11015   ins_encode( Push_Reg_D(src1),
11016               OpcP, RegOpc(src2),
11017               cmpF_P6_fixup );
11018   ins_pipe( pipe_slow );
11019 %}
11020 
11021 instruct cmpF_cc_P6CF(eFlagsRegUCF cr, regF src1, regF src2) %{
11022   predicate(VM_Version::supports_cmov() && UseSSE == 0);
11023   match(Set cr (CmpF src1 src2));
11024   ins_cost(100);
11025   format %{ "FLD    $src1\n\t"
11026             "FUCOMIP ST,$src2  // P6 instruction" %}
11027   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
11028   ins_encode( Push_Reg_D(src1),
11029               OpcP, RegOpc(src2));
11030   ins_pipe( pipe_slow );
11031 %}
11032 
11033 
11034 // Compare & branch
11035 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
11036   predicate(UseSSE == 0);
11037   match(Set cr (CmpF src1 src2));
11038   effect(KILL rax);
11039   ins_cost(200);
11040   format %{ "FLD    $src1\n\t"
11041             "FCOMp  $src2\n\t"
11042             "FNSTSW AX\n\t"
11043             "TEST   AX,0x400\n\t"
11044             "JZ,s   flags\n\t"
11045             "MOV    AH,1\t# unordered treat as LT\n"
11046     "flags:\tSAHF" %}
11047   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
11048   ins_encode( Push_Reg_D(src1),
11049               OpcP, RegOpc(src2),
11050               fpu_flags);
11051   ins_pipe( pipe_slow );
11052 %}
11053 
11054 // Compare vs zero into -1,0,1
11055 instruct cmpF_0(eRegI dst, regF src1, immF0 zero, eAXRegI rax, eFlagsReg cr) %{
11056   predicate(UseSSE == 0);
11057   match(Set dst (CmpF3 src1 zero));
11058   effect(KILL cr, KILL rax);
11059   ins_cost(280);
11060   format %{ "FTSTF  $dst,$src1" %}
11061   opcode(0xE4, 0xD9);
11062   ins_encode( Push_Reg_D(src1),
11063               OpcS, OpcP, PopFPU,
11064               CmpF_Result(dst));
11065   ins_pipe( pipe_slow );
11066 %}
11067 
11068 // Compare into -1,0,1
11069 instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
11070   predicate(UseSSE == 0);
11071   match(Set dst (CmpF3 src1 src2));
11072   effect(KILL cr, KILL rax);
11073   ins_cost(300);
11074   format %{ "FCMPF  $dst,$src1,$src2" %}
11075   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
11076   ins_encode( Push_Reg_D(src1),
11077               OpcP, RegOpc(src2),
11078               CmpF_Result(dst));
11079   ins_pipe( pipe_slow );
11080 %}
11081 
11082 // float compare and set condition codes in EFLAGS by XMM regs
11083 instruct cmpX_cc(eFlagsRegU cr, regX dst, regX src, eAXRegI rax) %{
11084   predicate(UseSSE>=1);
11085   match(Set cr (CmpF dst src));
11086   effect(KILL rax);
11087   ins_cost(145);
11088   format %{ "COMISS $dst,$src\n"
11089           "\tJNP    exit\n"
11090           "\tMOV    ah,1       // saw a NaN, set CF\n"
11091           "\tSAHF\n"
11092      "exit:\tNOP               // avoid branch to branch" %}
11093   opcode(0x0F, 0x2F);
11094   ins_encode(OpcP, OpcS, RegReg(dst, src), cmpF_P6_fixup);
11095   ins_pipe( pipe_slow );
11096 %}
11097 
11098 instruct cmpX_ccCF(eFlagsRegUCF cr, regX dst, regX src) %{
11099   predicate(UseSSE>=1);
11100   match(Set cr (CmpF dst src));
11101   ins_cost(100);
11102   format %{ "COMISS $dst,$src" %}
11103   opcode(0x0F, 0x2F);
11104   ins_encode(OpcP, OpcS, RegReg(dst, src));
11105   ins_pipe( pipe_slow );
11106 %}
11107 
11108 // float compare and set condition codes in EFLAGS by XMM regs
11109 instruct cmpX_ccmem(eFlagsRegU cr, regX dst, memory src, eAXRegI rax) %{
11110   predicate(UseSSE>=1);
11111   match(Set cr (CmpF dst (LoadF src)));
11112   effect(KILL rax);
11113   ins_cost(165);
11114   format %{ "COMISS $dst,$src\n"
11115           "\tJNP    exit\n"
11116           "\tMOV    ah,1       // saw a NaN, set CF\n"
11117           "\tSAHF\n"
11118      "exit:\tNOP               // avoid branch to branch" %}
11119   opcode(0x0F, 0x2F);
11120   ins_encode(OpcP, OpcS, RegMem(dst, src), cmpF_P6_fixup);
11121   ins_pipe( pipe_slow );
11122 %}
11123 
11124 instruct cmpX_ccmemCF(eFlagsRegUCF cr, regX dst, memory src) %{
11125   predicate(UseSSE>=1);
11126   match(Set cr (CmpF dst (LoadF src)));
11127   ins_cost(100);
11128   format %{ "COMISS $dst,$src" %}
11129   opcode(0x0F, 0x2F);
11130   ins_encode(OpcP, OpcS, RegMem(dst, src));
11131   ins_pipe( pipe_slow );
11132 %}
11133 
11134 // Compare into -1,0,1 in XMM
11135 instruct cmpX_reg(eRegI dst, regX src1, regX src2, eFlagsReg cr) %{
11136   predicate(UseSSE>=1);
11137   match(Set dst (CmpF3 src1 src2));
11138   effect(KILL cr);
11139   ins_cost(255);
11140   format %{ "XOR    $dst,$dst\n"
11141           "\tCOMISS $src1,$src2\n"
11142           "\tJP,s   nan\n"
11143           "\tJEQ,s  exit\n"
11144           "\tJA,s   inc\n"
11145       "nan:\tDEC    $dst\n"
11146           "\tJMP,s  exit\n"
11147       "inc:\tINC    $dst\n"
11148       "exit:"
11149                 %}
11150   opcode(0x0F, 0x2F);
11151   ins_encode(Xor_Reg(dst), OpcP, OpcS, RegReg(src1, src2), CmpX_Result(dst));
11152   ins_pipe( pipe_slow );
11153 %}
11154 
11155 // Compare into -1,0,1 in XMM and memory
11156 instruct cmpX_regmem(eRegI dst, regX src1, memory mem, eFlagsReg cr) %{
11157   predicate(UseSSE>=1);
11158   match(Set dst (CmpF3 src1 (LoadF mem)));
11159   effect(KILL cr);
11160   ins_cost(275);
11161   format %{ "COMISS $src1,$mem\n"
11162           "\tMOV    $dst,0\t\t# do not blow flags\n"
11163           "\tJP,s   nan\n"
11164           "\tJEQ,s  exit\n"
11165           "\tJA,s   inc\n"
11166       "nan:\tDEC    $dst\n"
11167           "\tJMP,s  exit\n"
11168       "inc:\tINC    $dst\n"
11169       "exit:"
11170                 %}
11171   opcode(0x0F, 0x2F);
11172   ins_encode(OpcP, OpcS, RegMem(src1, mem), LdImmI(dst,0x0), CmpX_Result(dst));
11173   ins_pipe( pipe_slow );
11174 %}
11175 
11176 // Spill to obtain 24-bit precision
11177 instruct subF24_reg(stackSlotF dst, regF src1, regF src2) %{
11178   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11179   match(Set dst (SubF src1 src2));
11180 
11181   format %{ "FSUB   $dst,$src1 - $src2" %}
11182   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
11183   ins_encode( Push_Reg_F(src1),
11184               OpcReg_F(src2),
11185               Pop_Mem_F(dst) );
11186   ins_pipe( fpu_mem_reg_reg );
11187 %}
11188 //
11189 // This instruction does not round to 24-bits
11190 instruct subF_reg(regF dst, regF src) %{
11191   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11192   match(Set dst (SubF dst src));
11193 
11194   format %{ "FSUB   $dst,$src" %}
11195   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
11196   ins_encode( Push_Reg_F(src),
11197               OpcP, RegOpc(dst) );
11198   ins_pipe( fpu_reg_reg );
11199 %}
11200 
11201 // Spill to obtain 24-bit precision
11202 instruct addF24_reg(stackSlotF dst, regF src1, regF src2) %{
11203   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11204   match(Set dst (AddF src1 src2));
11205 
11206   format %{ "FADD   $dst,$src1,$src2" %}
11207   opcode(0xD8, 0x0); /* D8 C0+i */
11208   ins_encode( Push_Reg_F(src2),
11209               OpcReg_F(src1),
11210               Pop_Mem_F(dst) );
11211   ins_pipe( fpu_mem_reg_reg );
11212 %}
11213 //
11214 // This instruction does not round to 24-bits
11215 instruct addF_reg(regF dst, regF src) %{
11216   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11217   match(Set dst (AddF dst src));
11218 
11219   format %{ "FLD    $src\n\t"
11220             "FADDp  $dst,ST" %}
11221   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
11222   ins_encode( Push_Reg_F(src),
11223               OpcP, RegOpc(dst) );
11224   ins_pipe( fpu_reg_reg );
11225 %}
11226 
11227 // Add two single precision floating point values in xmm
11228 instruct addX_reg(regX dst, regX src) %{
11229   predicate(UseSSE>=1);
11230   match(Set dst (AddF dst src));
11231   format %{ "ADDSS  $dst,$src" %}
11232   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
11233   ins_pipe( pipe_slow );
11234 %}
11235 
11236 instruct addX_imm(regX dst, immXF con) %{
11237   predicate(UseSSE>=1);
11238   match(Set dst (AddF dst con));
11239   format %{ "ADDSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
11240   ins_encode %{
11241     __ addss($dst$$XMMRegister, $constantaddress($con));
11242   %}
11243   ins_pipe(pipe_slow);
11244 %}
11245 
11246 instruct addX_mem(regX dst, memory mem) %{
11247   predicate(UseSSE>=1);
11248   match(Set dst (AddF dst (LoadF mem)));
11249   format %{ "ADDSS  $dst,$mem" %}
11250   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegMem(dst, mem));
11251   ins_pipe( pipe_slow );
11252 %}
11253 
11254 // Subtract two single precision floating point values in xmm
11255 instruct subX_reg(regX dst, regX src) %{
11256   predicate(UseSSE>=1);
11257   match(Set dst (SubF dst src));
11258   format %{ "SUBSS  $dst,$src" %}
11259   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
11260   ins_pipe( pipe_slow );
11261 %}
11262 
11263 instruct subX_imm(regX dst, immXF con) %{
11264   predicate(UseSSE>=1);
11265   match(Set dst (SubF dst con));
11266   format %{ "SUBSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
11267   ins_encode %{
11268     __ subss($dst$$XMMRegister, $constantaddress($con));
11269   %}
11270   ins_pipe(pipe_slow);
11271 %}
11272 
11273 instruct subX_mem(regX dst, memory mem) %{
11274   predicate(UseSSE>=1);
11275   match(Set dst (SubF dst (LoadF mem)));
11276   format %{ "SUBSS  $dst,$mem" %}
11277   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
11278   ins_pipe( pipe_slow );
11279 %}
11280 
11281 // Multiply two single precision floating point values in xmm
11282 instruct mulX_reg(regX dst, regX src) %{
11283   predicate(UseSSE>=1);
11284   match(Set dst (MulF dst src));
11285   format %{ "MULSS  $dst,$src" %}
11286   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
11287   ins_pipe( pipe_slow );
11288 %}
11289 
11290 instruct mulX_imm(regX dst, immXF con) %{
11291   predicate(UseSSE>=1);
11292   match(Set dst (MulF dst con));
11293   format %{ "MULSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
11294   ins_encode %{
11295     __ mulss($dst$$XMMRegister, $constantaddress($con));
11296   %}
11297   ins_pipe(pipe_slow);
11298 %}
11299 
11300 instruct mulX_mem(regX dst, memory mem) %{
11301   predicate(UseSSE>=1);
11302   match(Set dst (MulF dst (LoadF mem)));
11303   format %{ "MULSS  $dst,$mem" %}
11304   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
11305   ins_pipe( pipe_slow );
11306 %}
11307 
11308 // Divide two single precision floating point values in xmm
11309 instruct divX_reg(regX dst, regX src) %{
11310   predicate(UseSSE>=1);
11311   match(Set dst (DivF dst src));
11312   format %{ "DIVSS  $dst,$src" %}
11313   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
11314   ins_pipe( pipe_slow );
11315 %}
11316 
11317 instruct divX_imm(regX dst, immXF con) %{
11318   predicate(UseSSE>=1);
11319   match(Set dst (DivF dst con));
11320   format %{ "DIVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
11321   ins_encode %{
11322     __ divss($dst$$XMMRegister, $constantaddress($con));
11323   %}
11324   ins_pipe(pipe_slow);
11325 %}
11326 
11327 instruct divX_mem(regX dst, memory mem) %{
11328   predicate(UseSSE>=1);
11329   match(Set dst (DivF dst (LoadF mem)));
11330   format %{ "DIVSS  $dst,$mem" %}
11331   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
11332   ins_pipe( pipe_slow );
11333 %}
11334 
11335 // Get the square root of a single precision floating point values in xmm
11336 instruct sqrtX_reg(regX dst, regX src) %{
11337   predicate(UseSSE>=1);
11338   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
11339   format %{ "SQRTSS $dst,$src" %}
11340   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
11341   ins_pipe( pipe_slow );
11342 %}
11343 
11344 instruct sqrtX_mem(regX dst, memory mem) %{
11345   predicate(UseSSE>=1);
11346   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF mem)))));
11347   format %{ "SQRTSS $dst,$mem" %}
11348   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
11349   ins_pipe( pipe_slow );
11350 %}
11351 
11352 // Get the square root of a double precision floating point values in xmm
11353 instruct sqrtXD_reg(regXD dst, regXD src) %{
11354   predicate(UseSSE>=2);
11355   match(Set dst (SqrtD src));
11356   format %{ "SQRTSD $dst,$src" %}
11357   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
11358   ins_pipe( pipe_slow );
11359 %}
11360 
11361 instruct sqrtXD_mem(regXD dst, memory mem) %{
11362   predicate(UseSSE>=2);
11363   match(Set dst (SqrtD (LoadD mem)));
11364   format %{ "SQRTSD $dst,$mem" %}
11365   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
11366   ins_pipe( pipe_slow );
11367 %}
11368 
11369 instruct absF_reg(regFPR1 dst, regFPR1 src) %{
11370   predicate(UseSSE==0);
11371   match(Set dst (AbsF src));
11372   ins_cost(100);
11373   format %{ "FABS" %}
11374   opcode(0xE1, 0xD9);
11375   ins_encode( OpcS, OpcP );
11376   ins_pipe( fpu_reg_reg );
11377 %}
11378 
11379 instruct absX_reg(regX dst ) %{
11380   predicate(UseSSE>=1);
11381   match(Set dst (AbsF dst));
11382   format %{ "ANDPS  $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %}
11383   ins_encode( AbsXF_encoding(dst));
11384   ins_pipe( pipe_slow );
11385 %}
11386 
11387 instruct negF_reg(regFPR1 dst, regFPR1 src) %{
11388   predicate(UseSSE==0);
11389   match(Set dst (NegF src));
11390   ins_cost(100);
11391   format %{ "FCHS" %}
11392   opcode(0xE0, 0xD9);
11393   ins_encode( OpcS, OpcP );
11394   ins_pipe( fpu_reg_reg );
11395 %}
11396 
11397 instruct negX_reg( regX dst ) %{
11398   predicate(UseSSE>=1);
11399   match(Set dst (NegF dst));
11400   format %{ "XORPS  $dst,[0x80000000]\t# CHS F by sign flipping" %}
11401   ins_encode( NegXF_encoding(dst));
11402   ins_pipe( pipe_slow );
11403 %}
11404 
11405 // Cisc-alternate to addF_reg
11406 // Spill to obtain 24-bit precision
11407 instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
11408   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11409   match(Set dst (AddF src1 (LoadF src2)));
11410 
11411   format %{ "FLD    $src2\n\t"
11412             "FADD   ST,$src1\n\t"
11413             "FSTP_S $dst" %}
11414   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
11415   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11416               OpcReg_F(src1),
11417               Pop_Mem_F(dst) );
11418   ins_pipe( fpu_mem_reg_mem );
11419 %}
11420 //
11421 // Cisc-alternate to addF_reg
11422 // This instruction does not round to 24-bits
11423 instruct addF_reg_mem(regF dst, memory src) %{
11424   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11425   match(Set dst (AddF dst (LoadF src)));
11426 
11427   format %{ "FADD   $dst,$src" %}
11428   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
11429   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
11430               OpcP, RegOpc(dst) );
11431   ins_pipe( fpu_reg_mem );
11432 %}
11433 
11434 // // Following two instructions for _222_mpegaudio
11435 // Spill to obtain 24-bit precision
11436 instruct addF24_mem_reg(stackSlotF dst, regF src2, memory src1 ) %{
11437   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11438   match(Set dst (AddF src1 src2));
11439 
11440   format %{ "FADD   $dst,$src1,$src2" %}
11441   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
11442   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
11443               OpcReg_F(src2),
11444               Pop_Mem_F(dst) );
11445   ins_pipe( fpu_mem_reg_mem );
11446 %}
11447 
11448 // Cisc-spill variant
11449 // Spill to obtain 24-bit precision
11450 instruct addF24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
11451   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11452   match(Set dst (AddF src1 (LoadF src2)));
11453 
11454   format %{ "FADD   $dst,$src1,$src2 cisc" %}
11455   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
11456   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11457               set_instruction_start,
11458               OpcP, RMopc_Mem(secondary,src1),
11459               Pop_Mem_F(dst) );
11460   ins_pipe( fpu_mem_mem_mem );
11461 %}
11462 
11463 // Spill to obtain 24-bit precision
11464 instruct addF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
11465   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11466   match(Set dst (AddF src1 src2));
11467 
11468   format %{ "FADD   $dst,$src1,$src2" %}
11469   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
11470   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11471               set_instruction_start,
11472               OpcP, RMopc_Mem(secondary,src1),
11473               Pop_Mem_F(dst) );
11474   ins_pipe( fpu_mem_mem_mem );
11475 %}
11476 
11477 
11478 // Spill to obtain 24-bit precision
11479 instruct addF24_reg_imm(stackSlotF dst, regF src, immF con) %{
11480   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11481   match(Set dst (AddF src con));
11482   format %{ "FLD    $src\n\t"
11483             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
11484             "FSTP_S $dst"  %}
11485   ins_encode %{
11486     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
11487     __ fadd_s($constantaddress($con));
11488     __ fstp_s(Address(rsp, $dst$$disp));
11489   %}
11490   ins_pipe(fpu_mem_reg_con);
11491 %}
11492 //
11493 // This instruction does not round to 24-bits
11494 instruct addF_reg_imm(regF dst, regF src, immF con) %{
11495   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11496   match(Set dst (AddF src con));
11497   format %{ "FLD    $src\n\t"
11498             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
11499             "FSTP   $dst"  %}
11500   ins_encode %{
11501     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
11502     __ fadd_s($constantaddress($con));
11503     __ fstp_d($dst$$reg);
11504   %}
11505   ins_pipe(fpu_reg_reg_con);
11506 %}
11507 
11508 // Spill to obtain 24-bit precision
11509 instruct mulF24_reg(stackSlotF dst, regF src1, regF src2) %{
11510   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11511   match(Set dst (MulF src1 src2));
11512 
11513   format %{ "FLD    $src1\n\t"
11514             "FMUL   $src2\n\t"
11515             "FSTP_S $dst"  %}
11516   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
11517   ins_encode( Push_Reg_F(src1),
11518               OpcReg_F(src2),
11519               Pop_Mem_F(dst) );
11520   ins_pipe( fpu_mem_reg_reg );
11521 %}
11522 //
11523 // This instruction does not round to 24-bits
11524 instruct mulF_reg(regF dst, regF src1, regF src2) %{
11525   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11526   match(Set dst (MulF src1 src2));
11527 
11528   format %{ "FLD    $src1\n\t"
11529             "FMUL   $src2\n\t"
11530             "FSTP_S $dst"  %}
11531   opcode(0xD8, 0x1); /* D8 C8+i */
11532   ins_encode( Push_Reg_F(src2),
11533               OpcReg_F(src1),
11534               Pop_Reg_F(dst) );
11535   ins_pipe( fpu_reg_reg_reg );
11536 %}
11537 
11538 
11539 // Spill to obtain 24-bit precision
11540 // Cisc-alternate to reg-reg multiply
11541 instruct mulF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
11542   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11543   match(Set dst (MulF src1 (LoadF src2)));
11544 
11545   format %{ "FLD_S  $src2\n\t"
11546             "FMUL   $src1\n\t"
11547             "FSTP_S $dst"  %}
11548   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
11549   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11550               OpcReg_F(src1),
11551               Pop_Mem_F(dst) );
11552   ins_pipe( fpu_mem_reg_mem );
11553 %}
11554 //
11555 // This instruction does not round to 24-bits
11556 // Cisc-alternate to reg-reg multiply
11557 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
11558   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11559   match(Set dst (MulF src1 (LoadF src2)));
11560 
11561   format %{ "FMUL   $dst,$src1,$src2" %}
11562   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
11563   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11564               OpcReg_F(src1),
11565               Pop_Reg_F(dst) );
11566   ins_pipe( fpu_reg_reg_mem );
11567 %}
11568 
11569 // Spill to obtain 24-bit precision
11570 instruct mulF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
11571   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11572   match(Set dst (MulF src1 src2));
11573 
11574   format %{ "FMUL   $dst,$src1,$src2" %}
11575   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
11576   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11577               set_instruction_start,
11578               OpcP, RMopc_Mem(secondary,src1),
11579               Pop_Mem_F(dst) );
11580   ins_pipe( fpu_mem_mem_mem );
11581 %}
11582 
11583 // Spill to obtain 24-bit precision
11584 instruct mulF24_reg_imm(stackSlotF dst, regF src, immF con) %{
11585   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11586   match(Set dst (MulF src con));
11587 
11588   format %{ "FLD    $src\n\t"
11589             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
11590             "FSTP_S $dst"  %}
11591   ins_encode %{
11592     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
11593     __ fmul_s($constantaddress($con));
11594     __ fstp_s(Address(rsp, $dst$$disp));
11595   %}
11596   ins_pipe(fpu_mem_reg_con);
11597 %}
11598 //
11599 // This instruction does not round to 24-bits
11600 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
11601   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11602   match(Set dst (MulF src con));
11603 
11604   format %{ "FLD    $src\n\t"
11605             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
11606             "FSTP   $dst"  %}
11607   ins_encode %{
11608     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
11609     __ fmul_s($constantaddress($con));
11610     __ fstp_d($dst$$reg);
11611   %}
11612   ins_pipe(fpu_reg_reg_con);
11613 %}
11614 
11615 
11616 //
11617 // MACRO1 -- subsume unshared load into mulF
11618 // This instruction does not round to 24-bits
11619 instruct mulF_reg_load1(regF dst, regF src, memory mem1 ) %{
11620   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11621   match(Set dst (MulF (LoadF mem1) src));
11622 
11623   format %{ "FLD    $mem1    ===MACRO1===\n\t"
11624             "FMUL   ST,$src\n\t"
11625             "FSTP   $dst" %}
11626   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
11627   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
11628               OpcReg_F(src),
11629               Pop_Reg_F(dst) );
11630   ins_pipe( fpu_reg_reg_mem );
11631 %}
11632 //
11633 // MACRO2 -- addF a mulF which subsumed an unshared load
11634 // This instruction does not round to 24-bits
11635 instruct addF_mulF_reg_load1(regF dst, memory mem1, regF src1, regF src2) %{
11636   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11637   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
11638   ins_cost(95);
11639 
11640   format %{ "FLD    $mem1     ===MACRO2===\n\t"
11641             "FMUL   ST,$src1  subsume mulF left load\n\t"
11642             "FADD   ST,$src2\n\t"
11643             "FSTP   $dst" %}
11644   opcode(0xD9); /* LoadF D9 /0 */
11645   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
11646               FMul_ST_reg(src1),
11647               FAdd_ST_reg(src2),
11648               Pop_Reg_F(dst) );
11649   ins_pipe( fpu_reg_mem_reg_reg );
11650 %}
11651 
11652 // MACRO3 -- addF a mulF
11653 // This instruction does not round to 24-bits.  It is a '2-address'
11654 // instruction in that the result goes back to src2.  This eliminates
11655 // a move from the macro; possibly the register allocator will have
11656 // to add it back (and maybe not).
11657 instruct addF_mulF_reg(regF src2, regF src1, regF src0) %{
11658   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11659   match(Set src2 (AddF (MulF src0 src1) src2));
11660 
11661   format %{ "FLD    $src0     ===MACRO3===\n\t"
11662             "FMUL   ST,$src1\n\t"
11663             "FADDP  $src2,ST" %}
11664   opcode(0xD9); /* LoadF D9 /0 */
11665   ins_encode( Push_Reg_F(src0),
11666               FMul_ST_reg(src1),
11667               FAddP_reg_ST(src2) );
11668   ins_pipe( fpu_reg_reg_reg );
11669 %}
11670 
11671 // MACRO4 -- divF subF
11672 // This instruction does not round to 24-bits
11673 instruct subF_divF_reg(regF dst, regF src1, regF src2, regF src3) %{
11674   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11675   match(Set dst (DivF (SubF src2 src1) src3));
11676 
11677   format %{ "FLD    $src2   ===MACRO4===\n\t"
11678             "FSUB   ST,$src1\n\t"
11679             "FDIV   ST,$src3\n\t"
11680             "FSTP  $dst" %}
11681   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
11682   ins_encode( Push_Reg_F(src2),
11683               subF_divF_encode(src1,src3),
11684               Pop_Reg_F(dst) );
11685   ins_pipe( fpu_reg_reg_reg_reg );
11686 %}
11687 
11688 // Spill to obtain 24-bit precision
11689 instruct divF24_reg(stackSlotF dst, regF src1, regF src2) %{
11690   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11691   match(Set dst (DivF src1 src2));
11692 
11693   format %{ "FDIV   $dst,$src1,$src2" %}
11694   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
11695   ins_encode( Push_Reg_F(src1),
11696               OpcReg_F(src2),
11697               Pop_Mem_F(dst) );
11698   ins_pipe( fpu_mem_reg_reg );
11699 %}
11700 //
11701 // This instruction does not round to 24-bits
11702 instruct divF_reg(regF dst, regF src) %{
11703   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11704   match(Set dst (DivF dst src));
11705 
11706   format %{ "FDIV   $dst,$src" %}
11707   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
11708   ins_encode( Push_Reg_F(src),
11709               OpcP, RegOpc(dst) );
11710   ins_pipe( fpu_reg_reg );
11711 %}
11712 
11713 
11714 // Spill to obtain 24-bit precision
11715 instruct modF24_reg(stackSlotF dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
11716   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11717   match(Set dst (ModF src1 src2));
11718   effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
11719 
11720   format %{ "FMOD   $dst,$src1,$src2" %}
11721   ins_encode( Push_Reg_Mod_D(src1, src2),
11722               emitModD(),
11723               Push_Result_Mod_D(src2),
11724               Pop_Mem_F(dst));
11725   ins_pipe( pipe_slow );
11726 %}
11727 //
11728 // This instruction does not round to 24-bits
11729 instruct modF_reg(regF dst, regF src, eAXRegI rax, eFlagsReg cr) %{
11730   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11731   match(Set dst (ModF dst src));
11732   effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
11733 
11734   format %{ "FMOD   $dst,$src" %}
11735   ins_encode(Push_Reg_Mod_D(dst, src),
11736               emitModD(),
11737               Push_Result_Mod_D(src),
11738               Pop_Reg_F(dst));
11739   ins_pipe( pipe_slow );
11740 %}
11741 
11742 instruct modX_reg(regX dst, regX src0, regX src1, eAXRegI rax, eFlagsReg cr) %{
11743   predicate(UseSSE>=1);
11744   match(Set dst (ModF src0 src1));
11745   effect(KILL rax, KILL cr);
11746   format %{ "SUB    ESP,4\t # FMOD\n"
11747           "\tMOVSS  [ESP+0],$src1\n"
11748           "\tFLD_S  [ESP+0]\n"
11749           "\tMOVSS  [ESP+0],$src0\n"
11750           "\tFLD_S  [ESP+0]\n"
11751      "loop:\tFPREM\n"
11752           "\tFWAIT\n"
11753           "\tFNSTSW AX\n"
11754           "\tSAHF\n"
11755           "\tJP     loop\n"
11756           "\tFSTP_S [ESP+0]\n"
11757           "\tMOVSS  $dst,[ESP+0]\n"
11758           "\tADD    ESP,4\n"
11759           "\tFSTP   ST0\t # Restore FPU Stack"
11760     %}
11761   ins_cost(250);
11762   ins_encode( Push_ModX_encoding(src0, src1), emitModD(), Push_ResultX(dst,0x4), PopFPU);
11763   ins_pipe( pipe_slow );
11764 %}
11765 
11766 
11767 //----------Arithmetic Conversion Instructions---------------------------------
11768 // The conversions operations are all Alpha sorted.  Please keep it that way!
11769 
11770 instruct roundFloat_mem_reg(stackSlotF dst, regF src) %{
11771   predicate(UseSSE==0);
11772   match(Set dst (RoundFloat src));
11773   ins_cost(125);
11774   format %{ "FST_S  $dst,$src\t# F-round" %}
11775   ins_encode( Pop_Mem_Reg_F(dst, src) );
11776   ins_pipe( fpu_mem_reg );
11777 %}
11778 
11779 instruct roundDouble_mem_reg(stackSlotD dst, regD src) %{
11780   predicate(UseSSE<=1);
11781   match(Set dst (RoundDouble src));
11782   ins_cost(125);
11783   format %{ "FST_D  $dst,$src\t# D-round" %}
11784   ins_encode( Pop_Mem_Reg_D(dst, src) );
11785   ins_pipe( fpu_mem_reg );
11786 %}
11787 
11788 // Force rounding to 24-bit precision and 6-bit exponent
11789 instruct convD2F_reg(stackSlotF dst, regD src) %{
11790   predicate(UseSSE==0);
11791   match(Set dst (ConvD2F src));
11792   format %{ "FST_S  $dst,$src\t# F-round" %}
11793   expand %{
11794     roundFloat_mem_reg(dst,src);
11795   %}
11796 %}
11797 
11798 // Force rounding to 24-bit precision and 6-bit exponent
11799 instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{
11800   predicate(UseSSE==1);
11801   match(Set dst (ConvD2F src));
11802   effect( KILL cr );
11803   format %{ "SUB    ESP,4\n\t"
11804             "FST_S  [ESP],$src\t# F-round\n\t"
11805             "MOVSS  $dst,[ESP]\n\t"
11806             "ADD ESP,4" %}
11807   ins_encode( D2X_encoding(dst, src) );
11808   ins_pipe( pipe_slow );
11809 %}
11810 
11811 // Force rounding double precision to single precision
11812 instruct convXD2X_reg(regX dst, regXD src) %{
11813   predicate(UseSSE>=2);
11814   match(Set dst (ConvD2F src));
11815   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
11816   opcode(0xF2, 0x0F, 0x5A);
11817   ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
11818   ins_pipe( pipe_slow );
11819 %}
11820 
11821 instruct convF2D_reg_reg(regD dst, regF src) %{
11822   predicate(UseSSE==0);
11823   match(Set dst (ConvF2D src));
11824   format %{ "FST_S  $dst,$src\t# D-round" %}
11825   ins_encode( Pop_Reg_Reg_D(dst, src));
11826   ins_pipe( fpu_reg_reg );
11827 %}
11828 
11829 instruct convF2D_reg(stackSlotD dst, regF src) %{
11830   predicate(UseSSE==1);
11831   match(Set dst (ConvF2D src));
11832   format %{ "FST_D  $dst,$src\t# D-round" %}
11833   expand %{
11834     roundDouble_mem_reg(dst,src);
11835   %}
11836 %}
11837 
11838 instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{
11839   predicate(UseSSE==1);
11840   match(Set dst (ConvF2D src));
11841   effect( KILL cr );
11842   format %{ "SUB    ESP,4\n\t"
11843             "MOVSS  [ESP] $src\n\t"
11844             "FLD_S  [ESP]\n\t"
11845             "ADD    ESP,4\n\t"
11846             "FSTP   $dst\t# D-round" %}
11847   ins_encode( X2D_encoding(dst, src), Pop_Reg_D(dst));
11848   ins_pipe( pipe_slow );
11849 %}
11850 
11851 instruct convX2XD_reg(regXD dst, regX src) %{
11852   predicate(UseSSE>=2);
11853   match(Set dst (ConvF2D src));
11854   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
11855   opcode(0xF3, 0x0F, 0x5A);
11856   ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
11857   ins_pipe( pipe_slow );
11858 %}
11859 
11860 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
11861 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
11862   predicate(UseSSE<=1);
11863   match(Set dst (ConvD2I src));
11864   effect( KILL tmp, KILL cr );
11865   format %{ "FLD    $src\t# Convert double to int \n\t"
11866             "FLDCW  trunc mode\n\t"
11867             "SUB    ESP,4\n\t"
11868             "FISTp  [ESP + #0]\n\t"
11869             "FLDCW  std/24-bit mode\n\t"
11870             "POP    EAX\n\t"
11871             "CMP    EAX,0x80000000\n\t"
11872             "JNE,s  fast\n\t"
11873             "FLD_D  $src\n\t"
11874             "CALL   d2i_wrapper\n"
11875       "fast:" %}
11876   ins_encode( Push_Reg_D(src), D2I_encoding(src) );
11877   ins_pipe( pipe_slow );
11878 %}
11879 
11880 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
11881 instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %{
11882   predicate(UseSSE>=2);
11883   match(Set dst (ConvD2I src));
11884   effect( KILL tmp, KILL cr );
11885   format %{ "CVTTSD2SI $dst, $src\n\t"
11886             "CMP    $dst,0x80000000\n\t"
11887             "JNE,s  fast\n\t"
11888             "SUB    ESP, 8\n\t"
11889             "MOVSD  [ESP], $src\n\t"
11890             "FLD_D  [ESP]\n\t"
11891             "ADD    ESP, 8\n\t"
11892             "CALL   d2i_wrapper\n"
11893       "fast:" %}
11894   opcode(0x1); // double-precision conversion
11895   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
11896   ins_pipe( pipe_slow );
11897 %}
11898 
11899 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
11900   predicate(UseSSE<=1);
11901   match(Set dst (ConvD2L src));
11902   effect( KILL cr );
11903   format %{ "FLD    $src\t# Convert double to long\n\t"
11904             "FLDCW  trunc mode\n\t"
11905             "SUB    ESP,8\n\t"
11906             "FISTp  [ESP + #0]\n\t"
11907             "FLDCW  std/24-bit mode\n\t"
11908             "POP    EAX\n\t"
11909             "POP    EDX\n\t"
11910             "CMP    EDX,0x80000000\n\t"
11911             "JNE,s  fast\n\t"
11912             "TEST   EAX,EAX\n\t"
11913             "JNE,s  fast\n\t"
11914             "FLD    $src\n\t"
11915             "CALL   d2l_wrapper\n"
11916       "fast:" %}
11917   ins_encode( Push_Reg_D(src),  D2L_encoding(src) );
11918   ins_pipe( pipe_slow );
11919 %}
11920 
11921 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11922 instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{
11923   predicate (UseSSE>=2);
11924   match(Set dst (ConvD2L src));
11925   effect( KILL cr );
11926   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
11927             "MOVSD  [ESP],$src\n\t"
11928             "FLD_D  [ESP]\n\t"
11929             "FLDCW  trunc mode\n\t"
11930             "FISTp  [ESP + #0]\n\t"
11931             "FLDCW  std/24-bit mode\n\t"
11932             "POP    EAX\n\t"
11933             "POP    EDX\n\t"
11934             "CMP    EDX,0x80000000\n\t"
11935             "JNE,s  fast\n\t"
11936             "TEST   EAX,EAX\n\t"
11937             "JNE,s  fast\n\t"
11938             "SUB    ESP,8\n\t"
11939             "MOVSD  [ESP],$src\n\t"
11940             "FLD_D  [ESP]\n\t"
11941             "CALL   d2l_wrapper\n"
11942       "fast:" %}
11943   ins_encode( XD2L_encoding(src) );
11944   ins_pipe( pipe_slow );
11945 %}
11946 
11947 // Convert a double to an int.  Java semantics require we do complex
11948 // manglations in the corner cases.  So we set the rounding mode to
11949 // 'zero', store the darned double down as an int, and reset the
11950 // rounding mode to 'nearest'.  The hardware stores a flag value down
11951 // if we would overflow or converted a NAN; we check for this and
11952 // and go the slow path if needed.
11953 instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
11954   predicate(UseSSE==0);
11955   match(Set dst (ConvF2I src));
11956   effect( KILL tmp, KILL cr );
11957   format %{ "FLD    $src\t# Convert float to int \n\t"
11958             "FLDCW  trunc mode\n\t"
11959             "SUB    ESP,4\n\t"
11960             "FISTp  [ESP + #0]\n\t"
11961             "FLDCW  std/24-bit mode\n\t"
11962             "POP    EAX\n\t"
11963             "CMP    EAX,0x80000000\n\t"
11964             "JNE,s  fast\n\t"
11965             "FLD    $src\n\t"
11966             "CALL   d2i_wrapper\n"
11967       "fast:" %}
11968   // D2I_encoding works for F2I
11969   ins_encode( Push_Reg_F(src), D2I_encoding(src) );
11970   ins_pipe( pipe_slow );
11971 %}
11972 
11973 // Convert a float in xmm to an int reg.
11974 instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{
11975   predicate(UseSSE>=1);
11976   match(Set dst (ConvF2I src));
11977   effect( KILL tmp, KILL cr );
11978   format %{ "CVTTSS2SI $dst, $src\n\t"
11979             "CMP    $dst,0x80000000\n\t"
11980             "JNE,s  fast\n\t"
11981             "SUB    ESP, 4\n\t"
11982             "MOVSS  [ESP], $src\n\t"
11983             "FLD    [ESP]\n\t"
11984             "ADD    ESP, 4\n\t"
11985             "CALL   d2i_wrapper\n"
11986       "fast:" %}
11987   opcode(0x0); // single-precision conversion
11988   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
11989   ins_pipe( pipe_slow );
11990 %}
11991 
11992 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
11993   predicate(UseSSE==0);
11994   match(Set dst (ConvF2L src));
11995   effect( KILL cr );
11996   format %{ "FLD    $src\t# Convert float to long\n\t"
11997             "FLDCW  trunc mode\n\t"
11998             "SUB    ESP,8\n\t"
11999             "FISTp  [ESP + #0]\n\t"
12000             "FLDCW  std/24-bit mode\n\t"
12001             "POP    EAX\n\t"
12002             "POP    EDX\n\t"
12003             "CMP    EDX,0x80000000\n\t"
12004             "JNE,s  fast\n\t"
12005             "TEST   EAX,EAX\n\t"
12006             "JNE,s  fast\n\t"
12007             "FLD    $src\n\t"
12008             "CALL   d2l_wrapper\n"
12009       "fast:" %}
12010   // D2L_encoding works for F2L
12011   ins_encode( Push_Reg_F(src), D2L_encoding(src) );
12012   ins_pipe( pipe_slow );
12013 %}
12014 
12015 // XMM lacks a float/double->long conversion, so use the old FPU stack.
12016 instruct convX2L_reg_reg( eADXRegL dst, regX src, eFlagsReg cr ) %{
12017   predicate (UseSSE>=1);
12018   match(Set dst (ConvF2L src));
12019   effect( KILL cr );
12020   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
12021             "MOVSS  [ESP],$src\n\t"
12022             "FLD_S  [ESP]\n\t"
12023             "FLDCW  trunc mode\n\t"
12024             "FISTp  [ESP + #0]\n\t"
12025             "FLDCW  std/24-bit mode\n\t"
12026             "POP    EAX\n\t"
12027             "POP    EDX\n\t"
12028             "CMP    EDX,0x80000000\n\t"
12029             "JNE,s  fast\n\t"
12030             "TEST   EAX,EAX\n\t"
12031             "JNE,s  fast\n\t"
12032             "SUB    ESP,4\t# Convert float to long\n\t"
12033             "MOVSS  [ESP],$src\n\t"
12034             "FLD_S  [ESP]\n\t"
12035             "ADD    ESP,4\n\t"
12036             "CALL   d2l_wrapper\n"
12037       "fast:" %}
12038   ins_encode( X2L_encoding(src) );
12039   ins_pipe( pipe_slow );
12040 %}
12041 
12042 instruct convI2D_reg(regD dst, stackSlotI src) %{
12043   predicate( UseSSE<=1 );
12044   match(Set dst (ConvI2D src));
12045   format %{ "FILD   $src\n\t"
12046             "FSTP   $dst" %}
12047   opcode(0xDB, 0x0);  /* DB /0 */
12048   ins_encode(Push_Mem_I(src), Pop_Reg_D(dst));
12049   ins_pipe( fpu_reg_mem );
12050 %}
12051 
12052 instruct convI2XD_reg(regXD dst, eRegI src) %{
12053   predicate( UseSSE>=2 && !UseXmmI2D );
12054   match(Set dst (ConvI2D src));
12055   format %{ "CVTSI2SD $dst,$src" %}
12056   opcode(0xF2, 0x0F, 0x2A);
12057   ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
12058   ins_pipe( pipe_slow );
12059 %}
12060 
12061 instruct convI2XD_mem(regXD dst, memory mem) %{
12062   predicate( UseSSE>=2 );
12063   match(Set dst (ConvI2D (LoadI mem)));
12064   format %{ "CVTSI2SD $dst,$mem" %}
12065   opcode(0xF2, 0x0F, 0x2A);
12066   ins_encode( OpcP, OpcS, Opcode(tertiary), RegMem(dst, mem));
12067   ins_pipe( pipe_slow );
12068 %}
12069 
12070 instruct convXI2XD_reg(regXD dst, eRegI src)
12071 %{
12072   predicate( UseSSE>=2 && UseXmmI2D );
12073   match(Set dst (ConvI2D src));
12074 
12075   format %{ "MOVD  $dst,$src\n\t"
12076             "CVTDQ2PD $dst,$dst\t# i2d" %}
12077   ins_encode %{
12078     __ movdl($dst$$XMMRegister, $src$$Register);
12079     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
12080   %}
12081   ins_pipe(pipe_slow); // XXX
12082 %}
12083 
12084 instruct convI2D_mem(regD dst, memory mem) %{
12085   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
12086   match(Set dst (ConvI2D (LoadI mem)));
12087   format %{ "FILD   $mem\n\t"
12088             "FSTP   $dst" %}
12089   opcode(0xDB);      /* DB /0 */
12090   ins_encode( OpcP, RMopc_Mem(0x00,mem),
12091               Pop_Reg_D(dst));
12092   ins_pipe( fpu_reg_mem );
12093 %}
12094 
12095 // Convert a byte to a float; no rounding step needed.
12096 instruct conv24I2F_reg(regF dst, stackSlotI src) %{
12097   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
12098   match(Set dst (ConvI2F src));
12099   format %{ "FILD   $src\n\t"
12100             "FSTP   $dst" %}
12101 
12102   opcode(0xDB, 0x0);  /* DB /0 */
12103   ins_encode(Push_Mem_I(src), Pop_Reg_F(dst));
12104   ins_pipe( fpu_reg_mem );
12105 %}
12106 
12107 // In 24-bit mode, force exponent rounding by storing back out
12108 instruct convI2F_SSF(stackSlotF dst, stackSlotI src) %{
12109   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
12110   match(Set dst (ConvI2F src));
12111   ins_cost(200);
12112   format %{ "FILD   $src\n\t"
12113             "FSTP_S $dst" %}
12114   opcode(0xDB, 0x0);  /* DB /0 */
12115   ins_encode( Push_Mem_I(src),
12116               Pop_Mem_F(dst));
12117   ins_pipe( fpu_mem_mem );
12118 %}
12119 
12120 // In 24-bit mode, force exponent rounding by storing back out
12121 instruct convI2F_SSF_mem(stackSlotF dst, memory mem) %{
12122   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
12123   match(Set dst (ConvI2F (LoadI mem)));
12124   ins_cost(200);
12125   format %{ "FILD   $mem\n\t"
12126             "FSTP_S $dst" %}
12127   opcode(0xDB);  /* DB /0 */
12128   ins_encode( OpcP, RMopc_Mem(0x00,mem),
12129               Pop_Mem_F(dst));
12130   ins_pipe( fpu_mem_mem );
12131 %}
12132 
12133 // This instruction does not round to 24-bits
12134 instruct convI2F_reg(regF dst, stackSlotI src) %{
12135   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
12136   match(Set dst (ConvI2F src));
12137   format %{ "FILD   $src\n\t"
12138             "FSTP   $dst" %}
12139   opcode(0xDB, 0x0);  /* DB /0 */
12140   ins_encode( Push_Mem_I(src),
12141               Pop_Reg_F(dst));
12142   ins_pipe( fpu_reg_mem );
12143 %}
12144 
12145 // This instruction does not round to 24-bits
12146 instruct convI2F_mem(regF dst, memory mem) %{
12147   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
12148   match(Set dst (ConvI2F (LoadI mem)));
12149   format %{ "FILD   $mem\n\t"
12150             "FSTP   $dst" %}
12151   opcode(0xDB);      /* DB /0 */
12152   ins_encode( OpcP, RMopc_Mem(0x00,mem),
12153               Pop_Reg_F(dst));
12154   ins_pipe( fpu_reg_mem );
12155 %}
12156 
12157 // Convert an int to a float in xmm; no rounding step needed.
12158 instruct convI2X_reg(regX dst, eRegI src) %{
12159   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
12160   match(Set dst (ConvI2F src));
12161   format %{ "CVTSI2SS $dst, $src" %}
12162 
12163   opcode(0xF3, 0x0F, 0x2A);  /* F3 0F 2A /r */
12164   ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
12165   ins_pipe( pipe_slow );
12166 %}
12167 
12168  instruct convXI2X_reg(regX dst, eRegI src)
12169 %{
12170   predicate( UseSSE>=2 && UseXmmI2F );
12171   match(Set dst (ConvI2F src));
12172 
12173   format %{ "MOVD  $dst,$src\n\t"
12174             "CVTDQ2PS $dst,$dst\t# i2f" %}
12175   ins_encode %{
12176     __ movdl($dst$$XMMRegister, $src$$Register);
12177     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
12178   %}
12179   ins_pipe(pipe_slow); // XXX
12180 %}
12181 
12182 instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{
12183   match(Set dst (ConvI2L src));
12184   effect(KILL cr);
12185   ins_cost(375);
12186   format %{ "MOV    $dst.lo,$src\n\t"
12187             "MOV    $dst.hi,$src\n\t"
12188             "SAR    $dst.hi,31" %}
12189   ins_encode(convert_int_long(dst,src));
12190   ins_pipe( ialu_reg_reg_long );
12191 %}
12192 
12193 // Zero-extend convert int to long
12194 instruct convI2L_reg_zex(eRegL dst, eRegI src, immL_32bits mask, eFlagsReg flags ) %{
12195   match(Set dst (AndL (ConvI2L src) mask) );
12196   effect( KILL flags );
12197   ins_cost(250);
12198   format %{ "MOV    $dst.lo,$src\n\t"
12199             "XOR    $dst.hi,$dst.hi" %}
12200   opcode(0x33); // XOR
12201   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
12202   ins_pipe( ialu_reg_reg_long );
12203 %}
12204 
12205 // Zero-extend long
12206 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
12207   match(Set dst (AndL src mask) );
12208   effect( KILL flags );
12209   ins_cost(250);
12210   format %{ "MOV    $dst.lo,$src.lo\n\t"
12211             "XOR    $dst.hi,$dst.hi\n\t" %}
12212   opcode(0x33); // XOR
12213   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
12214   ins_pipe( ialu_reg_reg_long );
12215 %}
12216 
12217 instruct convL2D_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
12218   predicate (UseSSE<=1);
12219   match(Set dst (ConvL2D src));
12220   effect( KILL cr );
12221   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
12222             "PUSH   $src.lo\n\t"
12223             "FILD   ST,[ESP + #0]\n\t"
12224             "ADD    ESP,8\n\t"
12225             "FSTP_D $dst\t# D-round" %}
12226   opcode(0xDF, 0x5);  /* DF /5 */
12227   ins_encode(convert_long_double(src), Pop_Mem_D(dst));
12228   ins_pipe( pipe_slow );
12229 %}
12230 
12231 instruct convL2XD_reg( regXD dst, eRegL src, eFlagsReg cr) %{
12232   predicate (UseSSE>=2);
12233   match(Set dst (ConvL2D src));
12234   effect( KILL cr );
12235   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
12236             "PUSH   $src.lo\n\t"
12237             "FILD_D [ESP]\n\t"
12238             "FSTP_D [ESP]\n\t"
12239             "MOVSD  $dst,[ESP]\n\t"
12240             "ADD    ESP,8" %}
12241   opcode(0xDF, 0x5);  /* DF /5 */
12242   ins_encode(convert_long_double2(src), Push_ResultXD(dst));
12243   ins_pipe( pipe_slow );
12244 %}
12245 
12246 instruct convL2X_reg( regX dst, eRegL src, eFlagsReg cr) %{
12247   predicate (UseSSE>=1);
12248   match(Set dst (ConvL2F src));
12249   effect( KILL cr );
12250   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
12251             "PUSH   $src.lo\n\t"
12252             "FILD_D [ESP]\n\t"
12253             "FSTP_S [ESP]\n\t"
12254             "MOVSS  $dst,[ESP]\n\t"
12255             "ADD    ESP,8" %}
12256   opcode(0xDF, 0x5);  /* DF /5 */
12257   ins_encode(convert_long_double2(src), Push_ResultX(dst,0x8));
12258   ins_pipe( pipe_slow );
12259 %}
12260 
12261 instruct convL2F_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
12262   match(Set dst (ConvL2F src));
12263   effect( KILL cr );
12264   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
12265             "PUSH   $src.lo\n\t"
12266             "FILD   ST,[ESP + #0]\n\t"
12267             "ADD    ESP,8\n\t"
12268             "FSTP_S $dst\t# F-round" %}
12269   opcode(0xDF, 0x5);  /* DF /5 */
12270   ins_encode(convert_long_double(src), Pop_Mem_F(dst));
12271   ins_pipe( pipe_slow );
12272 %}
12273 
12274 instruct convL2I_reg( eRegI dst, eRegL src ) %{
12275   match(Set dst (ConvL2I src));
12276   effect( DEF dst, USE src );
12277   format %{ "MOV    $dst,$src.lo" %}
12278   ins_encode(enc_CopyL_Lo(dst,src));
12279   ins_pipe( ialu_reg_reg );
12280 %}
12281 
12282 
12283 instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{
12284   match(Set dst (MoveF2I src));
12285   effect( DEF dst, USE src );
12286   ins_cost(100);
12287   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
12288   opcode(0x8B);
12289   ins_encode( OpcP, RegMem(dst,src));
12290   ins_pipe( ialu_reg_mem );
12291 %}
12292 
12293 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
12294   predicate(UseSSE==0);
12295   match(Set dst (MoveF2I src));
12296   effect( DEF dst, USE src );
12297 
12298   ins_cost(125);
12299   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
12300   ins_encode( Pop_Mem_Reg_F(dst, src) );
12301   ins_pipe( fpu_mem_reg );
12302 %}
12303 
12304 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{
12305   predicate(UseSSE>=1);
12306   match(Set dst (MoveF2I src));
12307   effect( DEF dst, USE src );
12308 
12309   ins_cost(95);
12310   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
12311   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, dst));
12312   ins_pipe( pipe_slow );
12313 %}
12314 
12315 instruct MoveF2I_reg_reg_sse(eRegI dst, regX src) %{
12316   predicate(UseSSE>=2);
12317   match(Set dst (MoveF2I src));
12318   effect( DEF dst, USE src );
12319   ins_cost(85);
12320   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
12321   ins_encode( MovX2I_reg(dst, src));
12322   ins_pipe( pipe_slow );
12323 %}
12324 
12325 instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{
12326   match(Set dst (MoveI2F src));
12327   effect( DEF dst, USE src );
12328 
12329   ins_cost(100);
12330   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
12331   opcode(0x89);
12332   ins_encode( OpcPRegSS( dst, src ) );
12333   ins_pipe( ialu_mem_reg );
12334 %}
12335 
12336 
12337 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
12338   predicate(UseSSE==0);
12339   match(Set dst (MoveI2F src));
12340   effect(DEF dst, USE src);
12341 
12342   ins_cost(125);
12343   format %{ "FLD_S  $src\n\t"
12344             "FSTP   $dst\t# MoveI2F_stack_reg" %}
12345   opcode(0xD9);               /* D9 /0, FLD m32real */
12346   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
12347               Pop_Reg_F(dst) );
12348   ins_pipe( fpu_reg_mem );
12349 %}
12350 
12351 instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{
12352   predicate(UseSSE>=1);
12353   match(Set dst (MoveI2F src));
12354   effect( DEF dst, USE src );
12355 
12356   ins_cost(95);
12357   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
12358   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
12359   ins_pipe( pipe_slow );
12360 %}
12361 
12362 instruct MoveI2F_reg_reg_sse(regX dst, eRegI src) %{
12363   predicate(UseSSE>=2);
12364   match(Set dst (MoveI2F src));
12365   effect( DEF dst, USE src );
12366 
12367   ins_cost(85);
12368   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
12369   ins_encode( MovI2X_reg(dst, src) );
12370   ins_pipe( pipe_slow );
12371 %}
12372 
12373 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
12374   match(Set dst (MoveD2L src));
12375   effect(DEF dst, USE src);
12376 
12377   ins_cost(250);
12378   format %{ "MOV    $dst.lo,$src\n\t"
12379             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
12380   opcode(0x8B, 0x8B);
12381   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
12382   ins_pipe( ialu_mem_long_reg );
12383 %}
12384 
12385 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
12386   predicate(UseSSE<=1);
12387   match(Set dst (MoveD2L src));
12388   effect(DEF dst, USE src);
12389 
12390   ins_cost(125);
12391   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
12392   ins_encode( Pop_Mem_Reg_D(dst, src) );
12393   ins_pipe( fpu_mem_reg );
12394 %}
12395 
12396 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{
12397   predicate(UseSSE>=2);
12398   match(Set dst (MoveD2L src));
12399   effect(DEF dst, USE src);
12400   ins_cost(95);
12401 
12402   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
12403   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src,dst));
12404   ins_pipe( pipe_slow );
12405 %}
12406 
12407 instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{
12408   predicate(UseSSE>=2);
12409   match(Set dst (MoveD2L src));
12410   effect(DEF dst, USE src, TEMP tmp);
12411   ins_cost(85);
12412   format %{ "MOVD   $dst.lo,$src\n\t"
12413             "PSHUFLW $tmp,$src,0x4E\n\t"
12414             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
12415   ins_encode( MovXD2L_reg(dst, src, tmp) );
12416   ins_pipe( pipe_slow );
12417 %}
12418 
12419 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
12420   match(Set dst (MoveL2D src));
12421   effect(DEF dst, USE src);
12422 
12423   ins_cost(200);
12424   format %{ "MOV    $dst,$src.lo\n\t"
12425             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
12426   opcode(0x89, 0x89);
12427   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
12428   ins_pipe( ialu_mem_long_reg );
12429 %}
12430 
12431 
12432 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
12433   predicate(UseSSE<=1);
12434   match(Set dst (MoveL2D src));
12435   effect(DEF dst, USE src);
12436   ins_cost(125);
12437 
12438   format %{ "FLD_D  $src\n\t"
12439             "FSTP   $dst\t# MoveL2D_stack_reg" %}
12440   opcode(0xDD);               /* DD /0, FLD m64real */
12441   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
12442               Pop_Reg_D(dst) );
12443   ins_pipe( fpu_reg_mem );
12444 %}
12445 
12446 
12447 instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{
12448   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
12449   match(Set dst (MoveL2D src));
12450   effect(DEF dst, USE src);
12451 
12452   ins_cost(95);
12453   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
12454   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
12455   ins_pipe( pipe_slow );
12456 %}
12457 
12458 instruct MoveL2D_stack_reg_sse_partial(regXD dst, stackSlotL src) %{
12459   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
12460   match(Set dst (MoveL2D src));
12461   effect(DEF dst, USE src);
12462 
12463   ins_cost(95);
12464   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
12465   ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,src));
12466   ins_pipe( pipe_slow );
12467 %}
12468 
12469 instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{
12470   predicate(UseSSE>=2);
12471   match(Set dst (MoveL2D src));
12472   effect(TEMP dst, USE src, TEMP tmp);
12473   ins_cost(85);
12474   format %{ "MOVD   $dst,$src.lo\n\t"
12475             "MOVD   $tmp,$src.hi\n\t"
12476             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
12477   ins_encode( MovL2XD_reg(dst, src, tmp) );
12478   ins_pipe( pipe_slow );
12479 %}
12480 
12481 // Replicate scalar to packed byte (1 byte) values in xmm
12482 instruct Repl8B_reg(regXD dst, regXD src) %{
12483   predicate(UseSSE>=2);
12484   match(Set dst (Replicate8B src));
12485   format %{ "MOVDQA  $dst,$src\n\t"
12486             "PUNPCKLBW $dst,$dst\n\t"
12487             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12488   ins_encode( pshufd_8x8(dst, src));
12489   ins_pipe( pipe_slow );
12490 %}
12491 
12492 // Replicate scalar to packed byte (1 byte) values in xmm
12493 instruct Repl8B_eRegI(regXD dst, eRegI src) %{
12494   predicate(UseSSE>=2);
12495   match(Set dst (Replicate8B src));
12496   format %{ "MOVD    $dst,$src\n\t"
12497             "PUNPCKLBW $dst,$dst\n\t"
12498             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12499   ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
12500   ins_pipe( pipe_slow );
12501 %}
12502 
12503 // Replicate scalar zero to packed byte (1 byte) values in xmm
12504 instruct Repl8B_immI0(regXD dst, immI0 zero) %{
12505   predicate(UseSSE>=2);
12506   match(Set dst (Replicate8B zero));
12507   format %{ "PXOR  $dst,$dst\t! replicate8B" %}
12508   ins_encode( pxor(dst, dst));
12509   ins_pipe( fpu_reg_reg );
12510 %}
12511 
12512 // Replicate scalar to packed shore (2 byte) values in xmm
12513 instruct Repl4S_reg(regXD dst, regXD src) %{
12514   predicate(UseSSE>=2);
12515   match(Set dst (Replicate4S src));
12516   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
12517   ins_encode( pshufd_4x16(dst, src));
12518   ins_pipe( fpu_reg_reg );
12519 %}
12520 
12521 // Replicate scalar to packed shore (2 byte) values in xmm
12522 instruct Repl4S_eRegI(regXD dst, eRegI src) %{
12523   predicate(UseSSE>=2);
12524   match(Set dst (Replicate4S src));
12525   format %{ "MOVD    $dst,$src\n\t"
12526             "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
12527   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
12528   ins_pipe( fpu_reg_reg );
12529 %}
12530 
12531 // Replicate scalar zero to packed short (2 byte) values in xmm
12532 instruct Repl4S_immI0(regXD dst, immI0 zero) %{
12533   predicate(UseSSE>=2);
12534   match(Set dst (Replicate4S zero));
12535   format %{ "PXOR  $dst,$dst\t! replicate4S" %}
12536   ins_encode( pxor(dst, dst));
12537   ins_pipe( fpu_reg_reg );
12538 %}
12539 
12540 // Replicate scalar to packed char (2 byte) values in xmm
12541 instruct Repl4C_reg(regXD dst, regXD src) %{
12542   predicate(UseSSE>=2);
12543   match(Set dst (Replicate4C src));
12544   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
12545   ins_encode( pshufd_4x16(dst, src));
12546   ins_pipe( fpu_reg_reg );
12547 %}
12548 
12549 // Replicate scalar to packed char (2 byte) values in xmm
12550 instruct Repl4C_eRegI(regXD dst, eRegI src) %{
12551   predicate(UseSSE>=2);
12552   match(Set dst (Replicate4C src));
12553   format %{ "MOVD    $dst,$src\n\t"
12554             "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
12555   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
12556   ins_pipe( fpu_reg_reg );
12557 %}
12558 
12559 // Replicate scalar zero to packed char (2 byte) values in xmm
12560 instruct Repl4C_immI0(regXD dst, immI0 zero) %{
12561   predicate(UseSSE>=2);
12562   match(Set dst (Replicate4C zero));
12563   format %{ "PXOR  $dst,$dst\t! replicate4C" %}
12564   ins_encode( pxor(dst, dst));
12565   ins_pipe( fpu_reg_reg );
12566 %}
12567 
12568 // Replicate scalar to packed integer (4 byte) values in xmm
12569 instruct Repl2I_reg(regXD dst, regXD src) %{
12570   predicate(UseSSE>=2);
12571   match(Set dst (Replicate2I src));
12572   format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
12573   ins_encode( pshufd(dst, src, 0x00));
12574   ins_pipe( fpu_reg_reg );
12575 %}
12576 
12577 // Replicate scalar to packed integer (4 byte) values in xmm
12578 instruct Repl2I_eRegI(regXD dst, eRegI src) %{
12579   predicate(UseSSE>=2);
12580   match(Set dst (Replicate2I src));
12581   format %{ "MOVD   $dst,$src\n\t"
12582             "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
12583   ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
12584   ins_pipe( fpu_reg_reg );
12585 %}
12586 
12587 // Replicate scalar zero to packed integer (2 byte) values in xmm
12588 instruct Repl2I_immI0(regXD dst, immI0 zero) %{
12589   predicate(UseSSE>=2);
12590   match(Set dst (Replicate2I zero));
12591   format %{ "PXOR  $dst,$dst\t! replicate2I" %}
12592   ins_encode( pxor(dst, dst));
12593   ins_pipe( fpu_reg_reg );
12594 %}
12595 
12596 // Replicate scalar to packed single precision floating point values in xmm
12597 instruct Repl2F_reg(regXD dst, regXD src) %{
12598   predicate(UseSSE>=2);
12599   match(Set dst (Replicate2F src));
12600   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12601   ins_encode( pshufd(dst, src, 0xe0));
12602   ins_pipe( fpu_reg_reg );
12603 %}
12604 
12605 // Replicate scalar to packed single precision floating point values in xmm
12606 instruct Repl2F_regX(regXD dst, regX src) %{
12607   predicate(UseSSE>=2);
12608   match(Set dst (Replicate2F src));
12609   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12610   ins_encode( pshufd(dst, src, 0xe0));
12611   ins_pipe( fpu_reg_reg );
12612 %}
12613 
12614 // Replicate scalar to packed single precision floating point values in xmm
12615 instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{
12616   predicate(UseSSE>=2);
12617   match(Set dst (Replicate2F zero));
12618   format %{ "PXOR  $dst,$dst\t! replicate2F" %}
12619   ins_encode( pxor(dst, dst));
12620   ins_pipe( fpu_reg_reg );
12621 %}
12622 
12623 // =======================================================================
12624 // fast clearing of an array
12625 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
12626   match(Set dummy (ClearArray cnt base));
12627   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
12628   format %{ "SHL    ECX,1\t# Convert doublewords to words\n\t"
12629             "XOR    EAX,EAX\n\t"
12630             "REP STOS\t# store EAX into [EDI++] while ECX--" %}
12631   opcode(0,0x4);
12632   ins_encode( Opcode(0xD1), RegOpc(ECX),
12633               OpcRegReg(0x33,EAX,EAX),
12634               Opcode(0xF3), Opcode(0xAB) );
12635   ins_pipe( pipe_slow );
12636 %}
12637 
12638 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
12639                         eAXRegI result, regXD tmp1, eFlagsReg cr) %{
12640   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12641   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12642 
12643   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
12644   ins_encode %{
12645     __ string_compare($str1$$Register, $str2$$Register,
12646                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
12647                       $tmp1$$XMMRegister);
12648   %}
12649   ins_pipe( pipe_slow );
12650 %}
12651 
12652 // fast string equals
12653 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
12654                        regXD tmp1, regXD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
12655   match(Set result (StrEquals (Binary str1 str2) cnt));
12656   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
12657 
12658   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
12659   ins_encode %{
12660     __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
12661                           $cnt$$Register, $result$$Register, $tmp3$$Register,
12662                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
12663   %}
12664   ins_pipe( pipe_slow );
12665 %}
12666 
12667 // fast search of substring with known size.
12668 instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
12669                             eBXRegI result, regXD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
12670   predicate(UseSSE42Intrinsics);
12671   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12672   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12673 
12674   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
12675   ins_encode %{
12676     int icnt2 = (int)$int_cnt2$$constant;
12677     if (icnt2 >= 8) {
12678       // IndexOf for constant substrings with size >= 8 elements
12679       // which don't need to be loaded through stack.
12680       __ string_indexofC8($str1$$Register, $str2$$Register,
12681                           $cnt1$$Register, $cnt2$$Register,
12682                           icnt2, $result$$Register,
12683                           $vec$$XMMRegister, $tmp$$Register);
12684     } else {
12685       // Small strings are loaded through stack if they cross page boundary.
12686       __ string_indexof($str1$$Register, $str2$$Register,
12687                         $cnt1$$Register, $cnt2$$Register,
12688                         icnt2, $result$$Register,
12689                         $vec$$XMMRegister, $tmp$$Register);
12690     }
12691   %}
12692   ins_pipe( pipe_slow );
12693 %}
12694 
12695 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12696                         eBXRegI result, regXD vec, eCXRegI tmp, eFlagsReg cr) %{
12697   predicate(UseSSE42Intrinsics);
12698   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12699   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12700 
12701   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12702   ins_encode %{
12703     __ string_indexof($str1$$Register, $str2$$Register,
12704                       $cnt1$$Register, $cnt2$$Register,
12705                       (-1), $result$$Register,
12706                       $vec$$XMMRegister, $tmp$$Register);
12707   %}
12708   ins_pipe( pipe_slow );
12709 %}
12710 
12711 // fast array equals
12712 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12713                       regXD tmp1, regXD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12714 %{
12715   match(Set result (AryEq ary1 ary2));
12716   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12717   //ins_cost(300);
12718 
12719   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12720   ins_encode %{
12721     __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
12722                           $tmp3$$Register, $result$$Register, $tmp4$$Register,
12723                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
12724   %}
12725   ins_pipe( pipe_slow );
12726 %}
12727 
12728 //----------Control Flow Instructions------------------------------------------
12729 // Signed compare Instructions
12730 instruct compI_eReg(eFlagsReg cr, eRegI op1, eRegI op2) %{
12731   match(Set cr (CmpI op1 op2));
12732   effect( DEF cr, USE op1, USE op2 );
12733   format %{ "CMP    $op1,$op2" %}
12734   opcode(0x3B);  /* Opcode 3B /r */
12735   ins_encode( OpcP, RegReg( op1, op2) );
12736   ins_pipe( ialu_cr_reg_reg );
12737 %}
12738 
12739 instruct compI_eReg_imm(eFlagsReg cr, eRegI op1, immI op2) %{
12740   match(Set cr (CmpI op1 op2));
12741   effect( DEF cr, USE op1 );
12742   format %{ "CMP    $op1,$op2" %}
12743   opcode(0x81,0x07);  /* Opcode 81 /7 */
12744   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12745   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12746   ins_pipe( ialu_cr_reg_imm );
12747 %}
12748 
12749 // Cisc-spilled version of cmpI_eReg
12750 instruct compI_eReg_mem(eFlagsReg cr, eRegI op1, memory op2) %{
12751   match(Set cr (CmpI op1 (LoadI op2)));
12752 
12753   format %{ "CMP    $op1,$op2" %}
12754   ins_cost(500);
12755   opcode(0x3B);  /* Opcode 3B /r */
12756   ins_encode( OpcP, RegMem( op1, op2) );
12757   ins_pipe( ialu_cr_reg_mem );
12758 %}
12759 
12760 instruct testI_reg( eFlagsReg cr, eRegI src, immI0 zero ) %{
12761   match(Set cr (CmpI src zero));
12762   effect( DEF cr, USE src );
12763 
12764   format %{ "TEST   $src,$src" %}
12765   opcode(0x85);
12766   ins_encode( OpcP, RegReg( src, src ) );
12767   ins_pipe( ialu_cr_reg_imm );
12768 %}
12769 
12770 instruct testI_reg_imm( eFlagsReg cr, eRegI src, immI con, immI0 zero ) %{
12771   match(Set cr (CmpI (AndI src con) zero));
12772 
12773   format %{ "TEST   $src,$con" %}
12774   opcode(0xF7,0x00);
12775   ins_encode( OpcP, RegOpc(src), Con32(con) );
12776   ins_pipe( ialu_cr_reg_imm );
12777 %}
12778 
12779 instruct testI_reg_mem( eFlagsReg cr, eRegI src, memory mem, immI0 zero ) %{
12780   match(Set cr (CmpI (AndI src mem) zero));
12781 
12782   format %{ "TEST   $src,$mem" %}
12783   opcode(0x85);
12784   ins_encode( OpcP, RegMem( src, mem ) );
12785   ins_pipe( ialu_cr_reg_mem );
12786 %}
12787 
12788 // Unsigned compare Instructions; really, same as signed except they
12789 // produce an eFlagsRegU instead of eFlagsReg.
12790 instruct compU_eReg(eFlagsRegU cr, eRegI op1, eRegI op2) %{
12791   match(Set cr (CmpU op1 op2));
12792 
12793   format %{ "CMPu   $op1,$op2" %}
12794   opcode(0x3B);  /* Opcode 3B /r */
12795   ins_encode( OpcP, RegReg( op1, op2) );
12796   ins_pipe( ialu_cr_reg_reg );
12797 %}
12798 
12799 instruct compU_eReg_imm(eFlagsRegU cr, eRegI op1, immI op2) %{
12800   match(Set cr (CmpU op1 op2));
12801 
12802   format %{ "CMPu   $op1,$op2" %}
12803   opcode(0x81,0x07);  /* Opcode 81 /7 */
12804   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12805   ins_pipe( ialu_cr_reg_imm );
12806 %}
12807 
12808 // // Cisc-spilled version of cmpU_eReg
12809 instruct compU_eReg_mem(eFlagsRegU cr, eRegI op1, memory op2) %{
12810   match(Set cr (CmpU op1 (LoadI op2)));
12811 
12812   format %{ "CMPu   $op1,$op2" %}
12813   ins_cost(500);
12814   opcode(0x3B);  /* Opcode 3B /r */
12815   ins_encode( OpcP, RegMem( op1, op2) );
12816   ins_pipe( ialu_cr_reg_mem );
12817 %}
12818 
12819 // // Cisc-spilled version of cmpU_eReg
12820 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, eRegI op2) %{
12821 //  match(Set cr (CmpU (LoadI op1) op2));
12822 //
12823 //  format %{ "CMPu   $op1,$op2" %}
12824 //  ins_cost(500);
12825 //  opcode(0x39);  /* Opcode 39 /r */
12826 //  ins_encode( OpcP, RegMem( op1, op2) );
12827 //%}
12828 
12829 instruct testU_reg( eFlagsRegU cr, eRegI src, immI0 zero ) %{
12830   match(Set cr (CmpU src zero));
12831 
12832   format %{ "TESTu  $src,$src" %}
12833   opcode(0x85);
12834   ins_encode( OpcP, RegReg( src, src ) );
12835   ins_pipe( ialu_cr_reg_imm );
12836 %}
12837 
12838 // Unsigned pointer compare Instructions
12839 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12840   match(Set cr (CmpP op1 op2));
12841 
12842   format %{ "CMPu   $op1,$op2" %}
12843   opcode(0x3B);  /* Opcode 3B /r */
12844   ins_encode( OpcP, RegReg( op1, op2) );
12845   ins_pipe( ialu_cr_reg_reg );
12846 %}
12847 
12848 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12849   match(Set cr (CmpP op1 op2));
12850 
12851   format %{ "CMPu   $op1,$op2" %}
12852   opcode(0x81,0x07);  /* Opcode 81 /7 */
12853   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12854   ins_pipe( ialu_cr_reg_imm );
12855 %}
12856 
12857 // // Cisc-spilled version of cmpP_eReg
12858 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12859   match(Set cr (CmpP op1 (LoadP op2)));
12860 
12861   format %{ "CMPu   $op1,$op2" %}
12862   ins_cost(500);
12863   opcode(0x3B);  /* Opcode 3B /r */
12864   ins_encode( OpcP, RegMem( op1, op2) );
12865   ins_pipe( ialu_cr_reg_mem );
12866 %}
12867 
12868 // // Cisc-spilled version of cmpP_eReg
12869 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12870 //  match(Set cr (CmpP (LoadP op1) op2));
12871 //
12872 //  format %{ "CMPu   $op1,$op2" %}
12873 //  ins_cost(500);
12874 //  opcode(0x39);  /* Opcode 39 /r */
12875 //  ins_encode( OpcP, RegMem( op1, op2) );
12876 //%}
12877 
12878 // Compare raw pointer (used in out-of-heap check).
12879 // Only works because non-oop pointers must be raw pointers
12880 // and raw pointers have no anti-dependencies.
12881 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12882   predicate( !n->in(2)->in(2)->bottom_type()->isa_oop_ptr() );
12883   match(Set cr (CmpP op1 (LoadP op2)));
12884 
12885   format %{ "CMPu   $op1,$op2" %}
12886   opcode(0x3B);  /* Opcode 3B /r */
12887   ins_encode( OpcP, RegMem( op1, op2) );
12888   ins_pipe( ialu_cr_reg_mem );
12889 %}
12890 
12891 //
12892 // This will generate a signed flags result. This should be ok
12893 // since any compare to a zero should be eq/neq.
12894 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12895   match(Set cr (CmpP src zero));
12896 
12897   format %{ "TEST   $src,$src" %}
12898   opcode(0x85);
12899   ins_encode( OpcP, RegReg( src, src ) );
12900   ins_pipe( ialu_cr_reg_imm );
12901 %}
12902 
12903 // Cisc-spilled version of testP_reg
12904 // This will generate a signed flags result. This should be ok
12905 // since any compare to a zero should be eq/neq.
12906 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
12907   match(Set cr (CmpP (LoadP op) zero));
12908 
12909   format %{ "TEST   $op,0xFFFFFFFF" %}
12910   ins_cost(500);
12911   opcode(0xF7);               /* Opcode F7 /0 */
12912   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12913   ins_pipe( ialu_cr_reg_imm );
12914 %}
12915 
12916 // Yanked all unsigned pointer compare operations.
12917 // Pointer compares are done with CmpP which is already unsigned.
12918 
12919 //----------Max and Min--------------------------------------------------------
12920 // Min Instructions
12921 ////
12922 //   *** Min and Max using the conditional move are slower than the
12923 //   *** branch version on a Pentium III.
12924 // // Conditional move for min
12925 //instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
12926 //  effect( USE_DEF op2, USE op1, USE cr );
12927 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12928 //  opcode(0x4C,0x0F);
12929 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12930 //  ins_pipe( pipe_cmov_reg );
12931 //%}
12932 //
12933 //// Min Register with Register (P6 version)
12934 //instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{
12935 //  predicate(VM_Version::supports_cmov() );
12936 //  match(Set op2 (MinI op1 op2));
12937 //  ins_cost(200);
12938 //  expand %{
12939 //    eFlagsReg cr;
12940 //    compI_eReg(cr,op1,op2);
12941 //    cmovI_reg_lt(op2,op1,cr);
12942 //  %}
12943 //%}
12944 
12945 // Min Register with Register (generic version)
12946 instruct minI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
12947   match(Set dst (MinI dst src));
12948   effect(KILL flags);
12949   ins_cost(300);
12950 
12951   format %{ "MIN    $dst,$src" %}
12952   opcode(0xCC);
12953   ins_encode( min_enc(dst,src) );
12954   ins_pipe( pipe_slow );
12955 %}
12956 
12957 // Max Register with Register
12958 //   *** Min and Max using the conditional move are slower than the
12959 //   *** branch version on a Pentium III.
12960 // // Conditional move for max
12961 //instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
12962 //  effect( USE_DEF op2, USE op1, USE cr );
12963 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12964 //  opcode(0x4F,0x0F);
12965 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12966 //  ins_pipe( pipe_cmov_reg );
12967 //%}
12968 //
12969 // // Max Register with Register (P6 version)
12970 //instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{
12971 //  predicate(VM_Version::supports_cmov() );
12972 //  match(Set op2 (MaxI op1 op2));
12973 //  ins_cost(200);
12974 //  expand %{
12975 //    eFlagsReg cr;
12976 //    compI_eReg(cr,op1,op2);
12977 //    cmovI_reg_gt(op2,op1,cr);
12978 //  %}
12979 //%}
12980 
12981 // Max Register with Register (generic version)
12982 instruct maxI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
12983   match(Set dst (MaxI dst src));
12984   effect(KILL flags);
12985   ins_cost(300);
12986 
12987   format %{ "MAX    $dst,$src" %}
12988   opcode(0xCC);
12989   ins_encode( max_enc(dst,src) );
12990   ins_pipe( pipe_slow );
12991 %}
12992 
12993 // ============================================================================
12994 // Counted Loop limit node which represents exact final iterator value.
12995 // Note: the resulting value should fit into integer range since
12996 // counted loops have limit check on overflow.
12997 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12998   match(Set limit (LoopLimit (Binary init limit) stride));
12999   effect(TEMP limit_hi, TEMP tmp, KILL flags);
13000   ins_cost(300);
13001 
13002   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
13003   ins_encode %{
13004     int strd = (int)$stride$$constant;
13005     assert(strd != 1 && strd != -1, "sanity");
13006     int m1 = (strd > 0) ? 1 : -1;
13007     // Convert limit to long (EAX:EDX)
13008     __ cdql();
13009     // Convert init to long (init:tmp)
13010     __ movl($tmp$$Register, $init$$Register);
13011     __ sarl($tmp$$Register, 31);
13012     // $limit - $init
13013     __ subl($limit$$Register, $init$$Register);
13014     __ sbbl($limit_hi$$Register, $tmp$$Register);
13015     // + ($stride - 1)
13016     if (strd > 0) {
13017       __ addl($limit$$Register, (strd - 1));
13018       __ adcl($limit_hi$$Register, 0);
13019       __ movl($tmp$$Register, strd);
13020     } else {
13021       __ addl($limit$$Register, (strd + 1));
13022       __ adcl($limit_hi$$Register, -1);
13023       __ lneg($limit_hi$$Register, $limit$$Register);
13024       __ movl($tmp$$Register, -strd);
13025     }
13026     // signed devision: (EAX:EDX) / pos_stride
13027     __ idivl($tmp$$Register);
13028     if (strd < 0) {
13029       // restore sign
13030       __ negl($tmp$$Register);
13031     }
13032     // (EAX) * stride
13033     __ mull($tmp$$Register);
13034     // + init (ignore upper bits)
13035     __ addl($limit$$Register, $init$$Register);
13036   %}
13037   ins_pipe( pipe_slow );
13038 %}
13039 
13040 // ============================================================================
13041 // Branch Instructions
13042 // Jump Table
13043 instruct jumpXtnd(eRegI switch_val) %{
13044   match(Jump switch_val);
13045   ins_cost(350);
13046   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
13047   ins_encode %{
13048     // Jump to Address(table_base + switch_reg)
13049     Address index(noreg, $switch_val$$Register, Address::times_1);
13050     __ jump(ArrayAddress($constantaddress, index));
13051   %}
13052   ins_pc_relative(1);
13053   ins_pipe(pipe_jmp);
13054 %}
13055 
13056 // Jump Direct - Label defines a relative address from JMP+1
13057 instruct jmpDir(label labl) %{
13058   match(Goto);
13059   effect(USE labl);
13060 
13061   ins_cost(300);
13062   format %{ "JMP    $labl" %}
13063   size(5);
13064   opcode(0xE9);
13065   ins_encode( OpcP, Lbl( labl ) );
13066   ins_pipe( pipe_jmp );
13067   ins_pc_relative(1);
13068 %}
13069 
13070 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13071 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
13072   match(If cop cr);
13073   effect(USE labl);
13074 
13075   ins_cost(300);
13076   format %{ "J$cop    $labl" %}
13077   size(6);
13078   opcode(0x0F, 0x80);
13079   ins_encode( Jcc( cop, labl) );
13080   ins_pipe( pipe_jcc );
13081   ins_pc_relative(1);
13082 %}
13083 
13084 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13085 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
13086   match(CountedLoopEnd cop cr);
13087   effect(USE labl);
13088 
13089   ins_cost(300);
13090   format %{ "J$cop    $labl\t# Loop end" %}
13091   size(6);
13092   opcode(0x0F, 0x80);
13093   ins_encode( Jcc( cop, labl) );
13094   ins_pipe( pipe_jcc );
13095   ins_pc_relative(1);
13096 %}
13097 
13098 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13099 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
13100   match(CountedLoopEnd cop cmp);
13101   effect(USE labl);
13102 
13103   ins_cost(300);
13104   format %{ "J$cop,u  $labl\t# Loop end" %}
13105   size(6);
13106   opcode(0x0F, 0x80);
13107   ins_encode( Jcc( cop, labl) );
13108   ins_pipe( pipe_jcc );
13109   ins_pc_relative(1);
13110 %}
13111 
13112 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
13113   match(CountedLoopEnd cop cmp);
13114   effect(USE labl);
13115 
13116   ins_cost(200);
13117   format %{ "J$cop,u  $labl\t# Loop end" %}
13118   size(6);
13119   opcode(0x0F, 0x80);
13120   ins_encode( Jcc( cop, labl) );
13121   ins_pipe( pipe_jcc );
13122   ins_pc_relative(1);
13123 %}
13124 
13125 // Jump Direct Conditional - using unsigned comparison
13126 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
13127   match(If cop cmp);
13128   effect(USE labl);
13129 
13130   ins_cost(300);
13131   format %{ "J$cop,u  $labl" %}
13132   size(6);
13133   opcode(0x0F, 0x80);
13134   ins_encode(Jcc(cop, labl));
13135   ins_pipe(pipe_jcc);
13136   ins_pc_relative(1);
13137 %}
13138 
13139 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
13140   match(If cop cmp);
13141   effect(USE labl);
13142 
13143   ins_cost(200);
13144   format %{ "J$cop,u  $labl" %}
13145   size(6);
13146   opcode(0x0F, 0x80);
13147   ins_encode(Jcc(cop, labl));
13148   ins_pipe(pipe_jcc);
13149   ins_pc_relative(1);
13150 %}
13151 
13152 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
13153   match(If cop cmp);
13154   effect(USE labl);
13155 
13156   ins_cost(200);
13157   format %{ $$template
13158     if ($cop$$cmpcode == Assembler::notEqual) {
13159       $$emit$$"JP,u   $labl\n\t"
13160       $$emit$$"J$cop,u   $labl"
13161     } else {
13162       $$emit$$"JP,u   done\n\t"
13163       $$emit$$"J$cop,u   $labl\n\t"
13164       $$emit$$"done:"
13165     }
13166   %}
13167   size(12);
13168   opcode(0x0F, 0x80);
13169   ins_encode %{
13170     Label* l = $labl$$label;
13171     assert(l != NULL, "need Label");
13172     $$$emit8$primary;
13173     emit_cc(cbuf, $secondary, Assembler::parity);
13174     int parity_disp = -1;
13175     bool ok = false;
13176     if ($cop$$cmpcode == Assembler::notEqual) {
13177        // the two jumps 6 bytes apart so the jump distances are too
13178        parity_disp = l->loc_pos() - (cbuf.insts_size() + 4);
13179     } else if ($cop$$cmpcode == Assembler::equal) {
13180        parity_disp = 6;
13181        ok = true;
13182     } else {
13183        ShouldNotReachHere();
13184     }
13185     emit_d32(cbuf, parity_disp);
13186     $$$emit8$primary;
13187     emit_cc(cbuf, $secondary, $cop$$cmpcode);
13188     int disp = l->loc_pos() - (cbuf.insts_size() + 4);
13189     emit_d32(cbuf, disp);
13190   %}
13191   ins_pipe(pipe_jcc);
13192   ins_pc_relative(1);
13193 %}
13194 
13195 // ============================================================================
13196 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
13197 // array for an instance of the superklass.  Set a hidden internal cache on a
13198 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
13199 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
13200 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
13201   match(Set result (PartialSubtypeCheck sub super));
13202   effect( KILL rcx, KILL cr );
13203 
13204   ins_cost(1100);  // slightly larger than the next version
13205   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
13206             "MOV    ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
13207             "ADD    EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
13208             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
13209             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
13210             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
13211             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
13212      "miss:\t" %}
13213 
13214   opcode(0x1); // Force a XOR of EDI
13215   ins_encode( enc_PartialSubtypeCheck() );
13216   ins_pipe( pipe_slow );
13217 %}
13218 
13219 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
13220   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
13221   effect( KILL rcx, KILL result );
13222 
13223   ins_cost(1000);
13224   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
13225             "MOV    ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
13226             "ADD    EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
13227             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
13228             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
13229             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
13230      "miss:\t" %}
13231 
13232   opcode(0x0);  // No need to XOR EDI
13233   ins_encode( enc_PartialSubtypeCheck() );
13234   ins_pipe( pipe_slow );
13235 %}
13236 
13237 // ============================================================================
13238 // Branch Instructions -- short offset versions
13239 //
13240 // These instructions are used to replace jumps of a long offset (the default
13241 // match) with jumps of a shorter offset.  These instructions are all tagged
13242 // with the ins_short_branch attribute, which causes the ADLC to suppress the
13243 // match rules in general matching.  Instead, the ADLC generates a conversion
13244 // method in the MachNode which can be used to do in-place replacement of the
13245 // long variant with the shorter variant.  The compiler will determine if a
13246 // branch can be taken by the is_short_branch_offset() predicate in the machine
13247 // specific code section of the file.
13248 
13249 // Jump Direct - Label defines a relative address from JMP+1
13250 instruct jmpDir_short(label labl) %{
13251   match(Goto);
13252   effect(USE labl);
13253 
13254   ins_cost(300);
13255   format %{ "JMP,s  $labl" %}
13256   size(2);
13257   opcode(0xEB);
13258   ins_encode( OpcP, LblShort( labl ) );
13259   ins_pipe( pipe_jmp );
13260   ins_pc_relative(1);
13261   ins_short_branch(1);
13262 %}
13263 
13264 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13265 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
13266   match(If cop cr);
13267   effect(USE labl);
13268 
13269   ins_cost(300);
13270   format %{ "J$cop,s  $labl" %}
13271   size(2);
13272   opcode(0x70);
13273   ins_encode( JccShort( cop, labl) );
13274   ins_pipe( pipe_jcc );
13275   ins_pc_relative(1);
13276   ins_short_branch(1);
13277 %}
13278 
13279 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13280 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
13281   match(CountedLoopEnd cop cr);
13282   effect(USE labl);
13283 
13284   ins_cost(300);
13285   format %{ "J$cop,s  $labl\t# Loop end" %}
13286   size(2);
13287   opcode(0x70);
13288   ins_encode( JccShort( cop, labl) );
13289   ins_pipe( pipe_jcc );
13290   ins_pc_relative(1);
13291   ins_short_branch(1);
13292 %}
13293 
13294 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13295 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
13296   match(CountedLoopEnd cop cmp);
13297   effect(USE labl);
13298 
13299   ins_cost(300);
13300   format %{ "J$cop,us $labl\t# Loop end" %}
13301   size(2);
13302   opcode(0x70);
13303   ins_encode( JccShort( cop, labl) );
13304   ins_pipe( pipe_jcc );
13305   ins_pc_relative(1);
13306   ins_short_branch(1);
13307 %}
13308 
13309 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
13310   match(CountedLoopEnd cop cmp);
13311   effect(USE labl);
13312 
13313   ins_cost(300);
13314   format %{ "J$cop,us $labl\t# Loop end" %}
13315   size(2);
13316   opcode(0x70);
13317   ins_encode( JccShort( cop, labl) );
13318   ins_pipe( pipe_jcc );
13319   ins_pc_relative(1);
13320   ins_short_branch(1);
13321 %}
13322 
13323 // Jump Direct Conditional - using unsigned comparison
13324 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
13325   match(If cop cmp);
13326   effect(USE labl);
13327 
13328   ins_cost(300);
13329   format %{ "J$cop,us $labl" %}
13330   size(2);
13331   opcode(0x70);
13332   ins_encode( JccShort( cop, labl) );
13333   ins_pipe( pipe_jcc );
13334   ins_pc_relative(1);
13335   ins_short_branch(1);
13336 %}
13337 
13338 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
13339   match(If cop cmp);
13340   effect(USE labl);
13341 
13342   ins_cost(300);
13343   format %{ "J$cop,us $labl" %}
13344   size(2);
13345   opcode(0x70);
13346   ins_encode( JccShort( cop, labl) );
13347   ins_pipe( pipe_jcc );
13348   ins_pc_relative(1);
13349   ins_short_branch(1);
13350 %}
13351 
13352 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
13353   match(If cop cmp);
13354   effect(USE labl);
13355 
13356   ins_cost(300);
13357   format %{ $$template
13358     if ($cop$$cmpcode == Assembler::notEqual) {
13359       $$emit$$"JP,u,s   $labl\n\t"
13360       $$emit$$"J$cop,u,s   $labl"
13361     } else {
13362       $$emit$$"JP,u,s   done\n\t"
13363       $$emit$$"J$cop,u,s  $labl\n\t"
13364       $$emit$$"done:"
13365     }
13366   %}
13367   size(4);
13368   opcode(0x70);
13369   ins_encode %{
13370     Label* l = $labl$$label;
13371     assert(l != NULL, "need Label");
13372     emit_cc(cbuf, $primary, Assembler::parity);
13373     int parity_disp = -1;
13374     if ($cop$$cmpcode == Assembler::notEqual) {
13375       parity_disp = l->loc_pos() - (cbuf.insts_size() + 1);
13376     } else if ($cop$$cmpcode == Assembler::equal) {
13377       parity_disp = 2;
13378     } else {
13379       ShouldNotReachHere();
13380     }
13381     emit_d8(cbuf, parity_disp);
13382     emit_cc(cbuf, $primary, $cop$$cmpcode);
13383     int disp = l->loc_pos() - (cbuf.insts_size() + 1);
13384     emit_d8(cbuf, disp);
13385     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
13386     assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp");
13387   %}
13388   ins_pipe(pipe_jcc);
13389   ins_pc_relative(1);
13390   ins_short_branch(1);
13391 %}
13392 
13393 // ============================================================================
13394 // Long Compare
13395 //
13396 // Currently we hold longs in 2 registers.  Comparing such values efficiently
13397 // is tricky.  The flavor of compare used depends on whether we are testing
13398 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
13399 // The GE test is the negated LT test.  The LE test can be had by commuting
13400 // the operands (yielding a GE test) and then negating; negate again for the
13401 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
13402 // NE test is negated from that.
13403 
13404 // Due to a shortcoming in the ADLC, it mixes up expressions like:
13405 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
13406 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
13407 // are collapsed internally in the ADLC's dfa-gen code.  The match for
13408 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
13409 // foo match ends up with the wrong leaf.  One fix is to not match both
13410 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
13411 // both forms beat the trinary form of long-compare and both are very useful
13412 // on Intel which has so few registers.
13413 
13414 // Manifest a CmpL result in an integer register.  Very painful.
13415 // This is the test to avoid.
13416 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
13417   match(Set dst (CmpL3 src1 src2));
13418   effect( KILL flags );
13419   ins_cost(1000);
13420   format %{ "XOR    $dst,$dst\n\t"
13421             "CMP    $src1.hi,$src2.hi\n\t"
13422             "JLT,s  m_one\n\t"
13423             "JGT,s  p_one\n\t"
13424             "CMP    $src1.lo,$src2.lo\n\t"
13425             "JB,s   m_one\n\t"
13426             "JEQ,s  done\n"
13427     "p_one:\tINC    $dst\n\t"
13428             "JMP,s  done\n"
13429     "m_one:\tDEC    $dst\n"
13430      "done:" %}
13431   ins_encode %{
13432     Label p_one, m_one, done;
13433     __ xorptr($dst$$Register, $dst$$Register);
13434     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
13435     __ jccb(Assembler::less,    m_one);
13436     __ jccb(Assembler::greater, p_one);
13437     __ cmpl($src1$$Register, $src2$$Register);
13438     __ jccb(Assembler::below,   m_one);
13439     __ jccb(Assembler::equal,   done);
13440     __ bind(p_one);
13441     __ incrementl($dst$$Register);
13442     __ jmpb(done);
13443     __ bind(m_one);
13444     __ decrementl($dst$$Register);
13445     __ bind(done);
13446   %}
13447   ins_pipe( pipe_slow );
13448 %}
13449 
13450 //======
13451 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13452 // compares.  Can be used for LE or GT compares by reversing arguments.
13453 // NOT GOOD FOR EQ/NE tests.
13454 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
13455   match( Set flags (CmpL src zero ));
13456   ins_cost(100);
13457   format %{ "TEST   $src.hi,$src.hi" %}
13458   opcode(0x85);
13459   ins_encode( OpcP, RegReg_Hi2( src, src ) );
13460   ins_pipe( ialu_cr_reg_reg );
13461 %}
13462 
13463 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13464 // compares.  Can be used for LE or GT compares by reversing arguments.
13465 // NOT GOOD FOR EQ/NE tests.
13466 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, eRegI tmp ) %{
13467   match( Set flags (CmpL src1 src2 ));
13468   effect( TEMP tmp );
13469   ins_cost(300);
13470   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13471             "MOV    $tmp,$src1.hi\n\t"
13472             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
13473   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
13474   ins_pipe( ialu_cr_reg_reg );
13475 %}
13476 
13477 // Long compares reg < zero/req OR reg >= zero/req.
13478 // Just a wrapper for a normal branch, plus the predicate test.
13479 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
13480   match(If cmp flags);
13481   effect(USE labl);
13482   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13483   expand %{
13484     jmpCon(cmp,flags,labl);    // JLT or JGE...
13485   %}
13486 %}
13487 
13488 // Compare 2 longs and CMOVE longs.
13489 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
13490   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13491   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13492   ins_cost(400);
13493   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13494             "CMOV$cmp $dst.hi,$src.hi" %}
13495   opcode(0x0F,0x40);
13496   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13497   ins_pipe( pipe_cmov_reg_long );
13498 %}
13499 
13500 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
13501   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13502   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13503   ins_cost(500);
13504   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13505             "CMOV$cmp $dst.hi,$src.hi" %}
13506   opcode(0x0F,0x40);
13507   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13508   ins_pipe( pipe_cmov_reg_long );
13509 %}
13510 
13511 // Compare 2 longs and CMOVE ints.
13512 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, eRegI src) %{
13513   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13514   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13515   ins_cost(200);
13516   format %{ "CMOV$cmp $dst,$src" %}
13517   opcode(0x0F,0x40);
13518   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13519   ins_pipe( pipe_cmov_reg );
13520 %}
13521 
13522 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, memory src) %{
13523   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13524   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13525   ins_cost(250);
13526   format %{ "CMOV$cmp $dst,$src" %}
13527   opcode(0x0F,0x40);
13528   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13529   ins_pipe( pipe_cmov_mem );
13530 %}
13531 
13532 // Compare 2 longs and CMOVE ints.
13533 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13534   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13535   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13536   ins_cost(200);
13537   format %{ "CMOV$cmp $dst,$src" %}
13538   opcode(0x0F,0x40);
13539   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13540   ins_pipe( pipe_cmov_reg );
13541 %}
13542 
13543 // Compare 2 longs and CMOVE doubles
13544 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13545   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13546   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13547   ins_cost(200);
13548   expand %{
13549     fcmovD_regS(cmp,flags,dst,src);
13550   %}
13551 %}
13552 
13553 // Compare 2 longs and CMOVE doubles
13554 instruct cmovXDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regXD dst, regXD src) %{
13555   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13556   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13557   ins_cost(200);
13558   expand %{
13559     fcmovXD_regS(cmp,flags,dst,src);
13560   %}
13561 %}
13562 
13563 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13564   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13565   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13566   ins_cost(200);
13567   expand %{
13568     fcmovF_regS(cmp,flags,dst,src);
13569   %}
13570 %}
13571 
13572 instruct cmovXX_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regX dst, regX src) %{
13573   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13574   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13575   ins_cost(200);
13576   expand %{
13577     fcmovX_regS(cmp,flags,dst,src);
13578   %}
13579 %}
13580 
13581 //======
13582 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13583 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, eRegI tmp ) %{
13584   match( Set flags (CmpL src zero ));
13585   effect(TEMP tmp);
13586   ins_cost(200);
13587   format %{ "MOV    $tmp,$src.lo\n\t"
13588             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13589   ins_encode( long_cmp_flags0( src, tmp ) );
13590   ins_pipe( ialu_reg_reg_long );
13591 %}
13592 
13593 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13594 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13595   match( Set flags (CmpL src1 src2 ));
13596   ins_cost(200+300);
13597   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13598             "JNE,s  skip\n\t"
13599             "CMP    $src1.hi,$src2.hi\n\t"
13600      "skip:\t" %}
13601   ins_encode( long_cmp_flags1( src1, src2 ) );
13602   ins_pipe( ialu_cr_reg_reg );
13603 %}
13604 
13605 // Long compare reg == zero/reg OR reg != zero/reg
13606 // Just a wrapper for a normal branch, plus the predicate test.
13607 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13608   match(If cmp flags);
13609   effect(USE labl);
13610   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13611   expand %{
13612     jmpCon(cmp,flags,labl);    // JEQ or JNE...
13613   %}
13614 %}
13615 
13616 // Compare 2 longs and CMOVE longs.
13617 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13618   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13619   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13620   ins_cost(400);
13621   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13622             "CMOV$cmp $dst.hi,$src.hi" %}
13623   opcode(0x0F,0x40);
13624   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13625   ins_pipe( pipe_cmov_reg_long );
13626 %}
13627 
13628 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13629   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13630   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13631   ins_cost(500);
13632   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13633             "CMOV$cmp $dst.hi,$src.hi" %}
13634   opcode(0x0F,0x40);
13635   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13636   ins_pipe( pipe_cmov_reg_long );
13637 %}
13638 
13639 // Compare 2 longs and CMOVE ints.
13640 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, eRegI src) %{
13641   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13642   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13643   ins_cost(200);
13644   format %{ "CMOV$cmp $dst,$src" %}
13645   opcode(0x0F,0x40);
13646   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13647   ins_pipe( pipe_cmov_reg );
13648 %}
13649 
13650 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, memory src) %{
13651   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13652   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13653   ins_cost(250);
13654   format %{ "CMOV$cmp $dst,$src" %}
13655   opcode(0x0F,0x40);
13656   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13657   ins_pipe( pipe_cmov_mem );
13658 %}
13659 
13660 // Compare 2 longs and CMOVE ints.
13661 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13662   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13663   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13664   ins_cost(200);
13665   format %{ "CMOV$cmp $dst,$src" %}
13666   opcode(0x0F,0x40);
13667   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13668   ins_pipe( pipe_cmov_reg );
13669 %}
13670 
13671 // Compare 2 longs and CMOVE doubles
13672 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13673   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13674   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13675   ins_cost(200);
13676   expand %{
13677     fcmovD_regS(cmp,flags,dst,src);
13678   %}
13679 %}
13680 
13681 // Compare 2 longs and CMOVE doubles
13682 instruct cmovXDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regXD dst, regXD src) %{
13683   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13684   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13685   ins_cost(200);
13686   expand %{
13687     fcmovXD_regS(cmp,flags,dst,src);
13688   %}
13689 %}
13690 
13691 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13692   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13693   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13694   ins_cost(200);
13695   expand %{
13696     fcmovF_regS(cmp,flags,dst,src);
13697   %}
13698 %}
13699 
13700 instruct cmovXX_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regX dst, regX src) %{
13701   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13702   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13703   ins_cost(200);
13704   expand %{
13705     fcmovX_regS(cmp,flags,dst,src);
13706   %}
13707 %}
13708 
13709 //======
13710 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13711 // Same as cmpL_reg_flags_LEGT except must negate src
13712 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, eRegI tmp ) %{
13713   match( Set flags (CmpL src zero ));
13714   effect( TEMP tmp );
13715   ins_cost(300);
13716   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13717             "CMP    $tmp,$src.lo\n\t"
13718             "SBB    $tmp,$src.hi\n\t" %}
13719   ins_encode( long_cmp_flags3(src, tmp) );
13720   ins_pipe( ialu_reg_reg_long );
13721 %}
13722 
13723 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13724 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13725 // requires a commuted test to get the same result.
13726 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, eRegI tmp ) %{
13727   match( Set flags (CmpL src1 src2 ));
13728   effect( TEMP tmp );
13729   ins_cost(300);
13730   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13731             "MOV    $tmp,$src2.hi\n\t"
13732             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13733   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13734   ins_pipe( ialu_cr_reg_reg );
13735 %}
13736 
13737 // Long compares reg < zero/req OR reg >= zero/req.
13738 // Just a wrapper for a normal branch, plus the predicate test
13739 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13740   match(If cmp flags);
13741   effect(USE labl);
13742   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13743   ins_cost(300);
13744   expand %{
13745     jmpCon(cmp,flags,labl);    // JGT or JLE...
13746   %}
13747 %}
13748 
13749 // Compare 2 longs and CMOVE longs.
13750 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13751   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13752   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13753   ins_cost(400);
13754   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13755             "CMOV$cmp $dst.hi,$src.hi" %}
13756   opcode(0x0F,0x40);
13757   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13758   ins_pipe( pipe_cmov_reg_long );
13759 %}
13760 
13761 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13762   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13763   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13764   ins_cost(500);
13765   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13766             "CMOV$cmp $dst.hi,$src.hi+4" %}
13767   opcode(0x0F,0x40);
13768   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13769   ins_pipe( pipe_cmov_reg_long );
13770 %}
13771 
13772 // Compare 2 longs and CMOVE ints.
13773 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, eRegI src) %{
13774   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13775   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13776   ins_cost(200);
13777   format %{ "CMOV$cmp $dst,$src" %}
13778   opcode(0x0F,0x40);
13779   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13780   ins_pipe( pipe_cmov_reg );
13781 %}
13782 
13783 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, memory src) %{
13784   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13785   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13786   ins_cost(250);
13787   format %{ "CMOV$cmp $dst,$src" %}
13788   opcode(0x0F,0x40);
13789   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13790   ins_pipe( pipe_cmov_mem );
13791 %}
13792 
13793 // Compare 2 longs and CMOVE ptrs.
13794 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13795   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13796   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13797   ins_cost(200);
13798   format %{ "CMOV$cmp $dst,$src" %}
13799   opcode(0x0F,0x40);
13800   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13801   ins_pipe( pipe_cmov_reg );
13802 %}
13803 
13804 // Compare 2 longs and CMOVE doubles
13805 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13806   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13807   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13808   ins_cost(200);
13809   expand %{
13810     fcmovD_regS(cmp,flags,dst,src);
13811   %}
13812 %}
13813 
13814 // Compare 2 longs and CMOVE doubles
13815 instruct cmovXDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regXD dst, regXD src) %{
13816   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13817   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13818   ins_cost(200);
13819   expand %{
13820     fcmovXD_regS(cmp,flags,dst,src);
13821   %}
13822 %}
13823 
13824 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13825   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13826   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13827   ins_cost(200);
13828   expand %{
13829     fcmovF_regS(cmp,flags,dst,src);
13830   %}
13831 %}
13832 
13833 
13834 instruct cmovXX_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regX dst, regX src) %{
13835   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13836   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13837   ins_cost(200);
13838   expand %{
13839     fcmovX_regS(cmp,flags,dst,src);
13840   %}
13841 %}
13842 
13843 
13844 // ============================================================================
13845 // Procedure Call/Return Instructions
13846 // Call Java Static Instruction
13847 // Note: If this code changes, the corresponding ret_addr_offset() and
13848 //       compute_padding() functions will have to be adjusted.
13849 instruct CallStaticJavaDirect(method meth) %{
13850   match(CallStaticJava);
13851   predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke());
13852   effect(USE meth);
13853 
13854   ins_cost(300);
13855   format %{ "CALL,static " %}
13856   opcode(0xE8); /* E8 cd */
13857   ins_encode( pre_call_FPU,
13858               Java_Static_Call( meth ),
13859               call_epilog,
13860               post_call_FPU );
13861   ins_pipe( pipe_slow );
13862   ins_pc_relative(1);
13863   ins_alignment(4);
13864 %}
13865 
13866 // Call Java Static Instruction (method handle version)
13867 // Note: If this code changes, the corresponding ret_addr_offset() and
13868 //       compute_padding() functions will have to be adjusted.
13869 instruct CallStaticJavaHandle(method meth, eBPRegP ebp_mh_SP_save) %{
13870   match(CallStaticJava);
13871   predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
13872   effect(USE meth);
13873   // EBP is saved by all callees (for interpreter stack correction).
13874   // We use it here for a similar purpose, in {preserve,restore}_SP.
13875 
13876   ins_cost(300);
13877   format %{ "CALL,static/MethodHandle " %}
13878   opcode(0xE8); /* E8 cd */
13879   ins_encode( pre_call_FPU,
13880               preserve_SP,
13881               Java_Static_Call( meth ),
13882               restore_SP,
13883               call_epilog,
13884               post_call_FPU );
13885   ins_pipe( pipe_slow );
13886   ins_pc_relative(1);
13887   ins_alignment(4);
13888 %}
13889 
13890 // Call Java Dynamic Instruction
13891 // Note: If this code changes, the corresponding ret_addr_offset() and
13892 //       compute_padding() functions will have to be adjusted.
13893 instruct CallDynamicJavaDirect(method meth) %{
13894   match(CallDynamicJava);
13895   effect(USE meth);
13896 
13897   ins_cost(300);
13898   format %{ "MOV    EAX,(oop)-1\n\t"
13899             "CALL,dynamic" %}
13900   opcode(0xE8); /* E8 cd */
13901   ins_encode( pre_call_FPU,
13902               Java_Dynamic_Call( meth ),
13903               call_epilog,
13904               post_call_FPU );
13905   ins_pipe( pipe_slow );
13906   ins_pc_relative(1);
13907   ins_alignment(4);
13908 %}
13909 
13910 // Call Runtime Instruction
13911 instruct CallRuntimeDirect(method meth) %{
13912   match(CallRuntime );
13913   effect(USE meth);
13914 
13915   ins_cost(300);
13916   format %{ "CALL,runtime " %}
13917   opcode(0xE8); /* E8 cd */
13918   // Use FFREEs to clear entries in float stack
13919   ins_encode( pre_call_FPU,
13920               FFree_Float_Stack_All,
13921               Java_To_Runtime( meth ),
13922               post_call_FPU );
13923   ins_pipe( pipe_slow );
13924   ins_pc_relative(1);
13925 %}
13926 
13927 // Call runtime without safepoint
13928 instruct CallLeafDirect(method meth) %{
13929   match(CallLeaf);
13930   effect(USE meth);
13931 
13932   ins_cost(300);
13933   format %{ "CALL_LEAF,runtime " %}
13934   opcode(0xE8); /* E8 cd */
13935   ins_encode( pre_call_FPU,
13936               FFree_Float_Stack_All,
13937               Java_To_Runtime( meth ),
13938               Verify_FPU_For_Leaf, post_call_FPU );
13939   ins_pipe( pipe_slow );
13940   ins_pc_relative(1);
13941 %}
13942 
13943 instruct CallLeafNoFPDirect(method meth) %{
13944   match(CallLeafNoFP);
13945   effect(USE meth);
13946 
13947   ins_cost(300);
13948   format %{ "CALL_LEAF_NOFP,runtime " %}
13949   opcode(0xE8); /* E8 cd */
13950   ins_encode(Java_To_Runtime(meth));
13951   ins_pipe( pipe_slow );
13952   ins_pc_relative(1);
13953 %}
13954 
13955 
13956 // Return Instruction
13957 // Remove the return address & jump to it.
13958 instruct Ret() %{
13959   match(Return);
13960   format %{ "RET" %}
13961   opcode(0xC3);
13962   ins_encode(OpcP);
13963   ins_pipe( pipe_jmp );
13964 %}
13965 
13966 // Tail Call; Jump from runtime stub to Java code.
13967 // Also known as an 'interprocedural jump'.
13968 // Target of jump will eventually return to caller.
13969 // TailJump below removes the return address.
13970 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
13971   match(TailCall jump_target method_oop );
13972   ins_cost(300);
13973   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
13974   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13975   ins_encode( OpcP, RegOpc(jump_target) );
13976   ins_pipe( pipe_jmp );
13977 %}
13978 
13979 
13980 // Tail Jump; remove the return address; jump to target.
13981 // TailCall above leaves the return address around.
13982 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13983   match( TailJump jump_target ex_oop );
13984   ins_cost(300);
13985   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13986             "JMP    $jump_target " %}
13987   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13988   ins_encode( enc_pop_rdx,
13989               OpcP, RegOpc(jump_target) );
13990   ins_pipe( pipe_jmp );
13991 %}
13992 
13993 // Create exception oop: created by stack-crawling runtime code.
13994 // Created exception is now available to this handler, and is setup
13995 // just prior to jumping to this handler.  No code emitted.
13996 instruct CreateException( eAXRegP ex_oop )
13997 %{
13998   match(Set ex_oop (CreateEx));
13999 
14000   size(0);
14001   // use the following format syntax
14002   format %{ "# exception oop is in EAX; no code emitted" %}
14003   ins_encode();
14004   ins_pipe( empty );
14005 %}
14006 
14007 
14008 // Rethrow exception:
14009 // The exception oop will come in the first argument position.
14010 // Then JUMP (not call) to the rethrow stub code.
14011 instruct RethrowException()
14012 %{
14013   match(Rethrow);
14014 
14015   // use the following format syntax
14016   format %{ "JMP    rethrow_stub" %}
14017   ins_encode(enc_rethrow);
14018   ins_pipe( pipe_jmp );
14019 %}
14020 
14021 // inlined locking and unlocking
14022 
14023 
14024 instruct cmpFastLock( eFlagsReg cr, eRegP object, eRegP box, eAXRegI tmp, eRegP scr) %{
14025   match( Set cr (FastLock object box) );
14026   effect( TEMP tmp, TEMP scr );
14027   ins_cost(300);
14028   format %{ "FASTLOCK $object, $box KILLS $tmp,$scr" %}
14029   ins_encode( Fast_Lock(object,box,tmp,scr) );
14030   ins_pipe( pipe_slow );
14031   ins_pc_relative(1);
14032 %}
14033 
14034 instruct cmpFastUnlock( eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
14035   match( Set cr (FastUnlock object box) );
14036   effect( TEMP tmp );
14037   ins_cost(300);
14038   format %{ "FASTUNLOCK $object, $box, $tmp" %}
14039   ins_encode( Fast_Unlock(object,box,tmp) );
14040   ins_pipe( pipe_slow );
14041   ins_pc_relative(1);
14042 %}
14043 
14044 
14045 
14046 // ============================================================================
14047 // Safepoint Instruction
14048 instruct safePoint_poll(eFlagsReg cr) %{
14049   match(SafePoint);
14050   effect(KILL cr);
14051 
14052   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
14053   // On SPARC that might be acceptable as we can generate the address with
14054   // just a sethi, saving an or.  By polling at offset 0 we can end up
14055   // putting additional pressure on the index-0 in the D$.  Because of
14056   // alignment (just like the situation at hand) the lower indices tend
14057   // to see more traffic.  It'd be better to change the polling address
14058   // to offset 0 of the last $line in the polling page.
14059 
14060   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
14061   ins_cost(125);
14062   size(6) ;
14063   ins_encode( Safepoint_Poll() );
14064   ins_pipe( ialu_reg_mem );
14065 %}
14066 
14067 //----------PEEPHOLE RULES-----------------------------------------------------
14068 // These must follow all instruction definitions as they use the names
14069 // defined in the instructions definitions.
14070 //
14071 // peepmatch ( root_instr_name [preceding_instruction]* );
14072 //
14073 // peepconstraint %{
14074 // (instruction_number.operand_name relational_op instruction_number.operand_name
14075 //  [, ...] );
14076 // // instruction numbers are zero-based using left to right order in peepmatch
14077 //
14078 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
14079 // // provide an instruction_number.operand_name for each operand that appears
14080 // // in the replacement instruction's match rule
14081 //
14082 // ---------VM FLAGS---------------------------------------------------------
14083 //
14084 // All peephole optimizations can be turned off using -XX:-OptoPeephole
14085 //
14086 // Each peephole rule is given an identifying number starting with zero and
14087 // increasing by one in the order seen by the parser.  An individual peephole
14088 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
14089 // on the command-line.
14090 //
14091 // ---------CURRENT LIMITATIONS----------------------------------------------
14092 //
14093 // Only match adjacent instructions in same basic block
14094 // Only equality constraints
14095 // Only constraints between operands, not (0.dest_reg == EAX_enc)
14096 // Only one replacement instruction
14097 //
14098 // ---------EXAMPLE----------------------------------------------------------
14099 //
14100 // // pertinent parts of existing instructions in architecture description
14101 // instruct movI(eRegI dst, eRegI src) %{
14102 //   match(Set dst (CopyI src));
14103 // %}
14104 //
14105 // instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
14106 //   match(Set dst (AddI dst src));
14107 //   effect(KILL cr);
14108 // %}
14109 //
14110 // // Change (inc mov) to lea
14111 // peephole %{
14112 //   // increment preceeded by register-register move
14113 //   peepmatch ( incI_eReg movI );
14114 //   // require that the destination register of the increment
14115 //   // match the destination register of the move
14116 //   peepconstraint ( 0.dst == 1.dst );
14117 //   // construct a replacement instruction that sets
14118 //   // the destination to ( move's source register + one )
14119 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14120 // %}
14121 //
14122 // Implementation no longer uses movX instructions since
14123 // machine-independent system no longer uses CopyX nodes.
14124 //
14125 // peephole %{
14126 //   peepmatch ( incI_eReg movI );
14127 //   peepconstraint ( 0.dst == 1.dst );
14128 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14129 // %}
14130 //
14131 // peephole %{
14132 //   peepmatch ( decI_eReg movI );
14133 //   peepconstraint ( 0.dst == 1.dst );
14134 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14135 // %}
14136 //
14137 // peephole %{
14138 //   peepmatch ( addI_eReg_imm movI );
14139 //   peepconstraint ( 0.dst == 1.dst );
14140 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14141 // %}
14142 //
14143 // peephole %{
14144 //   peepmatch ( addP_eReg_imm movP );
14145 //   peepconstraint ( 0.dst == 1.dst );
14146 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
14147 // %}
14148 
14149 // // Change load of spilled value to only a spill
14150 // instruct storeI(memory mem, eRegI src) %{
14151 //   match(Set mem (StoreI mem src));
14152 // %}
14153 //
14154 // instruct loadI(eRegI dst, memory mem) %{
14155 //   match(Set dst (LoadI mem));
14156 // %}
14157 //
14158 peephole %{
14159   peepmatch ( loadI storeI );
14160   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
14161   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
14162 %}
14163 
14164 //----------SMARTSPILL RULES---------------------------------------------------
14165 // These must follow all instruction definitions as they use the names
14166 // defined in the instructions definitions.