Old src/cpu/x86/vm/x86

   1 //
   2 // Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Special Registers
  78 reg_def EFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
  79 
  80 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  81 // allocator, and only shows up in the encodings.
  82 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  83 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  84 // Ok so here's the trick FPR1 is really st(0) except in the midst
  85 // of emission of assembly for a machnode. During the emission the fpu stack
  86 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  87 // the stack will not have this element so FPR1 == st(0) from the
  88 // oopMap viewpoint. This same weirdness with numbering causes
  89 // instruction encoding to have to play games with the register
  90 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  91 // where it does flt->flt moves to see an example
  92 //
  93 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  94 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  95 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  96 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  97 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  98 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  99 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
 100 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
 101 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
 102 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 103 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 104 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 105 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 106 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 107 
 108 // XMM registers.  128-bit registers or 4 words each, labeled a-d.
 109 // Word a in each register holds a Float, words ab hold a Double.
 110 // We currently do not use the SIMD capabilities, so registers cd
 111 // are unused at the moment.
 112 reg_def XMM0a( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
 113 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next());
 114 reg_def XMM1a( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
 115 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next());
 116 reg_def XMM2a( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
 117 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next());
 118 reg_def XMM3a( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
 119 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next());
 120 reg_def XMM4a( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
 121 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next());
 122 reg_def XMM5a( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
 123 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next());
 124 reg_def XMM6a( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
 125 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next());
 126 reg_def XMM7a( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
 127 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next());
 128 
 129 // Specify priority of register selection within phases of register
 130 // allocation.  Highest priority is first.  A useful heuristic is to
 131 // give registers a low priority when they are required by machine
 132 // instructions, like EAX and EDX.  Registers which are used as
 133 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 134 // For the Intel integer registers, the equivalent Long pairs are
 135 // EDX:EAX, EBX:ECX, and EDI:EBP.
 136 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 137                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 138                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 139                     FPR6L, FPR6H, FPR7L, FPR7H );
 140 
 141 alloc_class chunk1( XMM0a, XMM0b,
 142                     XMM1a, XMM1b,
 143                     XMM2a, XMM2b,
 144                     XMM3a, XMM3b,
 145                     XMM4a, XMM4b,
 146                     XMM5a, XMM5b,
 147                     XMM6a, XMM6b,
 148                     XMM7a, XMM7b, EFLAGS);
 149 
 150 
 151 //----------Architecture Description Register Classes--------------------------
 152 // Several register classes are automatically defined based upon information in
 153 // this architecture description.
 154 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 155 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 156 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 157 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 158 //
 159 // Class for all registers
 160 reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 161 // Class for general registers
 162 reg_class e_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 163 // Class for general registers which may be used for implicit null checks on win95
 164 // Also safe for use by tailjump. We don't want to allocate in rbp,
 165 reg_class e_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX);
 166 // Class of "X" registers
 167 reg_class x_reg(EBX, ECX, EDX, EAX);
 168 // Class of registers that can appear in an address with no offset.
 169 // EBP and ESP require an extra instruction byte for zero offset.
 170 // Used in fast-unlock
 171 reg_class p_reg(EDX, EDI, ESI, EBX);
 172 // Class for general registers not including ECX
 173 reg_class ncx_reg(EAX, EDX, EBP, EDI, ESI, EBX);
 174 // Class for general registers not including EAX
 175 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 176 // Class for general registers not including EAX or EBX.
 177 reg_class nabx_reg(EDX, EDI, ESI, ECX, EBP);
 178 // Class of EAX (for multiply and divide operations)
 179 reg_class eax_reg(EAX);
 180 // Class of EBX (for atomic add)
 181 reg_class ebx_reg(EBX);
 182 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 183 reg_class ecx_reg(ECX);
 184 // Class of EDX (for multiply and divide operations)
 185 reg_class edx_reg(EDX);
 186 // Class of EDI (for synchronization)
 187 reg_class edi_reg(EDI);
 188 // Class of ESI (for synchronization)
 189 reg_class esi_reg(ESI);
 190 // Singleton class for interpreter's stack pointer
 191 reg_class ebp_reg(EBP);
 192 // Singleton class for stack pointer
 193 reg_class sp_reg(ESP);
 194 // Singleton class for instruction pointer
 195 // reg_class ip_reg(EIP);
 196 // Singleton class for condition codes
 197 reg_class int_flags(EFLAGS);
 198 // Class of integer register pairs
 199 reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI );
 200 // Class of integer register pairs that aligns with calling convention
 201 reg_class eadx_reg( EAX,EDX );
 202 reg_class ebcx_reg( ECX,EBX );
 203 // Not AX or DX, used in divides
 204 reg_class nadx_reg( EBX,ECX,ESI,EDI,EBP );
 205 
 206 // Floating point registers.  Notice FPR0 is not a choice.
 207 // FPR0 is not ever allocated; we use clever encodings to fake
 208 // a 2-address instructions out of Intels FP stack.
 209 reg_class flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 210 
 211 // make a register class for SSE registers
 212 reg_class xmm_reg(XMM0a, XMM1a, XMM2a, XMM3a, XMM4a, XMM5a, XMM6a, XMM7a);
 213 
 214 // make a double register class for SSE2 registers
 215 reg_class xdb_reg(XMM0a,XMM0b, XMM1a,XMM1b, XMM2a,XMM2b, XMM3a,XMM3b,
 216                   XMM4a,XMM4b, XMM5a,XMM5b, XMM6a,XMM6b, XMM7a,XMM7b );
 217 
 218 reg_class dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 219                    FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 220                    FPR7L,FPR7H );
 221 
 222 reg_class flt_reg0( FPR1L );
 223 reg_class dbl_reg0( FPR1L,FPR1H );
 224 reg_class dbl_reg1( FPR2L,FPR2H );
 225 reg_class dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 226                        FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 227 
 228 // XMM6 and XMM7 could be used as temporary registers for long, float and
 229 // double values for SSE2.
 230 reg_class xdb_reg6( XMM6a,XMM6b );
 231 reg_class xdb_reg7( XMM7a,XMM7b );
 232 %}
 233 
 234 
 235 //----------SOURCE BLOCK-------------------------------------------------------
 236 // This is a block of C++ code which provides values, functions, and
 237 // definitions necessary in the rest of the architecture description
 238 source_hpp %{
 239 // Must be visible to the DFA in dfa_x86_32.cpp
 240 extern bool is_operand_hi32_zero(Node* n);
 241 %}
 242 
 243 source %{
 244 #define   RELOC_IMM32    Assembler::imm_operand
 245 #define   RELOC_DISP32   Assembler::disp32_operand
 246 
 247 #define __ _masm.
 248 
 249 // How to find the high register of a Long pair, given the low register
 250 #define   HIGH_FROM_LOW(x) ((x)+2)
 251 
 252 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 253 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 254 // fast versions of NegF/NegD and AbsF/AbsD.
 255 
 256 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 257 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 258   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 259   // of 128-bits operands for SSE instructions.
 260   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 261   // Store the value to a 128-bits operand.
 262   operand[0] = lo;
 263   operand[1] = hi;
 264   return operand;
 265 }
 266 
 267 // Buffer for 128-bits masks used by SSE instructions.
 268 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 269 
 270 // Static initialization during VM startup.
 271 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 272 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 273 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 274 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 275 
 276 // Offset hacking within calls.
 277 static int pre_call_FPU_size() {
 278   if (Compile::current()->in_24_bit_fp_mode())
 279     return 6; // fldcw
 280   return 0;
 281 }
 282 
 283 static int preserve_SP_size() {
 284   return LP64_ONLY(1 +) 2;  // [rex,] op, rm(reg/reg)
 285 }
 286 
 287 // !!!!! Special hack to get all type of calls to specify the byte offset
 288 //       from the start of the call to the point where the return address
 289 //       will point.
 290 int MachCallStaticJavaNode::ret_addr_offset() {
 291   int offset = 5 + pre_call_FPU_size();  // 5 bytes from start of call to where return address points
 292   if (_method_handle_invoke)
 293     offset += preserve_SP_size();
 294   return offset;
 295 }
 296 
 297 int MachCallDynamicJavaNode::ret_addr_offset() {
 298   return 10 + pre_call_FPU_size();  // 10 bytes from start of call to where return address points
 299 }
 300 
 301 static int sizeof_FFree_Float_Stack_All = -1;
 302 
 303 int MachCallRuntimeNode::ret_addr_offset() {
 304   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 305   return sizeof_FFree_Float_Stack_All + 5 + pre_call_FPU_size();
 306 }
 307 
 308 // Indicate if the safepoint node needs the polling page as an input.
 309 // Since x86 does have absolute addressing, it doesn't.
 310 bool SafePointNode::needs_polling_address_input() {
 311   return false;
 312 }
 313 
 314 //
 315 // Compute padding required for nodes which need alignment
 316 //
 317 
 318 // The address of the call instruction needs to be 4-byte aligned to
 319 // ensure that it does not span a cache line so that it can be patched.
 320 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 321   current_offset += pre_call_FPU_size();  // skip fldcw, if any
 322   current_offset += 1;      // skip call opcode byte
 323   return round_to(current_offset, alignment_required()) - current_offset;
 324 }
 325 
 326 // The address of the call instruction needs to be 4-byte aligned to
 327 // ensure that it does not span a cache line so that it can be patched.
 328 int CallStaticJavaHandleNode::compute_padding(int current_offset) const {
 329   current_offset += pre_call_FPU_size();  // skip fldcw, if any
 330   current_offset += preserve_SP_size();   // skip mov rbp, rsp
 331   current_offset += 1;      // skip call opcode byte
 332   return round_to(current_offset, alignment_required()) - current_offset;
 333 }
 334 
 335 // The address of the call instruction needs to be 4-byte aligned to
 336 // ensure that it does not span a cache line so that it can be patched.
 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 338   current_offset += pre_call_FPU_size();  // skip fldcw, if any
 339   current_offset += 5;      // skip MOV instruction
 340   current_offset += 1;      // skip call opcode byte
 341   return round_to(current_offset, alignment_required()) - current_offset;
 342 }
 343 
 344 #ifndef PRODUCT
 345 void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const {
 346   st->print("INT3");
 347 }
 348 #endif
 349 
 350 // EMIT_RM()
 351 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 352   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 353   cbuf.insts()->emit_int8(c);
 354 }
 355 
 356 // EMIT_CC()
 357 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 358   unsigned char c = (unsigned char)( f1 | f2 );
 359   cbuf.insts()->emit_int8(c);
 360 }
 361 
 362 // EMIT_OPCODE()
 363 void emit_opcode(CodeBuffer &cbuf, int code) {
 364   cbuf.insts()->emit_int8((unsigned char) code);
 365 }
 366 
 367 // EMIT_OPCODE() w/ relocation information
 368 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 369   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 370   emit_opcode(cbuf, code);
 371 }
 372 
 373 // EMIT_D8()
 374 void emit_d8(CodeBuffer &cbuf, int d8) {
 375   cbuf.insts()->emit_int8((unsigned char) d8);
 376 }
 377 
 378 // EMIT_D16()
 379 void emit_d16(CodeBuffer &cbuf, int d16) {
 380   cbuf.insts()->emit_int16(d16);
 381 }
 382 
 383 // EMIT_D32()
 384 void emit_d32(CodeBuffer &cbuf, int d32) {
 385   cbuf.insts()->emit_int32(d32);
 386 }
 387 
 388 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 389 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 390         int format) {
 391   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 392   cbuf.insts()->emit_int32(d32);
 393 }
 394 
 395 // emit 32 bit value and construct relocation entry from RelocationHolder
 396 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 397         int format) {
 398 #ifdef ASSERT
 399   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 400     assert(oop(d32)->is_oop() && (ScavengeRootsInCode || !oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
 401   }
 402 #endif
 403   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 404   cbuf.insts()->emit_int32(d32);
 405 }
 406 
 407 // Access stack slot for load or store
 408 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 409   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 410   if( -128 <= disp && disp <= 127 ) {
 411     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 412     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 413     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 414   } else {
 415     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 416     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 417     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 418   }
 419 }
 420 
 421    // eRegI ereg, memory mem) %{    // emit_reg_mem
 422 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, bool displace_is_oop ) {
 423   // There is no index & no scale, use form without SIB byte
 424   if ((index == 0x4) &&
 425       (scale == 0) && (base != ESP_enc)) {
 426     // If no displacement, mode is 0x0; unless base is [EBP]
 427     if ( (displace == 0) && (base != EBP_enc) ) {
 428       emit_rm(cbuf, 0x0, reg_encoding, base);
 429     }
 430     else {                    // If 8-bit displacement, mode 0x1
 431       if ((displace >= -128) && (displace <= 127)
 432           && !(displace_is_oop) ) {
 433         emit_rm(cbuf, 0x1, reg_encoding, base);
 434         emit_d8(cbuf, displace);
 435       }
 436       else {                  // If 32-bit displacement
 437         if (base == -1) { // Special flag for absolute address
 438           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 439           // (manual lies; no SIB needed here)
 440           if ( displace_is_oop ) {
 441             emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
 442           } else {
 443             emit_d32      (cbuf, displace);
 444           }
 445         }
 446         else {                // Normal base + offset
 447           emit_rm(cbuf, 0x2, reg_encoding, base);
 448           if ( displace_is_oop ) {
 449             emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
 450           } else {
 451             emit_d32      (cbuf, displace);
 452           }
 453         }
 454       }
 455     }
 456   }
 457   else {                      // Else, encode with the SIB byte
 458     // If no displacement, mode is 0x0; unless base is [EBP]
 459     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 460       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 461       emit_rm(cbuf, scale, index, base);
 462     }
 463     else {                    // If 8-bit displacement, mode 0x1
 464       if ((displace >= -128) && (displace <= 127)
 465           && !(displace_is_oop) ) {
 466         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 467         emit_rm(cbuf, scale, index, base);
 468         emit_d8(cbuf, displace);
 469       }
 470       else {                  // If 32-bit displacement
 471         if (base == 0x04 ) {
 472           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 473           emit_rm(cbuf, scale, index, 0x04);
 474         } else {
 475           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 476           emit_rm(cbuf, scale, index, base);
 477         }
 478         if ( displace_is_oop ) {
 479           emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
 480         } else {
 481           emit_d32      (cbuf, displace);
 482         }
 483       }
 484     }
 485   }
 486 }
 487 
 488 
 489 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 490   if( dst_encoding == src_encoding ) {
 491     // reg-reg copy, use an empty encoding
 492   } else {
 493     emit_opcode( cbuf, 0x8B );
 494     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 495   }
 496 }
 497 
 498 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 499   if( dst_encoding == src_encoding ) {
 500     // reg-reg copy, use an empty encoding
 501   } else {
 502     MacroAssembler _masm(&cbuf);
 503 
 504     __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
 505   }
 506 }
 507 
 508 
 509 //=============================================================================
 510 const bool Matcher::constant_table_absolute_addressing = true;
 511 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 512 
 513 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 514   // Empty encoding
 515 }
 516 
 517 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 518   return 0;
 519 }
 520 
 521 #ifndef PRODUCT
 522 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 523   st->print("# MachConstantBaseNode (empty encoding)");
 524 }
 525 #endif
 526 
 527 
 528 //=============================================================================
 529 #ifndef PRODUCT
 530 void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 531   Compile* C = ra_->C;
 532   if( C->in_24_bit_fp_mode() ) {
 533     st->print("FLDCW  24 bit fpu control word");
 534     st->print_cr(""); st->print("\t");
 535   }
 536 
 537   int framesize = C->frame_slots() << LogBytesPerInt;
 538   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 539   // Remove two words for return addr and rbp,
 540   framesize -= 2*wordSize;
 541 
 542   // Calls to C2R adapters often do not accept exceptional returns.
 543   // We require that their callers must bang for them.  But be careful, because
 544   // some VM calls (such as call site linkage) can use several kilobytes of
 545   // stack.  But the stack safety zone should account for that.
 546   // See bugs 4446381, 4468289, 4497237.
 547   if (C->need_stack_bang(framesize)) {
 548     st->print_cr("# stack bang"); st->print("\t");
 549   }
 550   st->print_cr("PUSHL  EBP"); st->print("\t");
 551 
 552   if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth
 553     st->print("PUSH   0xBADB100D\t# Majik cookie for stack depth check");
 554     st->print_cr(""); st->print("\t");
 555     framesize -= wordSize;
 556   }
 557 
 558   if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) {
 559     if (framesize) {
 560       st->print("SUB    ESP,%d\t# Create frame",framesize);
 561     }
 562   } else {
 563     st->print("SUB    ESP,%d\t# Create frame",framesize);
 564   }
 565 }
 566 #endif
 567 
 568 
 569 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 570   Compile* C = ra_->C;
 571 
 572   if (UseSSE >= 2 && VerifyFPU) {
 573     MacroAssembler masm(&cbuf);
 574     masm.verify_FPU(0, "FPU stack must be clean on entry");
 575   }
 576 
 577   // WARNING: Initial instruction MUST be 5 bytes or longer so that
 578   // NativeJump::patch_verified_entry will be able to patch out the entry
 579   // code safely. The fldcw is ok at 6 bytes, the push to verify stack
 580   // depth is ok at 5 bytes, the frame allocation can be either 3 or
 581   // 6 bytes. So if we don't do the fldcw or the push then we must
 582   // use the 6 byte frame allocation even if we have no frame. :-(
 583   // If method sets FPU control word do it now
 584   if( C->in_24_bit_fp_mode() ) {
 585     MacroAssembler masm(&cbuf);
 586     masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
 587   }
 588 
 589   int framesize = C->frame_slots() << LogBytesPerInt;
 590   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 591   // Remove two words for return addr and rbp,
 592   framesize -= 2*wordSize;
 593 
 594   // Calls to C2R adapters often do not accept exceptional returns.
 595   // We require that their callers must bang for them.  But be careful, because
 596   // some VM calls (such as call site linkage) can use several kilobytes of
 597   // stack.  But the stack safety zone should account for that.
 598   // See bugs 4446381, 4468289, 4497237.
 599   if (C->need_stack_bang(framesize)) {
 600     MacroAssembler masm(&cbuf);
 601     masm.generate_stack_overflow_check(framesize);
 602   }
 603 
 604   // We always push rbp, so that on return to interpreter rbp, will be
 605   // restored correctly and we can correct the stack.
 606   emit_opcode(cbuf, 0x50 | EBP_enc);
 607 
 608   if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth
 609     emit_opcode(cbuf, 0x68); // push 0xbadb100d
 610     emit_d32(cbuf, 0xbadb100d);
 611     framesize -= wordSize;
 612   }
 613 
 614   if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) {
 615     if (framesize) {
 616       emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
 617       emit_rm(cbuf, 0x3, 0x05, ESP_enc);
 618       emit_d8(cbuf, framesize);
 619     }
 620   } else {
 621     emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
 622     emit_rm(cbuf, 0x3, 0x05, ESP_enc);
 623     emit_d32(cbuf, framesize);
 624   }
 625   C->set_frame_complete(cbuf.insts_size());
 626 
 627 #ifdef ASSERT
 628   if (VerifyStackAtCalls) {
 629     Label L;
 630     MacroAssembler masm(&cbuf);
 631     masm.push(rax);
 632     masm.mov(rax, rsp);
 633     masm.andptr(rax, StackAlignmentInBytes-1);
 634     masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
 635     masm.pop(rax);
 636     masm.jcc(Assembler::equal, L);
 637     masm.stop("Stack is not properly aligned!");
 638     masm.bind(L);
 639   }
 640 #endif
 641 
 642 }
 643 
 644 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 645   return MachNode::size(ra_); // too many variables; just compute it the hard way
 646 }
 647 
 648 int MachPrologNode::reloc() const {
 649   return 0; // a large enough number
 650 }
 651 
 652 //=============================================================================
 653 #ifndef PRODUCT
 654 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 655   Compile *C = ra_->C;
 656   int framesize = C->frame_slots() << LogBytesPerInt;
 657   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 658   // Remove two words for return addr and rbp,
 659   framesize -= 2*wordSize;
 660 
 661   if( C->in_24_bit_fp_mode() ) {
 662     st->print("FLDCW  standard control word");
 663     st->cr(); st->print("\t");
 664   }
 665   if( framesize ) {
 666     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 667     st->cr(); st->print("\t");
 668   }
 669   st->print_cr("POPL   EBP"); st->print("\t");
 670   if( do_polling() && C->is_method_compilation() ) {
 671     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 672     st->cr(); st->print("\t");
 673   }
 674 }
 675 #endif
 676 
 677 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 678   Compile *C = ra_->C;
 679 
 680   // If method set FPU control word, restore to standard control word
 681   if( C->in_24_bit_fp_mode() ) {
 682     MacroAssembler masm(&cbuf);
 683     masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 684   }
 685 
 686   int framesize = C->frame_slots() << LogBytesPerInt;
 687   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 688   // Remove two words for return addr and rbp,
 689   framesize -= 2*wordSize;
 690 
 691   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 692 
 693   if( framesize >= 128 ) {
 694     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 695     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 696     emit_d32(cbuf, framesize);
 697   }
 698   else if( framesize ) {
 699     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 700     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 701     emit_d8(cbuf, framesize);
 702   }
 703 
 704   emit_opcode(cbuf, 0x58 | EBP_enc);
 705 
 706   if( do_polling() && C->is_method_compilation() ) {
 707     cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 708     emit_opcode(cbuf,0x85);
 709     emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 710     emit_d32(cbuf, (intptr_t)os::get_polling_page());
 711   }
 712 }
 713 
 714 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 715   Compile *C = ra_->C;
 716   // If method set FPU control word, restore to standard control word
 717   int size = C->in_24_bit_fp_mode() ? 6 : 0;
 718   if( do_polling() && C->is_method_compilation() ) size += 6;
 719 
 720   int framesize = C->frame_slots() << LogBytesPerInt;
 721   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 722   // Remove two words for return addr and rbp,
 723   framesize -= 2*wordSize;
 724 
 725   size++; // popl rbp,
 726 
 727   if( framesize >= 128 ) {
 728     size += 6;
 729   } else {
 730     size += framesize ? 3 : 0;
 731   }
 732   return size;
 733 }
 734 
 735 int MachEpilogNode::reloc() const {
 736   return 0; // a large enough number
 737 }
 738 
 739 const Pipeline * MachEpilogNode::pipeline() const {
 740   return MachNode::pipeline_class();
 741 }
 742 
 743 int MachEpilogNode::safepoint_offset() const { return 0; }
 744 
 745 //=============================================================================
 746 
 747 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 748 static enum RC rc_class( OptoReg::Name reg ) {
 749 
 750   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 751   if (OptoReg::is_stack(reg)) return rc_stack;
 752 
 753   VMReg r = OptoReg::as_VMReg(reg);
 754   if (r->is_Register()) return rc_int;
 755   if (r->is_FloatRegister()) {
 756     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 757     return rc_float;
 758   }
 759   assert(r->is_XMMRegister(), "must be");
 760   return rc_xmm;
 761 }
 762 
 763 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 764                         int opcode, const char *op_str, int size, outputStream* st ) {
 765   if( cbuf ) {
 766     emit_opcode  (*cbuf, opcode );
 767     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, false);
 768 #ifndef PRODUCT
 769   } else if( !do_size ) {
 770     if( size != 0 ) st->print("\n\t");
 771     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 772       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 773       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 774     } else { // FLD, FST, PUSH, POP
 775       st->print("%s [ESP + #%d]",op_str,offset);
 776     }
 777 #endif
 778   }
 779   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 780   return size+3+offset_size;
 781 }
 782 
 783 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 784 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 785                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 786   if( cbuf ) {
 787     if( reg_lo+1 == reg_hi ) { // double move?
 788       if( is_load && !UseXmmLoadAndClearUpper )
 789         emit_opcode(*cbuf, 0x66 ); // use 'movlpd' for load
 790       else
 791         emit_opcode(*cbuf, 0xF2 ); // use 'movsd' otherwise
 792     } else {
 793       emit_opcode(*cbuf, 0xF3 );
 794     }
 795     emit_opcode(*cbuf, 0x0F );
 796     if( reg_lo+1 == reg_hi && is_load && !UseXmmLoadAndClearUpper )
 797       emit_opcode(*cbuf, 0x12 );   // use 'movlpd' for load
 798     else
 799       emit_opcode(*cbuf, is_load ? 0x10 : 0x11 );
 800     encode_RegMem(*cbuf, Matcher::_regEncode[reg_lo], ESP_enc, 0x4, 0, offset, false);
 801 #ifndef PRODUCT
 802   } else if( !do_size ) {
 803     if( size != 0 ) st->print("\n\t");
 804     if( reg_lo+1 == reg_hi ) { // double move?
 805       if( is_load ) st->print("%s %s,[ESP + #%d]",
 806                                UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 807                                Matcher::regName[reg_lo], offset);
 808       else          st->print("MOVSD  [ESP + #%d],%s",
 809                                offset, Matcher::regName[reg_lo]);
 810     } else {
 811       if( is_load ) st->print("MOVSS  %s,[ESP + #%d]",
 812                                Matcher::regName[reg_lo], offset);
 813       else          st->print("MOVSS  [ESP + #%d],%s",
 814                                offset, Matcher::regName[reg_lo]);
 815     }
 816 #endif
 817   }
 818   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 819   return size+5+offset_size;
 820 }
 821 
 822 
 823 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 824                             int src_hi, int dst_hi, int size, outputStream* st ) {
 825   if( UseXmmRegToRegMoveAll ) {//Use movaps,movapd to move between xmm registers
 826     if( cbuf ) {
 827       if( (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ) {
 828         emit_opcode(*cbuf, 0x66 );
 829       }
 830       emit_opcode(*cbuf, 0x0F );
 831       emit_opcode(*cbuf, 0x28 );
 832       emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
 833 #ifndef PRODUCT
 834     } else if( !do_size ) {
 835       if( size != 0 ) st->print("\n\t");
 836       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 837         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 838       } else {
 839         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 840       }
 841 #endif
 842     }
 843     return size + ((src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 4 : 3);
 844   } else {
 845     if( cbuf ) {
 846       emit_opcode(*cbuf, (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 0xF2 : 0xF3 );
 847       emit_opcode(*cbuf, 0x0F );
 848       emit_opcode(*cbuf, 0x10 );
 849       emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
 850 #ifndef PRODUCT
 851     } else if( !do_size ) {
 852       if( size != 0 ) st->print("\n\t");
 853       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 854         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 855       } else {
 856         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 857       }
 858 #endif
 859     }
 860     return size+4;
 861   }
 862 }
 863 
 864 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 865                             int src_hi, int dst_hi, int size, outputStream* st ) {
 866   // 32-bit
 867   if (cbuf) {
 868     emit_opcode(*cbuf, 0x66);
 869     emit_opcode(*cbuf, 0x0F);
 870     emit_opcode(*cbuf, 0x6E);
 871     emit_rm(*cbuf, 0x3, Matcher::_regEncode[dst_lo] & 7, Matcher::_regEncode[src_lo] & 7);
 872 #ifndef PRODUCT
 873   } else if (!do_size) {
 874     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 875 #endif
 876   }
 877   return 4;
 878 }
 879 
 880 
 881 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 882                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 883   // 32-bit
 884   if (cbuf) {
 885     emit_opcode(*cbuf, 0x66);
 886     emit_opcode(*cbuf, 0x0F);
 887     emit_opcode(*cbuf, 0x7E);
 888     emit_rm(*cbuf, 0x3, Matcher::_regEncode[src_lo] & 7, Matcher::_regEncode[dst_lo] & 7);
 889 #ifndef PRODUCT
 890   } else if (!do_size) {
 891     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 892 #endif
 893   }
 894   return 4;
 895 }
 896 
 897 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 898   if( cbuf ) {
 899     emit_opcode(*cbuf, 0x8B );
 900     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 901 #ifndef PRODUCT
 902   } else if( !do_size ) {
 903     if( size != 0 ) st->print("\n\t");
 904     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 905 #endif
 906   }
 907   return size+2;
 908 }
 909 
 910 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 911                                  int offset, int size, outputStream* st ) {
 912   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 913     if( cbuf ) {
 914       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 915       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 916 #ifndef PRODUCT
 917     } else if( !do_size ) {
 918       if( size != 0 ) st->print("\n\t");
 919       st->print("FLD    %s",Matcher::regName[src_lo]);
 920 #endif
 921     }
 922     size += 2;
 923   }
 924 
 925   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 926   const char *op_str;
 927   int op;
 928   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 929     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 930     op = 0xDD;
 931   } else {                   // 32-bit store
 932     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 933     op = 0xD9;
 934     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 935   }
 936 
 937   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 938 }
 939 
 940 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
 941   // Get registers to move
 942   OptoReg::Name src_second = ra_->get_reg_second(in(1));
 943   OptoReg::Name src_first = ra_->get_reg_first(in(1));
 944   OptoReg::Name dst_second = ra_->get_reg_second(this );
 945   OptoReg::Name dst_first = ra_->get_reg_first(this );
 946 
 947   enum RC src_second_rc = rc_class(src_second);
 948   enum RC src_first_rc = rc_class(src_first);
 949   enum RC dst_second_rc = rc_class(dst_second);
 950   enum RC dst_first_rc = rc_class(dst_first);
 951 
 952   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
 953 
 954   // Generate spill code!
 955   int size = 0;
 956 
 957   if( src_first == dst_first && src_second == dst_second )
 958     return size;            // Self copy, no move
 959 
 960   // --------------------------------------
 961   // Check for mem-mem move.  push/pop to move.
 962   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
 963     if( src_second == dst_first ) { // overlapping stack copy ranges
 964       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
 965       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 966       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 967       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
 968     }
 969     // move low bits
 970     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
 971     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
 972     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
 973       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
 974       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
 975     }
 976     return size;
 977   }
 978 
 979   // --------------------------------------
 980   // Check for integer reg-reg copy
 981   if( src_first_rc == rc_int && dst_first_rc == rc_int )
 982     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
 983 
 984   // Check for integer store
 985   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
 986     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
 987 
 988   // Check for integer load
 989   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
 990     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
 991 
 992   // Check for integer reg-xmm reg copy
 993   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
 994     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
 995             "no 64 bit integer-float reg moves" );
 996     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
 997   }
 998   // --------------------------------------
 999   // Check for float reg-reg copy
1000   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1001     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1002             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1003     if( cbuf ) {
1004 
1005       // Note the mucking with the register encode to compensate for the 0/1
1006       // indexing issue mentioned in a comment in the reg_def sections
1007       // for FPR registers many lines above here.
1008 
1009       if( src_first != FPR1L_num ) {
1010         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1011         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1012         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1013         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1014      } else {
1015         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1016         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1017      }
1018 #ifndef PRODUCT
1019     } else if( !do_size ) {
1020       if( size != 0 ) st->print("\n\t");
1021       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1022       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1023 #endif
1024     }
1025     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1026   }
1027 
1028   // Check for float store
1029   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1030     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1031   }
1032 
1033   // Check for float load
1034   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1035     int offset = ra_->reg2offset(src_first);
1036     const char *op_str;
1037     int op;
1038     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1039       op_str = "FLD_D";
1040       op = 0xDD;
1041     } else {                   // 32-bit load
1042       op_str = "FLD_S";
1043       op = 0xD9;
1044       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1045     }
1046     if( cbuf ) {
1047       emit_opcode  (*cbuf, op );
1048       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, false);
1049       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1050       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1051 #ifndef PRODUCT
1052     } else if( !do_size ) {
1053       if( size != 0 ) st->print("\n\t");
1054       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1055 #endif
1056     }
1057     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1058     return size + 3+offset_size+2;
1059   }
1060 
1061   // Check for xmm reg-reg copy
1062   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1063     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1064             (src_first+1 == src_second && dst_first+1 == dst_second),
1065             "no non-adjacent float-moves" );
1066     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1067   }
1068 
1069   // Check for xmm reg-integer reg copy
1070   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1071     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1072             "no 64 bit float-integer reg moves" );
1073     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1074   }
1075 
1076   // Check for xmm store
1077   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1078     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1079   }
1080 
1081   // Check for float xmm load
1082   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1083     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1084   }
1085 
1086   // Copy from float reg to xmm reg
1087   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1088     // copy to the top of stack from floating point reg
1089     // and use LEA to preserve flags
1090     if( cbuf ) {
1091       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1092       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1093       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1094       emit_d8(*cbuf,0xF8);
1095 #ifndef PRODUCT
1096     } else if( !do_size ) {
1097       if( size != 0 ) st->print("\n\t");
1098       st->print("LEA    ESP,[ESP-8]");
1099 #endif
1100     }
1101     size += 4;
1102 
1103     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1104 
1105     // Copy from the temp memory to the xmm reg.
1106     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1107 
1108     if( cbuf ) {
1109       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1110       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1111       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1112       emit_d8(*cbuf,0x08);
1113 #ifndef PRODUCT
1114     } else if( !do_size ) {
1115       if( size != 0 ) st->print("\n\t");
1116       st->print("LEA    ESP,[ESP+8]");
1117 #endif
1118     }
1119     size += 4;
1120     return size;
1121   }
1122 
1123   assert( size > 0, "missed a case" );
1124 
1125   // --------------------------------------------------------------------
1126   // Check for second bits still needing moving.
1127   if( src_second == dst_second )
1128     return size;               // Self copy; no move
1129   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1130 
1131   // Check for second word int-int move
1132   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1133     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1134 
1135   // Check for second word integer store
1136   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1137     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1138 
1139   // Check for second word integer load
1140   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1141     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1142 
1143 
1144   Unimplemented();
1145 }
1146 
1147 #ifndef PRODUCT
1148 void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1149   implementation( NULL, ra_, false, st );
1150 }
1151 #endif
1152 
1153 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1154   implementation( &cbuf, ra_, false, NULL );
1155 }
1156 
1157 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1158   return implementation( NULL, ra_, true, NULL );
1159 }
1160 
1161 //=============================================================================
1162 #ifndef PRODUCT
1163 void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const {
1164   st->print("NOP \t# %d bytes pad for loops and calls", _count);
1165 }
1166 #endif
1167 
1168 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
1169   MacroAssembler _masm(&cbuf);
1170   __ nop(_count);
1171 }
1172 
1173 uint MachNopNode::size(PhaseRegAlloc *) const {
1174   return _count;
1175 }
1176 
1177 
1178 //=============================================================================
1179 #ifndef PRODUCT
1180 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1181   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1182   int reg = ra_->get_reg_first(this);
1183   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1184 }
1185 #endif
1186 
1187 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1188   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1189   int reg = ra_->get_encode(this);
1190   if( offset >= 128 ) {
1191     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1192     emit_rm(cbuf, 0x2, reg, 0x04);
1193     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1194     emit_d32(cbuf, offset);
1195   }
1196   else {
1197     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1198     emit_rm(cbuf, 0x1, reg, 0x04);
1199     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1200     emit_d8(cbuf, offset);
1201   }
1202 }
1203 
1204 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1205   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1206   if( offset >= 128 ) {
1207     return 7;
1208   }
1209   else {
1210     return 4;
1211   }
1212 }
1213 
1214 //=============================================================================
1215 
1216 // emit call stub, compiled java to interpreter
1217 void emit_java_to_interp(CodeBuffer &cbuf ) {
1218   // Stub is fixed up when the corresponding call is converted from calling
1219   // compiled code to calling interpreted code.
1220   // mov rbx,0
1221   // jmp -1
1222 
1223   address mark = cbuf.insts_mark();  // get mark within main instrs section
1224 
1225   // Note that the code buffer's insts_mark is always relative to insts.
1226   // That's why we must use the macroassembler to generate a stub.
1227   MacroAssembler _masm(&cbuf);
1228 
1229   address base =
1230   __ start_a_stub(Compile::MAX_stubs_size);
1231   if (base == NULL)  return;  // CodeBuffer::expand failed
1232   // static stub relocation stores the instruction address of the call
1233   __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM32);
1234   // static stub relocation also tags the methodOop in the code-stream.
1235   __ movoop(rbx, (jobject)NULL);  // method is zapped till fixup time
1236   // This is recognized as unresolved by relocs/nativeInst/ic code
1237   __ jump(RuntimeAddress(__ pc()));
1238 
1239   __ end_a_stub();
1240   // Update current stubs pointer and restore insts_end.
1241 }
1242 // size of call stub, compiled java to interpretor
1243 uint size_java_to_interp() {
1244   return 10;  // movl; jmp
1245 }
1246 // relocation entries for call stub, compiled java to interpretor
1247 uint reloc_java_to_interp() {
1248   return 4;  // 3 in emit_java_to_interp + 1 in Java_Static_Call
1249 }
1250 
1251 //=============================================================================
1252 #ifndef PRODUCT
1253 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1254   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1255   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1256   st->print_cr("\tNOP");
1257   st->print_cr("\tNOP");
1258   if( !OptoBreakpoint )
1259     st->print_cr("\tNOP");
1260 }
1261 #endif
1262 
1263 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1264   MacroAssembler masm(&cbuf);
1265 #ifdef ASSERT
1266   uint insts_size = cbuf.insts_size();
1267 #endif
1268   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1269   masm.jump_cc(Assembler::notEqual,
1270                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1271   /* WARNING these NOPs are critical so that verified entry point is properly
1272      aligned for patching by NativeJump::patch_verified_entry() */
1273   int nops_cnt = 2;
1274   if( !OptoBreakpoint ) // Leave space for int3
1275      nops_cnt += 1;
1276   masm.nop(nops_cnt);
1277 
1278   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1279 }
1280 
1281 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1282   return OptoBreakpoint ? 11 : 12;
1283 }
1284 
1285 
1286 //=============================================================================
1287 uint size_exception_handler() {
1288   // NativeCall instruction size is the same as NativeJump.
1289   // exception handler starts out as jump and can be patched to
1290   // a call be deoptimization.  (4932387)
1291   // Note that this value is also credited (in output.cpp) to
1292   // the size of the code section.
1293   return NativeJump::instruction_size;
1294 }
1295 
1296 // Emit exception handler code.  Stuff framesize into a register
1297 // and call a VM stub routine.
1298 int emit_exception_handler(CodeBuffer& cbuf) {
1299 
1300   // Note that the code buffer's insts_mark is always relative to insts.
1301   // That's why we must use the macroassembler to generate a handler.
1302   MacroAssembler _masm(&cbuf);
1303   address base =
1304   __ start_a_stub(size_exception_handler());
1305   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1306   int offset = __ offset();
1307   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1308   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1309   __ end_a_stub();
1310   return offset;
1311 }
1312 
1313 uint size_deopt_handler() {
1314   // NativeCall instruction size is the same as NativeJump.
1315   // exception handler starts out as jump and can be patched to
1316   // a call be deoptimization.  (4932387)
1317   // Note that this value is also credited (in output.cpp) to
1318   // the size of the code section.
1319   return 5 + NativeJump::instruction_size; // pushl(); jmp;
1320 }
1321 
1322 // Emit deopt handler code.
1323 int emit_deopt_handler(CodeBuffer& cbuf) {
1324 
1325   // Note that the code buffer's insts_mark is always relative to insts.
1326   // That's why we must use the macroassembler to generate a handler.
1327   MacroAssembler _masm(&cbuf);
1328   address base =
1329   __ start_a_stub(size_exception_handler());
1330   if (base == NULL)  return 0;  // CodeBuffer::expand failed
1331   int offset = __ offset();
1332   InternalAddress here(__ pc());
1333   __ pushptr(here.addr());
1334 
1335   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1336   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1337   __ end_a_stub();
1338   return offset;
1339 }
1340 
1341 
1342 const bool Matcher::match_rule_supported(int opcode) {
1343   if (!has_match_rule(opcode))
1344     return false;
1345 
1346   return true;  // Per default match rules are supported.
1347 }
1348 
1349 int Matcher::regnum_to_fpu_offset(int regnum) {
1350   return regnum - 32; // The FP registers are in the second chunk
1351 }
1352 
1353 // This is UltraSparc specific, true just means we have fast l2f conversion
1354 const bool Matcher::convL2FSupported(void) {
1355   return true;
1356 }
1357 
1358 // Vector width in bytes
1359 const uint Matcher::vector_width_in_bytes(void) {
1360   return UseSSE >= 2 ? 8 : 0;
1361 }
1362 
1363 // Vector ideal reg
1364 const uint Matcher::vector_ideal_reg(void) {
1365   return Op_RegD;
1366 }
1367 
1368 // Is this branch offset short enough that a short branch can be used?
1369 //
1370 // NOTE: If the platform does not provide any short branch variants, then
1371 //       this method should return false for offset 0.
1372 bool Matcher::is_short_branch_offset(int rule, int offset) {
1373   // the short version of jmpConUCF2 contains multiple branches,
1374   // making the reach slightly less
1375   if (rule == jmpConUCF2_rule)
1376     return (-126 <= offset && offset <= 125);
1377   return (-128 <= offset && offset <= 127);
1378 }
1379 
1380 const bool Matcher::isSimpleConstant64(jlong value) {
1381   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1382   return false;
1383 }
1384 
1385 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1386 const bool Matcher::init_array_count_is_in_bytes = false;
1387 
1388 // Threshold size for cleararray.
1389 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1390 
1391 // Should the Matcher clone shifts on addressing modes, expecting them to
1392 // be subsumed into complex addressing expressions or compute them into
1393 // registers?  True for Intel but false for most RISCs
1394 const bool Matcher::clone_shift_expressions = true;
1395 
1396 // Do we need to mask the count passed to shift instructions or does
1397 // the cpu only look at the lower 5/6 bits anyway?
1398 const bool Matcher::need_masked_shift_count = false;
1399 
1400 bool Matcher::narrow_oop_use_complex_address() {
1401   ShouldNotCallThis();
1402   return true;
1403 }
1404 
1405 
1406 // Is it better to copy float constants, or load them directly from memory?
1407 // Intel can load a float constant from a direct address, requiring no
1408 // extra registers.  Most RISCs will have to materialize an address into a
1409 // register first, so they would do better to copy the constant from stack.
1410 const bool Matcher::rematerialize_float_constants = true;
1411 
1412 // If CPU can load and store mis-aligned doubles directly then no fixup is
1413 // needed.  Else we split the double into 2 integer pieces and move it
1414 // piece-by-piece.  Only happens when passing doubles into C code as the
1415 // Java calling convention forces doubles to be aligned.
1416 const bool Matcher::misaligned_doubles_ok = true;
1417 
1418 
1419 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1420   // Get the memory operand from the node
1421   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1422   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1423   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1424   uint opcnt     = 1;                 // First operand
1425   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1426   while( idx >= skipped+num_edges ) {
1427     skipped += num_edges;
1428     opcnt++;                          // Bump operand count
1429     assert( opcnt < numopnds, "Accessing non-existent operand" );
1430     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1431   }
1432 
1433   MachOper *memory = node->_opnds[opcnt];
1434   MachOper *new_memory = NULL;
1435   switch (memory->opcode()) {
1436   case DIRECT:
1437   case INDOFFSET32X:
1438     // No transformation necessary.
1439     return;
1440   case INDIRECT:
1441     new_memory = new (C) indirect_win95_safeOper( );
1442     break;
1443   case INDOFFSET8:
1444     new_memory = new (C) indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1445     break;
1446   case INDOFFSET32:
1447     new_memory = new (C) indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1448     break;
1449   case INDINDEXOFFSET:
1450     new_memory = new (C) indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1451     break;
1452   case INDINDEXSCALE:
1453     new_memory = new (C) indIndexScale_win95_safeOper(memory->scale());
1454     break;
1455   case INDINDEXSCALEOFFSET:
1456     new_memory = new (C) indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1457     break;
1458   case LOAD_LONG_INDIRECT:
1459   case LOAD_LONG_INDOFFSET32:
1460     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1461     return;
1462   default:
1463     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1464     return;
1465   }
1466   node->_opnds[opcnt] = new_memory;
1467 }
1468 
1469 // Advertise here if the CPU requires explicit rounding operations
1470 // to implement the UseStrictFP mode.
1471 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1472 
1473 // Are floats conerted to double when stored to stack during deoptimization?
1474 // On x32 it is stored with convertion only when FPU is used for floats.
1475 bool Matcher::float_in_double() { return (UseSSE == 0); }
1476 
1477 // Do ints take an entire long register or just half?
1478 const bool Matcher::int_in_long = false;
1479 
1480 // Return whether or not this register is ever used as an argument.  This
1481 // function is used on startup to build the trampoline stubs in generateOptoStub.
1482 // Registers not mentioned will be killed by the VM call in the trampoline, and
1483 // arguments in those registers not be available to the callee.
1484 bool Matcher::can_be_java_arg( int reg ) {
1485   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1486   if( (reg == XMM0a_num || reg == XMM1a_num) && UseSSE>=1 ) return true;
1487   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1488   return false;
1489 }
1490 
1491 bool Matcher::is_spillable_arg( int reg ) {
1492   return can_be_java_arg(reg);
1493 }
1494 
1495 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1496   // Use hardware integer DIV instruction when
1497   // it is faster than a code which use multiply.
1498   // Only when constant divisor fits into 32 bit
1499   // (min_jint is excluded to get only correct
1500   // positive 32 bit values from negative).
1501   return VM_Version::has_fast_idiv() &&
1502          (divisor == (int)divisor && divisor != min_jint);
1503 }
1504 
1505 // Register for DIVI projection of divmodI
1506 RegMask Matcher::divI_proj_mask() {
1507   return EAX_REG_mask;
1508 }
1509 
1510 // Register for MODI projection of divmodI
1511 RegMask Matcher::modI_proj_mask() {
1512   return EDX_REG_mask;
1513 }
1514 
1515 // Register for DIVL projection of divmodL
1516 RegMask Matcher::divL_proj_mask() {
1517   ShouldNotReachHere();
1518   return RegMask();
1519 }
1520 
1521 // Register for MODL projection of divmodL
1522 RegMask Matcher::modL_proj_mask() {
1523   ShouldNotReachHere();
1524   return RegMask();
1525 }
1526 
1527 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1528   return EBP_REG_mask;
1529 }
1530 
1531 // Returns true if the high 32 bits of the value is known to be zero.
1532 bool is_operand_hi32_zero(Node* n) {
1533   int opc = n->Opcode();
1534   if (opc == Op_LoadUI2L) {
1535     return true;
1536   }
1537   if (opc == Op_AndL) {
1538     Node* o2 = n->in(2);
1539     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1540       return true;
1541     }
1542   }
1543   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1544     return true;
1545   }
1546   return false;
1547 }
1548 
1549 %}
1550 
1551 //----------ENCODING BLOCK-----------------------------------------------------
1552 // This block specifies the encoding classes used by the compiler to output
1553 // byte streams.  Encoding classes generate functions which are called by
1554 // Machine Instruction Nodes in order to generate the bit encoding of the
1555 // instruction.  Operands specify their base encoding interface with the
1556 // interface keyword.  There are currently supported four interfaces,
1557 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1558 // operand to generate a function which returns its register number when
1559 // queried.   CONST_INTER causes an operand to generate a function which
1560 // returns the value of the constant when queried.  MEMORY_INTER causes an
1561 // operand to generate four functions which return the Base Register, the
1562 // Index Register, the Scale Value, and the Offset Value of the operand when
1563 // queried.  COND_INTER causes an operand to generate six functions which
1564 // return the encoding code (ie - encoding bits for the instruction)
1565 // associated with each basic boolean condition for a conditional instruction.
1566 // Instructions specify two basic values for encoding.  They use the
1567 // ins_encode keyword to specify their encoding class (which must be one of
1568 // the class names specified in the encoding block), and they use the
1569 // opcode keyword to specify, in order, their primary, secondary, and
1570 // tertiary opcode.  Only the opcode sections which a particular instruction
1571 // needs for encoding need to be specified.
1572 encode %{
1573   // Build emit functions for each basic byte or larger field in the intel
1574   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1575   // code in the enc_class source block.  Emit functions will live in the
1576   // main source block for now.  In future, we can generalize this by
1577   // adding a syntax that specifies the sizes of fields in an order,
1578   // so that the adlc can build the emit functions automagically
1579 
1580   // Emit primary opcode
1581   enc_class OpcP %{
1582     emit_opcode(cbuf, $primary);
1583   %}
1584 
1585   // Emit secondary opcode
1586   enc_class OpcS %{
1587     emit_opcode(cbuf, $secondary);
1588   %}
1589 
1590   // Emit opcode directly
1591   enc_class Opcode(immI d8) %{
1592     emit_opcode(cbuf, $d8$$constant);
1593   %}
1594 
1595   enc_class SizePrefix %{
1596     emit_opcode(cbuf,0x66);
1597   %}
1598 
1599   enc_class RegReg (eRegI dst, eRegI src) %{    // RegReg(Many)
1600     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1601   %}
1602 
1603   enc_class OpcRegReg (immI opcode, eRegI dst, eRegI src) %{    // OpcRegReg(Many)
1604     emit_opcode(cbuf,$opcode$$constant);
1605     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1606   %}
1607 
1608   enc_class mov_r32_imm0( eRegI dst ) %{
1609     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1610     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1611   %}
1612 
1613   enc_class cdq_enc %{
1614     // Full implementation of Java idiv and irem; checks for
1615     // special case as described in JVM spec., p.243 & p.271.
1616     //
1617     //         normal case                           special case
1618     //
1619     // input : rax,: dividend                         min_int
1620     //         reg: divisor                          -1
1621     //
1622     // output: rax,: quotient  (= rax, idiv reg)       min_int
1623     //         rdx: remainder (= rax, irem reg)       0
1624     //
1625     //  Code sequnce:
1626     //
1627     //  81 F8 00 00 00 80    cmp         rax,80000000h
1628     //  0F 85 0B 00 00 00    jne         normal_case
1629     //  33 D2                xor         rdx,edx
1630     //  83 F9 FF             cmp         rcx,0FFh
1631     //  0F 84 03 00 00 00    je          done
1632     //                  normal_case:
1633     //  99                   cdq
1634     //  F7 F9                idiv        rax,ecx
1635     //                  done:
1636     //
1637     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1638     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1639     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1640     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1641     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1642     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1643     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1644     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1645     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1646     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1647     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1648     // normal_case:
1649     emit_opcode(cbuf,0x99);                                         // cdq
1650     // idiv (note: must be emitted by the user of this rule)
1651     // normal:
1652   %}
1653 
1654   // Dense encoding for older common ops
1655   enc_class Opc_plus(immI opcode, eRegI reg) %{
1656     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1657   %}
1658 
1659 
1660   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1661   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1662     // Check for 8-bit immediate, and set sign extend bit in opcode
1663     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1664       emit_opcode(cbuf, $primary | 0x02);
1665     }
1666     else {                          // If 32-bit immediate
1667       emit_opcode(cbuf, $primary);
1668     }
1669   %}
1670 
1671   enc_class OpcSErm (eRegI dst, immI imm) %{    // OpcSEr/m
1672     // Emit primary opcode and set sign-extend bit
1673     // Check for 8-bit immediate, and set sign extend bit in opcode
1674     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1675       emit_opcode(cbuf, $primary | 0x02);    }
1676     else {                          // If 32-bit immediate
1677       emit_opcode(cbuf, $primary);
1678     }
1679     // Emit r/m byte with secondary opcode, after primary opcode.
1680     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1681   %}
1682 
1683   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1684     // Check for 8-bit immediate, and set sign extend bit in opcode
1685     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1686       $$$emit8$imm$$constant;
1687     }
1688     else {                          // If 32-bit immediate
1689       // Output immediate
1690       $$$emit32$imm$$constant;
1691     }
1692   %}
1693 
1694   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1695     // Emit primary opcode and set sign-extend bit
1696     // Check for 8-bit immediate, and set sign extend bit in opcode
1697     int con = (int)$imm$$constant; // Throw away top bits
1698     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1699     // Emit r/m byte with secondary opcode, after primary opcode.
1700     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1701     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1702     else                               emit_d32(cbuf,con);
1703   %}
1704 
1705   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1706     // Emit primary opcode and set sign-extend bit
1707     // Check for 8-bit immediate, and set sign extend bit in opcode
1708     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1709     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1710     // Emit r/m byte with tertiary opcode, after primary opcode.
1711     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1712     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1713     else                               emit_d32(cbuf,con);
1714   %}
1715 
1716   enc_class Lbl (label labl) %{ // GOTO
1717     Label *l = $labl$$label;
1718     assert(l != NULL, "need Label");
1719     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size()+4)) : 0);
1720   %}
1721 
1722   enc_class LblShort (label labl) %{ // GOTO
1723     Label *l = $labl$$label;
1724     assert(l != NULL, "need Label");
1725     int disp = l ? (l->loc_pos() - (cbuf.insts_size()+1)) : 0;
1726     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
1727     emit_d8(cbuf, disp);
1728   %}
1729 
1730   enc_class OpcSReg (eRegI dst) %{    // BSWAP
1731     emit_cc(cbuf, $secondary, $dst$$reg );
1732   %}
1733 
1734   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1735     int destlo = $dst$$reg;
1736     int desthi = HIGH_FROM_LOW(destlo);
1737     // bswap lo
1738     emit_opcode(cbuf, 0x0F);
1739     emit_cc(cbuf, 0xC8, destlo);
1740     // bswap hi
1741     emit_opcode(cbuf, 0x0F);
1742     emit_cc(cbuf, 0xC8, desthi);
1743     // xchg lo and hi
1744     emit_opcode(cbuf, 0x87);
1745     emit_rm(cbuf, 0x3, destlo, desthi);
1746   %}
1747 
1748   enc_class RegOpc (eRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1749     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1750   %}
1751 
1752   enc_class Jcc (cmpOp cop, label labl) %{    // JCC
1753     Label *l = $labl$$label;
1754     assert(l != NULL, "need Label");
1755     $$$emit8$primary;
1756     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1757     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.insts_size()+4)) : 0);
1758   %}
1759 
1760   enc_class JccShort (cmpOp cop, label labl) %{    // JCC
1761     Label *l = $labl$$label;
1762     assert(l != NULL, "need Label");
1763     emit_cc(cbuf, $primary, $cop$$cmpcode);
1764     int disp = l ? (l->loc_pos() - (cbuf.insts_size()+1)) : 0;
1765     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
1766     emit_d8(cbuf, disp);
1767   %}
1768 
1769   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1770     $$$emit8$primary;
1771     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1772   %}
1773 
1774   enc_class enc_cmov_d(cmpOp cop, regD src ) %{ // CMOV
1775     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1776     emit_d8(cbuf, op >> 8 );
1777     emit_d8(cbuf, op & 255);
1778   %}
1779 
1780   // emulate a CMOV with a conditional branch around a MOV
1781   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1782     // Invert sense of branch from sense of CMOV
1783     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1784     emit_d8( cbuf, $brOffs$$constant );
1785   %}
1786 
1787   enc_class enc_PartialSubtypeCheck( ) %{
1788     Register Redi = as_Register(EDI_enc); // result register
1789     Register Reax = as_Register(EAX_enc); // super class
1790     Register Recx = as_Register(ECX_enc); // killed
1791     Register Resi = as_Register(ESI_enc); // sub class
1792     Label miss;
1793 
1794     MacroAssembler _masm(&cbuf);
1795     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1796                                      NULL, &miss,
1797                                      /*set_cond_codes:*/ true);
1798     if ($primary) {
1799       __ xorptr(Redi, Redi);
1800     }
1801     __ bind(miss);
1802   %}
1803 
1804   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1805     MacroAssembler masm(&cbuf);
1806     int start = masm.offset();
1807     if (UseSSE >= 2) {
1808       if (VerifyFPU) {
1809         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1810       }
1811     } else {
1812       // External c_calling_convention expects the FPU stack to be 'clean'.
1813       // Compiled code leaves it dirty.  Do cleanup now.
1814       masm.empty_FPU_stack();
1815     }
1816     if (sizeof_FFree_Float_Stack_All == -1) {
1817       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1818     } else {
1819       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1820     }
1821   %}
1822 
1823   enc_class Verify_FPU_For_Leaf %{
1824     if( VerifyFPU ) {
1825       MacroAssembler masm(&cbuf);
1826       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1827     }
1828   %}
1829 
1830   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1831     // This is the instruction starting address for relocation info.
1832     cbuf.set_insts_mark();
1833     $$$emit8$primary;
1834     // CALL directly to the runtime
1835     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1836                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1837 
1838     if (UseSSE >= 2) {
1839       MacroAssembler _masm(&cbuf);
1840       BasicType rt = tf()->return_type();
1841 
1842       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1843         // A C runtime call where the return value is unused.  In SSE2+
1844         // mode the result needs to be removed from the FPU stack.  It's
1845         // likely that this function call could be removed by the
1846         // optimizer if the C function is a pure function.
1847         __ ffree(0);
1848       } else if (rt == T_FLOAT) {
1849         __ lea(rsp, Address(rsp, -4));
1850         __ fstp_s(Address(rsp, 0));
1851         __ movflt(xmm0, Address(rsp, 0));
1852         __ lea(rsp, Address(rsp,  4));
1853       } else if (rt == T_DOUBLE) {
1854         __ lea(rsp, Address(rsp, -8));
1855         __ fstp_d(Address(rsp, 0));
1856         __ movdbl(xmm0, Address(rsp, 0));
1857         __ lea(rsp, Address(rsp,  8));
1858       }
1859     }
1860   %}
1861 
1862 
1863   enc_class pre_call_FPU %{
1864     // If method sets FPU control word restore it here
1865     debug_only(int off0 = cbuf.insts_size());
1866     if( Compile::current()->in_24_bit_fp_mode() ) {
1867       MacroAssembler masm(&cbuf);
1868       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1869     }
1870     debug_only(int off1 = cbuf.insts_size());
1871     assert(off1 - off0 == pre_call_FPU_size(), "correct size prediction");
1872   %}
1873 
1874   enc_class post_call_FPU %{
1875     // If method sets FPU control word do it here also
1876     if( Compile::current()->in_24_bit_fp_mode() ) {
1877       MacroAssembler masm(&cbuf);
1878       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1879     }
1880   %}
1881 
1882   enc_class preserve_SP %{
1883     debug_only(int off0 = cbuf.insts_size());
1884     MacroAssembler _masm(&cbuf);
1885     // RBP is preserved across all calls, even compiled calls.
1886     // Use it to preserve RSP in places where the callee might change the SP.
1887     __ movptr(rbp_mh_SP_save, rsp);
1888     debug_only(int off1 = cbuf.insts_size());
1889     assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
1890   %}
1891 
1892   enc_class restore_SP %{
1893     MacroAssembler _masm(&cbuf);
1894     __ movptr(rsp, rbp_mh_SP_save);
1895   %}
1896 
1897   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1898     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1899     // who we intended to call.
1900     cbuf.set_insts_mark();
1901     $$$emit8$primary;
1902     if ( !_method ) {
1903       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1904                      runtime_call_Relocation::spec(), RELOC_IMM32 );
1905     } else if(_optimized_virtual) {
1906       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1907                      opt_virtual_call_Relocation::spec(), RELOC_IMM32 );
1908     } else {
1909       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1910                      static_call_Relocation::spec(), RELOC_IMM32 );
1911     }
1912     if( _method ) {  // Emit stub for static call
1913       emit_java_to_interp(cbuf);
1914     }
1915   %}
1916 
1917   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1918     // !!!!!
1919     // Generate  "Mov EAX,0x00", placeholder instruction to load oop-info
1920     // emit_call_dynamic_prologue( cbuf );
1921     cbuf.set_insts_mark();
1922     emit_opcode(cbuf, 0xB8 + EAX_enc);        // mov    EAX,-1
1923     emit_d32_reloc(cbuf, (int)Universe::non_oop_word(), oop_Relocation::spec_for_immediate(), RELOC_IMM32);
1924     address  virtual_call_oop_addr = cbuf.insts_mark();
1925     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1926     // who we intended to call.
1927     cbuf.set_insts_mark();
1928     $$$emit8$primary;
1929     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1930                 virtual_call_Relocation::spec(virtual_call_oop_addr), RELOC_IMM32 );
1931   %}
1932 
1933   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1934     int disp = in_bytes(methodOopDesc::from_compiled_offset());
1935     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1936 
1937     // CALL *[EAX+in_bytes(methodOopDesc::from_compiled_code_entry_point_offset())]
1938     cbuf.set_insts_mark();
1939     $$$emit8$primary;
1940     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1941     emit_d8(cbuf, disp);             // Displacement
1942 
1943   %}
1944 
1945   enc_class Xor_Reg (eRegI dst) %{
1946     emit_opcode(cbuf, 0x33);
1947     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
1948   %}
1949 
1950 //   Following encoding is no longer used, but may be restored if calling
1951 //   convention changes significantly.
1952 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1953 //
1954 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1955 //     // int ic_reg     = Matcher::inline_cache_reg();
1956 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1957 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1958 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1959 //
1960 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1961 //     // // so we load it immediately before the call
1962 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1963 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1964 //
1965 //     // xor rbp,ebp
1966 //     emit_opcode(cbuf, 0x33);
1967 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1968 //
1969 //     // CALL to interpreter.
1970 //     cbuf.set_insts_mark();
1971 //     $$$emit8$primary;
1972 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1973 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1974 //   %}
1975 
1976   enc_class RegOpcImm (eRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1977     $$$emit8$primary;
1978     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1979     $$$emit8$shift$$constant;
1980   %}
1981 
1982   enc_class LdImmI (eRegI dst, immI src) %{    // Load Immediate
1983     // Load immediate does not have a zero or sign extended version
1984     // for 8-bit immediates
1985     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1986     $$$emit32$src$$constant;
1987   %}
1988 
1989   enc_class LdImmP (eRegI dst, immI src) %{    // Load Immediate
1990     // Load immediate does not have a zero or sign extended version
1991     // for 8-bit immediates
1992     emit_opcode(cbuf, $primary + $dst$$reg);
1993     $$$emit32$src$$constant;
1994   %}
1995 
1996   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1997     // Load immediate does not have a zero or sign extended version
1998     // for 8-bit immediates
1999     int dst_enc = $dst$$reg;
2000     int src_con = $src$$constant & 0x0FFFFFFFFL;
2001     if (src_con == 0) {
2002       // xor dst, dst
2003       emit_opcode(cbuf, 0x33);
2004       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2005     } else {
2006       emit_opcode(cbuf, $primary + dst_enc);
2007       emit_d32(cbuf, src_con);
2008     }
2009   %}
2010 
2011   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2012     // Load immediate does not have a zero or sign extended version
2013     // for 8-bit immediates
2014     int dst_enc = $dst$$reg + 2;
2015     int src_con = ((julong)($src$$constant)) >> 32;
2016     if (src_con == 0) {
2017       // xor dst, dst
2018       emit_opcode(cbuf, 0x33);
2019       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2020     } else {
2021       emit_opcode(cbuf, $primary + dst_enc);
2022       emit_d32(cbuf, src_con);
2023     }
2024   %}
2025 
2026 
2027   enc_class MovI2X_reg(regX dst, eRegI src) %{
2028     emit_opcode(cbuf, 0x66 );     // MOVD dst,src
2029     emit_opcode(cbuf, 0x0F );
2030     emit_opcode(cbuf, 0x6E );
2031     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2032   %}
2033 
2034   enc_class MovX2I_reg(eRegI dst, regX src) %{
2035     emit_opcode(cbuf, 0x66 );     // MOVD dst,src
2036     emit_opcode(cbuf, 0x0F );
2037     emit_opcode(cbuf, 0x7E );
2038     emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
2039   %}
2040 
2041   enc_class MovL2XD_reg(regXD dst, eRegL src, regXD tmp) %{
2042     { // MOVD $dst,$src.lo
2043       emit_opcode(cbuf,0x66);
2044       emit_opcode(cbuf,0x0F);
2045       emit_opcode(cbuf,0x6E);
2046       emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2047     }
2048     { // MOVD $tmp,$src.hi
2049       emit_opcode(cbuf,0x66);
2050       emit_opcode(cbuf,0x0F);
2051       emit_opcode(cbuf,0x6E);
2052       emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2053     }
2054     { // PUNPCKLDQ $dst,$tmp
2055       emit_opcode(cbuf,0x66);
2056       emit_opcode(cbuf,0x0F);
2057       emit_opcode(cbuf,0x62);
2058       emit_rm(cbuf, 0x3, $dst$$reg, $tmp$$reg);
2059      }
2060   %}
2061 
2062   enc_class MovXD2L_reg(eRegL dst, regXD src, regXD tmp) %{
2063     { // MOVD $dst.lo,$src
2064       emit_opcode(cbuf,0x66);
2065       emit_opcode(cbuf,0x0F);
2066       emit_opcode(cbuf,0x7E);
2067       emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
2068     }
2069     { // PSHUFLW $tmp,$src,0x4E  (01001110b)
2070       emit_opcode(cbuf,0xF2);
2071       emit_opcode(cbuf,0x0F);
2072       emit_opcode(cbuf,0x70);
2073       emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2074       emit_d8(cbuf, 0x4E);
2075     }
2076     { // MOVD $dst.hi,$tmp
2077       emit_opcode(cbuf,0x66);
2078       emit_opcode(cbuf,0x0F);
2079       emit_opcode(cbuf,0x7E);
2080       emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
2081     }
2082   %}
2083 
2084 
2085   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2086   enc_class enc_Copy( eRegI dst, eRegI src ) %{
2087     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2088   %}
2089 
2090   enc_class enc_CopyL_Lo( eRegI dst, eRegL src ) %{
2091     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2092   %}
2093 
2094   // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
2095   enc_class enc_CopyXD( RegXD dst, RegXD src ) %{
2096     encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2097   %}
2098 
2099   enc_class RegReg (eRegI dst, eRegI src) %{    // RegReg(Many)
2100     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2101   %}
2102 
2103   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2104     $$$emit8$primary;
2105     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2106   %}
2107 
2108   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2109     $$$emit8$secondary;
2110     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2111   %}
2112 
2113   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2114     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2115   %}
2116 
2117   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2118     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2119   %}
2120 
2121   enc_class RegReg_HiLo( eRegL src, eRegI dst ) %{
2122     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2123   %}
2124 
2125   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2126     // Output immediate
2127     $$$emit32$src$$constant;
2128   %}
2129 
2130   enc_class Con32F_as_bits(immF src) %{        // storeF_imm
2131     // Output Float immediate bits
2132     jfloat jf = $src$$constant;
2133     int    jf_as_bits = jint_cast( jf );
2134     emit_d32(cbuf, jf_as_bits);
2135   %}
2136 
2137   enc_class Con32XF_as_bits(immXF src) %{      // storeX_imm
2138     // Output Float immediate bits
2139     jfloat jf = $src$$constant;
2140     int    jf_as_bits = jint_cast( jf );
2141     emit_d32(cbuf, jf_as_bits);
2142   %}
2143 
2144   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2145     // Output immediate
2146     $$$emit16$src$$constant;
2147   %}
2148 
2149   enc_class Con_d32(immI src) %{
2150     emit_d32(cbuf,$src$$constant);
2151   %}
2152 
2153   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2154     // Output immediate memory reference
2155     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2156     emit_d32(cbuf, 0x00);
2157   %}
2158 
2159   enc_class lock_prefix( ) %{
2160     if( os::is_MP() )
2161       emit_opcode(cbuf,0xF0);         // [Lock]
2162   %}
2163 
2164   // Cmp-xchg long value.
2165   // Note: we need to swap rbx, and rcx before and after the
2166   //       cmpxchg8 instruction because the instruction uses
2167   //       rcx as the high order word of the new value to store but
2168   //       our register encoding uses rbx,.
2169   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2170 
2171     // XCHG  rbx,ecx
2172     emit_opcode(cbuf,0x87);
2173     emit_opcode(cbuf,0xD9);
2174     // [Lock]
2175     if( os::is_MP() )
2176       emit_opcode(cbuf,0xF0);
2177     // CMPXCHG8 [Eptr]
2178     emit_opcode(cbuf,0x0F);
2179     emit_opcode(cbuf,0xC7);
2180     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2181     // XCHG  rbx,ecx
2182     emit_opcode(cbuf,0x87);
2183     emit_opcode(cbuf,0xD9);
2184   %}
2185 
2186   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2187     // [Lock]
2188     if( os::is_MP() )
2189       emit_opcode(cbuf,0xF0);
2190 
2191     // CMPXCHG [Eptr]
2192     emit_opcode(cbuf,0x0F);
2193     emit_opcode(cbuf,0xB1);
2194     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2195   %}
2196 
2197   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2198     int res_encoding = $res$$reg;
2199 
2200     // MOV  res,0
2201     emit_opcode( cbuf, 0xB8 + res_encoding);
2202     emit_d32( cbuf, 0 );
2203     // JNE,s  fail
2204     emit_opcode(cbuf,0x75);
2205     emit_d8(cbuf, 5 );
2206     // MOV  res,1
2207     emit_opcode( cbuf, 0xB8 + res_encoding);
2208     emit_d32( cbuf, 1 );
2209     // fail:
2210   %}
2211 
2212   enc_class set_instruction_start( ) %{
2213     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2214   %}
2215 
2216   enc_class RegMem (eRegI ereg, memory mem) %{    // emit_reg_mem
2217     int reg_encoding = $ereg$$reg;
2218     int base  = $mem$$base;
2219     int index = $mem$$index;
2220     int scale = $mem$$scale;
2221     int displace = $mem$$disp;
2222     bool disp_is_oop = $mem->disp_is_oop();
2223     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2224   %}
2225 
2226   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2227     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2228     int base  = $mem$$base;
2229     int index = $mem$$index;
2230     int scale = $mem$$scale;
2231     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2232     assert( !$mem->disp_is_oop(), "Cannot add 4 to oop" );
2233     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, false/*disp_is_oop*/);
2234   %}
2235 
2236   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2237     int r1, r2;
2238     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2239     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2240     emit_opcode(cbuf,0x0F);
2241     emit_opcode(cbuf,$tertiary);
2242     emit_rm(cbuf, 0x3, r1, r2);
2243     emit_d8(cbuf,$cnt$$constant);
2244     emit_d8(cbuf,$primary);
2245     emit_rm(cbuf, 0x3, $secondary, r1);
2246     emit_d8(cbuf,$cnt$$constant);
2247   %}
2248 
2249   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2250     emit_opcode( cbuf, 0x8B ); // Move
2251     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2252     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2253       emit_d8(cbuf,$primary);
2254       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2255       emit_d8(cbuf,$cnt$$constant-32);
2256     }
2257     emit_d8(cbuf,$primary);
2258     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2259     emit_d8(cbuf,31);
2260   %}
2261 
2262   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2263     int r1, r2;
2264     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2265     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2266 
2267     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2268     emit_rm(cbuf, 0x3, r1, r2);
2269     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2270       emit_opcode(cbuf,$primary);
2271       emit_rm(cbuf, 0x3, $secondary, r1);
2272       emit_d8(cbuf,$cnt$$constant-32);
2273     }
2274     emit_opcode(cbuf,0x33);  // XOR r2,r2
2275     emit_rm(cbuf, 0x3, r2, r2);
2276   %}
2277 
2278   // Clone of RegMem but accepts an extra parameter to access each
2279   // half of a double in memory; it never needs relocation info.
2280   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, eRegI rm_reg) %{
2281     emit_opcode(cbuf,$opcode$$constant);
2282     int reg_encoding = $rm_reg$$reg;
2283     int base     = $mem$$base;
2284     int index    = $mem$$index;
2285     int scale    = $mem$$scale;
2286     int displace = $mem$$disp + $disp_for_half$$constant;
2287     bool disp_is_oop = false;
2288     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2289   %}
2290 
2291   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2292   //
2293   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2294   // and it never needs relocation information.
2295   // Frequently used to move data between FPU's Stack Top and memory.
2296   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2297     int rm_byte_opcode = $rm_opcode$$constant;
2298     int base     = $mem$$base;
2299     int index    = $mem$$index;
2300     int scale    = $mem$$scale;
2301     int displace = $mem$$disp;
2302     assert( !$mem->disp_is_oop(), "No oops here because no relo info allowed" );
2303     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, false);
2304   %}
2305 
2306   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2307     int rm_byte_opcode = $rm_opcode$$constant;
2308     int base     = $mem$$base;
2309     int index    = $mem$$index;
2310     int scale    = $mem$$scale;
2311     int displace = $mem$$disp;
2312     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
2313     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
2314   %}
2315 
2316   enc_class RegLea (eRegI dst, eRegI src0, immI src1 ) %{    // emit_reg_lea
2317     int reg_encoding = $dst$$reg;
2318     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2319     int index        = 0x04;            // 0x04 indicates no index
2320     int scale        = 0x00;            // 0x00 indicates no scale
2321     int displace     = $src1$$constant; // 0x00 indicates no displacement
2322     bool disp_is_oop = false;
2323     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2324   %}
2325 
2326   enc_class min_enc (eRegI dst, eRegI src) %{    // MIN
2327     // Compare dst,src
2328     emit_opcode(cbuf,0x3B);
2329     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2330     // jmp dst < src around move
2331     emit_opcode(cbuf,0x7C);
2332     emit_d8(cbuf,2);
2333     // move dst,src
2334     emit_opcode(cbuf,0x8B);
2335     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2336   %}
2337 
2338   enc_class max_enc (eRegI dst, eRegI src) %{    // MAX
2339     // Compare dst,src
2340     emit_opcode(cbuf,0x3B);
2341     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2342     // jmp dst > src around move
2343     emit_opcode(cbuf,0x7F);
2344     emit_d8(cbuf,2);
2345     // move dst,src
2346     emit_opcode(cbuf,0x8B);
2347     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2348   %}
2349 
2350   enc_class enc_FP_store(memory mem, regD src) %{
2351     // If src is FPR1, we can just FST to store it.
2352     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2353     int reg_encoding = 0x2; // Just store
2354     int base  = $mem$$base;
2355     int index = $mem$$index;
2356     int scale = $mem$$scale;
2357     int displace = $mem$$disp;
2358     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
2359     if( $src$$reg != FPR1L_enc ) {
2360       reg_encoding = 0x3;  // Store & pop
2361       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2362       emit_d8( cbuf, 0xC0-1+$src$$reg );
2363     }
2364     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2365     emit_opcode(cbuf,$primary);
2366     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2367   %}
2368 
2369   enc_class neg_reg(eRegI dst) %{
2370     // NEG $dst
2371     emit_opcode(cbuf,0xF7);
2372     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2373   %}
2374 
2375   enc_class setLT_reg(eCXRegI dst) %{
2376     // SETLT $dst
2377     emit_opcode(cbuf,0x0F);
2378     emit_opcode(cbuf,0x9C);
2379     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2380   %}
2381 
2382   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2383     int tmpReg = $tmp$$reg;
2384 
2385     // SUB $p,$q
2386     emit_opcode(cbuf,0x2B);
2387     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2388     // SBB $tmp,$tmp
2389     emit_opcode(cbuf,0x1B);
2390     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2391     // AND $tmp,$y
2392     emit_opcode(cbuf,0x23);
2393     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2394     // ADD $p,$tmp
2395     emit_opcode(cbuf,0x03);
2396     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2397   %}
2398 
2399   enc_class enc_cmpLTP_mem(eRegI p, eRegI q, memory mem, eCXRegI tmp) %{    // cadd_cmpLT
2400     int tmpReg = $tmp$$reg;
2401 
2402     // SUB $p,$q
2403     emit_opcode(cbuf,0x2B);
2404     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2405     // SBB $tmp,$tmp
2406     emit_opcode(cbuf,0x1B);
2407     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2408     // AND $tmp,$y
2409     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2410     emit_opcode(cbuf,0x23);
2411     int reg_encoding = tmpReg;
2412     int base  = $mem$$base;
2413     int index = $mem$$index;
2414     int scale = $mem$$scale;
2415     int displace = $mem$$disp;
2416     bool disp_is_oop = $mem->disp_is_oop();
2417     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2418     // ADD $p,$tmp
2419     emit_opcode(cbuf,0x03);
2420     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2421   %}
2422 
2423   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2424     // TEST shift,32
2425     emit_opcode(cbuf,0xF7);
2426     emit_rm(cbuf, 0x3, 0, ECX_enc);
2427     emit_d32(cbuf,0x20);
2428     // JEQ,s small
2429     emit_opcode(cbuf, 0x74);
2430     emit_d8(cbuf, 0x04);
2431     // MOV    $dst.hi,$dst.lo
2432     emit_opcode( cbuf, 0x8B );
2433     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2434     // CLR    $dst.lo
2435     emit_opcode(cbuf, 0x33);
2436     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2437 // small:
2438     // SHLD   $dst.hi,$dst.lo,$shift
2439     emit_opcode(cbuf,0x0F);
2440     emit_opcode(cbuf,0xA5);
2441     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2442     // SHL    $dst.lo,$shift"
2443     emit_opcode(cbuf,0xD3);
2444     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2445   %}
2446 
2447   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2448     // TEST shift,32
2449     emit_opcode(cbuf,0xF7);
2450     emit_rm(cbuf, 0x3, 0, ECX_enc);
2451     emit_d32(cbuf,0x20);
2452     // JEQ,s small
2453     emit_opcode(cbuf, 0x74);
2454     emit_d8(cbuf, 0x04);
2455     // MOV    $dst.lo,$dst.hi
2456     emit_opcode( cbuf, 0x8B );
2457     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2458     // CLR    $dst.hi
2459     emit_opcode(cbuf, 0x33);
2460     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2461 // small:
2462     // SHRD   $dst.lo,$dst.hi,$shift
2463     emit_opcode(cbuf,0x0F);
2464     emit_opcode(cbuf,0xAD);
2465     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2466     // SHR    $dst.hi,$shift"
2467     emit_opcode(cbuf,0xD3);
2468     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2469   %}
2470 
2471   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2472     // TEST shift,32
2473     emit_opcode(cbuf,0xF7);
2474     emit_rm(cbuf, 0x3, 0, ECX_enc);
2475     emit_d32(cbuf,0x20);
2476     // JEQ,s small
2477     emit_opcode(cbuf, 0x74);
2478     emit_d8(cbuf, 0x05);
2479     // MOV    $dst.lo,$dst.hi
2480     emit_opcode( cbuf, 0x8B );
2481     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2482     // SAR    $dst.hi,31
2483     emit_opcode(cbuf, 0xC1);
2484     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2485     emit_d8(cbuf, 0x1F );
2486 // small:
2487     // SHRD   $dst.lo,$dst.hi,$shift
2488     emit_opcode(cbuf,0x0F);
2489     emit_opcode(cbuf,0xAD);
2490     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2491     // SAR    $dst.hi,$shift"
2492     emit_opcode(cbuf,0xD3);
2493     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2494   %}
2495 
2496 
2497   // ----------------- Encodings for floating point unit -----------------
2498   // May leave result in FPU-TOS or FPU reg depending on opcodes
2499   enc_class OpcReg_F (regF src) %{    // FMUL, FDIV
2500     $$$emit8$primary;
2501     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2502   %}
2503 
2504   // Pop argument in FPR0 with FSTP ST(0)
2505   enc_class PopFPU() %{
2506     emit_opcode( cbuf, 0xDD );
2507     emit_d8( cbuf, 0xD8 );
2508   %}
2509 
2510   // !!!!! equivalent to Pop_Reg_F
2511   enc_class Pop_Reg_D( regD dst ) %{
2512     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2513     emit_d8( cbuf, 0xD8+$dst$$reg );
2514   %}
2515 
2516   enc_class Push_Reg_D( regD dst ) %{
2517     emit_opcode( cbuf, 0xD9 );
2518     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2519   %}
2520 
2521   enc_class strictfp_bias1( regD dst ) %{
2522     emit_opcode( cbuf, 0xDB );           // FLD m80real
2523     emit_opcode( cbuf, 0x2D );
2524     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2525     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2526     emit_opcode( cbuf, 0xC8+$dst$$reg );
2527   %}
2528 
2529   enc_class strictfp_bias2( regD dst ) %{
2530     emit_opcode( cbuf, 0xDB );           // FLD m80real
2531     emit_opcode( cbuf, 0x2D );
2532     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2533     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2534     emit_opcode( cbuf, 0xC8+$dst$$reg );
2535   %}
2536 
2537   // Special case for moving an integer register to a stack slot.
2538   enc_class OpcPRegSS( stackSlotI dst, eRegI src ) %{ // RegSS
2539     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2540   %}
2541 
2542   // Special case for moving a register to a stack slot.
2543   enc_class RegSS( stackSlotI dst, eRegI src ) %{ // RegSS
2544     // Opcode already emitted
2545     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2546     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2547     emit_d32(cbuf, $dst$$disp);   // Displacement
2548   %}
2549 
2550   // Push the integer in stackSlot 'src' onto FP-stack
2551   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2552     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2553   %}
2554 
2555   // Push the float in stackSlot 'src' onto FP-stack
2556   enc_class Push_Mem_F( memory src ) %{    // FLD_S   [ESP+src]
2557     store_to_stackslot( cbuf, 0xD9, 0x00, $src$$disp );
2558   %}
2559 
2560   // Push the double in stackSlot 'src' onto FP-stack
2561   enc_class Push_Mem_D( memory src ) %{    // FLD_D   [ESP+src]
2562     store_to_stackslot( cbuf, 0xDD, 0x00, $src$$disp );
2563   %}
2564 
2565   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2566   enc_class Pop_Mem_F( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2567     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2568   %}
2569 
2570   // Same as Pop_Mem_F except for opcode
2571   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2572   enc_class Pop_Mem_D( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2573     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2574   %}
2575 
2576   enc_class Pop_Reg_F( regF dst ) %{
2577     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2578     emit_d8( cbuf, 0xD8+$dst$$reg );
2579   %}
2580 
2581   enc_class Push_Reg_F( regF dst ) %{
2582     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2583     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2584   %}
2585 
2586   // Push FPU's float to a stack-slot, and pop FPU-stack
2587   enc_class Pop_Mem_Reg_F( stackSlotF dst, regF src ) %{
2588     int pop = 0x02;
2589     if ($src$$reg != FPR1L_enc) {
2590       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2591       emit_d8( cbuf, 0xC0-1+$src$$reg );
2592       pop = 0x03;
2593     }
2594     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2595   %}
2596 
2597   // Push FPU's double to a stack-slot, and pop FPU-stack
2598   enc_class Pop_Mem_Reg_D( stackSlotD dst, regD src ) %{
2599     int pop = 0x02;
2600     if ($src$$reg != FPR1L_enc) {
2601       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2602       emit_d8( cbuf, 0xC0-1+$src$$reg );
2603       pop = 0x03;
2604     }
2605     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2606   %}
2607 
2608   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2609   enc_class Pop_Reg_Reg_D( regD dst, regF src ) %{
2610     int pop = 0xD0 - 1; // -1 since we skip FLD
2611     if ($src$$reg != FPR1L_enc) {
2612       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2613       emit_d8( cbuf, 0xC0-1+$src$$reg );
2614       pop = 0xD8;
2615     }
2616     emit_opcode( cbuf, 0xDD );
2617     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2618   %}
2619 
2620 
2621   enc_class Mul_Add_F( regF dst, regF src, regF src1, regF src2 ) %{
2622     MacroAssembler masm(&cbuf);
2623     masm.fld_s(  $src1$$reg-1);   // nothing at TOS, load TOS from src1.reg
2624     masm.fmul(   $src2$$reg+0);   // value at TOS
2625     masm.fadd(   $src$$reg+0);    // value at TOS
2626     masm.fstp_d( $dst$$reg+0);    // value at TOS, popped off after store
2627   %}
2628 
2629 
2630   enc_class Push_Reg_Mod_D( regD dst, regD src) %{
2631     // load dst in FPR0
2632     emit_opcode( cbuf, 0xD9 );
2633     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2634     if ($src$$reg != FPR1L_enc) {
2635       // fincstp
2636       emit_opcode (cbuf, 0xD9);
2637       emit_opcode (cbuf, 0xF7);
2638       // swap src with FPR1:
2639       // FXCH FPR1 with src
2640       emit_opcode(cbuf, 0xD9);
2641       emit_d8(cbuf, 0xC8-1+$src$$reg );
2642       // fdecstp
2643       emit_opcode (cbuf, 0xD9);
2644       emit_opcode (cbuf, 0xF6);
2645     }
2646   %}
2647 
2648   enc_class Push_ModD_encoding( regXD src0, regXD src1) %{
2649     // Allocate a word
2650     emit_opcode(cbuf,0x83);            // SUB ESP,8
2651     emit_opcode(cbuf,0xEC);
2652     emit_d8(cbuf,0x08);
2653 
2654     emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src1
2655     emit_opcode  (cbuf, 0x0F );
2656     emit_opcode  (cbuf, 0x11 );
2657     encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
2658 
2659     emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2660     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2661 
2662     emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src0
2663     emit_opcode  (cbuf, 0x0F );
2664     emit_opcode  (cbuf, 0x11 );
2665     encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
2666 
2667     emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2668     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2669 
2670   %}
2671 
2672   enc_class Push_ModX_encoding( regX src0, regX src1) %{
2673     // Allocate a word
2674     emit_opcode(cbuf,0x83);            // SUB ESP,4
2675     emit_opcode(cbuf,0xEC);
2676     emit_d8(cbuf,0x04);
2677 
2678     emit_opcode  (cbuf, 0xF3 );     // MOVSS [ESP], src1
2679     emit_opcode  (cbuf, 0x0F );
2680     emit_opcode  (cbuf, 0x11 );
2681     encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
2682 
2683     emit_opcode(cbuf,0xD9 );      // FLD [ESP]
2684     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2685 
2686     emit_opcode  (cbuf, 0xF3 );     // MOVSS [ESP], src0
2687     emit_opcode  (cbuf, 0x0F );
2688     emit_opcode  (cbuf, 0x11 );
2689     encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
2690 
2691     emit_opcode(cbuf,0xD9 );      // FLD [ESP]
2692     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2693 
2694   %}
2695 
2696   enc_class Push_ResultXD(regXD dst) %{
2697     store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [ESP]
2698 
2699     // UseXmmLoadAndClearUpper ? movsd dst,[esp] : movlpd dst,[esp]
2700     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2701     emit_opcode  (cbuf, 0x0F );
2702     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2703     encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
2704 
2705     emit_opcode(cbuf,0x83);    // ADD ESP,8
2706     emit_opcode(cbuf,0xC4);
2707     emit_d8(cbuf,0x08);
2708   %}
2709 
2710   enc_class Push_ResultX(regX dst, immI d8) %{
2711     store_to_stackslot( cbuf, 0xD9, 0x03, 0 ); //FSTP_S [ESP]
2712 
2713     emit_opcode  (cbuf, 0xF3 );     // MOVSS dst(xmm), [ESP]
2714     emit_opcode  (cbuf, 0x0F );
2715     emit_opcode  (cbuf, 0x10 );
2716     encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
2717 
2718     emit_opcode(cbuf,0x83);    // ADD ESP,d8 (4 or 8)
2719     emit_opcode(cbuf,0xC4);
2720     emit_d8(cbuf,$d8$$constant);
2721   %}
2722 
2723   enc_class Push_SrcXD(regXD src) %{
2724     // Allocate a word
2725     emit_opcode(cbuf,0x83);            // SUB ESP,8
2726     emit_opcode(cbuf,0xEC);
2727     emit_d8(cbuf,0x08);
2728 
2729     emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src
2730     emit_opcode  (cbuf, 0x0F );
2731     emit_opcode  (cbuf, 0x11 );
2732     encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
2733 
2734     emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2735     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2736   %}
2737 
2738   enc_class push_stack_temp_qword() %{
2739     emit_opcode(cbuf,0x83);     // SUB ESP,8
2740     emit_opcode(cbuf,0xEC);
2741     emit_d8    (cbuf,0x08);
2742   %}
2743 
2744   enc_class pop_stack_temp_qword() %{
2745     emit_opcode(cbuf,0x83);     // ADD ESP,8
2746     emit_opcode(cbuf,0xC4);
2747     emit_d8    (cbuf,0x08);
2748   %}
2749 
2750   enc_class push_xmm_to_fpr1( regXD xmm_src ) %{
2751     emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], xmm_src
2752     emit_opcode  (cbuf, 0x0F );
2753     emit_opcode  (cbuf, 0x11 );
2754     encode_RegMem(cbuf, $xmm_src$$reg, ESP_enc, 0x4, 0, 0, false);
2755 
2756     emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2757     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2758   %}
2759 
2760   // Compute X^Y using Intel's fast hardware instructions, if possible.
2761   // Otherwise return a NaN.
2762   enc_class pow_exp_core_encoding %{
2763     // FPR1 holds Y*ln2(X).  Compute FPR1 = 2^(Y*ln2(X))
2764     emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xC0);  // fdup = fld st(0)          Q       Q
2765     emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xFC);  // frndint               int(Q)      Q
2766     emit_opcode(cbuf,0xDC); emit_opcode(cbuf,0xE9);  // fsub st(1) -= st(0);  int(Q) frac(Q)
2767     emit_opcode(cbuf,0xDB);                          // FISTP [ESP]           frac(Q)
2768     emit_opcode(cbuf,0x1C);
2769     emit_d8(cbuf,0x24);
2770     emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xF0);  // f2xm1                 2^frac(Q)-1
2771     emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xE8);  // fld1                  1 2^frac(Q)-1
2772     emit_opcode(cbuf,0xDE); emit_opcode(cbuf,0xC1);  // faddp                 2^frac(Q)
2773     emit_opcode(cbuf,0x8B);                          // mov rax,[esp+0]=int(Q)
2774     encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 0, false);
2775     emit_opcode(cbuf,0xC7);                          // mov rcx,0xFFFFF800 - overflow mask
2776     emit_rm(cbuf, 0x3, 0x0, ECX_enc);
2777     emit_d32(cbuf,0xFFFFF800);
2778     emit_opcode(cbuf,0x81);                          // add rax,1023 - the double exponent bias
2779     emit_rm(cbuf, 0x3, 0x0, EAX_enc);
2780     emit_d32(cbuf,1023);
2781     emit_opcode(cbuf,0x8B);                          // mov rbx,eax
2782     emit_rm(cbuf, 0x3, EBX_enc, EAX_enc);
2783     emit_opcode(cbuf,0xC1);                          // shl rax,20 - Slide to exponent position
2784     emit_rm(cbuf,0x3,0x4,EAX_enc);
2785     emit_d8(cbuf,20);
2786     emit_opcode(cbuf,0x85);                          // test rbx,ecx - check for overflow
2787     emit_rm(cbuf, 0x3, EBX_enc, ECX_enc);
2788     emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0x45);  // CMOVne rax,ecx - overflow; stuff NAN into EAX
2789     emit_rm(cbuf, 0x3, EAX_enc, ECX_enc);
2790     emit_opcode(cbuf,0x89);                          // mov [esp+4],eax - Store as part of double word
2791     encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 4, false);
2792     emit_opcode(cbuf,0xC7);                          // mov [esp+0],0   - [ESP] = (double)(1<<int(Q)) = 2^int(Q)
2793     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2794     emit_d32(cbuf,0);
2795     emit_opcode(cbuf,0xDC);                          // fmul dword st(0),[esp+0]; FPR1 = 2^int(Q)*2^frac(Q) = 2^Q
2796     encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false);
2797   %}
2798 
2799 //   enc_class Pop_Reg_Mod_D( regD dst, regD src)
2800 //   was replaced by Push_Result_Mod_D followed by Pop_Reg_X() or Pop_Mem_X()
2801 
2802   enc_class Push_Result_Mod_D( regD src) %{
2803     if ($src$$reg != FPR1L_enc) {
2804       // fincstp
2805       emit_opcode (cbuf, 0xD9);
2806       emit_opcode (cbuf, 0xF7);
2807       // FXCH FPR1 with src
2808       emit_opcode(cbuf, 0xD9);
2809       emit_d8(cbuf, 0xC8-1+$src$$reg );
2810       // fdecstp
2811       emit_opcode (cbuf, 0xD9);
2812       emit_opcode (cbuf, 0xF6);
2813     }
2814     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2815     // // FSTP   FPR$dst$$reg
2816     // emit_opcode( cbuf, 0xDD );
2817     // emit_d8( cbuf, 0xD8+$dst$$reg );
2818   %}
2819 
2820   enc_class fnstsw_sahf_skip_parity() %{
2821     // fnstsw ax
2822     emit_opcode( cbuf, 0xDF );
2823     emit_opcode( cbuf, 0xE0 );
2824     // sahf
2825     emit_opcode( cbuf, 0x9E );
2826     // jnp  ::skip
2827     emit_opcode( cbuf, 0x7B );
2828     emit_opcode( cbuf, 0x05 );
2829   %}
2830 
2831   enc_class emitModD() %{
2832     // fprem must be iterative
2833     // :: loop
2834     // fprem
2835     emit_opcode( cbuf, 0xD9 );
2836     emit_opcode( cbuf, 0xF8 );
2837     // wait
2838     emit_opcode( cbuf, 0x9b );
2839     // fnstsw ax
2840     emit_opcode( cbuf, 0xDF );
2841     emit_opcode( cbuf, 0xE0 );
2842     // sahf
2843     emit_opcode( cbuf, 0x9E );
2844     // jp  ::loop
2845     emit_opcode( cbuf, 0x0F );
2846     emit_opcode( cbuf, 0x8A );
2847     emit_opcode( cbuf, 0xF4 );
2848     emit_opcode( cbuf, 0xFF );
2849     emit_opcode( cbuf, 0xFF );
2850     emit_opcode( cbuf, 0xFF );
2851   %}
2852 
2853   enc_class fpu_flags() %{
2854     // fnstsw_ax
2855     emit_opcode( cbuf, 0xDF);
2856     emit_opcode( cbuf, 0xE0);
2857     // test ax,0x0400
2858     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2859     emit_opcode( cbuf, 0xA9 );
2860     emit_d16   ( cbuf, 0x0400 );
2861     // // // This sequence works, but stalls for 12-16 cycles on PPro
2862     // // test rax,0x0400
2863     // emit_opcode( cbuf, 0xA9 );
2864     // emit_d32   ( cbuf, 0x00000400 );
2865     //
2866     // jz exit (no unordered comparison)
2867     emit_opcode( cbuf, 0x74 );
2868     emit_d8    ( cbuf, 0x02 );
2869     // mov ah,1 - treat as LT case (set carry flag)
2870     emit_opcode( cbuf, 0xB4 );
2871     emit_d8    ( cbuf, 0x01 );
2872     // sahf
2873     emit_opcode( cbuf, 0x9E);
2874   %}
2875 
2876   enc_class cmpF_P6_fixup() %{
2877     // Fixup the integer flags in case comparison involved a NaN
2878     //
2879     // JNP exit (no unordered comparison, P-flag is set by NaN)
2880     emit_opcode( cbuf, 0x7B );
2881     emit_d8    ( cbuf, 0x03 );
2882     // MOV AH,1 - treat as LT case (set carry flag)
2883     emit_opcode( cbuf, 0xB4 );
2884     emit_d8    ( cbuf, 0x01 );
2885     // SAHF
2886     emit_opcode( cbuf, 0x9E);
2887     // NOP     // target for branch to avoid branch to branch
2888     emit_opcode( cbuf, 0x90);
2889   %}
2890 
2891 //     fnstsw_ax();
2892 //     sahf();
2893 //     movl(dst, nan_result);
2894 //     jcc(Assembler::parity, exit);
2895 //     movl(dst, less_result);
2896 //     jcc(Assembler::below, exit);
2897 //     movl(dst, equal_result);
2898 //     jcc(Assembler::equal, exit);
2899 //     movl(dst, greater_result);
2900 
2901 // less_result     =  1;
2902 // greater_result  = -1;
2903 // equal_result    = 0;
2904 // nan_result      = -1;
2905 
2906   enc_class CmpF_Result(eRegI dst) %{
2907     // fnstsw_ax();
2908     emit_opcode( cbuf, 0xDF);
2909     emit_opcode( cbuf, 0xE0);
2910     // sahf
2911     emit_opcode( cbuf, 0x9E);
2912     // movl(dst, nan_result);
2913     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2914     emit_d32( cbuf, -1 );
2915     // jcc(Assembler::parity, exit);
2916     emit_opcode( cbuf, 0x7A );
2917     emit_d8    ( cbuf, 0x13 );
2918     // movl(dst, less_result);
2919     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2920     emit_d32( cbuf, -1 );
2921     // jcc(Assembler::below, exit);
2922     emit_opcode( cbuf, 0x72 );
2923     emit_d8    ( cbuf, 0x0C );
2924     // movl(dst, equal_result);
2925     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2926     emit_d32( cbuf, 0 );
2927     // jcc(Assembler::equal, exit);
2928     emit_opcode( cbuf, 0x74 );
2929     emit_d8    ( cbuf, 0x05 );
2930     // movl(dst, greater_result);
2931     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2932     emit_d32( cbuf, 1 );
2933   %}
2934 
2935 
2936   // XMM version of CmpF_Result. Because the XMM compare
2937   // instructions set the EFLAGS directly. It becomes simpler than
2938   // the float version above.
2939   enc_class CmpX_Result(eRegI dst) %{
2940     MacroAssembler _masm(&cbuf);
2941     Label nan, inc, done;
2942 
2943     __ jccb(Assembler::parity, nan);
2944     __ jccb(Assembler::equal,  done);
2945     __ jccb(Assembler::above,  inc);
2946     __ bind(nan);
2947     __ decrement(as_Register($dst$$reg)); // NO L qqq
2948     __ jmpb(done);
2949     __ bind(inc);
2950     __ increment(as_Register($dst$$reg)); // NO L qqq
2951     __ bind(done);
2952   %}
2953 
2954   // Compare the longs and set flags
2955   // BROKEN!  Do Not use as-is
2956   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2957     // CMP    $src1.hi,$src2.hi
2958     emit_opcode( cbuf, 0x3B );
2959     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2960     // JNE,s  done
2961     emit_opcode(cbuf,0x75);
2962     emit_d8(cbuf, 2 );
2963     // CMP    $src1.lo,$src2.lo
2964     emit_opcode( cbuf, 0x3B );
2965     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2966 // done:
2967   %}
2968 
2969   enc_class convert_int_long( regL dst, eRegI src ) %{
2970     // mov $dst.lo,$src
2971     int dst_encoding = $dst$$reg;
2972     int src_encoding = $src$$reg;
2973     encode_Copy( cbuf, dst_encoding  , src_encoding );
2974     // mov $dst.hi,$src
2975     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2976     // sar $dst.hi,31
2977     emit_opcode( cbuf, 0xC1 );
2978     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2979     emit_d8(cbuf, 0x1F );
2980   %}
2981 
2982   enc_class convert_long_double( eRegL src ) %{
2983     // push $src.hi
2984     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2985     // push $src.lo
2986     emit_opcode(cbuf, 0x50+$src$$reg  );
2987     // fild 64-bits at [SP]
2988     emit_opcode(cbuf,0xdf);
2989     emit_d8(cbuf, 0x6C);
2990     emit_d8(cbuf, 0x24);
2991     emit_d8(cbuf, 0x00);
2992     // pop stack
2993     emit_opcode(cbuf, 0x83); // add  SP, #8
2994     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2995     emit_d8(cbuf, 0x8);
2996   %}
2997 
2998   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2999     // IMUL   EDX:EAX,$src1
3000     emit_opcode( cbuf, 0xF7 );
3001     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
3002     // SAR    EDX,$cnt-32
3003     int shift_count = ((int)$cnt$$constant) - 32;
3004     if (shift_count > 0) {
3005       emit_opcode(cbuf, 0xC1);
3006       emit_rm(cbuf, 0x3, 7, $dst$$reg );
3007       emit_d8(cbuf, shift_count);
3008     }
3009   %}
3010 
3011   // this version doesn't have add sp, 8
3012   enc_class convert_long_double2( eRegL src ) %{
3013     // push $src.hi
3014     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
3015     // push $src.lo
3016     emit_opcode(cbuf, 0x50+$src$$reg  );
3017     // fild 64-bits at [SP]
3018     emit_opcode(cbuf,0xdf);
3019     emit_d8(cbuf, 0x6C);
3020     emit_d8(cbuf, 0x24);
3021     emit_d8(cbuf, 0x00);
3022   %}
3023 
3024   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
3025     // Basic idea: long = (long)int * (long)int
3026     // IMUL EDX:EAX, src
3027     emit_opcode( cbuf, 0xF7 );
3028     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
3029   %}
3030 
3031   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
3032     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
3033     // MUL EDX:EAX, src
3034     emit_opcode( cbuf, 0xF7 );
3035     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
3036   %}
3037 
3038   enc_class long_multiply( eADXRegL dst, eRegL src, eRegI tmp ) %{
3039     // Basic idea: lo(result) = lo(x_lo * y_lo)
3040     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
3041     // MOV    $tmp,$src.lo
3042     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
3043     // IMUL   $tmp,EDX
3044     emit_opcode( cbuf, 0x0F );
3045     emit_opcode( cbuf, 0xAF );
3046     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
3047     // MOV    EDX,$src.hi
3048     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
3049     // IMUL   EDX,EAX
3050     emit_opcode( cbuf, 0x0F );
3051     emit_opcode( cbuf, 0xAF );
3052     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
3053     // ADD    $tmp,EDX
3054     emit_opcode( cbuf, 0x03 );
3055     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
3056     // MUL   EDX:EAX,$src.lo
3057     emit_opcode( cbuf, 0xF7 );
3058     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
3059     // ADD    EDX,ESI
3060     emit_opcode( cbuf, 0x03 );
3061     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
3062   %}
3063 
3064   enc_class long_multiply_con( eADXRegL dst, immL_127 src, eRegI tmp ) %{
3065     // Basic idea: lo(result) = lo(src * y_lo)
3066     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
3067     // IMUL   $tmp,EDX,$src
3068     emit_opcode( cbuf, 0x6B );
3069     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
3070     emit_d8( cbuf, (int)$src$$constant );
3071     // MOV    EDX,$src
3072     emit_opcode(cbuf, 0xB8 + EDX_enc);
3073     emit_d32( cbuf, (int)$src$$constant );
3074     // MUL   EDX:EAX,EDX
3075     emit_opcode( cbuf, 0xF7 );
3076     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
3077     // ADD    EDX,ESI
3078     emit_opcode( cbuf, 0x03 );
3079     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
3080   %}
3081 
3082   enc_class long_div( eRegL src1, eRegL src2 ) %{
3083     // PUSH src1.hi
3084     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
3085     // PUSH src1.lo
3086     emit_opcode(cbuf,               0x50+$src1$$reg  );
3087     // PUSH src2.hi
3088     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
3089     // PUSH src2.lo
3090     emit_opcode(cbuf,               0x50+$src2$$reg  );
3091     // CALL directly to the runtime
3092     cbuf.set_insts_mark();
3093     emit_opcode(cbuf,0xE8);       // Call into runtime
3094     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3095     // Restore stack
3096     emit_opcode(cbuf, 0x83); // add  SP, #framesize
3097     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
3098     emit_d8(cbuf, 4*4);
3099   %}
3100 
3101   enc_class long_mod( eRegL src1, eRegL src2 ) %{
3102     // PUSH src1.hi
3103     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
3104     // PUSH src1.lo
3105     emit_opcode(cbuf,               0x50+$src1$$reg  );
3106     // PUSH src2.hi
3107     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
3108     // PUSH src2.lo
3109     emit_opcode(cbuf,               0x50+$src2$$reg  );
3110     // CALL directly to the runtime
3111     cbuf.set_insts_mark();
3112     emit_opcode(cbuf,0xE8);       // Call into runtime
3113     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3114     // Restore stack
3115     emit_opcode(cbuf, 0x83); // add  SP, #framesize
3116     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
3117     emit_d8(cbuf, 4*4);
3118   %}
3119 
3120   enc_class long_cmp_flags0( eRegL src, eRegI tmp ) %{
3121     // MOV   $tmp,$src.lo
3122     emit_opcode(cbuf, 0x8B);
3123     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
3124     // OR    $tmp,$src.hi
3125     emit_opcode(cbuf, 0x0B);
3126     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
3127   %}
3128 
3129   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
3130     // CMP    $src1.lo,$src2.lo
3131     emit_opcode( cbuf, 0x3B );
3132     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
3133     // JNE,s  skip
3134     emit_cc(cbuf, 0x70, 0x5);
3135     emit_d8(cbuf,2);
3136     // CMP    $src1.hi,$src2.hi
3137     emit_opcode( cbuf, 0x3B );
3138     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
3139   %}
3140 
3141   enc_class long_cmp_flags2( eRegL src1, eRegL src2, eRegI tmp ) %{
3142     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
3143     emit_opcode( cbuf, 0x3B );
3144     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
3145     // MOV    $tmp,$src1.hi
3146     emit_opcode( cbuf, 0x8B );
3147     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
3148     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
3149     emit_opcode( cbuf, 0x1B );
3150     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
3151   %}
3152 
3153   enc_class long_cmp_flags3( eRegL src, eRegI tmp ) %{
3154     // XOR    $tmp,$tmp
3155     emit_opcode(cbuf,0x33);  // XOR
3156     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
3157     // CMP    $tmp,$src.lo
3158     emit_opcode( cbuf, 0x3B );
3159     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
3160     // SBB    $tmp,$src.hi
3161     emit_opcode( cbuf, 0x1B );
3162     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
3163   %}
3164 
3165  // Sniff, sniff... smells like Gnu Superoptimizer
3166   enc_class neg_long( eRegL dst ) %{
3167     emit_opcode(cbuf,0xF7);    // NEG hi
3168     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
3169     emit_opcode(cbuf,0xF7);    // NEG lo
3170     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
3171     emit_opcode(cbuf,0x83);    // SBB hi,0
3172     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
3173     emit_d8    (cbuf,0 );
3174   %}
3175 
3176   enc_class movq_ld(regXD dst, memory mem) %{
3177     MacroAssembler _masm(&cbuf);
3178     __ movq($dst$$XMMRegister, $mem$$Address);
3179   %}
3180 
3181   enc_class movq_st(memory mem, regXD src) %{
3182     MacroAssembler _masm(&cbuf);
3183     __ movq($mem$$Address, $src$$XMMRegister);
3184   %}
3185 
3186   enc_class pshufd_8x8(regX dst, regX src) %{
3187     MacroAssembler _masm(&cbuf);
3188 
3189     encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3190     __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3191     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3192   %}
3193 
3194   enc_class pshufd_4x16(regX dst, regX src) %{
3195     MacroAssembler _masm(&cbuf);
3196 
3197     __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3198   %}
3199 
3200   enc_class pshufd(regXD dst, regXD src, int mode) %{
3201     MacroAssembler _masm(&cbuf);
3202 
3203     __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3204   %}
3205 
3206   enc_class pxor(regXD dst, regXD src) %{
3207     MacroAssembler _masm(&cbuf);
3208 
3209     __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3210   %}
3211 
3212   enc_class mov_i2x(regXD dst, eRegI src) %{
3213     MacroAssembler _masm(&cbuf);
3214 
3215     __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3216   %}
3217 
3218 
3219   // Because the transitions from emitted code to the runtime
3220   // monitorenter/exit helper stubs are so slow it's critical that
3221   // we inline both the stack-locking fast-path and the inflated fast path.
3222   //
3223   // See also: cmpFastLock and cmpFastUnlock.
3224   //
3225   // What follows is a specialized inline transliteration of the code
3226   // in slow_enter() and slow_exit().  If we're concerned about I$ bloat
3227   // another option would be to emit TrySlowEnter and TrySlowExit methods
3228   // at startup-time.  These methods would accept arguments as
3229   // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
3230   // indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
3231   // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
3232   // In practice, however, the # of lock sites is bounded and is usually small.
3233   // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
3234   // if the processor uses simple bimodal branch predictors keyed by EIP
3235   // Since the helper routines would be called from multiple synchronization
3236   // sites.
3237   //
3238   // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
3239   // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
3240   // to those specialized methods.  That'd give us a mostly platform-independent
3241   // implementation that the JITs could optimize and inline at their pleasure.
3242   // Done correctly, the only time we'd need to cross to native could would be
3243   // to park() or unpark() threads.  We'd also need a few more unsafe operators
3244   // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
3245   // (b) explicit barriers or fence operations.
3246   //
3247   // TODO:
3248   //
3249   // *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
3250   //    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
3251   //    Given TLAB allocation, Self is usually manifested in a register, so passing it into
3252   //    the lock operators would typically be faster than reifying Self.
3253   //
3254   // *  Ideally I'd define the primitives as:
3255   //       fast_lock   (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
3256   //       fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
3257   //    Unfortunately ADLC bugs prevent us from expressing the ideal form.
3258   //    Instead, we're stuck with a rather awkward and brittle register assignments below.
3259   //    Furthermore the register assignments are overconstrained, possibly resulting in
3260   //    sub-optimal code near the synchronization site.
3261   //
3262   // *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
3263   //    Alternately, use a better sp-proximity test.
3264   //
3265   // *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
3266   //    Either one is sufficient to uniquely identify a thread.
3267   //    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
3268   //
3269   // *  Intrinsify notify() and notifyAll() for the common cases where the
3270   //    object is locked by the calling thread but the waitlist is empty.
3271   //    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
3272   //
3273   // *  use jccb and jmpb instead of jcc and jmp to improve code density.
3274   //    But beware of excessive branch density on AMD Opterons.
3275   //
3276   // *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
3277   //    or failure of the fast-path.  If the fast-path fails then we pass
3278   //    control to the slow-path, typically in C.  In Fast_Lock and
3279   //    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
3280   //    will emit a conditional branch immediately after the node.
3281   //    So we have branches to branches and lots of ICC.ZF games.
3282   //    Instead, it might be better to have C2 pass a "FailureLabel"
3283   //    into Fast_Lock and Fast_Unlock.  In the case of success, control
3284   //    will drop through the node.  ICC.ZF is undefined at exit.
3285   //    In the case of failure, the node will branch directly to the
3286   //    FailureLabel
3287 
3288 
3289   // obj: object to lock
3290   // box: on-stack box address (displaced header location) - KILLED
3291   // rax,: tmp -- KILLED
3292   // scr: tmp -- KILLED
3293   enc_class Fast_Lock( eRegP obj, eRegP box, eAXRegI tmp, eRegP scr ) %{
3294 
3295     Register objReg = as_Register($obj$$reg);
3296     Register boxReg = as_Register($box$$reg);
3297     Register tmpReg = as_Register($tmp$$reg);
3298     Register scrReg = as_Register($scr$$reg);
3299 
3300     // Ensure the register assignents are disjoint
3301     guarantee (objReg != boxReg, "") ;
3302     guarantee (objReg != tmpReg, "") ;
3303     guarantee (objReg != scrReg, "") ;
3304     guarantee (boxReg != tmpReg, "") ;
3305     guarantee (boxReg != scrReg, "") ;
3306     guarantee (tmpReg == as_Register(EAX_enc), "") ;
3307 
3308     MacroAssembler masm(&cbuf);
3309 
3310     if (_counters != NULL) {
3311       masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3312     }
3313     if (EmitSync & 1) {
3314         // set box->dhw = unused_mark (3)
3315         // Force all sync thru slow-path: slow_enter() and slow_exit() 
3316         masm.movptr (Address(boxReg, 0), int32_t(markOopDesc::unused_mark())) ;             
3317         masm.cmpptr (rsp, (int32_t)0) ;                        
3318     } else 
3319     if (EmitSync & 2) { 
3320         Label DONE_LABEL ;           
3321         if (UseBiasedLocking) {
3322            // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3323            masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3324         }
3325 
3326         masm.movptr(tmpReg, Address(objReg, 0)) ;          // fetch markword 
3327         masm.orptr (tmpReg, 0x1);
3328         masm.movptr(Address(boxReg, 0), tmpReg);           // Anticipate successful CAS 
3329         if (os::is_MP()) { masm.lock();  }
3330         masm.cmpxchgptr(boxReg, Address(objReg, 0));          // Updates tmpReg
3331         masm.jcc(Assembler::equal, DONE_LABEL);
3332         // Recursive locking
3333         masm.subptr(tmpReg, rsp);
3334         masm.andptr(tmpReg, (int32_t) 0xFFFFF003 );
3335         masm.movptr(Address(boxReg, 0), tmpReg);
3336         masm.bind(DONE_LABEL) ; 
3337     } else {  
3338       // Possible cases that we'll encounter in fast_lock 
3339       // ------------------------------------------------
3340       // * Inflated
3341       //    -- unlocked
3342       //    -- Locked
3343       //       = by self
3344       //       = by other
3345       // * biased
3346       //    -- by Self
3347       //    -- by other
3348       // * neutral
3349       // * stack-locked
3350       //    -- by self
3351       //       = sp-proximity test hits
3352       //       = sp-proximity test generates false-negative
3353       //    -- by other
3354       //
3355 
3356       Label IsInflated, DONE_LABEL, PopDone ;
3357 
3358       // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
3359       // order to reduce the number of conditional branches in the most common cases.
3360       // Beware -- there's a subtle invariant that fetch of the markword
3361       // at [FETCH], below, will never observe a biased encoding (*101b).
3362       // If this invariant is not held we risk exclusion (safety) failure.
3363       if (UseBiasedLocking && !UseOptoBiasInlining) {
3364         masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3365       }
3366 
3367       masm.movptr(tmpReg, Address(objReg, 0)) ;         // [FETCH]
3368       masm.testptr(tmpReg, 0x02) ;                      // Inflated v (Stack-locked or neutral)
3369       masm.jccb  (Assembler::notZero, IsInflated) ;
3370 
3371       // Attempt stack-locking ...
3372       masm.orptr (tmpReg, 0x1);
3373       masm.movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
3374       if (os::is_MP()) { masm.lock();  }
3375       masm.cmpxchgptr(boxReg, Address(objReg, 0));           // Updates tmpReg
3376       if (_counters != NULL) {
3377         masm.cond_inc32(Assembler::equal,
3378                         ExternalAddress((address)_counters->fast_path_entry_count_addr()));
3379       }
3380       masm.jccb (Assembler::equal, DONE_LABEL);
3381 
3382       // Recursive locking
3383       masm.subptr(tmpReg, rsp);
3384       masm.andptr(tmpReg, 0xFFFFF003 );
3385       masm.movptr(Address(boxReg, 0), tmpReg);
3386       if (_counters != NULL) {
3387         masm.cond_inc32(Assembler::equal,
3388                         ExternalAddress((address)_counters->fast_path_entry_count_addr()));
3389       }
3390       masm.jmp  (DONE_LABEL) ;
3391 
3392       masm.bind (IsInflated) ;
3393 
3394       // The object is inflated.
3395       //
3396       // TODO-FIXME: eliminate the ugly use of manifest constants:
3397       //   Use markOopDesc::monitor_value instead of "2".
3398       //   use markOop::unused_mark() instead of "3".
3399       // The tmpReg value is an objectMonitor reference ORed with
3400       // markOopDesc::monitor_value (2).   We can either convert tmpReg to an
3401       // objectmonitor pointer by masking off the "2" bit or we can just
3402       // use tmpReg as an objectmonitor pointer but bias the objectmonitor
3403       // field offsets with "-2" to compensate for and annul the low-order tag bit.
3404       //
3405       // I use the latter as it avoids AGI stalls.
3406       // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]"
3407       // instead of "mov r, [tmpReg+OFFSETOF(Owner)]".
3408       //
3409       #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2)
3410 
3411       // boxReg refers to the on-stack BasicLock in the current frame.
3412       // We'd like to write:
3413       //   set box->_displaced_header = markOop::unused_mark().  Any non-0 value suffices.
3414       // This is convenient but results a ST-before-CAS penalty.  The following CAS suffers
3415       // additional latency as we have another ST in the store buffer that must drain.
3416 
3417       if (EmitSync & 8192) { 
3418          masm.movptr(Address(boxReg, 0), 3) ;            // results in ST-before-CAS penalty
3419          masm.get_thread (scrReg) ; 
3420          masm.movptr(boxReg, tmpReg);                    // consider: LEA box, [tmp-2] 
3421          masm.movptr(tmpReg, NULL_WORD);                 // consider: xor vs mov
3422          if (os::is_MP()) { masm.lock(); } 
3423          masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3424       } else 
3425       if ((EmitSync & 128) == 0) {                      // avoid ST-before-CAS
3426          masm.movptr(scrReg, boxReg) ; 
3427          masm.movptr(boxReg, tmpReg);                   // consider: LEA box, [tmp-2] 
3428 
3429          // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
3430          if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
3431             // prefetchw [eax + Offset(_owner)-2]
3432             masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
3433          }
3434 
3435          if ((EmitSync & 64) == 0) {
3436            // Optimistic form: consider XORL tmpReg,tmpReg
3437            masm.movptr(tmpReg, NULL_WORD) ; 
3438          } else { 
3439            // Can suffer RTS->RTO upgrades on shared or cold $ lines
3440            // Test-And-CAS instead of CAS
3441            masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;   // rax, = m->_owner
3442            masm.testptr(tmpReg, tmpReg) ;                   // Locked ? 
3443            masm.jccb  (Assembler::notZero, DONE_LABEL) ;                   
3444          }
3445 
3446          // Appears unlocked - try to swing _owner from null to non-null.
3447          // Ideally, I'd manifest "Self" with get_thread and then attempt
3448          // to CAS the register containing Self into m->Owner.
3449          // But we don't have enough registers, so instead we can either try to CAS
3450          // rsp or the address of the box (in scr) into &m->owner.  If the CAS succeeds
3451          // we later store "Self" into m->Owner.  Transiently storing a stack address
3452          // (rsp or the address of the box) into  m->owner is harmless.
3453          // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
3454          if (os::is_MP()) { masm.lock();  }
3455          masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3456          masm.movptr(Address(scrReg, 0), 3) ;          // box->_displaced_header = 3
3457          masm.jccb  (Assembler::notZero, DONE_LABEL) ; 
3458          masm.get_thread (scrReg) ;                    // beware: clobbers ICCs
3459          masm.movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg) ; 
3460          masm.xorptr(boxReg, boxReg) ;                 // set icc.ZFlag = 1 to indicate success
3461                        
3462          // If the CAS fails we can either retry or pass control to the slow-path.  
3463          // We use the latter tactic.  
3464          // Pass the CAS result in the icc.ZFlag into DONE_LABEL
3465          // If the CAS was successful ...
3466          //   Self has acquired the lock
3467          //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
3468          // Intentional fall-through into DONE_LABEL ...
3469       } else {
3470          masm.movptr(Address(boxReg, 0), 3) ;       // results in ST-before-CAS penalty
3471          masm.movptr(boxReg, tmpReg) ; 
3472 
3473          // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
3474          if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
3475             // prefetchw [eax + Offset(_owner)-2]
3476             masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
3477          }
3478 
3479          if ((EmitSync & 64) == 0) {
3480            // Optimistic form
3481            masm.xorptr  (tmpReg, tmpReg) ; 
3482          } else { 
3483            // Can suffer RTS->RTO upgrades on shared or cold $ lines
3484            masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;   // rax, = m->_owner
3485            masm.testptr(tmpReg, tmpReg) ;                   // Locked ? 
3486            masm.jccb  (Assembler::notZero, DONE_LABEL) ;                   
3487          }
3488 
3489          // Appears unlocked - try to swing _owner from null to non-null.
3490          // Use either "Self" (in scr) or rsp as thread identity in _owner.
3491          // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
3492          masm.get_thread (scrReg) ;
3493          if (os::is_MP()) { masm.lock(); }
3494          masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3495 
3496          // If the CAS fails we can either retry or pass control to the slow-path.
3497          // We use the latter tactic.
3498          // Pass the CAS result in the icc.ZFlag into DONE_LABEL
3499          // If the CAS was successful ...
3500          //   Self has acquired the lock
3501          //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
3502          // Intentional fall-through into DONE_LABEL ...
3503       }
3504 
3505       // DONE_LABEL is a hot target - we'd really like to place it at the
3506       // start of cache line by padding with NOPs.
3507       // See the AMD and Intel software optimization manuals for the
3508       // most efficient "long" NOP encodings.
3509       // Unfortunately none of our alignment mechanisms suffice.
3510       masm.bind(DONE_LABEL);
3511 
3512       // Avoid branch-to-branch on AMD processors
3513       // This appears to be superstition.
3514       if (EmitSync & 32) masm.nop() ;
3515 
3516 
3517       // At DONE_LABEL the icc ZFlag is set as follows ...
3518       // Fast_Unlock uses the same protocol.
3519       // ZFlag == 1 -> Success
3520       // ZFlag == 0 -> Failure - force control through the slow-path
3521     }
3522   %}
3523 
3524   // obj: object to unlock
3525   // box: box address (displaced header location), killed.  Must be EAX.
3526   // rbx,: killed tmp; cannot be obj nor box.
3527   //
3528   // Some commentary on balanced locking:
3529   //
3530   // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
3531   // Methods that don't have provably balanced locking are forced to run in the
3532   // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
3533   // The interpreter provides two properties:
3534   // I1:  At return-time the interpreter automatically and quietly unlocks any
3535   //      objects acquired the current activation (frame).  Recall that the
3536   //      interpreter maintains an on-stack list of locks currently held by
3537   //      a frame.
3538   // I2:  If a method attempts to unlock an object that is not held by the
3539   //      the frame the interpreter throws IMSX.
3540   //
3541   // Lets say A(), which has provably balanced locking, acquires O and then calls B().
3542   // B() doesn't have provably balanced locking so it runs in the interpreter.
3543   // Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
3544   // is still locked by A().
3545   //
3546   // The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
3547   // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
3548   // should not be unlocked by "normal" java-level locking and vice-versa.  The specification
3549   // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
3550 
3551   enc_class Fast_Unlock( nabxRegP obj, eAXRegP box, eRegP tmp) %{
3552 
3553     Register objReg = as_Register($obj$$reg);
3554     Register boxReg = as_Register($box$$reg);
3555     Register tmpReg = as_Register($tmp$$reg);
3556 
3557     guarantee (objReg != boxReg, "") ;
3558     guarantee (objReg != tmpReg, "") ;
3559     guarantee (boxReg != tmpReg, "") ;
3560     guarantee (boxReg == as_Register(EAX_enc), "") ;
3561     MacroAssembler masm(&cbuf);
3562 
3563     if (EmitSync & 4) {
3564       // Disable - inhibit all inlining.  Force control through the slow-path
3565       masm.cmpptr (rsp, 0) ; 
3566     } else 
3567     if (EmitSync & 8) {
3568       Label DONE_LABEL ;
3569       if (UseBiasedLocking) {
3570          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3571       }
3572       // classic stack-locking code ...
3573       masm.movptr(tmpReg, Address(boxReg, 0)) ;
3574       masm.testptr(tmpReg, tmpReg) ;
3575       masm.jcc   (Assembler::zero, DONE_LABEL) ;
3576       if (os::is_MP()) { masm.lock(); }
3577       masm.cmpxchgptr(tmpReg, Address(objReg, 0));          // Uses EAX which is box
3578       masm.bind(DONE_LABEL);
3579     } else {
3580       Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
3581 
3582       // Critically, the biased locking test must have precedence over
3583       // and appear before the (box->dhw == 0) recursive stack-lock test.
3584       if (UseBiasedLocking && !UseOptoBiasInlining) {
3585          masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3586       }
3587       
3588       masm.cmpptr(Address(boxReg, 0), 0) ;            // Examine the displaced header
3589       masm.movptr(tmpReg, Address(objReg, 0)) ;       // Examine the object's markword
3590       masm.jccb  (Assembler::zero, DONE_LABEL) ;      // 0 indicates recursive stack-lock
3591 
3592       masm.testptr(tmpReg, 0x02) ;                     // Inflated? 
3593       masm.jccb  (Assembler::zero, Stacked) ;
3594 
3595       masm.bind  (Inflated) ;
3596       // It's inflated.
3597       // Despite our balanced locking property we still check that m->_owner == Self
3598       // as java routines or native JNI code called by this thread might
3599       // have released the lock.
3600       // Refer to the comments in synchronizer.cpp for how we might encode extra
3601       // state in _succ so we can avoid fetching EntryList|cxq.
3602       //
3603       // I'd like to add more cases in fast_lock() and fast_unlock() --
3604       // such as recursive enter and exit -- but we have to be wary of
3605       // I$ bloat, T$ effects and BP$ effects.
3606       //
3607       // If there's no contention try a 1-0 exit.  That is, exit without
3608       // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
3609       // we detect and recover from the race that the 1-0 exit admits.
3610       //
3611       // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
3612       // before it STs null into _owner, releasing the lock.  Updates
3613       // to data protected by the critical section must be visible before
3614       // we drop the lock (and thus before any other thread could acquire
3615       // the lock and observe the fields protected by the lock).
3616       // IA32's memory-model is SPO, so STs are ordered with respect to
3617       // each other and there's no need for an explicit barrier (fence).
3618       // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
3619 
3620       masm.get_thread (boxReg) ;
3621       if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
3622         // prefetchw [ebx + Offset(_owner)-2]
3623         masm.prefetchw(Address(rbx, ObjectMonitor::owner_offset_in_bytes()-2));
3624       }
3625 
3626       // Note that we could employ various encoding schemes to reduce
3627       // the number of loads below (currently 4) to just 2 or 3.
3628       // Refer to the comments in synchronizer.cpp.
3629       // In practice the chain of fetches doesn't seem to impact performance, however.
3630       if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
3631          // Attempt to reduce branch density - AMD's branch predictor.
3632          masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;  
3633          masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3634          masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
3635          masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
3636          masm.jccb  (Assembler::notZero, DONE_LABEL) ; 
3637          masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
3638          masm.jmpb  (DONE_LABEL) ; 
3639       } else { 
3640          masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;  
3641          masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3642          masm.jccb  (Assembler::notZero, DONE_LABEL) ; 
3643          masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
3644          masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
3645          masm.jccb  (Assembler::notZero, CheckSucc) ; 
3646          masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
3647          masm.jmpb  (DONE_LABEL) ; 
3648       }
3649 
3650       // The Following code fragment (EmitSync & 65536) improves the performance of
3651       // contended applications and contended synchronization microbenchmarks.
3652       // Unfortunately the emission of the code - even though not executed - causes regressions
3653       // in scimark and jetstream, evidently because of $ effects.  Replacing the code
3654       // with an equal number of never-executed NOPs results in the same regression.
3655       // We leave it off by default.
3656 
3657       if ((EmitSync & 65536) != 0) {
3658          Label LSuccess, LGoSlowPath ;
3659 
3660          masm.bind  (CheckSucc) ;
3661 
3662          // Optional pre-test ... it's safe to elide this
3663          if ((EmitSync & 16) == 0) { 
3664             masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; 
3665             masm.jccb  (Assembler::zero, LGoSlowPath) ; 
3666          }
3667 
3668          // We have a classic Dekker-style idiom:
3669          //    ST m->_owner = 0 ; MEMBAR; LD m->_succ
3670          // There are a number of ways to implement the barrier:
3671          // (1) lock:andl &m->_owner, 0
3672          //     is fast, but mask doesn't currently support the "ANDL M,IMM32" form.
3673          //     LOCK: ANDL [ebx+Offset(_Owner)-2], 0
3674          //     Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8
3675          // (2) If supported, an explicit MFENCE is appealing.
3676          //     In older IA32 processors MFENCE is slower than lock:add or xchg
3677          //     particularly if the write-buffer is full as might be the case if
3678          //     if stores closely precede the fence or fence-equivalent instruction.
3679          //     In more modern implementations MFENCE appears faster, however.
3680          // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
3681          //     The $lines underlying the top-of-stack should be in M-state.
3682          //     The locked add instruction is serializing, of course.
3683          // (4) Use xchg, which is serializing
3684          //     mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works
3685          // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
3686          //     The integer condition codes will tell us if succ was 0.
3687          //     Since _succ and _owner should reside in the same $line and
3688          //     we just stored into _owner, it's likely that the $line
3689          //     remains in M-state for the lock:orl.
3690          //
3691          // We currently use (3), although it's likely that switching to (2)
3692          // is correct for the future.
3693             
3694          masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
3695          if (os::is_MP()) { 
3696             if (VM_Version::supports_sse2() && 1 == FenceInstruction) { 
3697               masm.mfence();
3698             } else { 
3699               masm.lock () ; masm.addptr(Address(rsp, 0), 0) ; 
3700             }
3701          }
3702          // Ratify _succ remains non-null
3703          masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; 
3704          masm.jccb  (Assembler::notZero, LSuccess) ; 
3705 
3706          masm.xorptr(boxReg, boxReg) ;                  // box is really EAX
3707          if (os::is_MP()) { masm.lock(); }
3708          masm.cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3709          masm.jccb  (Assembler::notEqual, LSuccess) ;
3710          // Since we're low on registers we installed rsp as a placeholding in _owner.
3711          // Now install Self over rsp.  This is safe as we're transitioning from
3712          // non-null to non=null
3713          masm.get_thread (boxReg) ;
3714          masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg) ;
3715          // Intentional fall-through into LGoSlowPath ...
3716 
3717          masm.bind  (LGoSlowPath) ; 
3718          masm.orptr(boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
3719          masm.jmpb  (DONE_LABEL) ; 
3720 
3721          masm.bind  (LSuccess) ; 
3722          masm.xorptr(boxReg, boxReg) ;                 // set ICC.ZF=1 to indicate success
3723          masm.jmpb  (DONE_LABEL) ; 
3724       }
3725 
3726       masm.bind (Stacked) ;
3727       // It's not inflated and it's not recursively stack-locked and it's not biased.
3728       // It must be stack-locked.
3729       // Try to reset the header to displaced header.
3730       // The "box" value on the stack is stable, so we can reload
3731       // and be assured we observe the same value as above.
3732       masm.movptr(tmpReg, Address(boxReg, 0)) ;
3733       if (os::is_MP()) {   masm.lock();    }
3734       masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box
3735       // Intention fall-thru into DONE_LABEL
3736 
3737 
3738       // DONE_LABEL is a hot target - we'd really like to place it at the
3739       // start of cache line by padding with NOPs.
3740       // See the AMD and Intel software optimization manuals for the
3741       // most efficient "long" NOP encodings.
3742       // Unfortunately none of our alignment mechanisms suffice.
3743       if ((EmitSync & 65536) == 0) {
3744          masm.bind (CheckSucc) ;
3745       }
3746       masm.bind(DONE_LABEL);
3747 
3748       // Avoid branch to branch on AMD processors
3749       if (EmitSync & 32768) { masm.nop() ; }
3750     }
3751   %}
3752 
3753 
3754   enc_class enc_pop_rdx() %{
3755     emit_opcode(cbuf,0x5A);
3756   %}
3757 
3758   enc_class enc_rethrow() %{
3759     cbuf.set_insts_mark();
3760     emit_opcode(cbuf, 0xE9);        // jmp    entry
3761     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
3762                    runtime_call_Relocation::spec(), RELOC_IMM32 );
3763   %}
3764 
3765 
3766   // Convert a double to an int.  Java semantics require we do complex
3767   // manglelations in the corner cases.  So we set the rounding mode to
3768   // 'zero', store the darned double down as an int, and reset the
3769   // rounding mode to 'nearest'.  The hardware throws an exception which
3770   // patches up the correct value directly to the stack.
3771   enc_class D2I_encoding( regD src ) %{
3772     // Flip to round-to-zero mode.  We attempted to allow invalid-op
3773     // exceptions here, so that a NAN or other corner-case value will
3774     // thrown an exception (but normal values get converted at full speed).
3775     // However, I2C adapters and other float-stack manglers leave pending
3776     // invalid-op exceptions hanging.  We would have to clear them before
3777     // enabling them and that is more expensive than just testing for the
3778     // invalid value Intel stores down in the corner cases.
3779     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3780     emit_opcode(cbuf,0x2D);
3781     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3782     // Allocate a word
3783     emit_opcode(cbuf,0x83);            // SUB ESP,4
3784     emit_opcode(cbuf,0xEC);
3785     emit_d8(cbuf,0x04);
3786     // Encoding assumes a double has been pushed into FPR0.
3787     // Store down the double as an int, popping the FPU stack
3788     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
3789     emit_opcode(cbuf,0x1C);
3790     emit_d8(cbuf,0x24);
3791     // Restore the rounding mode; mask the exception
3792     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3793     emit_opcode(cbuf,0x2D);
3794     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3795         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3796         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3797 
3798     // Load the converted int; adjust CPU stack
3799     emit_opcode(cbuf,0x58);       // POP EAX
3800     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
3801     emit_d32   (cbuf,0x80000000); //         0x80000000
3802     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3803     emit_d8    (cbuf,0x07);       // Size of slow_call
3804     // Push src onto stack slow-path
3805     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3806     emit_d8    (cbuf,0xC0-1+$src$$reg );
3807     // CALL directly to the runtime
3808     cbuf.set_insts_mark();
3809     emit_opcode(cbuf,0xE8);       // Call into runtime
3810     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3811     // Carry on here...
3812   %}
3813 
3814   enc_class D2L_encoding( regD src ) %{
3815     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3816     emit_opcode(cbuf,0x2D);
3817     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3818     // Allocate a word
3819     emit_opcode(cbuf,0x83);            // SUB ESP,8
3820     emit_opcode(cbuf,0xEC);
3821     emit_d8(cbuf,0x08);
3822     // Encoding assumes a double has been pushed into FPR0.
3823     // Store down the double as a long, popping the FPU stack
3824     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3825     emit_opcode(cbuf,0x3C);
3826     emit_d8(cbuf,0x24);
3827     // Restore the rounding mode; mask the exception
3828     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3829     emit_opcode(cbuf,0x2D);
3830     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3831         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3832         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3833 
3834     // Load the converted int; adjust CPU stack
3835     emit_opcode(cbuf,0x58);       // POP EAX
3836     emit_opcode(cbuf,0x5A);       // POP EDX
3837     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3838     emit_d8    (cbuf,0xFA);       // rdx
3839     emit_d32   (cbuf,0x80000000); //         0x80000000
3840     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3841     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3842     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3843     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3844     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3845     emit_d8    (cbuf,0x07);       // Size of slow_call
3846     // Push src onto stack slow-path
3847     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3848     emit_d8    (cbuf,0xC0-1+$src$$reg );
3849     // CALL directly to the runtime
3850     cbuf.set_insts_mark();
3851     emit_opcode(cbuf,0xE8);       // Call into runtime
3852     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3853     // Carry on here...
3854   %}
3855 
3856   enc_class X2L_encoding( regX src ) %{
3857     // Allocate a word
3858     emit_opcode(cbuf,0x83);      // SUB ESP,8
3859     emit_opcode(cbuf,0xEC);
3860     emit_d8(cbuf,0x08);
3861 
3862     emit_opcode  (cbuf, 0xF3 );  // MOVSS [ESP], src
3863     emit_opcode  (cbuf, 0x0F );
3864     emit_opcode  (cbuf, 0x11 );
3865     encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3866 
3867     emit_opcode(cbuf,0xD9 );     // FLD_S [ESP]
3868     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3869 
3870     emit_opcode(cbuf,0xD9);      // FLDCW  trunc
3871     emit_opcode(cbuf,0x2D);
3872     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3873 
3874     // Encoding assumes a double has been pushed into FPR0.
3875     // Store down the double as a long, popping the FPU stack
3876     emit_opcode(cbuf,0xDF);      // FISTP [ESP]
3877     emit_opcode(cbuf,0x3C);
3878     emit_d8(cbuf,0x24);
3879 
3880     // Restore the rounding mode; mask the exception
3881     emit_opcode(cbuf,0xD9);      // FLDCW   std/24-bit mode
3882     emit_opcode(cbuf,0x2D);
3883     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3884       ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3885       : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3886 
3887     // Load the converted int; adjust CPU stack
3888     emit_opcode(cbuf,0x58);      // POP EAX
3889 
3890     emit_opcode(cbuf,0x5A);      // POP EDX
3891 
3892     emit_opcode(cbuf,0x81);      // CMP EDX,imm
3893     emit_d8    (cbuf,0xFA);      // rdx
3894     emit_d32   (cbuf,0x80000000);//         0x80000000
3895 
3896     emit_opcode(cbuf,0x75);      // JNE around_slow_call
3897     emit_d8    (cbuf,0x13+4);    // Size of slow_call
3898 
3899     emit_opcode(cbuf,0x85);      // TEST EAX,EAX
3900     emit_opcode(cbuf,0xC0);      // 2/rax,/rax,
3901 
3902     emit_opcode(cbuf,0x75);      // JNE around_slow_call
3903     emit_d8    (cbuf,0x13);      // Size of slow_call
3904 
3905     // Allocate a word
3906     emit_opcode(cbuf,0x83);      // SUB ESP,4
3907     emit_opcode(cbuf,0xEC);
3908     emit_d8(cbuf,0x04);
3909 
3910     emit_opcode  (cbuf, 0xF3 );  // MOVSS [ESP], src
3911     emit_opcode  (cbuf, 0x0F );
3912     emit_opcode  (cbuf, 0x11 );
3913     encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3914 
3915     emit_opcode(cbuf,0xD9 );     // FLD_S [ESP]
3916     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3917 
3918     emit_opcode(cbuf,0x83);      // ADD ESP,4
3919     emit_opcode(cbuf,0xC4);
3920     emit_d8(cbuf,0x04);
3921 
3922     // CALL directly to the runtime
3923     cbuf.set_insts_mark();
3924     emit_opcode(cbuf,0xE8);       // Call into runtime
3925     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3926     // Carry on here...
3927   %}
3928 
3929   enc_class XD2L_encoding( regXD src ) %{
3930     // Allocate a word
3931     emit_opcode(cbuf,0x83);      // SUB ESP,8
3932     emit_opcode(cbuf,0xEC);
3933     emit_d8(cbuf,0x08);
3934 
3935     emit_opcode  (cbuf, 0xF2 );  // MOVSD [ESP], src
3936     emit_opcode  (cbuf, 0x0F );
3937     emit_opcode  (cbuf, 0x11 );
3938     encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3939 
3940     emit_opcode(cbuf,0xDD );     // FLD_D [ESP]
3941     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3942 
3943     emit_opcode(cbuf,0xD9);      // FLDCW  trunc
3944     emit_opcode(cbuf,0x2D);
3945     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3946 
3947     // Encoding assumes a double has been pushed into FPR0.
3948     // Store down the double as a long, popping the FPU stack
3949     emit_opcode(cbuf,0xDF);      // FISTP [ESP]
3950     emit_opcode(cbuf,0x3C);
3951     emit_d8(cbuf,0x24);
3952 
3953     // Restore the rounding mode; mask the exception
3954     emit_opcode(cbuf,0xD9);      // FLDCW   std/24-bit mode
3955     emit_opcode(cbuf,0x2D);
3956     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3957       ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3958       : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3959 
3960     // Load the converted int; adjust CPU stack
3961     emit_opcode(cbuf,0x58);      // POP EAX
3962 
3963     emit_opcode(cbuf,0x5A);      // POP EDX
3964 
3965     emit_opcode(cbuf,0x81);      // CMP EDX,imm
3966     emit_d8    (cbuf,0xFA);      // rdx
3967     emit_d32   (cbuf,0x80000000); //         0x80000000
3968 
3969     emit_opcode(cbuf,0x75);      // JNE around_slow_call
3970     emit_d8    (cbuf,0x13+4);    // Size of slow_call
3971 
3972     emit_opcode(cbuf,0x85);      // TEST EAX,EAX
3973     emit_opcode(cbuf,0xC0);      // 2/rax,/rax,
3974 
3975     emit_opcode(cbuf,0x75);      // JNE around_slow_call
3976     emit_d8    (cbuf,0x13);      // Size of slow_call
3977 
3978     // Push src onto stack slow-path
3979     // Allocate a word
3980     emit_opcode(cbuf,0x83);      // SUB ESP,8
3981     emit_opcode(cbuf,0xEC);
3982     emit_d8(cbuf,0x08);
3983 
3984     emit_opcode  (cbuf, 0xF2 );  // MOVSD [ESP], src
3985     emit_opcode  (cbuf, 0x0F );
3986     emit_opcode  (cbuf, 0x11 );
3987     encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3988 
3989     emit_opcode(cbuf,0xDD );     // FLD_D [ESP]
3990     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3991 
3992     emit_opcode(cbuf,0x83);      // ADD ESP,8
3993     emit_opcode(cbuf,0xC4);
3994     emit_d8(cbuf,0x08);
3995 
3996     // CALL directly to the runtime
3997     cbuf.set_insts_mark();
3998     emit_opcode(cbuf,0xE8);      // Call into runtime
3999     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
4000     // Carry on here...
4001   %}
4002 
4003   enc_class D2X_encoding( regX dst, regD src ) %{
4004     // Allocate a word
4005     emit_opcode(cbuf,0x83);            // SUB ESP,4
4006     emit_opcode(cbuf,0xEC);
4007     emit_d8(cbuf,0x04);
4008     int pop = 0x02;
4009     if ($src$$reg != FPR1L_enc) {
4010       emit_opcode( cbuf, 0xD9 );       // FLD    ST(i-1)
4011       emit_d8( cbuf, 0xC0-1+$src$$reg );
4012       pop = 0x03;
4013     }
4014     store_to_stackslot( cbuf, 0xD9, pop, 0 ); // FST<P>_S  [ESP]
4015 
4016     emit_opcode  (cbuf, 0xF3 );        // MOVSS dst(xmm), [ESP]
4017     emit_opcode  (cbuf, 0x0F );
4018     emit_opcode  (cbuf, 0x10 );
4019     encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
4020 
4021     emit_opcode(cbuf,0x83);            // ADD ESP,4
4022     emit_opcode(cbuf,0xC4);
4023     emit_d8(cbuf,0x04);
4024     // Carry on here...
4025   %}
4026 
4027   enc_class FX2I_encoding( regX src, eRegI dst ) %{
4028     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
4029 
4030     // Compare the result to see if we need to go to the slow path
4031     emit_opcode(cbuf,0x81);       // CMP dst,imm
4032     emit_rm    (cbuf,0x3,0x7,$dst$$reg);
4033     emit_d32   (cbuf,0x80000000); //         0x80000000
4034 
4035     emit_opcode(cbuf,0x75);       // JNE around_slow_call
4036     emit_d8    (cbuf,0x13);       // Size of slow_call
4037     // Store xmm to a temp memory
4038     // location and push it onto stack.
4039 
4040     emit_opcode(cbuf,0x83);  // SUB ESP,4
4041     emit_opcode(cbuf,0xEC);
4042     emit_d8(cbuf, $primary ? 0x8 : 0x4);
4043 
4044     emit_opcode  (cbuf, $primary ? 0xF2 : 0xF3 );   // MOVSS [ESP], xmm
4045     emit_opcode  (cbuf, 0x0F );
4046     emit_opcode  (cbuf, 0x11 );
4047     encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4048 
4049     emit_opcode(cbuf, $primary ? 0xDD : 0xD9 );      // FLD [ESP]
4050     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4051 
4052     emit_opcode(cbuf,0x83);    // ADD ESP,4
4053     emit_opcode(cbuf,0xC4);
4054     emit_d8(cbuf, $primary ? 0x8 : 0x4);
4055 
4056     // CALL directly to the runtime
4057     cbuf.set_insts_mark();
4058     emit_opcode(cbuf,0xE8);       // Call into runtime
4059     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
4060 
4061     // Carry on here...
4062   %}
4063 
4064   enc_class X2D_encoding( regD dst, regX src ) %{
4065     // Allocate a word
4066     emit_opcode(cbuf,0x83);     // SUB ESP,4
4067     emit_opcode(cbuf,0xEC);
4068     emit_d8(cbuf,0x04);
4069 
4070     emit_opcode  (cbuf, 0xF3 ); // MOVSS [ESP], xmm
4071     emit_opcode  (cbuf, 0x0F );
4072     emit_opcode  (cbuf, 0x11 );
4073     encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4074 
4075     emit_opcode(cbuf,0xD9 );    // FLD_S [ESP]
4076     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4077 
4078     emit_opcode(cbuf,0x83);     // ADD ESP,4
4079     emit_opcode(cbuf,0xC4);
4080     emit_d8(cbuf,0x04);
4081 
4082     // Carry on here...
4083   %}
4084 
4085   enc_class AbsXF_encoding(regX dst) %{
4086     address signmask_address=(address)float_signmask_pool;
4087     // andpd:\tANDPS  $dst,[signconst]
4088     emit_opcode(cbuf, 0x0F);
4089     emit_opcode(cbuf, 0x54);
4090     emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4091     emit_d32(cbuf, (int)signmask_address);
4092   %}
4093 
4094   enc_class AbsXD_encoding(regXD dst) %{
4095     address signmask_address=(address)double_signmask_pool;
4096     // andpd:\tANDPD  $dst,[signconst]
4097     emit_opcode(cbuf, 0x66);
4098     emit_opcode(cbuf, 0x0F);
4099     emit_opcode(cbuf, 0x54);
4100     emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4101     emit_d32(cbuf, (int)signmask_address);
4102   %}
4103 
4104   enc_class NegXF_encoding(regX dst) %{
4105     address signmask_address=(address)float_signflip_pool;
4106     // andpd:\tXORPS  $dst,[signconst]
4107     emit_opcode(cbuf, 0x0F);
4108     emit_opcode(cbuf, 0x57);
4109     emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4110     emit_d32(cbuf, (int)signmask_address);
4111   %}
4112 
4113   enc_class NegXD_encoding(regXD dst) %{
4114     address signmask_address=(address)double_signflip_pool;
4115     // andpd:\tXORPD  $dst,[signconst]
4116     emit_opcode(cbuf, 0x66);
4117     emit_opcode(cbuf, 0x0F);
4118     emit_opcode(cbuf, 0x57);
4119     emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4120     emit_d32(cbuf, (int)signmask_address);
4121   %}
4122 
4123   enc_class FMul_ST_reg( eRegF src1 ) %{
4124     // Operand was loaded from memory into fp ST (stack top)
4125     // FMUL   ST,$src  /* D8 C8+i */
4126     emit_opcode(cbuf, 0xD8);
4127     emit_opcode(cbuf, 0xC8 + $src1$$reg);
4128   %}
4129 
4130   enc_class FAdd_ST_reg( eRegF src2 ) %{
4131     // FADDP  ST,src2  /* D8 C0+i */
4132     emit_opcode(cbuf, 0xD8);
4133     emit_opcode(cbuf, 0xC0 + $src2$$reg);
4134     //could use FADDP  src2,fpST  /* DE C0+i */
4135   %}
4136 
4137   enc_class FAddP_reg_ST( eRegF src2 ) %{
4138     // FADDP  src2,ST  /* DE C0+i */
4139     emit_opcode(cbuf, 0xDE);
4140     emit_opcode(cbuf, 0xC0 + $src2$$reg);
4141   %}
4142 
4143   enc_class subF_divF_encode( eRegF src1, eRegF src2) %{
4144     // Operand has been loaded into fp ST (stack top)
4145       // FSUB   ST,$src1
4146       emit_opcode(cbuf, 0xD8);
4147       emit_opcode(cbuf, 0xE0 + $src1$$reg);
4148 
4149       // FDIV
4150       emit_opcode(cbuf, 0xD8);
4151       emit_opcode(cbuf, 0xF0 + $src2$$reg);
4152   %}
4153 
4154   enc_class MulFAddF (eRegF src1, eRegF src2) %{
4155     // Operand was loaded from memory into fp ST (stack top)
4156     // FADD   ST,$src  /* D8 C0+i */
4157     emit_opcode(cbuf, 0xD8);
4158     emit_opcode(cbuf, 0xC0 + $src1$$reg);
4159 
4160     // FMUL  ST,src2  /* D8 C*+i */
4161     emit_opcode(cbuf, 0xD8);
4162     emit_opcode(cbuf, 0xC8 + $src2$$reg);
4163   %}
4164 
4165 
4166   enc_class MulFAddFreverse (eRegF src1, eRegF src2) %{
4167     // Operand was loaded from memory into fp ST (stack top)
4168     // FADD   ST,$src  /* D8 C0+i */
4169     emit_opcode(cbuf, 0xD8);
4170     emit_opcode(cbuf, 0xC0 + $src1$$reg);
4171 
4172     // FMULP  src2,ST  /* DE C8+i */
4173     emit_opcode(cbuf, 0xDE);
4174     emit_opcode(cbuf, 0xC8 + $src2$$reg);
4175   %}
4176 
4177   // Atomically load the volatile long
4178   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
4179     emit_opcode(cbuf,0xDF);
4180     int rm_byte_opcode = 0x05;
4181     int base     = $mem$$base;
4182     int index    = $mem$$index;
4183     int scale    = $mem$$scale;
4184     int displace = $mem$$disp;
4185     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4186     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
4187     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
4188   %}
4189 
4190   enc_class enc_loadLX_volatile( memory mem, stackSlotL dst, regXD tmp ) %{
4191     { // Atomic long load
4192       // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
4193       emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4194       emit_opcode(cbuf,0x0F);
4195       emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4196       int base     = $mem$$base;
4197       int index    = $mem$$index;
4198       int scale    = $mem$$scale;
4199       int displace = $mem$$disp;
4200       bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4201       encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4202     }
4203     { // MOVSD $dst,$tmp ! atomic long store
4204       emit_opcode(cbuf,0xF2);
4205       emit_opcode(cbuf,0x0F);
4206       emit_opcode(cbuf,0x11);
4207       int base     = $dst$$base;
4208       int index    = $dst$$index;
4209       int scale    = $dst$$scale;
4210       int displace = $dst$$disp;
4211       bool disp_is_oop = $dst->disp_is_oop(); // disp-as-oop when working with static globals
4212       encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4213     }
4214   %}
4215 
4216   enc_class enc_loadLX_reg_volatile( memory mem, eRegL dst, regXD tmp ) %{
4217     { // Atomic long load
4218       // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
4219       emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4220       emit_opcode(cbuf,0x0F);
4221       emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4222       int base     = $mem$$base;
4223       int index    = $mem$$index;
4224       int scale    = $mem$$scale;
4225       int displace = $mem$$disp;
4226       bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4227       encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4228     }
4229     { // MOVD $dst.lo,$tmp
4230       emit_opcode(cbuf,0x66);
4231       emit_opcode(cbuf,0x0F);
4232       emit_opcode(cbuf,0x7E);
4233       emit_rm(cbuf, 0x3, $tmp$$reg, $dst$$reg);
4234     }
4235     { // PSRLQ $tmp,32
4236       emit_opcode(cbuf,0x66);
4237       emit_opcode(cbuf,0x0F);
4238       emit_opcode(cbuf,0x73);
4239       emit_rm(cbuf, 0x3, 0x02, $tmp$$reg);
4240       emit_d8(cbuf, 0x20);
4241     }
4242     { // MOVD $dst.hi,$tmp
4243       emit_opcode(cbuf,0x66);
4244       emit_opcode(cbuf,0x0F);
4245       emit_opcode(cbuf,0x7E);
4246       emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
4247     }
4248   %}
4249 
4250   // Volatile Store Long.  Must be atomic, so move it into
4251   // the FP TOS and then do a 64-bit FIST.  Has to probe the
4252   // target address before the store (for null-ptr checks)
4253   // so the memory operand is used twice in the encoding.
4254   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
4255     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
4256     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
4257     emit_opcode(cbuf,0xDF);
4258     int rm_byte_opcode = 0x07;
4259     int base     = $mem$$base;
4260     int index    = $mem$$index;
4261     int scale    = $mem$$scale;
4262     int displace = $mem$$disp;
4263     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4264     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
4265   %}
4266 
4267   enc_class enc_storeLX_volatile( memory mem, stackSlotL src, regXD tmp) %{
4268     { // Atomic long load
4269       // UseXmmLoadAndClearUpper ? movsd $tmp,[$src] : movlpd $tmp,[$src]
4270       emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4271       emit_opcode(cbuf,0x0F);
4272       emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4273       int base     = $src$$base;
4274       int index    = $src$$index;
4275       int scale    = $src$$scale;
4276       int displace = $src$$disp;
4277       bool disp_is_oop = $src->disp_is_oop(); // disp-as-oop when working with static globals
4278       encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4279     }
4280     cbuf.set_insts_mark();            // Mark start of MOVSD in case $mem has an oop
4281     { // MOVSD $mem,$tmp ! atomic long store
4282       emit_opcode(cbuf,0xF2);
4283       emit_opcode(cbuf,0x0F);
4284       emit_opcode(cbuf,0x11);
4285       int base     = $mem$$base;
4286       int index    = $mem$$index;
4287       int scale    = $mem$$scale;
4288       int displace = $mem$$disp;
4289       bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4290       encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4291     }
4292   %}
4293 
4294   enc_class enc_storeLX_reg_volatile( memory mem, eRegL src, regXD tmp, regXD tmp2) %{
4295     { // MOVD $tmp,$src.lo
4296       emit_opcode(cbuf,0x66);
4297       emit_opcode(cbuf,0x0F);
4298       emit_opcode(cbuf,0x6E);
4299       emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
4300     }
4301     { // MOVD $tmp2,$src.hi
4302       emit_opcode(cbuf,0x66);
4303       emit_opcode(cbuf,0x0F);
4304       emit_opcode(cbuf,0x6E);
4305       emit_rm(cbuf, 0x3, $tmp2$$reg, HIGH_FROM_LOW($src$$reg));
4306     }
4307     { // PUNPCKLDQ $tmp,$tmp2
4308       emit_opcode(cbuf,0x66);
4309       emit_opcode(cbuf,0x0F);
4310       emit_opcode(cbuf,0x62);
4311       emit_rm(cbuf, 0x3, $tmp$$reg, $tmp2$$reg);
4312     }
4313     cbuf.set_insts_mark();            // Mark start of MOVSD in case $mem has an oop
4314     { // MOVSD $mem,$tmp ! atomic long store
4315       emit_opcode(cbuf,0xF2);
4316       emit_opcode(cbuf,0x0F);
4317       emit_opcode(cbuf,0x11);
4318       int base     = $mem$$base;
4319       int index    = $mem$$index;
4320       int scale    = $mem$$scale;
4321       int displace = $mem$$disp;
4322       bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4323       encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4324     }
4325   %}
4326 
4327   // Safepoint Poll.  This polls the safepoint page, and causes an
4328   // exception if it is not readable. Unfortunately, it kills the condition code
4329   // in the process
4330   // We current use TESTL [spp],EDI
4331   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
4332 
4333   enc_class Safepoint_Poll() %{
4334     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
4335     emit_opcode(cbuf,0x85);
4336     emit_rm (cbuf, 0x0, 0x7, 0x5);
4337     emit_d32(cbuf, (intptr_t)os::get_polling_page());
4338   %}
4339 %}
4340 
4341 
4342 //----------FRAME--------------------------------------------------------------
4343 // Definition of frame structure and management information.
4344 //
4345 //  S T A C K   L A Y O U T    Allocators stack-slot number
4346 //                             |   (to get allocators register number
4347 //  G  Owned by    |        |  v    add OptoReg::stack0())
4348 //  r   CALLER     |        |
4349 //  o     |        +--------+      pad to even-align allocators stack-slot
4350 //  w     V        |  pad0  |        numbers; owned by CALLER
4351 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
4352 //  h     ^        |   in   |  5
4353 //        |        |  args  |  4   Holes in incoming args owned by SELF
4354 //  |     |        |        |  3
4355 //  |     |        +--------+
4356 //  V     |        | old out|      Empty on Intel, window on Sparc
4357 //        |    old |preserve|      Must be even aligned.
4358 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
4359 //        |        |   in   |  3   area for Intel ret address
4360 //     Owned by    |preserve|      Empty on Sparc.
4361 //       SELF      +--------+
4362 //        |        |  pad2  |  2   pad to align old SP
4363 //        |        +--------+  1
4364 //        |        | locks  |  0
4365 //        |        +--------+----> OptoReg::stack0(), even aligned
4366 //        |        |  pad1  | 11   pad to align new SP
4367 //        |        +--------+
4368 //        |        |        | 10
4369 //        |        | spills |  9   spills
4370 //        V        |        |  8   (pad0 slot for callee)
4371 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
4372 //        ^        |  out   |  7
4373 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
4374 //     Owned by    +--------+
4375 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
4376 //        |    new |preserve|      Must be even-aligned.
4377 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
4378 //        |        |        |
4379 //
4380 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
4381 //         known from SELF's arguments and the Java calling convention.
4382 //         Region 6-7 is determined per call site.
4383 // Note 2: If the calling convention leaves holes in the incoming argument
4384 //         area, those holes are owned by SELF.  Holes in the outgoing area
4385 //         are owned by the CALLEE.  Holes should not be nessecary in the
4386 //         incoming area, as the Java calling convention is completely under
4387 //         the control of the AD file.  Doubles can be sorted and packed to
4388 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
4389 //         varargs C calling conventions.
4390 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
4391 //         even aligned with pad0 as needed.
4392 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
4393 //         region 6-11 is even aligned; it may be padded out more so that
4394 //         the region from SP to FP meets the minimum stack alignment.
4395 
4396 frame %{
4397   // What direction does stack grow in (assumed to be same for C & Java)
4398   stack_direction(TOWARDS_LOW);
4399 
4400   // These three registers define part of the calling convention
4401   // between compiled code and the interpreter.
4402   inline_cache_reg(EAX);                // Inline Cache Register
4403   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
4404 
4405   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
4406   cisc_spilling_operand_name(indOffset32);
4407 
4408   // Number of stack slots consumed by locking an object
4409   sync_stack_slots(1);
4410 
4411   // Compiled code's Frame Pointer
4412   frame_pointer(ESP);
4413   // Interpreter stores its frame pointer in a register which is
4414   // stored to the stack by I2CAdaptors.
4415   // I2CAdaptors convert from interpreted java to compiled java.
4416   interpreter_frame_pointer(EBP);
4417 
4418   // Stack alignment requirement
4419   // Alignment size in bytes (128-bit -> 16 bytes)
4420   stack_alignment(StackAlignmentInBytes);
4421 
4422   // Number of stack slots between incoming argument block and the start of
4423   // a new frame.  The PROLOG must add this many slots to the stack.  The
4424   // EPILOG must remove this many slots.  Intel needs one slot for
4425   // return address and one for rbp, (must save rbp)
4426   in_preserve_stack_slots(2+VerifyStackAtCalls);
4427 
4428   // Number of outgoing stack slots killed above the out_preserve_stack_slots
4429   // for calls to C.  Supports the var-args backing area for register parms.
4430   varargs_C_out_slots_killed(0);
4431 
4432   // The after-PROLOG location of the return address.  Location of
4433   // return address specifies a type (REG or STACK) and a number
4434   // representing the register number (i.e. - use a register name) or
4435   // stack slot.
4436   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4437   // Otherwise, it is above the locks and verification slot and alignment word
4438   return_addr(STACK - 1 +
4439               round_to(1+VerifyStackAtCalls+
4440               Compile::current()->fixed_slots(),
4441               (StackAlignmentInBytes/wordSize)));
4442 
4443   // Body of function which returns an integer array locating
4444   // arguments either in registers or in stack slots.  Passed an array
4445   // of ideal registers called "sig" and a "length" count.  Stack-slot
4446   // offsets are based on outgoing arguments, i.e. a CALLER setting up
4447   // arguments for a CALLEE.  Incoming stack arguments are
4448   // automatically biased by the preserve_stack_slots field above.
4449   calling_convention %{
4450     // No difference between ingoing/outgoing just pass false
4451     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4452   %}
4453 
4454 
4455   // Body of function which returns an integer array locating
4456   // arguments either in registers or in stack slots.  Passed an array
4457   // of ideal registers called "sig" and a "length" count.  Stack-slot
4458   // offsets are based on outgoing arguments, i.e. a CALLER setting up
4459   // arguments for a CALLEE.  Incoming stack arguments are
4460   // automatically biased by the preserve_stack_slots field above.
4461   c_calling_convention %{
4462     // This is obviously always outgoing
4463     (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
4464   %}
4465 
4466   // Location of C & interpreter return values
4467   c_return_value %{
4468     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
4469     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
4470     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
4471 
4472     // in SSE2+ mode we want to keep the FPU stack clean so pretend
4473     // that C functions return float and double results in XMM0.
4474     if( ideal_reg == Op_RegD && UseSSE>=2 )
4475       return OptoRegPair(XMM0b_num,XMM0a_num);
4476     if( ideal_reg == Op_RegF && UseSSE>=2 )
4477       return OptoRegPair(OptoReg::Bad,XMM0a_num);
4478 
4479     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
4480   %}
4481 
4482   // Location of return values
4483   return_value %{
4484     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
4485     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
4486     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
4487     if( ideal_reg == Op_RegD && UseSSE>=2 )
4488       return OptoRegPair(XMM0b_num,XMM0a_num);
4489     if( ideal_reg == Op_RegF && UseSSE>=1 )
4490       return OptoRegPair(OptoReg::Bad,XMM0a_num);
4491     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
4492   %}
4493 
4494 %}
4495 
4496 //----------ATTRIBUTES---------------------------------------------------------
4497 //----------Operand Attributes-------------------------------------------------
4498 op_attrib op_cost(0);        // Required cost attribute
4499 
4500 //----------Instruction Attributes---------------------------------------------
4501 ins_attrib ins_cost(100);       // Required cost attribute
4502 ins_attrib ins_size(8);         // Required size attribute (in bits)
4503 ins_attrib ins_pc_relative(0);  // Required PC Relative flag
4504 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
4505                                 // non-matching short branch variant of some
4506                                                             // long branch?
4507 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
4508                                 // specifies the alignment that some part of the instruction (not
4509                                 // necessarily the start) requires.  If > 1, a compute_padding()
4510                                 // function must be provided for the instruction
4511 
4512 //----------OPERANDS-----------------------------------------------------------
4513 // Operand definitions must precede instruction definitions for correct parsing
4514 // in the ADLC because operands constitute user defined types which are used in
4515 // instruction definitions.
4516 
4517 //----------Simple Operands----------------------------------------------------
4518 // Immediate Operands
4519 // Integer Immediate
4520 operand immI() %{
4521   match(ConI);
4522 
4523   op_cost(10);
4524   format %{ %}
4525   interface(CONST_INTER);
4526 %}
4527 
4528 // Constant for test vs zero
4529 operand immI0() %{
4530   predicate(n->get_int() == 0);
4531   match(ConI);
4532 
4533   op_cost(0);
4534   format %{ %}
4535   interface(CONST_INTER);
4536 %}
4537 
4538 // Constant for increment
4539 operand immI1() %{
4540   predicate(n->get_int() == 1);
4541   match(ConI);
4542 
4543   op_cost(0);
4544   format %{ %}
4545   interface(CONST_INTER);
4546 %}
4547 
4548 // Constant for decrement
4549 operand immI_M1() %{
4550   predicate(n->get_int() == -1);
4551   match(ConI);
4552 
4553   op_cost(0);
4554   format %{ %}
4555   interface(CONST_INTER);
4556 %}
4557 
4558 // Valid scale values for addressing modes
4559 operand immI2() %{
4560   predicate(0 <= n->get_int() && (n->get_int() <= 3));
4561   match(ConI);
4562 
4563   format %{ %}
4564   interface(CONST_INTER);
4565 %}
4566 
4567 operand immI8() %{
4568   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
4569   match(ConI);
4570 
4571   op_cost(5);
4572   format %{ %}
4573   interface(CONST_INTER);
4574 %}
4575 
4576 operand immI16() %{
4577   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4578   match(ConI);
4579 
4580   op_cost(10);
4581   format %{ %}
4582   interface(CONST_INTER);
4583 %}
4584 
4585 // Constant for long shifts
4586 operand immI_32() %{
4587   predicate( n->get_int() == 32 );
4588   match(ConI);
4589 
4590   op_cost(0);
4591   format %{ %}
4592   interface(CONST_INTER);
4593 %}
4594 
4595 operand immI_1_31() %{
4596   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
4597   match(ConI);
4598 
4599   op_cost(0);
4600   format %{ %}
4601   interface(CONST_INTER);
4602 %}
4603 
4604 operand immI_32_63() %{
4605   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
4606   match(ConI);
4607   op_cost(0);
4608 
4609   format %{ %}
4610   interface(CONST_INTER);
4611 %}
4612 
4613 operand immI_1() %{
4614   predicate( n->get_int() == 1 );
4615   match(ConI);
4616 
4617   op_cost(0);
4618   format %{ %}
4619   interface(CONST_INTER);
4620 %}
4621 
4622 operand immI_2() %{
4623   predicate( n->get_int() == 2 );
4624   match(ConI);
4625 
4626   op_cost(0);
4627   format %{ %}
4628   interface(CONST_INTER);
4629 %}
4630 
4631 operand immI_3() %{
4632   predicate( n->get_int() == 3 );
4633   match(ConI);
4634 
4635   op_cost(0);
4636   format %{ %}
4637   interface(CONST_INTER);
4638 %}
4639 
4640 // Pointer Immediate
4641 operand immP() %{
4642   match(ConP);
4643 
4644   op_cost(10);
4645   format %{ %}
4646   interface(CONST_INTER);
4647 %}
4648 
4649 // NULL Pointer Immediate
4650 operand immP0() %{
4651   predicate( n->get_ptr() == 0 );
4652   match(ConP);
4653   op_cost(0);
4654 
4655   format %{ %}
4656   interface(CONST_INTER);
4657 %}
4658 
4659 // Long Immediate
4660 operand immL() %{
4661   match(ConL);
4662 
4663   op_cost(20);
4664   format %{ %}
4665   interface(CONST_INTER);
4666 %}
4667 
4668 // Long Immediate zero
4669 operand immL0() %{
4670   predicate( n->get_long() == 0L );
4671   match(ConL);
4672   op_cost(0);
4673 
4674   format %{ %}
4675   interface(CONST_INTER);
4676 %}
4677 
4678 // Long Immediate zero
4679 operand immL_M1() %{
4680   predicate( n->get_long() == -1L );
4681   match(ConL);
4682   op_cost(0);
4683 
4684   format %{ %}
4685   interface(CONST_INTER);
4686 %}
4687 
4688 // Long immediate from 0 to 127.
4689 // Used for a shorter form of long mul by 10.
4690 operand immL_127() %{
4691   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
4692   match(ConL);
4693   op_cost(0);
4694 
4695   format %{ %}
4696   interface(CONST_INTER);
4697 %}
4698 
4699 // Long Immediate: low 32-bit mask
4700 operand immL_32bits() %{
4701   predicate(n->get_long() == 0xFFFFFFFFL);
4702   match(ConL);
4703   op_cost(0);
4704 
4705   format %{ %}
4706   interface(CONST_INTER);
4707 %}
4708 
4709 // Long Immediate: low 32-bit mask
4710 operand immL32() %{
4711   predicate(n->get_long() == (int)(n->get_long()));
4712   match(ConL);
4713   op_cost(20);
4714 
4715   format %{ %}
4716   interface(CONST_INTER);
4717 %}
4718 
4719 //Double Immediate zero
4720 operand immD0() %{
4721   // Do additional (and counter-intuitive) test against NaN to work around VC++
4722   // bug that generates code such that NaNs compare equal to 0.0
4723   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
4724   match(ConD);
4725 
4726   op_cost(5);
4727   format %{ %}
4728   interface(CONST_INTER);
4729 %}
4730 
4731 // Double Immediate one
4732 operand immD1() %{
4733   predicate( UseSSE<=1 && n->getd() == 1.0 );
4734   match(ConD);
4735 
4736   op_cost(5);
4737   format %{ %}
4738   interface(CONST_INTER);
4739 %}
4740 
4741 // Double Immediate
4742 operand immD() %{
4743   predicate(UseSSE<=1);
4744   match(ConD);
4745 
4746   op_cost(5);
4747   format %{ %}
4748   interface(CONST_INTER);
4749 %}
4750 
4751 operand immXD() %{
4752   predicate(UseSSE>=2);
4753   match(ConD);
4754 
4755   op_cost(5);
4756   format %{ %}
4757   interface(CONST_INTER);
4758 %}
4759 
4760 // Double Immediate zero
4761 operand immXD0() %{
4762   // Do additional (and counter-intuitive) test against NaN to work around VC++
4763   // bug that generates code such that NaNs compare equal to 0.0 AND do not
4764   // compare equal to -0.0.
4765   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
4766   match(ConD);
4767 
4768   format %{ %}
4769   interface(CONST_INTER);
4770 %}
4771 
4772 // Float Immediate zero
4773 operand immF0() %{
4774   predicate(UseSSE == 0 && n->getf() == 0.0F);
4775   match(ConF);
4776 
4777   op_cost(5);
4778   format %{ %}
4779   interface(CONST_INTER);
4780 %}
4781 
4782 // Float Immediate one
4783 operand immF1() %{
4784   predicate(UseSSE == 0 && n->getf() == 1.0F);
4785   match(ConF);
4786 
4787   op_cost(5);
4788   format %{ %}
4789   interface(CONST_INTER);
4790 %}
4791 
4792 // Float Immediate
4793 operand immF() %{
4794   predicate( UseSSE == 0 );
4795   match(ConF);
4796 
4797   op_cost(5);
4798   format %{ %}
4799   interface(CONST_INTER);
4800 %}
4801 
4802 // Float Immediate
4803 operand immXF() %{
4804   predicate(UseSSE >= 1);
4805   match(ConF);
4806 
4807   op_cost(5);
4808   format %{ %}
4809   interface(CONST_INTER);
4810 %}
4811 
4812 // Float Immediate zero.  Zero and not -0.0
4813 operand immXF0() %{
4814   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
4815   match(ConF);
4816 
4817   op_cost(5);
4818   format %{ %}
4819   interface(CONST_INTER);
4820 %}
4821 
4822 // Immediates for special shifts (sign extend)
4823 
4824 // Constants for increment
4825 operand immI_16() %{
4826   predicate( n->get_int() == 16 );
4827   match(ConI);
4828 
4829   format %{ %}
4830   interface(CONST_INTER);
4831 %}
4832 
4833 operand immI_24() %{
4834   predicate( n->get_int() == 24 );
4835   match(ConI);
4836 
4837   format %{ %}
4838   interface(CONST_INTER);
4839 %}
4840 
4841 // Constant for byte-wide masking
4842 operand immI_255() %{
4843   predicate( n->get_int() == 255 );
4844   match(ConI);
4845 
4846   format %{ %}
4847   interface(CONST_INTER);
4848 %}
4849 
4850 // Constant for short-wide masking
4851 operand immI_65535() %{
4852   predicate(n->get_int() == 65535);
4853   match(ConI);
4854 
4855   format %{ %}
4856   interface(CONST_INTER);
4857 %}
4858 
4859 // Register Operands
4860 // Integer Register
4861 operand eRegI() %{
4862   constraint(ALLOC_IN_RC(e_reg));
4863   match(RegI);
4864   match(xRegI);
4865   match(eAXRegI);
4866   match(eBXRegI);
4867   match(eCXRegI);
4868   match(eDXRegI);
4869   match(eDIRegI);
4870   match(eSIRegI);
4871 
4872   format %{ %}
4873   interface(REG_INTER);
4874 %}
4875 
4876 // Subset of Integer Register
4877 operand xRegI(eRegI reg) %{
4878   constraint(ALLOC_IN_RC(x_reg));
4879   match(reg);
4880   match(eAXRegI);
4881   match(eBXRegI);
4882   match(eCXRegI);
4883   match(eDXRegI);
4884 
4885   format %{ %}
4886   interface(REG_INTER);
4887 %}
4888 
4889 // Special Registers
4890 operand eAXRegI(xRegI reg) %{
4891   constraint(ALLOC_IN_RC(eax_reg));
4892   match(reg);
4893   match(eRegI);
4894 
4895   format %{ "EAX" %}
4896   interface(REG_INTER);
4897 %}
4898 
4899 // Special Registers
4900 operand eBXRegI(xRegI reg) %{
4901   constraint(ALLOC_IN_RC(ebx_reg));
4902   match(reg);
4903   match(eRegI);
4904 
4905   format %{ "EBX" %}
4906   interface(REG_INTER);
4907 %}
4908 
4909 operand eCXRegI(xRegI reg) %{
4910   constraint(ALLOC_IN_RC(ecx_reg));
4911   match(reg);
4912   match(eRegI);
4913 
4914   format %{ "ECX" %}
4915   interface(REG_INTER);
4916 %}
4917 
4918 operand eDXRegI(xRegI reg) %{
4919   constraint(ALLOC_IN_RC(edx_reg));
4920   match(reg);
4921   match(eRegI);
4922 
4923   format %{ "EDX" %}
4924   interface(REG_INTER);
4925 %}
4926 
4927 operand eDIRegI(xRegI reg) %{
4928   constraint(ALLOC_IN_RC(edi_reg));
4929   match(reg);
4930   match(eRegI);
4931 
4932   format %{ "EDI" %}
4933   interface(REG_INTER);
4934 %}
4935 
4936 operand naxRegI() %{
4937   constraint(ALLOC_IN_RC(nax_reg));
4938   match(RegI);
4939   match(eCXRegI);
4940   match(eDXRegI);
4941   match(eSIRegI);
4942   match(eDIRegI);
4943 
4944   format %{ %}
4945   interface(REG_INTER);
4946 %}
4947 
4948 operand nadxRegI() %{
4949   constraint(ALLOC_IN_RC(nadx_reg));
4950   match(RegI);
4951   match(eBXRegI);
4952   match(eCXRegI);
4953   match(eSIRegI);
4954   match(eDIRegI);
4955 
4956   format %{ %}
4957   interface(REG_INTER);
4958 %}
4959 
4960 operand ncxRegI() %{
4961   constraint(ALLOC_IN_RC(ncx_reg));
4962   match(RegI);
4963   match(eAXRegI);
4964   match(eDXRegI);
4965   match(eSIRegI);
4966   match(eDIRegI);
4967 
4968   format %{ %}
4969   interface(REG_INTER);
4970 %}
4971 
4972 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
4973 // //
4974 operand eSIRegI(xRegI reg) %{
4975    constraint(ALLOC_IN_RC(esi_reg));
4976    match(reg);
4977    match(eRegI);
4978 
4979    format %{ "ESI" %}
4980    interface(REG_INTER);
4981 %}
4982 
4983 // Pointer Register
4984 operand anyRegP() %{
4985   constraint(ALLOC_IN_RC(any_reg));
4986   match(RegP);
4987   match(eAXRegP);
4988   match(eBXRegP);
4989   match(eCXRegP);
4990   match(eDIRegP);
4991   match(eRegP);
4992 
4993   format %{ %}
4994   interface(REG_INTER);
4995 %}
4996 
4997 operand eRegP() %{
4998   constraint(ALLOC_IN_RC(e_reg));
4999   match(RegP);
5000   match(eAXRegP);
5001   match(eBXRegP);
5002   match(eCXRegP);
5003   match(eDIRegP);
5004 
5005   format %{ %}
5006   interface(REG_INTER);
5007 %}
5008 
5009 // On windows95, EBP is not safe to use for implicit null tests.
5010 operand eRegP_no_EBP() %{
5011   constraint(ALLOC_IN_RC(e_reg_no_rbp));
5012   match(RegP);
5013   match(eAXRegP);
5014   match(eBXRegP);
5015   match(eCXRegP);
5016   match(eDIRegP);
5017 
5018   op_cost(100);
5019   format %{ %}
5020   interface(REG_INTER);
5021 %}
5022 
5023 operand naxRegP() %{
5024   constraint(ALLOC_IN_RC(nax_reg));
5025   match(RegP);
5026   match(eBXRegP);
5027   match(eDXRegP);
5028   match(eCXRegP);
5029   match(eSIRegP);
5030   match(eDIRegP);
5031 
5032   format %{ %}
5033   interface(REG_INTER);
5034 %}
5035 
5036 operand nabxRegP() %{
5037   constraint(ALLOC_IN_RC(nabx_reg));
5038   match(RegP);
5039   match(eCXRegP);
5040   match(eDXRegP);
5041   match(eSIRegP);
5042   match(eDIRegP);
5043 
5044   format %{ %}
5045   interface(REG_INTER);
5046 %}
5047 
5048 operand pRegP() %{
5049   constraint(ALLOC_IN_RC(p_reg));
5050   match(RegP);
5051   match(eBXRegP);
5052   match(eDXRegP);
5053   match(eSIRegP);
5054   match(eDIRegP);
5055 
5056   format %{ %}
5057   interface(REG_INTER);
5058 %}
5059 
5060 // Special Registers
5061 // Return a pointer value
5062 operand eAXRegP(eRegP reg) %{
5063   constraint(ALLOC_IN_RC(eax_reg));
5064   match(reg);
5065   format %{ "EAX" %}
5066   interface(REG_INTER);
5067 %}
5068 
5069 // Used in AtomicAdd
5070 operand eBXRegP(eRegP reg) %{
5071   constraint(ALLOC_IN_RC(ebx_reg));
5072   match(reg);
5073   format %{ "EBX" %}
5074   interface(REG_INTER);
5075 %}
5076 
5077 // Tail-call (interprocedural jump) to interpreter
5078 operand eCXRegP(eRegP reg) %{
5079   constraint(ALLOC_IN_RC(ecx_reg));
5080   match(reg);
5081   format %{ "ECX" %}
5082   interface(REG_INTER);
5083 %}
5084 
5085 operand eSIRegP(eRegP reg) %{
5086   constraint(ALLOC_IN_RC(esi_reg));
5087   match(reg);
5088   format %{ "ESI" %}
5089   interface(REG_INTER);
5090 %}
5091 
5092 // Used in rep stosw
5093 operand eDIRegP(eRegP reg) %{
5094   constraint(ALLOC_IN_RC(edi_reg));
5095   match(reg);
5096   format %{ "EDI" %}
5097   interface(REG_INTER);
5098 %}
5099 
5100 operand eBPRegP() %{
5101   constraint(ALLOC_IN_RC(ebp_reg));
5102   match(RegP);
5103   format %{ "EBP" %}
5104   interface(REG_INTER);
5105 %}
5106 
5107 operand eRegL() %{
5108   constraint(ALLOC_IN_RC(long_reg));
5109   match(RegL);
5110   match(eADXRegL);
5111 
5112   format %{ %}
5113   interface(REG_INTER);
5114 %}
5115 
5116 operand eADXRegL( eRegL reg ) %{
5117   constraint(ALLOC_IN_RC(eadx_reg));
5118   match(reg);
5119 
5120   format %{ "EDX:EAX" %}
5121   interface(REG_INTER);
5122 %}
5123 
5124 operand eBCXRegL( eRegL reg ) %{
5125   constraint(ALLOC_IN_RC(ebcx_reg));
5126   match(reg);
5127 
5128   format %{ "EBX:ECX" %}
5129   interface(REG_INTER);
5130 %}
5131 
5132 // Special case for integer high multiply
5133 operand eADXRegL_low_only() %{
5134   constraint(ALLOC_IN_RC(eadx_reg));
5135   match(RegL);
5136 
5137   format %{ "EAX" %}
5138   interface(REG_INTER);
5139 %}
5140 
5141 // Flags register, used as output of compare instructions
5142 operand eFlagsReg() %{
5143   constraint(ALLOC_IN_RC(int_flags));
5144   match(RegFlags);
5145 
5146   format %{ "EFLAGS" %}
5147   interface(REG_INTER);
5148 %}
5149 
5150 // Flags register, used as output of FLOATING POINT compare instructions
5151 operand eFlagsRegU() %{
5152   constraint(ALLOC_IN_RC(int_flags));
5153   match(RegFlags);
5154 
5155   format %{ "EFLAGS_U" %}
5156   interface(REG_INTER);
5157 %}
5158 
5159 operand eFlagsRegUCF() %{
5160   constraint(ALLOC_IN_RC(int_flags));
5161   match(RegFlags);
5162   predicate(false);
5163 
5164   format %{ "EFLAGS_U_CF" %}
5165   interface(REG_INTER);
5166 %}
5167 
5168 // Condition Code Register used by long compare
5169 operand flagsReg_long_LTGE() %{
5170   constraint(ALLOC_IN_RC(int_flags));
5171   match(RegFlags);
5172   format %{ "FLAGS_LTGE" %}
5173   interface(REG_INTER);
5174 %}
5175 operand flagsReg_long_EQNE() %{
5176   constraint(ALLOC_IN_RC(int_flags));
5177   match(RegFlags);
5178   format %{ "FLAGS_EQNE" %}
5179   interface(REG_INTER);
5180 %}
5181 operand flagsReg_long_LEGT() %{
5182   constraint(ALLOC_IN_RC(int_flags));
5183   match(RegFlags);
5184   format %{ "FLAGS_LEGT" %}
5185   interface(REG_INTER);
5186 %}
5187 
5188 // Float register operands
5189 operand regD() %{
5190   predicate( UseSSE < 2 );
5191   constraint(ALLOC_IN_RC(dbl_reg));
5192   match(RegD);
5193   match(regDPR1);
5194   match(regDPR2);
5195   format %{ %}
5196   interface(REG_INTER);
5197 %}
5198 
5199 operand regDPR1(regD reg) %{
5200   predicate( UseSSE < 2 );
5201   constraint(ALLOC_IN_RC(dbl_reg0));
5202   match(reg);
5203   format %{ "FPR1" %}
5204   interface(REG_INTER);
5205 %}
5206 
5207 operand regDPR2(regD reg) %{
5208   predicate( UseSSE < 2 );
5209   constraint(ALLOC_IN_RC(dbl_reg1));
5210   match(reg);
5211   format %{ "FPR2" %}
5212   interface(REG_INTER);
5213 %}
5214 
5215 operand regnotDPR1(regD reg) %{
5216   predicate( UseSSE < 2 );
5217   constraint(ALLOC_IN_RC(dbl_notreg0));
5218   match(reg);
5219   format %{ %}
5220   interface(REG_INTER);
5221 %}
5222 
5223 // XMM Double register operands
5224 operand regXD() %{
5225   predicate( UseSSE>=2 );
5226   constraint(ALLOC_IN_RC(xdb_reg));
5227   match(RegD);
5228   match(regXD6);
5229   match(regXD7);
5230   format %{ %}
5231   interface(REG_INTER);
5232 %}
5233 
5234 // XMM6 double register operands
5235 operand regXD6(regXD reg) %{
5236   predicate( UseSSE>=2 );
5237   constraint(ALLOC_IN_RC(xdb_reg6));
5238   match(reg);
5239   format %{ "XMM6" %}
5240   interface(REG_INTER);
5241 %}
5242 
5243 // XMM7 double register operands
5244 operand regXD7(regXD reg) %{
5245   predicate( UseSSE>=2 );
5246   constraint(ALLOC_IN_RC(xdb_reg7));
5247   match(reg);
5248   format %{ "XMM7" %}
5249   interface(REG_INTER);
5250 %}
5251 
5252 // Float register operands
5253 operand regF() %{
5254   predicate( UseSSE < 2 );
5255   constraint(ALLOC_IN_RC(flt_reg));
5256   match(RegF);
5257   match(regFPR1);
5258   format %{ %}
5259   interface(REG_INTER);
5260 %}
5261 
5262 // Float register operands
5263 operand regFPR1(regF reg) %{
5264   predicate( UseSSE < 2 );
5265   constraint(ALLOC_IN_RC(flt_reg0));
5266   match(reg);
5267   format %{ "FPR1" %}
5268   interface(REG_INTER);
5269 %}
5270 
5271 // XMM register operands
5272 operand regX() %{
5273   predicate( UseSSE>=1 );
5274   constraint(ALLOC_IN_RC(xmm_reg));
5275   match(RegF);
5276   format %{ %}
5277   interface(REG_INTER);
5278 %}
5279 
5280 
5281 //----------Memory Operands----------------------------------------------------
5282 // Direct Memory Operand
5283 operand direct(immP addr) %{
5284   match(addr);
5285 
5286   format %{ "[$addr]" %}
5287   interface(MEMORY_INTER) %{
5288     base(0xFFFFFFFF);
5289     index(0x4);
5290     scale(0x0);
5291     disp($addr);
5292   %}
5293 %}
5294 
5295 // Indirect Memory Operand
5296 operand indirect(eRegP reg) %{
5297   constraint(ALLOC_IN_RC(e_reg));
5298   match(reg);
5299 
5300   format %{ "[$reg]" %}
5301   interface(MEMORY_INTER) %{
5302     base($reg);
5303     index(0x4);
5304     scale(0x0);
5305     disp(0x0);
5306   %}
5307 %}
5308 
5309 // Indirect Memory Plus Short Offset Operand
5310 operand indOffset8(eRegP reg, immI8 off) %{
5311   match(AddP reg off);
5312 
5313   format %{ "[$reg + $off]" %}
5314   interface(MEMORY_INTER) %{
5315     base($reg);
5316     index(0x4);
5317     scale(0x0);
5318     disp($off);
5319   %}
5320 %}
5321 
5322 // Indirect Memory Plus Long Offset Operand
5323 operand indOffset32(eRegP reg, immI off) %{
5324   match(AddP reg off);
5325 
5326   format %{ "[$reg + $off]" %}
5327   interface(MEMORY_INTER) %{
5328     base($reg);
5329     index(0x4);
5330     scale(0x0);
5331     disp($off);
5332   %}
5333 %}
5334 
5335 // Indirect Memory Plus Long Offset Operand
5336 operand indOffset32X(eRegI reg, immP off) %{
5337   match(AddP off reg);
5338 
5339   format %{ "[$reg + $off]" %}
5340   interface(MEMORY_INTER) %{
5341     base($reg);
5342     index(0x4);
5343     scale(0x0);
5344     disp($off);
5345   %}
5346 %}
5347 
5348 // Indirect Memory Plus Index Register Plus Offset Operand
5349 operand indIndexOffset(eRegP reg, eRegI ireg, immI off) %{
5350   match(AddP (AddP reg ireg) off);
5351 
5352   op_cost(10);
5353   format %{"[$reg + $off + $ireg]" %}
5354   interface(MEMORY_INTER) %{
5355     base($reg);
5356     index($ireg);
5357     scale(0x0);
5358     disp($off);
5359   %}
5360 %}
5361 
5362 // Indirect Memory Plus Index Register Plus Offset Operand
5363 operand indIndex(eRegP reg, eRegI ireg) %{
5364   match(AddP reg ireg);
5365 
5366   op_cost(10);
5367   format %{"[$reg + $ireg]" %}
5368   interface(MEMORY_INTER) %{
5369     base($reg);
5370     index($ireg);
5371     scale(0x0);
5372     disp(0x0);
5373   %}
5374 %}
5375 
5376 // // -------------------------------------------------------------------------
5377 // // 486 architecture doesn't support "scale * index + offset" with out a base
5378 // // -------------------------------------------------------------------------
5379 // // Scaled Memory Operands
5380 // // Indirect Memory Times Scale Plus Offset Operand
5381 // operand indScaleOffset(immP off, eRegI ireg, immI2 scale) %{
5382 //   match(AddP off (LShiftI ireg scale));
5383 //
5384 //   op_cost(10);
5385 //   format %{"[$off + $ireg << $scale]" %}
5386 //   interface(MEMORY_INTER) %{
5387 //     base(0x4);
5388 //     index($ireg);
5389 //     scale($scale);
5390 //     disp($off);
5391 //   %}
5392 // %}
5393 
5394 // Indirect Memory Times Scale Plus Index Register
5395 operand indIndexScale(eRegP reg, eRegI ireg, immI2 scale) %{
5396   match(AddP reg (LShiftI ireg scale));
5397 
5398   op_cost(10);
5399   format %{"[$reg + $ireg << $scale]" %}
5400   interface(MEMORY_INTER) %{
5401     base($reg);
5402     index($ireg);
5403     scale($scale);
5404     disp(0x0);
5405   %}
5406 %}
5407 
5408 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5409 operand indIndexScaleOffset(eRegP reg, immI off, eRegI ireg, immI2 scale) %{
5410   match(AddP (AddP reg (LShiftI ireg scale)) off);
5411 
5412   op_cost(10);
5413   format %{"[$reg + $off + $ireg << $scale]" %}
5414   interface(MEMORY_INTER) %{
5415     base($reg);
5416     index($ireg);
5417     scale($scale);
5418     disp($off);
5419   %}
5420 %}
5421 
5422 //----------Load Long Memory Operands------------------------------------------
5423 // The load-long idiom will use it's address expression again after loading
5424 // the first word of the long.  If the load-long destination overlaps with
5425 // registers used in the addressing expression, the 2nd half will be loaded
5426 // from a clobbered address.  Fix this by requiring that load-long use
5427 // address registers that do not overlap with the load-long target.
5428 
5429 // load-long support
5430 operand load_long_RegP() %{
5431   constraint(ALLOC_IN_RC(esi_reg));
5432   match(RegP);
5433   match(eSIRegP);
5434   op_cost(100);
5435   format %{  %}
5436   interface(REG_INTER);
5437 %}
5438 
5439 // Indirect Memory Operand Long
5440 operand load_long_indirect(load_long_RegP reg) %{
5441   constraint(ALLOC_IN_RC(esi_reg));
5442   match(reg);
5443 
5444   format %{ "[$reg]" %}
5445   interface(MEMORY_INTER) %{
5446     base($reg);
5447     index(0x4);
5448     scale(0x0);
5449     disp(0x0);
5450   %}
5451 %}
5452 
5453 // Indirect Memory Plus Long Offset Operand
5454 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
5455   match(AddP reg off);
5456 
5457   format %{ "[$reg + $off]" %}
5458   interface(MEMORY_INTER) %{
5459     base($reg);
5460     index(0x4);
5461     scale(0x0);
5462     disp($off);
5463   %}
5464 %}
5465 
5466 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
5467 
5468 
5469 //----------Special Memory Operands--------------------------------------------
5470 // Stack Slot Operand - This operand is used for loading and storing temporary
5471 //                      values on the stack where a match requires a value to
5472 //                      flow through memory.
5473 operand stackSlotP(sRegP reg) %{
5474   constraint(ALLOC_IN_RC(stack_slots));
5475   // No match rule because this operand is only generated in matching
5476   format %{ "[$reg]" %}
5477   interface(MEMORY_INTER) %{
5478     base(0x4);   // ESP
5479     index(0x4);  // No Index
5480     scale(0x0);  // No Scale
5481     disp($reg);  // Stack Offset
5482   %}
5483 %}
5484 
5485 operand stackSlotI(sRegI reg) %{
5486   constraint(ALLOC_IN_RC(stack_slots));
5487   // No match rule because this operand is only generated in matching
5488   format %{ "[$reg]" %}
5489   interface(MEMORY_INTER) %{
5490     base(0x4);   // ESP
5491     index(0x4);  // No Index
5492     scale(0x0);  // No Scale
5493     disp($reg);  // Stack Offset
5494   %}
5495 %}
5496 
5497 operand stackSlotF(sRegF reg) %{
5498   constraint(ALLOC_IN_RC(stack_slots));
5499   // No match rule because this operand is only generated in matching
5500   format %{ "[$reg]" %}
5501   interface(MEMORY_INTER) %{
5502     base(0x4);   // ESP
5503     index(0x4);  // No Index
5504     scale(0x0);  // No Scale
5505     disp($reg);  // Stack Offset
5506   %}
5507 %}
5508 
5509 operand stackSlotD(sRegD reg) %{
5510   constraint(ALLOC_IN_RC(stack_slots));
5511   // No match rule because this operand is only generated in matching
5512   format %{ "[$reg]" %}
5513   interface(MEMORY_INTER) %{
5514     base(0x4);   // ESP
5515     index(0x4);  // No Index
5516     scale(0x0);  // No Scale
5517     disp($reg);  // Stack Offset
5518   %}
5519 %}
5520 
5521 operand stackSlotL(sRegL reg) %{
5522   constraint(ALLOC_IN_RC(stack_slots));
5523   // No match rule because this operand is only generated in matching
5524   format %{ "[$reg]" %}
5525   interface(MEMORY_INTER) %{
5526     base(0x4);   // ESP
5527     index(0x4);  // No Index
5528     scale(0x0);  // No Scale
5529     disp($reg);  // Stack Offset
5530   %}
5531 %}
5532 
5533 //----------Memory Operands - Win95 Implicit Null Variants----------------
5534 // Indirect Memory Operand
5535 operand indirect_win95_safe(eRegP_no_EBP reg)
5536 %{
5537   constraint(ALLOC_IN_RC(e_reg));
5538   match(reg);
5539 
5540   op_cost(100);
5541   format %{ "[$reg]" %}
5542   interface(MEMORY_INTER) %{
5543     base($reg);
5544     index(0x4);
5545     scale(0x0);
5546     disp(0x0);
5547   %}
5548 %}
5549 
5550 // Indirect Memory Plus Short Offset Operand
5551 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
5552 %{
5553   match(AddP reg off);
5554 
5555   op_cost(100);
5556   format %{ "[$reg + $off]" %}
5557   interface(MEMORY_INTER) %{
5558     base($reg);
5559     index(0x4);
5560     scale(0x0);
5561     disp($off);
5562   %}
5563 %}
5564 
5565 // Indirect Memory Plus Long Offset Operand
5566 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
5567 %{
5568   match(AddP reg off);
5569 
5570   op_cost(100);
5571   format %{ "[$reg + $off]" %}
5572   interface(MEMORY_INTER) %{
5573     base($reg);
5574     index(0x4);
5575     scale(0x0);
5576     disp($off);
5577   %}
5578 %}
5579 
5580 // Indirect Memory Plus Index Register Plus Offset Operand
5581 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI off)
5582 %{
5583   match(AddP (AddP reg ireg) off);
5584 
5585   op_cost(100);
5586   format %{"[$reg + $off + $ireg]" %}
5587   interface(MEMORY_INTER) %{
5588     base($reg);
5589     index($ireg);
5590     scale(0x0);
5591     disp($off);
5592   %}
5593 %}
5594 
5595 // Indirect Memory Times Scale Plus Index Register
5596 operand indIndexScale_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI2 scale)
5597 %{
5598   match(AddP reg (LShiftI ireg scale));
5599 
5600   op_cost(100);
5601   format %{"[$reg + $ireg << $scale]" %}
5602   interface(MEMORY_INTER) %{
5603     base($reg);
5604     index($ireg);
5605     scale($scale);
5606     disp(0x0);
5607   %}
5608 %}
5609 
5610 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5611 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, eRegI ireg, immI2 scale)
5612 %{
5613   match(AddP (AddP reg (LShiftI ireg scale)) off);
5614 
5615   op_cost(100);
5616   format %{"[$reg + $off + $ireg << $scale]" %}
5617   interface(MEMORY_INTER) %{
5618     base($reg);
5619     index($ireg);
5620     scale($scale);
5621     disp($off);
5622   %}
5623 %}
5624 
5625 //----------Conditional Branch Operands----------------------------------------
5626 // Comparison Op  - This is the operation of the comparison, and is limited to
5627 //                  the following set of codes:
5628 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5629 //
5630 // Other attributes of the comparison, such as unsignedness, are specified
5631 // by the comparison instruction that sets a condition code flags register.
5632 // That result is represented by a flags operand whose subtype is appropriate
5633 // to the unsignedness (etc.) of the comparison.
5634 //
5635 // Later, the instruction which matches both the Comparison Op (a Bool) and
5636 // the flags (produced by the Cmp) specifies the coding of the comparison op
5637 // by matching a specific subtype of Bool operand below, such as cmpOpU.
5638 
5639 // Comparision Code
5640 operand cmpOp() %{
5641   match(Bool);
5642 
5643   format %{ "" %}
5644   interface(COND_INTER) %{
5645     equal(0x4, "e");
5646     not_equal(0x5, "ne");
5647     less(0xC, "l");
5648     greater_equal(0xD, "ge");
5649     less_equal(0xE, "le");
5650     greater(0xF, "g");
5651   %}
5652 %}
5653 
5654 // Comparison Code, unsigned compare.  Used by FP also, with
5655 // C2 (unordered) turned into GT or LT already.  The other bits
5656 // C0 and C3 are turned into Carry & Zero flags.
5657 operand cmpOpU() %{
5658   match(Bool);
5659 
5660   format %{ "" %}
5661   interface(COND_INTER) %{
5662     equal(0x4, "e");
5663     not_equal(0x5, "ne");
5664     less(0x2, "b");
5665     greater_equal(0x3, "nb");
5666     less_equal(0x6, "be");
5667     greater(0x7, "nbe");
5668   %}
5669 %}
5670 
5671 // Floating comparisons that don't require any fixup for the unordered case
5672 operand cmpOpUCF() %{
5673   match(Bool);
5674   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5675             n->as_Bool()->_test._test == BoolTest::ge ||
5676             n->as_Bool()->_test._test == BoolTest::le ||
5677             n->as_Bool()->_test._test == BoolTest::gt);
5678   format %{ "" %}
5679   interface(COND_INTER) %{
5680     equal(0x4, "e");
5681     not_equal(0x5, "ne");
5682     less(0x2, "b");
5683     greater_equal(0x3, "nb");
5684     less_equal(0x6, "be");
5685     greater(0x7, "nbe");
5686   %}
5687 %}
5688 
5689 
5690 // Floating comparisons that can be fixed up with extra conditional jumps
5691 operand cmpOpUCF2() %{
5692   match(Bool);
5693   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
5694             n->as_Bool()->_test._test == BoolTest::eq);
5695   format %{ "" %}
5696   interface(COND_INTER) %{
5697     equal(0x4, "e");
5698     not_equal(0x5, "ne");
5699     less(0x2, "b");
5700     greater_equal(0x3, "nb");
5701     less_equal(0x6, "be");
5702     greater(0x7, "nbe");
5703   %}
5704 %}
5705 
5706 // Comparison Code for FP conditional move
5707 operand cmpOp_fcmov() %{
5708   match(Bool);
5709 
5710   format %{ "" %}
5711   interface(COND_INTER) %{
5712     equal        (0x0C8);
5713     not_equal    (0x1C8);
5714     less         (0x0C0);
5715     greater_equal(0x1C0);
5716     less_equal   (0x0D0);
5717     greater      (0x1D0);
5718   %}
5719 %}
5720 
5721 // Comparision Code used in long compares
5722 operand cmpOp_commute() %{
5723   match(Bool);
5724 
5725   format %{ "" %}
5726   interface(COND_INTER) %{
5727     equal(0x4, "e");
5728     not_equal(0x5, "ne");
5729     less(0xF, "g");
5730     greater_equal(0xE, "le");
5731     less_equal(0xD, "ge");
5732     greater(0xC, "l");
5733   %}
5734 %}
5735 
5736 //----------OPERAND CLASSES----------------------------------------------------
5737 // Operand Classes are groups of operands that are used as to simplify
5738 // instruction definitions by not requiring the AD writer to specify separate
5739 // instructions for every form of operand when the instruction accepts
5740 // multiple operand types with the same basic encoding and format.  The classic
5741 // case of this is memory operands.
5742 
5743 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
5744                indIndex, indIndexScale, indIndexScaleOffset);
5745 
5746 // Long memory operations are encoded in 2 instructions and a +4 offset.
5747 // This means some kind of offset is always required and you cannot use
5748 // an oop as the offset (done when working on static globals).
5749 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
5750                     indIndex, indIndexScale, indIndexScaleOffset);
5751 
5752 
5753 //----------PIPELINE-----------------------------------------------------------
5754 // Rules which define the behavior of the target architectures pipeline.
5755 pipeline %{
5756 
5757 //----------ATTRIBUTES---------------------------------------------------------
5758 attributes %{
5759   variable_size_instructions;        // Fixed size instructions
5760   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
5761   instruction_unit_size = 1;         // An instruction is 1 bytes long
5762   instruction_fetch_unit_size = 16;  // The processor fetches one line
5763   instruction_fetch_units = 1;       // of 16 bytes
5764 
5765   // List of nop instructions
5766   nops( MachNop );
5767 %}
5768 
5769 //----------RESOURCES----------------------------------------------------------
5770 // Resources are the functional units available to the machine
5771 
5772 // Generic P2/P3 pipeline
5773 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5774 // 3 instructions decoded per cycle.
5775 // 2 load/store ops per cycle, 1 branch, 1 FPU,
5776 // 2 ALU op, only ALU0 handles mul/div instructions.
5777 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5778            MS0, MS1, MEM = MS0 | MS1,
5779            BR, FPU,
5780            ALU0, ALU1, ALU = ALU0 | ALU1 );
5781 
5782 //----------PIPELINE DESCRIPTION-----------------------------------------------
5783 // Pipeline Description specifies the stages in the machine's pipeline
5784 
5785 // Generic P2/P3 pipeline
5786 pipe_desc(S0, S1, S2, S3, S4, S5);
5787 
5788 //----------PIPELINE CLASSES---------------------------------------------------
5789 // Pipeline Classes describe the stages in which input and output are
5790 // referenced by the hardware pipeline.
5791 
5792 // Naming convention: ialu or fpu
5793 // Then: _reg
5794 // Then: _reg if there is a 2nd register
5795 // Then: _long if it's a pair of instructions implementing a long
5796 // Then: _fat if it requires the big decoder
5797 //   Or: _mem if it requires the big decoder and a memory unit.
5798 
5799 // Integer ALU reg operation
5800 pipe_class ialu_reg(eRegI dst) %{
5801     single_instruction;
5802     dst    : S4(write);
5803     dst    : S3(read);
5804     DECODE : S0;        // any decoder
5805     ALU    : S3;        // any alu
5806 %}
5807 
5808 // Long ALU reg operation
5809 pipe_class ialu_reg_long(eRegL dst) %{
5810     instruction_count(2);
5811     dst    : S4(write);
5812     dst    : S3(read);
5813     DECODE : S0(2);     // any 2 decoders
5814     ALU    : S3(2);     // both alus
5815 %}
5816 
5817 // Integer ALU reg operation using big decoder
5818 pipe_class ialu_reg_fat(eRegI dst) %{
5819     single_instruction;
5820     dst    : S4(write);
5821     dst    : S3(read);
5822     D0     : S0;        // big decoder only
5823     ALU    : S3;        // any alu
5824 %}
5825 
5826 // Long ALU reg operation using big decoder
5827 pipe_class ialu_reg_long_fat(eRegL dst) %{
5828     instruction_count(2);
5829     dst    : S4(write);
5830     dst    : S3(read);
5831     D0     : S0(2);     // big decoder only; twice
5832     ALU    : S3(2);     // any 2 alus
5833 %}
5834 
5835 // Integer ALU reg-reg operation
5836 pipe_class ialu_reg_reg(eRegI dst, eRegI src) %{
5837     single_instruction;
5838     dst    : S4(write);
5839     src    : S3(read);
5840     DECODE : S0;        // any decoder
5841     ALU    : S3;        // any alu
5842 %}
5843 
5844 // Long ALU reg-reg operation
5845 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
5846     instruction_count(2);
5847     dst    : S4(write);
5848     src    : S3(read);
5849     DECODE : S0(2);     // any 2 decoders
5850     ALU    : S3(2);     // both alus
5851 %}
5852 
5853 // Integer ALU reg-reg operation
5854 pipe_class ialu_reg_reg_fat(eRegI dst, memory src) %{
5855     single_instruction;
5856     dst    : S4(write);
5857     src    : S3(read);
5858     D0     : S0;        // big decoder only
5859     ALU    : S3;        // any alu
5860 %}
5861 
5862 // Long ALU reg-reg operation
5863 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
5864     instruction_count(2);
5865     dst    : S4(write);
5866     src    : S3(read);
5867     D0     : S0(2);     // big decoder only; twice
5868     ALU    : S3(2);     // both alus
5869 %}
5870 
5871 // Integer ALU reg-mem operation
5872 pipe_class ialu_reg_mem(eRegI dst, memory mem) %{
5873     single_instruction;
5874     dst    : S5(write);
5875     mem    : S3(read);
5876     D0     : S0;        // big decoder only
5877     ALU    : S4;        // any alu
5878     MEM    : S3;        // any mem
5879 %}
5880 
5881 // Long ALU reg-mem operation
5882 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
5883     instruction_count(2);
5884     dst    : S5(write);
5885     mem    : S3(read);
5886     D0     : S0(2);     // big decoder only; twice
5887     ALU    : S4(2);     // any 2 alus
5888     MEM    : S3(2);     // both mems
5889 %}
5890 
5891 // Integer mem operation (prefetch)
5892 pipe_class ialu_mem(memory mem)
5893 %{
5894     single_instruction;
5895     mem    : S3(read);
5896     D0     : S0;        // big decoder only
5897     MEM    : S3;        // any mem
5898 %}
5899 
5900 // Integer Store to Memory
5901 pipe_class ialu_mem_reg(memory mem, eRegI src) %{
5902     single_instruction;
5903     mem    : S3(read);
5904     src    : S5(read);
5905     D0     : S0;        // big decoder only
5906     ALU    : S4;        // any alu
5907     MEM    : S3;
5908 %}
5909 
5910 // Long Store to Memory
5911 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
5912     instruction_count(2);
5913     mem    : S3(read);
5914     src    : S5(read);
5915     D0     : S0(2);     // big decoder only; twice
5916     ALU    : S4(2);     // any 2 alus
5917     MEM    : S3(2);     // Both mems
5918 %}
5919 
5920 // Integer Store to Memory
5921 pipe_class ialu_mem_imm(memory mem) %{
5922     single_instruction;
5923     mem    : S3(read);
5924     D0     : S0;        // big decoder only
5925     ALU    : S4;        // any alu
5926     MEM    : S3;
5927 %}
5928 
5929 // Integer ALU0 reg-reg operation
5930 pipe_class ialu_reg_reg_alu0(eRegI dst, eRegI src) %{
5931     single_instruction;
5932     dst    : S4(write);
5933     src    : S3(read);
5934     D0     : S0;        // Big decoder only
5935     ALU0   : S3;        // only alu0
5936 %}
5937 
5938 // Integer ALU0 reg-mem operation
5939 pipe_class ialu_reg_mem_alu0(eRegI dst, memory mem) %{
5940     single_instruction;
5941     dst    : S5(write);
5942     mem    : S3(read);
5943     D0     : S0;        // big decoder only
5944     ALU0   : S4;        // ALU0 only
5945     MEM    : S3;        // any mem
5946 %}
5947 
5948 // Integer ALU reg-reg operation
5949 pipe_class ialu_cr_reg_reg(eFlagsReg cr, eRegI src1, eRegI src2) %{
5950     single_instruction;
5951     cr     : S4(write);
5952     src1   : S3(read);
5953     src2   : S3(read);
5954     DECODE : S0;        // any decoder
5955     ALU    : S3;        // any alu
5956 %}
5957 
5958 // Integer ALU reg-imm operation
5959 pipe_class ialu_cr_reg_imm(eFlagsReg cr, eRegI src1) %{
5960     single_instruction;
5961     cr     : S4(write);
5962     src1   : S3(read);
5963     DECODE : S0;        // any decoder
5964     ALU    : S3;        // any alu
5965 %}
5966 
5967 // Integer ALU reg-mem operation
5968 pipe_class ialu_cr_reg_mem(eFlagsReg cr, eRegI src1, memory src2) %{
5969     single_instruction;
5970     cr     : S4(write);
5971     src1   : S3(read);
5972     src2   : S3(read);
5973     D0     : S0;        // big decoder only
5974     ALU    : S4;        // any alu
5975     MEM    : S3;
5976 %}
5977 
5978 // Conditional move reg-reg
5979 pipe_class pipe_cmplt( eRegI p, eRegI q, eRegI y ) %{
5980     instruction_count(4);
5981     y      : S4(read);
5982     q      : S3(read);
5983     p      : S3(read);
5984     DECODE : S0(4);     // any decoder
5985 %}
5986 
5987 // Conditional move reg-reg
5988 pipe_class pipe_cmov_reg( eRegI dst, eRegI src, eFlagsReg cr ) %{
5989     single_instruction;
5990     dst    : S4(write);
5991     src    : S3(read);
5992     cr     : S3(read);
5993     DECODE : S0;        // any decoder
5994 %}
5995 
5996 // Conditional move reg-mem
5997 pipe_class pipe_cmov_mem( eFlagsReg cr, eRegI dst, memory src) %{
5998     single_instruction;
5999     dst    : S4(write);
6000     src    : S3(read);
6001     cr     : S3(read);
6002     DECODE : S0;        // any decoder
6003     MEM    : S3;
6004 %}
6005 
6006 // Conditional move reg-reg long
6007 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
6008     single_instruction;
6009     dst    : S4(write);
6010     src    : S3(read);
6011     cr     : S3(read);
6012     DECODE : S0(2);     // any 2 decoders
6013 %}
6014 
6015 // Conditional move double reg-reg
6016 pipe_class pipe_cmovD_reg( eFlagsReg cr, regDPR1 dst, regD src) %{
6017     single_instruction;
6018     dst    : S4(write);
6019     src    : S3(read);
6020     cr     : S3(read);
6021     DECODE : S0;        // any decoder
6022 %}
6023 
6024 // Float reg-reg operation
6025 pipe_class fpu_reg(regD dst) %{
6026     instruction_count(2);
6027     dst    : S3(read);
6028     DECODE : S0(2);     // any 2 decoders
6029     FPU    : S3;
6030 %}
6031 
6032 // Float reg-reg operation
6033 pipe_class fpu_reg_reg(regD dst, regD src) %{
6034     instruction_count(2);
6035     dst    : S4(write);
6036     src    : S3(read);
6037     DECODE : S0(2);     // any 2 decoders
6038     FPU    : S3;
6039 %}
6040 
6041 // Float reg-reg operation
6042 pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2) %{
6043     instruction_count(3);
6044     dst    : S4(write);
6045     src1   : S3(read);
6046     src2   : S3(read);
6047     DECODE : S0(3);     // any 3 decoders
6048     FPU    : S3(2);
6049 %}
6050 
6051 // Float reg-reg operation
6052 pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
6053     instruction_count(4);
6054     dst    : S4(write);
6055     src1   : S3(read);
6056     src2   : S3(read);
6057     src3   : S3(read);
6058     DECODE : S0(4);     // any 3 decoders
6059     FPU    : S3(2);
6060 %}
6061 
6062 // Float reg-reg operation
6063 pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3) %{
6064     instruction_count(4);
6065     dst    : S4(write);
6066     src1   : S3(read);
6067     src2   : S3(read);
6068     src3   : S3(read);
6069     DECODE : S1(3);     // any 3 decoders
6070     D0     : S0;        // Big decoder only
6071     FPU    : S3(2);
6072     MEM    : S3;
6073 %}
6074 
6075 // Float reg-mem operation
6076 pipe_class fpu_reg_mem(regD dst, memory mem) %{
6077     instruction_count(2);
6078     dst    : S5(write);
6079     mem    : S3(read);
6080     D0     : S0;        // big decoder only
6081     DECODE : S1;        // any decoder for FPU POP
6082     FPU    : S4;
6083     MEM    : S3;        // any mem
6084 %}
6085 
6086 // Float reg-mem operation
6087 pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem) %{
6088     instruction_count(3);
6089     dst    : S5(write);
6090     src1   : S3(read);
6091     mem    : S3(read);
6092     D0     : S0;        // big decoder only
6093     DECODE : S1(2);     // any decoder for FPU POP
6094     FPU    : S4;
6095     MEM    : S3;        // any mem
6096 %}
6097 
6098 // Float mem-reg operation
6099 pipe_class fpu_mem_reg(memory mem, regD src) %{
6100     instruction_count(2);
6101     src    : S5(read);
6102     mem    : S3(read);
6103     DECODE : S0;        // any decoder for FPU PUSH
6104     D0     : S1;        // big decoder only
6105     FPU    : S4;
6106     MEM    : S3;        // any mem
6107 %}
6108 
6109 pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2) %{
6110     instruction_count(3);
6111     src1   : S3(read);
6112     src2   : S3(read);
6113     mem    : S3(read);
6114     DECODE : S0(2);     // any decoder for FPU PUSH
6115     D0     : S1;        // big decoder only
6116     FPU    : S4;
6117     MEM    : S3;        // any mem
6118 %}
6119 
6120 pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2) %{
6121     instruction_count(3);
6122     src1   : S3(read);
6123     src2   : S3(read);
6124     mem    : S4(read);
6125     DECODE : S0;        // any decoder for FPU PUSH
6126     D0     : S0(2);     // big decoder only
6127     FPU    : S4;
6128     MEM    : S3(2);     // any mem
6129 %}
6130 
6131 pipe_class fpu_mem_mem(memory dst, memory src1) %{
6132     instruction_count(2);
6133     src1   : S3(read);
6134     dst    : S4(read);
6135     D0     : S0(2);     // big decoder only
6136     MEM    : S3(2);     // any mem
6137 %}
6138 
6139 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
6140     instruction_count(3);
6141     src1   : S3(read);
6142     src2   : S3(read);
6143     dst    : S4(read);
6144     D0     : S0(3);     // big decoder only
6145     FPU    : S4;
6146     MEM    : S3(3);     // any mem
6147 %}
6148 
6149 pipe_class fpu_mem_reg_con(memory mem, regD src1) %{
6150     instruction_count(3);
6151     src1   : S4(read);
6152     mem    : S4(read);
6153     DECODE : S0;        // any decoder for FPU PUSH
6154     D0     : S0(2);     // big decoder only
6155     FPU    : S4;
6156     MEM    : S3(2);     // any mem
6157 %}
6158 
6159 // Float load constant
6160 pipe_class fpu_reg_con(regD dst) %{
6161     instruction_count(2);
6162     dst    : S5(write);
6163     D0     : S0;        // big decoder only for the load
6164     DECODE : S1;        // any decoder for FPU POP
6165     FPU    : S4;
6166     MEM    : S3;        // any mem
6167 %}
6168 
6169 // Float load constant
6170 pipe_class fpu_reg_reg_con(regD dst, regD src) %{
6171     instruction_count(3);
6172     dst    : S5(write);
6173     src    : S3(read);
6174     D0     : S0;        // big decoder only for the load
6175     DECODE : S1(2);     // any decoder for FPU POP
6176     FPU    : S4;
6177     MEM    : S3;        // any mem
6178 %}
6179 
6180 // UnConditional branch
6181 pipe_class pipe_jmp( label labl ) %{
6182     single_instruction;
6183     BR   : S3;
6184 %}
6185 
6186 // Conditional branch
6187 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
6188     single_instruction;
6189     cr    : S1(read);
6190     BR    : S3;
6191 %}
6192 
6193 // Allocation idiom
6194 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
6195     instruction_count(1); force_serialization;
6196     fixed_latency(6);
6197     heap_ptr : S3(read);
6198     DECODE   : S0(3);
6199     D0       : S2;
6200     MEM      : S3;
6201     ALU      : S3(2);
6202     dst      : S5(write);
6203     BR       : S5;
6204 %}
6205 
6206 // Generic big/slow expanded idiom
6207 pipe_class pipe_slow(  ) %{
6208     instruction_count(10); multiple_bundles; force_serialization;
6209     fixed_latency(100);
6210     D0  : S0(2);
6211     MEM : S3(2);
6212 %}
6213 
6214 // The real do-nothing guy
6215 pipe_class empty( ) %{
6216     instruction_count(0);
6217 %}
6218 
6219 // Define the class for the Nop node
6220 define %{
6221    MachNop = empty;
6222 %}
6223 
6224 %}
6225 
6226 //----------INSTRUCTIONS-------------------------------------------------------
6227 //
6228 // match      -- States which machine-independent subtree may be replaced
6229 //               by this instruction.
6230 // ins_cost   -- The estimated cost of this instruction is used by instruction
6231 //               selection to identify a minimum cost tree of machine
6232 //               instructions that matches a tree of machine-independent
6233 //               instructions.
6234 // format     -- A string providing the disassembly for this instruction.
6235 //               The value of an instruction's operand may be inserted
6236 //               by referring to it with a '$' prefix.
6237 // opcode     -- Three instruction opcodes may be provided.  These are referred
6238 //               to within an encode class as $primary, $secondary, and $tertiary
6239 //               respectively.  The primary opcode is commonly used to
6240 //               indicate the type of machine instruction, while secondary
6241 //               and tertiary are often used for prefix options or addressing
6242 //               modes.
6243 // ins_encode -- A list of encode classes with parameters. The encode class
6244 //               name must have been defined in an 'enc_class' specification
6245 //               in the encode section of the architecture description.
6246 
6247 //----------BSWAP-Instruction--------------------------------------------------
6248 instruct bytes_reverse_int(eRegI dst) %{
6249   match(Set dst (ReverseBytesI dst));
6250 
6251   format %{ "BSWAP  $dst" %}
6252   opcode(0x0F, 0xC8);
6253   ins_encode( OpcP, OpcSReg(dst) );
6254   ins_pipe( ialu_reg );
6255 %}
6256 
6257 instruct bytes_reverse_long(eRegL dst) %{
6258   match(Set dst (ReverseBytesL dst));
6259 
6260   format %{ "BSWAP  $dst.lo\n\t"
6261             "BSWAP  $dst.hi\n\t"
6262             "XCHG   $dst.lo $dst.hi" %}
6263 
6264   ins_cost(125);
6265   ins_encode( bswap_long_bytes(dst) );
6266   ins_pipe( ialu_reg_reg);
6267 %}
6268 
6269 instruct bytes_reverse_unsigned_short(eRegI dst) %{
6270   match(Set dst (ReverseBytesUS dst));
6271 
6272   format %{ "BSWAP  $dst\n\t" 
6273             "SHR    $dst,16\n\t" %}
6274   ins_encode %{
6275     __ bswapl($dst$$Register);
6276     __ shrl($dst$$Register, 16); 
6277   %}
6278   ins_pipe( ialu_reg );
6279 %}
6280 
6281 instruct bytes_reverse_short(eRegI dst) %{
6282   match(Set dst (ReverseBytesS dst));
6283 
6284   format %{ "BSWAP  $dst\n\t" 
6285             "SAR    $dst,16\n\t" %}
6286   ins_encode %{
6287     __ bswapl($dst$$Register);
6288     __ sarl($dst$$Register, 16); 
6289   %}
6290   ins_pipe( ialu_reg );
6291 %}
6292 
6293 
6294 //---------- Zeros Count Instructions ------------------------------------------
6295 
6296 instruct countLeadingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
6297   predicate(UseCountLeadingZerosInstruction);
6298   match(Set dst (CountLeadingZerosI src));
6299   effect(KILL cr);
6300 
6301   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
6302   ins_encode %{
6303     __ lzcntl($dst$$Register, $src$$Register);
6304   %}
6305   ins_pipe(ialu_reg);
6306 %}
6307 
6308 instruct countLeadingZerosI_bsr(eRegI dst, eRegI src, eFlagsReg cr) %{
6309   predicate(!UseCountLeadingZerosInstruction);
6310   match(Set dst (CountLeadingZerosI src));
6311   effect(KILL cr);
6312 
6313   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
6314             "JNZ    skip\n\t"
6315             "MOV    $dst, -1\n"
6316       "skip:\n\t"
6317             "NEG    $dst\n\t"
6318             "ADD    $dst, 31" %}
6319   ins_encode %{
6320     Register Rdst = $dst$$Register;
6321     Register Rsrc = $src$$Register;
6322     Label skip;
6323     __ bsrl(Rdst, Rsrc);
6324     __ jccb(Assembler::notZero, skip);
6325     __ movl(Rdst, -1);
6326     __ bind(skip);
6327     __ negl(Rdst);
6328     __ addl(Rdst, BitsPerInt - 1);
6329   %}
6330   ins_pipe(ialu_reg);
6331 %}
6332 
6333 instruct countLeadingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
6334   predicate(UseCountLeadingZerosInstruction);
6335   match(Set dst (CountLeadingZerosL src));
6336   effect(TEMP dst, KILL cr);
6337 
6338   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
6339             "JNC    done\n\t"
6340             "LZCNT  $dst, $src.lo\n\t"
6341             "ADD    $dst, 32\n"
6342       "done:" %}
6343   ins_encode %{
6344     Register Rdst = $dst$$Register;
6345     Register Rsrc = $src$$Register;
6346     Label done;
6347     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
6348     __ jccb(Assembler::carryClear, done);
6349     __ lzcntl(Rdst, Rsrc);
6350     __ addl(Rdst, BitsPerInt);
6351     __ bind(done);
6352   %}
6353   ins_pipe(ialu_reg);
6354 %}
6355 
6356 instruct countLeadingZerosL_bsr(eRegI dst, eRegL src, eFlagsReg cr) %{
6357   predicate(!UseCountLeadingZerosInstruction);
6358   match(Set dst (CountLeadingZerosL src));
6359   effect(TEMP dst, KILL cr);
6360 
6361   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
6362             "JZ     msw_is_zero\n\t"
6363             "ADD    $dst, 32\n\t"
6364             "JMP    not_zero\n"
6365       "msw_is_zero:\n\t"
6366             "BSR    $dst, $src.lo\n\t"
6367             "JNZ    not_zero\n\t"
6368             "MOV    $dst, -1\n"
6369       "not_zero:\n\t"
6370             "NEG    $dst\n\t"
6371             "ADD    $dst, 63\n" %}
6372  ins_encode %{
6373     Register Rdst = $dst$$Register;
6374     Register Rsrc = $src$$Register;
6375     Label msw_is_zero;
6376     Label not_zero;
6377     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
6378     __ jccb(Assembler::zero, msw_is_zero);
6379     __ addl(Rdst, BitsPerInt);
6380     __ jmpb(not_zero);
6381     __ bind(msw_is_zero);
6382     __ bsrl(Rdst, Rsrc);
6383     __ jccb(Assembler::notZero, not_zero);
6384     __ movl(Rdst, -1);
6385     __ bind(not_zero);
6386     __ negl(Rdst);
6387     __ addl(Rdst, BitsPerLong - 1);
6388   %}
6389   ins_pipe(ialu_reg);
6390 %}
6391 
6392 instruct countTrailingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
6393   match(Set dst (CountTrailingZerosI src));
6394   effect(KILL cr);
6395 
6396   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
6397             "JNZ    done\n\t"
6398             "MOV    $dst, 32\n"
6399       "done:" %}
6400   ins_encode %{
6401     Register Rdst = $dst$$Register;
6402     Label done;
6403     __ bsfl(Rdst, $src$$Register);
6404     __ jccb(Assembler::notZero, done);
6405     __ movl(Rdst, BitsPerInt);
6406     __ bind(done);
6407   %}
6408   ins_pipe(ialu_reg);
6409 %}
6410 
6411 instruct countTrailingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
6412   match(Set dst (CountTrailingZerosL src));
6413   effect(TEMP dst, KILL cr);
6414 
6415   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
6416             "JNZ    done\n\t"
6417             "BSF    $dst, $src.hi\n\t"
6418             "JNZ    msw_not_zero\n\t"
6419             "MOV    $dst, 32\n"
6420       "msw_not_zero:\n\t"
6421             "ADD    $dst, 32\n"
6422       "done:" %}
6423   ins_encode %{
6424     Register Rdst = $dst$$Register;
6425     Register Rsrc = $src$$Register;
6426     Label msw_not_zero;
6427     Label done;
6428     __ bsfl(Rdst, Rsrc);
6429     __ jccb(Assembler::notZero, done);
6430     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
6431     __ jccb(Assembler::notZero, msw_not_zero);
6432     __ movl(Rdst, BitsPerInt);
6433     __ bind(msw_not_zero);
6434     __ addl(Rdst, BitsPerInt);
6435     __ bind(done);
6436   %}
6437   ins_pipe(ialu_reg);
6438 %}
6439 
6440 
6441 //---------- Population Count Instructions -------------------------------------
6442 
6443 instruct popCountI(eRegI dst, eRegI src) %{
6444   predicate(UsePopCountInstruction);
6445   match(Set dst (PopCountI src));
6446 
6447   format %{ "POPCNT $dst, $src" %}
6448   ins_encode %{
6449     __ popcntl($dst$$Register, $src$$Register);
6450   %}
6451   ins_pipe(ialu_reg);
6452 %}
6453 
6454 instruct popCountI_mem(eRegI dst, memory mem) %{
6455   predicate(UsePopCountInstruction);
6456   match(Set dst (PopCountI (LoadI mem)));
6457 
6458   format %{ "POPCNT $dst, $mem" %}
6459   ins_encode %{
6460     __ popcntl($dst$$Register, $mem$$Address);
6461   %}
6462   ins_pipe(ialu_reg);
6463 %}
6464 
6465 // Note: Long.bitCount(long) returns an int.
6466 instruct popCountL(eRegI dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
6467   predicate(UsePopCountInstruction);
6468   match(Set dst (PopCountL src));
6469   effect(KILL cr, TEMP tmp, TEMP dst);
6470 
6471   format %{ "POPCNT $dst, $src.lo\n\t"
6472             "POPCNT $tmp, $src.hi\n\t"
6473             "ADD    $dst, $tmp" %}
6474   ins_encode %{
6475     __ popcntl($dst$$Register, $src$$Register);
6476     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
6477     __ addl($dst$$Register, $tmp$$Register);
6478   %}
6479   ins_pipe(ialu_reg);
6480 %}
6481 
6482 // Note: Long.bitCount(long) returns an int.
6483 instruct popCountL_mem(eRegI dst, memory mem, eRegI tmp, eFlagsReg cr) %{
6484   predicate(UsePopCountInstruction);
6485   match(Set dst (PopCountL (LoadL mem)));
6486   effect(KILL cr, TEMP tmp, TEMP dst);
6487 
6488   format %{ "POPCNT $dst, $mem\n\t"
6489             "POPCNT $tmp, $mem+4\n\t"
6490             "ADD    $dst, $tmp" %}
6491   ins_encode %{
6492     //__ popcntl($dst$$Register, $mem$$Address$$first);
6493     //__ popcntl($tmp$$Register, $mem$$Address$$second);
6494     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false));
6495     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false));
6496     __ addl($dst$$Register, $tmp$$Register);
6497   %}
6498   ins_pipe(ialu_reg);
6499 %}
6500 
6501 
6502 //----------Load/Store/Move Instructions---------------------------------------
6503 //----------Load Instructions--------------------------------------------------
6504 // Load Byte (8bit signed)
6505 instruct loadB(xRegI dst, memory mem) %{
6506   match(Set dst (LoadB mem));
6507 
6508   ins_cost(125);
6509   format %{ "MOVSX8 $dst,$mem\t# byte" %}
6510 
6511   ins_encode %{
6512     __ movsbl($dst$$Register, $mem$$Address);
6513   %}
6514 
6515   ins_pipe(ialu_reg_mem);
6516 %}
6517 
6518 // Load Byte (8bit signed) into Long Register
6519 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
6520   match(Set dst (ConvI2L (LoadB mem)));
6521   effect(KILL cr);
6522 
6523   ins_cost(375);
6524   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
6525             "MOV    $dst.hi,$dst.lo\n\t"
6526             "SAR    $dst.hi,7" %}
6527 
6528   ins_encode %{
6529     __ movsbl($dst$$Register, $mem$$Address);
6530     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
6531     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
6532   %}
6533 
6534   ins_pipe(ialu_reg_mem);
6535 %}
6536 
6537 // Load Unsigned Byte (8bit UNsigned)
6538 instruct loadUB(xRegI dst, memory mem) %{
6539   match(Set dst (LoadUB mem));
6540 
6541   ins_cost(125);
6542   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
6543 
6544   ins_encode %{
6545     __ movzbl($dst$$Register, $mem$$Address);
6546   %}
6547 
6548   ins_pipe(ialu_reg_mem);
6549 %}
6550 
6551 // Load Unsigned Byte (8 bit UNsigned) into Long Register
6552 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
6553   match(Set dst (ConvI2L (LoadUB mem)));
6554   effect(KILL cr);
6555 
6556   ins_cost(250);
6557   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
6558             "XOR    $dst.hi,$dst.hi" %}
6559 
6560   ins_encode %{
6561     Register Rdst = $dst$$Register;
6562     __ movzbl(Rdst, $mem$$Address);
6563     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6564   %}
6565 
6566   ins_pipe(ialu_reg_mem);
6567 %}
6568 
6569 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
6570 instruct loadUB2L_immI8(eRegL dst, memory mem, immI8 mask, eFlagsReg cr) %{
6571   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
6572   effect(KILL cr);
6573 
6574   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 8-bit mask -> long\n\t"
6575             "XOR    $dst.hi,$dst.hi\n\t"
6576             "AND    $dst.lo,$mask" %}
6577   ins_encode %{
6578     Register Rdst = $dst$$Register;
6579     __ movzbl(Rdst, $mem$$Address);
6580     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6581     __ andl(Rdst, $mask$$constant);
6582   %}
6583   ins_pipe(ialu_reg_mem);
6584 %}
6585 
6586 // Load Short (16bit signed)
6587 instruct loadS(eRegI dst, memory mem) %{
6588   match(Set dst (LoadS mem));
6589 
6590   ins_cost(125);
6591   format %{ "MOVSX  $dst,$mem\t# short" %}
6592 
6593   ins_encode %{
6594     __ movswl($dst$$Register, $mem$$Address);
6595   %}
6596 
6597   ins_pipe(ialu_reg_mem);
6598 %}
6599 
6600 // Load Short (16 bit signed) to Byte (8 bit signed)
6601 instruct loadS2B(eRegI dst, memory mem, immI_24 twentyfour) %{
6602   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
6603 
6604   ins_cost(125);
6605   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
6606   ins_encode %{
6607     __ movsbl($dst$$Register, $mem$$Address);
6608   %}
6609   ins_pipe(ialu_reg_mem);
6610 %}
6611 
6612 // Load Short (16bit signed) into Long Register
6613 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
6614   match(Set dst (ConvI2L (LoadS mem)));
6615   effect(KILL cr);
6616 
6617   ins_cost(375);
6618   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
6619             "MOV    $dst.hi,$dst.lo\n\t"
6620             "SAR    $dst.hi,15" %}
6621 
6622   ins_encode %{
6623     __ movswl($dst$$Register, $mem$$Address);
6624     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
6625     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
6626   %}
6627 
6628   ins_pipe(ialu_reg_mem);
6629 %}
6630 
6631 // Load Unsigned Short/Char (16bit unsigned)
6632 instruct loadUS(eRegI dst, memory mem) %{
6633   match(Set dst (LoadUS mem));
6634 
6635   ins_cost(125);
6636   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
6637 
6638   ins_encode %{
6639     __ movzwl($dst$$Register, $mem$$Address);
6640   %}
6641 
6642   ins_pipe(ialu_reg_mem);
6643 %}
6644 
6645 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
6646 instruct loadUS2B(eRegI dst, memory mem, immI_24 twentyfour) %{
6647   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
6648 
6649   ins_cost(125);
6650   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
6651   ins_encode %{
6652     __ movsbl($dst$$Register, $mem$$Address);
6653   %}
6654   ins_pipe(ialu_reg_mem);
6655 %}
6656 
6657 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
6658 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
6659   match(Set dst (ConvI2L (LoadUS mem)));
6660   effect(KILL cr);
6661 
6662   ins_cost(250);
6663   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
6664             "XOR    $dst.hi,$dst.hi" %}
6665 
6666   ins_encode %{
6667     __ movzwl($dst$$Register, $mem$$Address);
6668     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
6669   %}
6670 
6671   ins_pipe(ialu_reg_mem);
6672 %}
6673 
6674 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
6675 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
6676   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6677   effect(KILL cr);
6678 
6679   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
6680             "XOR    $dst.hi,$dst.hi" %}
6681   ins_encode %{
6682     Register Rdst = $dst$$Register;
6683     __ movzbl(Rdst, $mem$$Address);
6684     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6685   %}
6686   ins_pipe(ialu_reg_mem);
6687 %}
6688 
6689 // Load Unsigned Short/Char (16 bit UNsigned) with a 16-bit mask into Long Register
6690 instruct loadUS2L_immI16(eRegL dst, memory mem, immI16 mask, eFlagsReg cr) %{
6691   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6692   effect(KILL cr);
6693 
6694   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 16-bit mask -> long\n\t"
6695             "XOR    $dst.hi,$dst.hi\n\t"
6696             "AND    $dst.lo,$mask" %}
6697   ins_encode %{
6698     Register Rdst = $dst$$Register;
6699     __ movzwl(Rdst, $mem$$Address);
6700     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6701     __ andl(Rdst, $mask$$constant);
6702   %}
6703   ins_pipe(ialu_reg_mem);
6704 %}
6705 
6706 // Load Integer
6707 instruct loadI(eRegI dst, memory mem) %{
6708   match(Set dst (LoadI mem));
6709 
6710   ins_cost(125);
6711   format %{ "MOV    $dst,$mem\t# int" %}
6712 
6713   ins_encode %{
6714     __ movl($dst$$Register, $mem$$Address);
6715   %}
6716 
6717   ins_pipe(ialu_reg_mem);
6718 %}
6719 
6720 // Load Integer (32 bit signed) to Byte (8 bit signed)
6721 instruct loadI2B(eRegI dst, memory mem, immI_24 twentyfour) %{
6722   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
6723 
6724   ins_cost(125);
6725   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
6726   ins_encode %{
6727     __ movsbl($dst$$Register, $mem$$Address);
6728   %}
6729   ins_pipe(ialu_reg_mem);
6730 %}
6731 
6732 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
6733 instruct loadI2UB(eRegI dst, memory mem, immI_255 mask) %{
6734   match(Set dst (AndI (LoadI mem) mask));
6735 
6736   ins_cost(125);
6737   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
6738   ins_encode %{
6739     __ movzbl($dst$$Register, $mem$$Address);
6740   %}
6741   ins_pipe(ialu_reg_mem);
6742 %}
6743 
6744 // Load Integer (32 bit signed) to Short (16 bit signed)
6745 instruct loadI2S(eRegI dst, memory mem, immI_16 sixteen) %{
6746   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
6747 
6748   ins_cost(125);
6749   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
6750   ins_encode %{
6751     __ movswl($dst$$Register, $mem$$Address);
6752   %}
6753   ins_pipe(ialu_reg_mem);
6754 %}
6755 
6756 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
6757 instruct loadI2US(eRegI dst, memory mem, immI_65535 mask) %{
6758   match(Set dst (AndI (LoadI mem) mask));
6759 
6760   ins_cost(125);
6761   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
6762   ins_encode %{
6763     __ movzwl($dst$$Register, $mem$$Address);
6764   %}
6765   ins_pipe(ialu_reg_mem);
6766 %}
6767 
6768 // Load Integer into Long Register
6769 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
6770   match(Set dst (ConvI2L (LoadI mem)));
6771   effect(KILL cr);
6772 
6773   ins_cost(375);
6774   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
6775             "MOV    $dst.hi,$dst.lo\n\t"
6776             "SAR    $dst.hi,31" %}
6777 
6778   ins_encode %{
6779     __ movl($dst$$Register, $mem$$Address);
6780     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
6781     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
6782   %}
6783 
6784   ins_pipe(ialu_reg_mem);
6785 %}
6786 
6787 // Load Integer with mask 0xFF into Long Register
6788 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
6789   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6790   effect(KILL cr);
6791 
6792   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
6793             "XOR    $dst.hi,$dst.hi" %}
6794   ins_encode %{
6795     Register Rdst = $dst$$Register;
6796     __ movzbl(Rdst, $mem$$Address);
6797     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6798   %}
6799   ins_pipe(ialu_reg_mem);
6800 %}
6801 
6802 // Load Integer with mask 0xFFFF into Long Register
6803 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
6804   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6805   effect(KILL cr);
6806 
6807   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
6808             "XOR    $dst.hi,$dst.hi" %}
6809   ins_encode %{
6810     Register Rdst = $dst$$Register;
6811     __ movzwl(Rdst, $mem$$Address);
6812     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6813   %}
6814   ins_pipe(ialu_reg_mem);
6815 %}
6816 
6817 // Load Integer with 32-bit mask into Long Register
6818 instruct loadI2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
6819   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6820   effect(KILL cr);
6821 
6822   format %{ "MOV    $dst.lo,$mem\t# int & 32-bit mask -> long\n\t"
6823             "XOR    $dst.hi,$dst.hi\n\t"
6824             "AND    $dst.lo,$mask" %}
6825   ins_encode %{
6826     Register Rdst = $dst$$Register;
6827     __ movl(Rdst, $mem$$Address);
6828     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6829     __ andl(Rdst, $mask$$constant);
6830   %}
6831   ins_pipe(ialu_reg_mem);
6832 %}
6833 
6834 // Load Unsigned Integer into Long Register
6835 instruct loadUI2L(eRegL dst, memory mem, eFlagsReg cr) %{
6836   match(Set dst (LoadUI2L mem));
6837   effect(KILL cr);
6838 
6839   ins_cost(250);
6840   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
6841             "XOR    $dst.hi,$dst.hi" %}
6842 
6843   ins_encode %{
6844     __ movl($dst$$Register, $mem$$Address);
6845     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
6846   %}
6847 
6848   ins_pipe(ialu_reg_mem);
6849 %}
6850 
6851 // Load Long.  Cannot clobber address while loading, so restrict address
6852 // register to ESI
6853 instruct loadL(eRegL dst, load_long_memory mem) %{
6854   predicate(!((LoadLNode*)n)->require_atomic_access());
6855   match(Set dst (LoadL mem));
6856 
6857   ins_cost(250);
6858   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
6859             "MOV    $dst.hi,$mem+4" %}
6860 
6861   ins_encode %{
6862     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false);
6863     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false);
6864     __ movl($dst$$Register, Amemlo);
6865     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
6866   %}
6867 
6868   ins_pipe(ialu_reg_long_mem);
6869 %}
6870 
6871 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
6872 // then store it down to the stack and reload on the int
6873 // side.
6874 instruct loadL_volatile(stackSlotL dst, memory mem) %{
6875   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
6876   match(Set dst (LoadL mem));
6877 
6878   ins_cost(200);
6879   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
6880             "FISTp  $dst" %}
6881   ins_encode(enc_loadL_volatile(mem,dst));
6882   ins_pipe( fpu_reg_mem );
6883 %}
6884 
6885 instruct loadLX_volatile(stackSlotL dst, memory mem, regXD tmp) %{
6886   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6887   match(Set dst (LoadL mem));
6888   effect(TEMP tmp);
6889   ins_cost(180);
6890   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
6891             "MOVSD  $dst,$tmp" %}
6892   ins_encode(enc_loadLX_volatile(mem, dst, tmp));
6893   ins_pipe( pipe_slow );
6894 %}
6895 
6896 instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{
6897   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6898   match(Set dst (LoadL mem));
6899   effect(TEMP tmp);
6900   ins_cost(160);
6901   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
6902             "MOVD   $dst.lo,$tmp\n\t"
6903             "PSRLQ  $tmp,32\n\t"
6904             "MOVD   $dst.hi,$tmp" %}
6905   ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));
6906   ins_pipe( pipe_slow );
6907 %}
6908 
6909 // Load Range
6910 instruct loadRange(eRegI dst, memory mem) %{
6911   match(Set dst (LoadRange mem));
6912 
6913   ins_cost(125);
6914   format %{ "MOV    $dst,$mem" %}
6915   opcode(0x8B);
6916   ins_encode( OpcP, RegMem(dst,mem));
6917   ins_pipe( ialu_reg_mem );
6918 %}
6919 
6920 
6921 // Load Pointer
6922 instruct loadP(eRegP dst, memory mem) %{
6923   match(Set dst (LoadP mem));
6924 
6925   ins_cost(125);
6926   format %{ "MOV    $dst,$mem" %}
6927   opcode(0x8B);
6928   ins_encode( OpcP, RegMem(dst,mem));
6929   ins_pipe( ialu_reg_mem );
6930 %}
6931 
6932 // Load Klass Pointer
6933 instruct loadKlass(eRegP dst, memory mem) %{
6934   match(Set dst (LoadKlass mem));
6935 
6936   ins_cost(125);
6937   format %{ "MOV    $dst,$mem" %}
6938   opcode(0x8B);
6939   ins_encode( OpcP, RegMem(dst,mem));
6940   ins_pipe( ialu_reg_mem );
6941 %}
6942 
6943 // Load Double
6944 instruct loadD(regD dst, memory mem) %{
6945   predicate(UseSSE<=1);
6946   match(Set dst (LoadD mem));
6947 
6948   ins_cost(150);
6949   format %{ "FLD_D  ST,$mem\n\t"
6950             "FSTP   $dst" %}
6951   opcode(0xDD);               /* DD /0 */
6952   ins_encode( OpcP, RMopc_Mem(0x00,mem),
6953               Pop_Reg_D(dst) );
6954   ins_pipe( fpu_reg_mem );
6955 %}
6956 
6957 // Load Double to XMM
6958 instruct loadXD(regXD dst, memory mem) %{
6959   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
6960   match(Set dst (LoadD mem));
6961   ins_cost(145);
6962   format %{ "MOVSD  $dst,$mem" %}
6963   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
6964   ins_pipe( pipe_slow );
6965 %}
6966 
6967 instruct loadXD_partial(regXD dst, memory mem) %{
6968   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
6969   match(Set dst (LoadD mem));
6970   ins_cost(145);
6971   format %{ "MOVLPD $dst,$mem" %}
6972   ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,mem));
6973   ins_pipe( pipe_slow );
6974 %}
6975 
6976 // Load to XMM register (single-precision floating point)
6977 // MOVSS instruction
6978 instruct loadX(regX dst, memory mem) %{
6979   predicate(UseSSE>=1);
6980   match(Set dst (LoadF mem));
6981   ins_cost(145);
6982   format %{ "MOVSS  $dst,$mem" %}
6983   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
6984   ins_pipe( pipe_slow );
6985 %}
6986 
6987 // Load Float
6988 instruct loadF(regF dst, memory mem) %{
6989   predicate(UseSSE==0);
6990   match(Set dst (LoadF mem));
6991 
6992   ins_cost(150);
6993   format %{ "FLD_S  ST,$mem\n\t"
6994             "FSTP   $dst" %}
6995   opcode(0xD9);               /* D9 /0 */
6996   ins_encode( OpcP, RMopc_Mem(0x00,mem),
6997               Pop_Reg_F(dst) );
6998   ins_pipe( fpu_reg_mem );
6999 %}
7000 
7001 // Load Aligned Packed Byte to XMM register
7002 instruct loadA8B(regXD dst, memory mem) %{
7003   predicate(UseSSE>=1);
7004   match(Set dst (Load8B mem));
7005   ins_cost(125);
7006   format %{ "MOVQ  $dst,$mem\t! packed8B" %}
7007   ins_encode( movq_ld(dst, mem));
7008   ins_pipe( pipe_slow );
7009 %}
7010 
7011 // Load Aligned Packed Short to XMM register
7012 instruct loadA4S(regXD dst, memory mem) %{
7013   predicate(UseSSE>=1);
7014   match(Set dst (Load4S mem));
7015   ins_cost(125);
7016   format %{ "MOVQ  $dst,$mem\t! packed4S" %}
7017   ins_encode( movq_ld(dst, mem));
7018   ins_pipe( pipe_slow );
7019 %}
7020 
7021 // Load Aligned Packed Char to XMM register
7022 instruct loadA4C(regXD dst, memory mem) %{
7023   predicate(UseSSE>=1);
7024   match(Set dst (Load4C mem));
7025   ins_cost(125);
7026   format %{ "MOVQ  $dst,$mem\t! packed4C" %}
7027   ins_encode( movq_ld(dst, mem));
7028   ins_pipe( pipe_slow );
7029 %}
7030 
7031 // Load Aligned Packed Integer to XMM register
7032 instruct load2IU(regXD dst, memory mem) %{
7033   predicate(UseSSE>=1);
7034   match(Set dst (Load2I mem));
7035   ins_cost(125);
7036   format %{ "MOVQ  $dst,$mem\t! packed2I" %}
7037   ins_encode( movq_ld(dst, mem));
7038   ins_pipe( pipe_slow );
7039 %}
7040 
7041 // Load Aligned Packed Single to XMM
7042 instruct loadA2F(regXD dst, memory mem) %{
7043   predicate(UseSSE>=1);
7044   match(Set dst (Load2F mem));
7045   ins_cost(145);
7046   format %{ "MOVQ  $dst,$mem\t! packed2F" %}
7047   ins_encode( movq_ld(dst, mem));
7048   ins_pipe( pipe_slow );
7049 %}
7050 
7051 // Load Effective Address
7052 instruct leaP8(eRegP dst, indOffset8 mem) %{
7053   match(Set dst mem);
7054 
7055   ins_cost(110);
7056   format %{ "LEA    $dst,$mem" %}
7057   opcode(0x8D);
7058   ins_encode( OpcP, RegMem(dst,mem));
7059   ins_pipe( ialu_reg_reg_fat );
7060 %}
7061 
7062 instruct leaP32(eRegP dst, indOffset32 mem) %{
7063   match(Set dst mem);
7064 
7065   ins_cost(110);
7066   format %{ "LEA    $dst,$mem" %}
7067   opcode(0x8D);
7068   ins_encode( OpcP, RegMem(dst,mem));
7069   ins_pipe( ialu_reg_reg_fat );
7070 %}
7071 
7072 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
7073   match(Set dst mem);
7074 
7075   ins_cost(110);
7076   format %{ "LEA    $dst,$mem" %}
7077   opcode(0x8D);
7078   ins_encode( OpcP, RegMem(dst,mem));
7079   ins_pipe( ialu_reg_reg_fat );
7080 %}
7081 
7082 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
7083   match(Set dst mem);
7084 
7085   ins_cost(110);
7086   format %{ "LEA    $dst,$mem" %}
7087   opcode(0x8D);
7088   ins_encode( OpcP, RegMem(dst,mem));
7089   ins_pipe( ialu_reg_reg_fat );
7090 %}
7091 
7092 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
7093   match(Set dst mem);
7094 
7095   ins_cost(110);
7096   format %{ "LEA    $dst,$mem" %}
7097   opcode(0x8D);
7098   ins_encode( OpcP, RegMem(dst,mem));
7099   ins_pipe( ialu_reg_reg_fat );
7100 %}
7101 
7102 // Load Constant
7103 instruct loadConI(eRegI dst, immI src) %{
7104   match(Set dst src);
7105 
7106   format %{ "MOV    $dst,$src" %}
7107   ins_encode( LdImmI(dst, src) );
7108   ins_pipe( ialu_reg_fat );
7109 %}
7110 
7111 // Load Constant zero
7112 instruct loadConI0(eRegI dst, immI0 src, eFlagsReg cr) %{
7113   match(Set dst src);
7114   effect(KILL cr);
7115 
7116   ins_cost(50);
7117   format %{ "XOR    $dst,$dst" %}
7118   opcode(0x33);  /* + rd */
7119   ins_encode( OpcP, RegReg( dst, dst ) );
7120   ins_pipe( ialu_reg );
7121 %}
7122 
7123 instruct loadConP(eRegP dst, immP src) %{
7124   match(Set dst src);
7125 
7126   format %{ "MOV    $dst,$src" %}
7127   opcode(0xB8);  /* + rd */
7128   ins_encode( LdImmP(dst, src) );
7129   ins_pipe( ialu_reg_fat );
7130 %}
7131 
7132 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
7133   match(Set dst src);
7134   effect(KILL cr);
7135   ins_cost(200);
7136   format %{ "MOV    $dst.lo,$src.lo\n\t"
7137             "MOV    $dst.hi,$src.hi" %}
7138   opcode(0xB8);
7139   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
7140   ins_pipe( ialu_reg_long_fat );
7141 %}
7142 
7143 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
7144   match(Set dst src);
7145   effect(KILL cr);
7146   ins_cost(150);
7147   format %{ "XOR    $dst.lo,$dst.lo\n\t"
7148             "XOR    $dst.hi,$dst.hi" %}
7149   opcode(0x33,0x33);
7150   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
7151   ins_pipe( ialu_reg_long );
7152 %}
7153 
7154 // The instruction usage is guarded by predicate in operand immF().
7155 instruct loadConF(regF dst, immF con) %{
7156   match(Set dst con);
7157   ins_cost(125);
7158   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
7159             "FSTP   $dst" %}
7160   ins_encode %{
7161     __ fld_s($constantaddress($con));
7162     __ fstp_d($dst$$reg);
7163   %}
7164   ins_pipe(fpu_reg_con);
7165 %}
7166 
7167 // The instruction usage is guarded by predicate in operand immF0().
7168 instruct loadConF0(regF dst, immF0 con) %{
7169   match(Set dst con);
7170   ins_cost(125);
7171   format %{ "FLDZ   ST\n\t"
7172             "FSTP   $dst" %}
7173   ins_encode %{
7174     __ fldz();
7175     __ fstp_d($dst$$reg);
7176   %}
7177   ins_pipe(fpu_reg_con);
7178 %}
7179 
7180 // The instruction usage is guarded by predicate in operand immF1().
7181 instruct loadConF1(regF dst, immF1 con) %{
7182   match(Set dst con);
7183   ins_cost(125);
7184   format %{ "FLD1   ST\n\t"
7185             "FSTP   $dst" %}
7186   ins_encode %{
7187     __ fld1();
7188     __ fstp_d($dst$$reg);
7189   %}
7190   ins_pipe(fpu_reg_con);
7191 %}
7192 
7193 // The instruction usage is guarded by predicate in operand immXF().
7194 instruct loadConX(regX dst, immXF con) %{
7195   match(Set dst con);
7196   ins_cost(125);
7197   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
7198   ins_encode %{
7199     __ movflt($dst$$XMMRegister, $constantaddress($con));
7200   %}
7201   ins_pipe(pipe_slow);
7202 %}
7203 
7204 // The instruction usage is guarded by predicate in operand immXF0().
7205 instruct loadConX0(regX dst, immXF0 src) %{
7206   match(Set dst src);
7207   ins_cost(100);
7208   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
7209   ins_encode %{
7210     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
7211   %}
7212   ins_pipe(pipe_slow);
7213 %}
7214 
7215 // The instruction usage is guarded by predicate in operand immD().
7216 instruct loadConD(regD dst, immD con) %{
7217   match(Set dst con);
7218   ins_cost(125);
7219 
7220   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
7221             "FSTP   $dst" %}
7222   ins_encode %{
7223     __ fld_d($constantaddress($con));
7224     __ fstp_d($dst$$reg);
7225   %}
7226   ins_pipe(fpu_reg_con);
7227 %}
7228 
7229 // The instruction usage is guarded by predicate in operand immD0().
7230 instruct loadConD0(regD dst, immD0 con) %{
7231   match(Set dst con);
7232   ins_cost(125);
7233 
7234   format %{ "FLDZ   ST\n\t"
7235             "FSTP   $dst" %}
7236   ins_encode %{
7237     __ fldz();
7238     __ fstp_d($dst$$reg);
7239   %}
7240   ins_pipe(fpu_reg_con);
7241 %}
7242 
7243 // The instruction usage is guarded by predicate in operand immD1().
7244 instruct loadConD1(regD dst, immD1 con) %{
7245   match(Set dst con);
7246   ins_cost(125);
7247 
7248   format %{ "FLD1   ST\n\t"
7249             "FSTP   $dst" %}
7250   ins_encode %{
7251     __ fld1();
7252     __ fstp_d($dst$$reg);
7253   %}
7254   ins_pipe(fpu_reg_con);
7255 %}
7256 
7257 // The instruction usage is guarded by predicate in operand immXD().
7258 instruct loadConXD(regXD dst, immXD con) %{
7259   match(Set dst con);
7260   ins_cost(125);
7261   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
7262   ins_encode %{
7263     __ movdbl($dst$$XMMRegister, $constantaddress($con));
7264   %}
7265   ins_pipe(pipe_slow);
7266 %}
7267 
7268 // The instruction usage is guarded by predicate in operand immXD0().
7269 instruct loadConXD0(regXD dst, immXD0 src) %{
7270   match(Set dst src);
7271   ins_cost(100);
7272   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
7273   ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x57), RegReg(dst,dst));
7274   ins_pipe( pipe_slow );
7275 %}
7276 
7277 // Load Stack Slot
7278 instruct loadSSI(eRegI dst, stackSlotI src) %{
7279   match(Set dst src);
7280   ins_cost(125);
7281 
7282   format %{ "MOV    $dst,$src" %}
7283   opcode(0x8B);
7284   ins_encode( OpcP, RegMem(dst,src));
7285   ins_pipe( ialu_reg_mem );
7286 %}
7287 
7288 instruct loadSSL(eRegL dst, stackSlotL src) %{
7289   match(Set dst src);
7290 
7291   ins_cost(200);
7292   format %{ "MOV    $dst,$src.lo\n\t"
7293             "MOV    $dst+4,$src.hi" %}
7294   opcode(0x8B, 0x8B);
7295   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
7296   ins_pipe( ialu_mem_long_reg );
7297 %}
7298 
7299 // Load Stack Slot
7300 instruct loadSSP(eRegP dst, stackSlotP src) %{
7301   match(Set dst src);
7302   ins_cost(125);
7303 
7304   format %{ "MOV    $dst,$src" %}
7305   opcode(0x8B);
7306   ins_encode( OpcP, RegMem(dst,src));
7307   ins_pipe( ialu_reg_mem );
7308 %}
7309 
7310 // Load Stack Slot
7311 instruct loadSSF(regF dst, stackSlotF src) %{
7312   match(Set dst src);
7313   ins_cost(125);
7314 
7315   format %{ "FLD_S  $src\n\t"
7316             "FSTP   $dst" %}
7317   opcode(0xD9);               /* D9 /0, FLD m32real */
7318   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
7319               Pop_Reg_F(dst) );
7320   ins_pipe( fpu_reg_mem );
7321 %}
7322 
7323 // Load Stack Slot
7324 instruct loadSSD(regD dst, stackSlotD src) %{
7325   match(Set dst src);
7326   ins_cost(125);
7327 
7328   format %{ "FLD_D  $src\n\t"
7329             "FSTP   $dst" %}
7330   opcode(0xDD);               /* DD /0, FLD m64real */
7331   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
7332               Pop_Reg_D(dst) );
7333   ins_pipe( fpu_reg_mem );
7334 %}
7335 
7336 // Prefetch instructions.
7337 // Must be safe to execute with invalid address (cannot fault).
7338 
7339 instruct prefetchr0( memory mem ) %{
7340   predicate(UseSSE==0 && !VM_Version::supports_3dnow_prefetch());
7341   match(PrefetchRead mem);
7342   ins_cost(0);
7343   size(0);
7344   format %{ "PREFETCHR (non-SSE is empty encoding)" %}
7345   ins_encode();
7346   ins_pipe(empty);
7347 %}
7348 
7349 instruct prefetchr( memory mem ) %{
7350   predicate(UseSSE==0 && VM_Version::supports_3dnow_prefetch() || ReadPrefetchInstr==3);
7351   match(PrefetchRead mem);
7352   ins_cost(100);
7353 
7354   format %{ "PREFETCHR $mem\t! Prefetch into level 1 cache for read" %}
7355   opcode(0x0F, 0x0d);     /* Opcode 0F 0d /0 */
7356   ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
7357   ins_pipe(ialu_mem);
7358 %}
7359 
7360 instruct prefetchrNTA( memory mem ) %{
7361   predicate(UseSSE>=1 && ReadPrefetchInstr==0);
7362   match(PrefetchRead mem);
7363   ins_cost(100);
7364 
7365   format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for read" %}
7366   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
7367   ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
7368   ins_pipe(ialu_mem);
7369 %}
7370 
7371 instruct prefetchrT0( memory mem ) %{
7372   predicate(UseSSE>=1 && ReadPrefetchInstr==1);
7373   match(PrefetchRead mem);
7374   ins_cost(100);
7375 
7376   format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for read" %}
7377   opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
7378   ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
7379   ins_pipe(ialu_mem);
7380 %}
7381 
7382 instruct prefetchrT2( memory mem ) %{
7383   predicate(UseSSE>=1 && ReadPrefetchInstr==2);
7384   match(PrefetchRead mem);
7385   ins_cost(100);
7386 
7387   format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for read" %}
7388   opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
7389   ins_encode(OpcP, OpcS, RMopc_Mem(0x03,mem));
7390   ins_pipe(ialu_mem);
7391 %}
7392 
7393 instruct prefetchw0( memory mem ) %{
7394   predicate(UseSSE==0 && !VM_Version::supports_3dnow_prefetch());
7395   match(PrefetchWrite mem);
7396   ins_cost(0);
7397   size(0);
7398   format %{ "Prefetch (non-SSE is empty encoding)" %}
7399   ins_encode();
7400   ins_pipe(empty);
7401 %}
7402 
7403 instruct prefetchw( memory mem ) %{
7404   predicate(UseSSE==0 && VM_Version::supports_3dnow_prefetch() || AllocatePrefetchInstr==3);
7405   match( PrefetchWrite mem );
7406   ins_cost(100);
7407 
7408   format %{ "PREFETCHW $mem\t! Prefetch into L1 cache and mark modified" %}
7409   opcode(0x0F, 0x0D);     /* Opcode 0F 0D /1 */
7410   ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
7411   ins_pipe(ialu_mem);
7412 %}
7413 
7414 instruct prefetchwNTA( memory mem ) %{
7415   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
7416   match(PrefetchWrite mem);
7417   ins_cost(100);
7418 
7419   format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for write" %}
7420   opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
7421   ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
7422   ins_pipe(ialu_mem);
7423 %}
7424 
7425 instruct prefetchwT0( memory mem ) %{
7426   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
7427   match(PrefetchWrite mem);
7428   ins_cost(100);
7429 
7430   format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for write" %}
7431   opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
7432   ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
7433   ins_pipe(ialu_mem);
7434 %}
7435 
7436 instruct prefetchwT2( memory mem ) %{
7437   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
7438   match(PrefetchWrite mem);
7439   ins_cost(100);
7440 
7441   format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for write" %}
7442   opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
7443   ins_encode(OpcP, OpcS, RMopc_Mem(0x03,mem));
7444   ins_pipe(ialu_mem);
7445 %}
7446 
7447 //----------Store Instructions-------------------------------------------------
7448 
7449 // Store Byte
7450 instruct storeB(memory mem, xRegI src) %{
7451   match(Set mem (StoreB mem src));
7452 
7453   ins_cost(125);
7454   format %{ "MOV8   $mem,$src" %}
7455   opcode(0x88);
7456   ins_encode( OpcP, RegMem( src, mem ) );
7457   ins_pipe( ialu_mem_reg );
7458 %}
7459 
7460 // Store Char/Short
7461 instruct storeC(memory mem, eRegI src) %{
7462   match(Set mem (StoreC mem src));
7463 
7464   ins_cost(125);
7465   format %{ "MOV16  $mem,$src" %}
7466   opcode(0x89, 0x66);
7467   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
7468   ins_pipe( ialu_mem_reg );
7469 %}
7470 
7471 // Store Integer
7472 instruct storeI(memory mem, eRegI src) %{
7473   match(Set mem (StoreI mem src));
7474 
7475   ins_cost(125);
7476   format %{ "MOV    $mem,$src" %}
7477   opcode(0x89);
7478   ins_encode( OpcP, RegMem( src, mem ) );
7479   ins_pipe( ialu_mem_reg );
7480 %}
7481 
7482 // Store Long
7483 instruct storeL(long_memory mem, eRegL src) %{
7484   predicate(!((StoreLNode*)n)->require_atomic_access());
7485   match(Set mem (StoreL mem src));
7486 
7487   ins_cost(200);
7488   format %{ "MOV    $mem,$src.lo\n\t"
7489             "MOV    $mem+4,$src.hi" %}
7490   opcode(0x89, 0x89);
7491   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
7492   ins_pipe( ialu_mem_long_reg );
7493 %}
7494 
7495 // Store Long to Integer
7496 instruct storeL2I(memory mem, eRegL src) %{
7497   match(Set mem (StoreI mem (ConvL2I src)));
7498 
7499   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
7500   ins_encode %{
7501     __ movl($mem$$Address, $src$$Register);
7502   %}
7503   ins_pipe(ialu_mem_reg);
7504 %}
7505 
7506 // Volatile Store Long.  Must be atomic, so move it into
7507 // the FP TOS and then do a 64-bit FIST.  Has to probe the
7508 // target address before the store (for null-ptr checks)
7509 // so the memory operand is used twice in the encoding.
7510 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
7511   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
7512   match(Set mem (StoreL mem src));
7513   effect( KILL cr );
7514   ins_cost(400);
7515   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
7516             "FILD   $src\n\t"
7517             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
7518   opcode(0x3B);
7519   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
7520   ins_pipe( fpu_reg_mem );
7521 %}
7522 
7523 instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %{
7524   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7525   match(Set mem (StoreL mem src));
7526   effect( TEMP tmp, KILL cr );
7527   ins_cost(380);
7528   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
7529             "MOVSD  $tmp,$src\n\t"
7530             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
7531   opcode(0x3B);
7532   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_volatile(mem, src, tmp));
7533   ins_pipe( pipe_slow );
7534 %}
7535 
7536 instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFlagsReg cr) %{
7537   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7538   match(Set mem (StoreL mem src));
7539   effect( TEMP tmp2 , TEMP tmp, KILL cr );
7540   ins_cost(360);
7541   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
7542             "MOVD   $tmp,$src.lo\n\t"
7543             "MOVD   $tmp2,$src.hi\n\t"
7544             "PUNPCKLDQ $tmp,$tmp2\n\t"
7545             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
7546   opcode(0x3B);
7547   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_reg_volatile(mem, src, tmp, tmp2));
7548   ins_pipe( pipe_slow );
7549 %}
7550 
7551 // Store Pointer; for storing unknown oops and raw pointers
7552 instruct storeP(memory mem, anyRegP src) %{
7553   match(Set mem (StoreP mem src));
7554 
7555   ins_cost(125);
7556   format %{ "MOV    $mem,$src" %}
7557   opcode(0x89);
7558   ins_encode( OpcP, RegMem( src, mem ) );
7559   ins_pipe( ialu_mem_reg );
7560 %}
7561 
7562 // Store Integer Immediate
7563 instruct storeImmI(memory mem, immI src) %{
7564   match(Set mem (StoreI mem src));
7565 
7566   ins_cost(150);
7567   format %{ "MOV    $mem,$src" %}
7568   opcode(0xC7);               /* C7 /0 */
7569   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
7570   ins_pipe( ialu_mem_imm );
7571 %}
7572 
7573 // Store Short/Char Immediate
7574 instruct storeImmI16(memory mem, immI16 src) %{
7575   predicate(UseStoreImmI16);
7576   match(Set mem (StoreC mem src));
7577 
7578   ins_cost(150);
7579   format %{ "MOV16  $mem,$src" %}
7580   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
7581   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
7582   ins_pipe( ialu_mem_imm );
7583 %}
7584 
7585 // Store Pointer Immediate; null pointers or constant oops that do not
7586 // need card-mark barriers.
7587 instruct storeImmP(memory mem, immP src) %{
7588   match(Set mem (StoreP mem src));
7589 
7590   ins_cost(150);
7591   format %{ "MOV    $mem,$src" %}
7592   opcode(0xC7);               /* C7 /0 */
7593   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
7594   ins_pipe( ialu_mem_imm );
7595 %}
7596 
7597 // Store Byte Immediate
7598 instruct storeImmB(memory mem, immI8 src) %{
7599   match(Set mem (StoreB mem src));
7600 
7601   ins_cost(150);
7602   format %{ "MOV8   $mem,$src" %}
7603   opcode(0xC6);               /* C6 /0 */
7604   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
7605   ins_pipe( ialu_mem_imm );
7606 %}
7607 
7608 // Store Aligned Packed Byte XMM register to memory
7609 instruct storeA8B(memory mem, regXD src) %{
7610   predicate(UseSSE>=1);
7611   match(Set mem (Store8B mem src));
7612   ins_cost(145);
7613   format %{ "MOVQ  $mem,$src\t! packed8B" %}
7614   ins_encode( movq_st(mem, src));
7615   ins_pipe( pipe_slow );
7616 %}
7617 
7618 // Store Aligned Packed Char/Short XMM register to memory
7619 instruct storeA4C(memory mem, regXD src) %{
7620   predicate(UseSSE>=1);
7621   match(Set mem (Store4C mem src));
7622   ins_cost(145);
7623   format %{ "MOVQ  $mem,$src\t! packed4C" %}
7624   ins_encode( movq_st(mem, src));
7625   ins_pipe( pipe_slow );
7626 %}
7627 
7628 // Store Aligned Packed Integer XMM register to memory
7629 instruct storeA2I(memory mem, regXD src) %{
7630   predicate(UseSSE>=1);
7631   match(Set mem (Store2I mem src));
7632   ins_cost(145);
7633   format %{ "MOVQ  $mem,$src\t! packed2I" %}
7634   ins_encode( movq_st(mem, src));
7635   ins_pipe( pipe_slow );
7636 %}
7637 
7638 // Store CMS card-mark Immediate
7639 instruct storeImmCM(memory mem, immI8 src) %{
7640   match(Set mem (StoreCM mem src));
7641 
7642   ins_cost(150);
7643   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
7644   opcode(0xC6);               /* C6 /0 */
7645   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
7646   ins_pipe( ialu_mem_imm );
7647 %}
7648 
7649 // Store Double
7650 instruct storeD( memory mem, regDPR1 src) %{
7651   predicate(UseSSE<=1);
7652   match(Set mem (StoreD mem src));
7653 
7654   ins_cost(100);
7655   format %{ "FST_D  $mem,$src" %}
7656   opcode(0xDD);       /* DD /2 */
7657   ins_encode( enc_FP_store(mem,src) );
7658   ins_pipe( fpu_mem_reg );
7659 %}
7660 
7661 // Store double does rounding on x86
7662 instruct storeD_rounded( memory mem, regDPR1 src) %{
7663   predicate(UseSSE<=1);
7664   match(Set mem (StoreD mem (RoundDouble src)));
7665 
7666   ins_cost(100);
7667   format %{ "FST_D  $mem,$src\t# round" %}
7668   opcode(0xDD);       /* DD /2 */
7669   ins_encode( enc_FP_store(mem,src) );
7670   ins_pipe( fpu_mem_reg );
7671 %}
7672 
7673 // Store XMM register to memory (double-precision floating points)
7674 // MOVSD instruction
7675 instruct storeXD(memory mem, regXD src) %{
7676   predicate(UseSSE>=2);
7677   match(Set mem (StoreD mem src));
7678   ins_cost(95);
7679   format %{ "MOVSD  $mem,$src" %}
7680   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
7681   ins_pipe( pipe_slow );
7682 %}
7683 
7684 // Store XMM register to memory (single-precision floating point)
7685 // MOVSS instruction
7686 instruct storeX(memory mem, regX src) %{
7687   predicate(UseSSE>=1);
7688   match(Set mem (StoreF mem src));
7689   ins_cost(95);
7690   format %{ "MOVSS  $mem,$src" %}
7691   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
7692   ins_pipe( pipe_slow );
7693 %}
7694 
7695 // Store Aligned Packed Single Float XMM register to memory
7696 instruct storeA2F(memory mem, regXD src) %{
7697   predicate(UseSSE>=1);
7698   match(Set mem (Store2F mem src));
7699   ins_cost(145);
7700   format %{ "MOVQ  $mem,$src\t! packed2F" %}
7701   ins_encode( movq_st(mem, src));
7702   ins_pipe( pipe_slow );
7703 %}
7704 
7705 // Store Float
7706 instruct storeF( memory mem, regFPR1 src) %{
7707   predicate(UseSSE==0);
7708   match(Set mem (StoreF mem src));
7709 
7710   ins_cost(100);
7711   format %{ "FST_S  $mem,$src" %}
7712   opcode(0xD9);       /* D9 /2 */
7713   ins_encode( enc_FP_store(mem,src) );
7714   ins_pipe( fpu_mem_reg );
7715 %}
7716 
7717 // Store Float does rounding on x86
7718 instruct storeF_rounded( memory mem, regFPR1 src) %{
7719   predicate(UseSSE==0);
7720   match(Set mem (StoreF mem (RoundFloat src)));
7721 
7722   ins_cost(100);
7723   format %{ "FST_S  $mem,$src\t# round" %}
7724   opcode(0xD9);       /* D9 /2 */
7725   ins_encode( enc_FP_store(mem,src) );
7726   ins_pipe( fpu_mem_reg );
7727 %}
7728 
7729 // Store Float does rounding on x86
7730 instruct storeF_Drounded( memory mem, regDPR1 src) %{
7731   predicate(UseSSE<=1);
7732   match(Set mem (StoreF mem (ConvD2F src)));
7733 
7734   ins_cost(100);
7735   format %{ "FST_S  $mem,$src\t# D-round" %}
7736   opcode(0xD9);       /* D9 /2 */
7737   ins_encode( enc_FP_store(mem,src) );
7738   ins_pipe( fpu_mem_reg );
7739 %}
7740 
7741 // Store immediate Float value (it is faster than store from FPU register)
7742 // The instruction usage is guarded by predicate in operand immF().
7743 instruct storeF_imm( memory mem, immF src) %{
7744   match(Set mem (StoreF mem src));
7745 
7746   ins_cost(50);
7747   format %{ "MOV    $mem,$src\t# store float" %}
7748   opcode(0xC7);               /* C7 /0 */
7749   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
7750   ins_pipe( ialu_mem_imm );
7751 %}
7752 
7753 // Store immediate Float value (it is faster than store from XMM register)
7754 // The instruction usage is guarded by predicate in operand immXF().
7755 instruct storeX_imm( memory mem, immXF src) %{
7756   match(Set mem (StoreF mem src));
7757 
7758   ins_cost(50);
7759   format %{ "MOV    $mem,$src\t# store float" %}
7760   opcode(0xC7);               /* C7 /0 */
7761   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32XF_as_bits( src ));
7762   ins_pipe( ialu_mem_imm );
7763 %}
7764 
7765 // Store Integer to stack slot
7766 instruct storeSSI(stackSlotI dst, eRegI src) %{
7767   match(Set dst src);
7768 
7769   ins_cost(100);
7770   format %{ "MOV    $dst,$src" %}
7771   opcode(0x89);
7772   ins_encode( OpcPRegSS( dst, src ) );
7773   ins_pipe( ialu_mem_reg );
7774 %}
7775 
7776 // Store Integer to stack slot
7777 instruct storeSSP(stackSlotP dst, eRegP src) %{
7778   match(Set dst src);
7779 
7780   ins_cost(100);
7781   format %{ "MOV    $dst,$src" %}
7782   opcode(0x89);
7783   ins_encode( OpcPRegSS( dst, src ) );
7784   ins_pipe( ialu_mem_reg );
7785 %}
7786 
7787 // Store Long to stack slot
7788 instruct storeSSL(stackSlotL dst, eRegL src) %{
7789   match(Set dst src);
7790 
7791   ins_cost(200);
7792   format %{ "MOV    $dst,$src.lo\n\t"
7793             "MOV    $dst+4,$src.hi" %}
7794   opcode(0x89, 0x89);
7795   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
7796   ins_pipe( ialu_mem_long_reg );
7797 %}
7798 
7799 //----------MemBar Instructions-----------------------------------------------
7800 // Memory barrier flavors
7801 
7802 instruct membar_acquire() %{
7803   match(MemBarAcquire);
7804   ins_cost(400);
7805 
7806   size(0);
7807   format %{ "MEMBAR-acquire ! (empty encoding)" %}
7808   ins_encode();
7809   ins_pipe(empty);
7810 %}
7811 
7812 instruct membar_acquire_lock() %{
7813   match(MemBarAcquire);
7814   predicate(Matcher::prior_fast_lock(n));
7815   ins_cost(0);
7816 
7817   size(0);
7818   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
7819   ins_encode( );
7820   ins_pipe(empty);
7821 %}
7822 
7823 instruct membar_release() %{
7824   match(MemBarRelease);
7825   ins_cost(400);
7826 
7827   size(0);
7828   format %{ "MEMBAR-release ! (empty encoding)" %}
7829   ins_encode( );
7830   ins_pipe(empty);
7831 %}
7832 
7833 instruct membar_release_lock() %{
7834   match(MemBarRelease);
7835   predicate(Matcher::post_fast_unlock(n));
7836   ins_cost(0);
7837 
7838   size(0);
7839   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
7840   ins_encode( );
7841   ins_pipe(empty);
7842 %}
7843 
7844 instruct membar_volatile(eFlagsReg cr) %{
7845   match(MemBarVolatile);
7846   effect(KILL cr);
7847   ins_cost(400);
7848 
7849   format %{ 
7850     $$template
7851     if (os::is_MP()) {
7852       $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
7853     } else {
7854       $$emit$$"MEMBAR-volatile ! (empty encoding)"
7855     }
7856   %}
7857   ins_encode %{
7858     __ membar(Assembler::StoreLoad);
7859   %}
7860   ins_pipe(pipe_slow);
7861 %}
7862 
7863 instruct unnecessary_membar_volatile() %{
7864   match(MemBarVolatile);
7865   predicate(Matcher::post_store_load_barrier(n));
7866   ins_cost(0);
7867 
7868   size(0);
7869   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
7870   ins_encode( );
7871   ins_pipe(empty);
7872 %}
7873 
7874 //----------Move Instructions--------------------------------------------------
7875 instruct castX2P(eAXRegP dst, eAXRegI src) %{
7876   match(Set dst (CastX2P src));
7877   format %{ "# X2P  $dst, $src" %}
7878   ins_encode( /*empty encoding*/ );
7879   ins_cost(0);
7880   ins_pipe(empty);
7881 %}
7882 
7883 instruct castP2X(eRegI dst, eRegP src ) %{
7884   match(Set dst (CastP2X src));
7885   ins_cost(50);
7886   format %{ "MOV    $dst, $src\t# CastP2X" %}
7887   ins_encode( enc_Copy( dst, src) );
7888   ins_pipe( ialu_reg_reg );
7889 %}
7890 
7891 //----------Conditional Move---------------------------------------------------
7892 // Conditional move
7893 instruct cmovI_reg(eRegI dst, eRegI src, eFlagsReg cr, cmpOp cop ) %{
7894   predicate(VM_Version::supports_cmov() );
7895   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7896   ins_cost(200);
7897   format %{ "CMOV$cop $dst,$src" %}
7898   opcode(0x0F,0x40);
7899   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7900   ins_pipe( pipe_cmov_reg );
7901 %}
7902 
7903 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, eRegI dst, eRegI src ) %{
7904   predicate(VM_Version::supports_cmov() );
7905   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7906   ins_cost(200);
7907   format %{ "CMOV$cop $dst,$src" %}
7908   opcode(0x0F,0x40);
7909   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7910   ins_pipe( pipe_cmov_reg );
7911 %}
7912 
7913 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, eRegI src ) %{
7914   predicate(VM_Version::supports_cmov() );
7915   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7916   ins_cost(200);
7917   expand %{
7918     cmovI_regU(cop, cr, dst, src);
7919   %}
7920 %}
7921 
7922 // Conditional move
7923 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, eRegI dst, memory src) %{
7924   predicate(VM_Version::supports_cmov() );
7925   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7926   ins_cost(250);
7927   format %{ "CMOV$cop $dst,$src" %}
7928   opcode(0x0F,0x40);
7929   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7930   ins_pipe( pipe_cmov_mem );
7931 %}
7932 
7933 // Conditional move
7934 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, eRegI dst, memory src) %{
7935   predicate(VM_Version::supports_cmov() );
7936   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7937   ins_cost(250);
7938   format %{ "CMOV$cop $dst,$src" %}
7939   opcode(0x0F,0x40);
7940   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7941   ins_pipe( pipe_cmov_mem );
7942 %}
7943 
7944 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, memory src) %{
7945   predicate(VM_Version::supports_cmov() );
7946   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7947   ins_cost(250);
7948   expand %{
7949     cmovI_memU(cop, cr, dst, src);
7950   %}
7951 %}
7952 
7953 // Conditional move
7954 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
7955   predicate(VM_Version::supports_cmov() );
7956   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7957   ins_cost(200);
7958   format %{ "CMOV$cop $dst,$src\t# ptr" %}
7959   opcode(0x0F,0x40);
7960   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7961   ins_pipe( pipe_cmov_reg );
7962 %}
7963 
7964 // Conditional move (non-P6 version)
7965 // Note:  a CMoveP is generated for  stubs and native wrappers
7966 //        regardless of whether we are on a P6, so we
7967 //        emulate a cmov here
7968 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
7969   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7970   ins_cost(300);
7971   format %{ "Jn$cop   skip\n\t"
7972           "MOV    $dst,$src\t# pointer\n"
7973       "skip:" %}
7974   opcode(0x8b);
7975   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
7976   ins_pipe( pipe_cmov_reg );
7977 %}
7978 
7979 // Conditional move
7980 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
7981   predicate(VM_Version::supports_cmov() );
7982   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7983   ins_cost(200);
7984   format %{ "CMOV$cop $dst,$src\t# ptr" %}
7985   opcode(0x0F,0x40);
7986   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7987   ins_pipe( pipe_cmov_reg );
7988 %}
7989 
7990 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
7991   predicate(VM_Version::supports_cmov() );
7992   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7993   ins_cost(200);
7994   expand %{
7995     cmovP_regU(cop, cr, dst, src);
7996   %}
7997 %}
7998 
7999 // DISABLED: Requires the ADLC to emit a bottom_type call that
8000 // correctly meets the two pointer arguments; one is an incoming
8001 // register but the other is a memory operand.  ALSO appears to
8002 // be buggy with implicit null checks.
8003 //
8004 //// Conditional move
8005 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
8006 //  predicate(VM_Version::supports_cmov() );
8007 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
8008 //  ins_cost(250);
8009 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
8010 //  opcode(0x0F,0x40);
8011 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
8012 //  ins_pipe( pipe_cmov_mem );
8013 //%}
8014 //
8015 //// Conditional move
8016 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
8017 //  predicate(VM_Version::supports_cmov() );
8018 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
8019 //  ins_cost(250);
8020 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
8021 //  opcode(0x0F,0x40);
8022 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
8023 //  ins_pipe( pipe_cmov_mem );
8024 //%}
8025 
8026 // Conditional move
8027 instruct fcmovD_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regD src) %{
8028   predicate(UseSSE<=1);
8029   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8030   ins_cost(200);
8031   format %{ "FCMOV$cop $dst,$src\t# double" %}
8032   opcode(0xDA);
8033   ins_encode( enc_cmov_d(cop,src) );
8034   ins_pipe( pipe_cmovD_reg );
8035 %}
8036 
8037 // Conditional move
8038 instruct fcmovF_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regF src) %{
8039   predicate(UseSSE==0);
8040   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8041   ins_cost(200);
8042   format %{ "FCMOV$cop $dst,$src\t# float" %}
8043   opcode(0xDA);
8044   ins_encode( enc_cmov_d(cop,src) );
8045   ins_pipe( pipe_cmovD_reg );
8046 %}
8047 
8048 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
8049 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
8050   predicate(UseSSE<=1);
8051   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8052   ins_cost(200);
8053   format %{ "Jn$cop   skip\n\t"
8054             "MOV    $dst,$src\t# double\n"
8055       "skip:" %}
8056   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
8057   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_D(src), OpcP, RegOpc(dst) );
8058   ins_pipe( pipe_cmovD_reg );
8059 %}
8060 
8061 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
8062 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
8063   predicate(UseSSE==0);
8064   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8065   ins_cost(200);
8066   format %{ "Jn$cop    skip\n\t"
8067             "MOV    $dst,$src\t# float\n"
8068       "skip:" %}
8069   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
8070   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_F(src), OpcP, RegOpc(dst) );
8071   ins_pipe( pipe_cmovD_reg );
8072 %}
8073 
8074 // No CMOVE with SSE/SSE2
8075 instruct fcmovX_regS(cmpOp cop, eFlagsReg cr, regX dst, regX src) %{
8076   predicate (UseSSE>=1);
8077   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8078   ins_cost(200);
8079   format %{ "Jn$cop   skip\n\t"
8080             "MOVSS  $dst,$src\t# float\n"
8081       "skip:" %}
8082   ins_encode %{
8083     Label skip;
8084     // Invert sense of branch from sense of CMOV
8085     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
8086     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
8087     __ bind(skip);
8088   %}
8089   ins_pipe( pipe_slow );
8090 %}
8091 
8092 // No CMOVE with SSE/SSE2
8093 instruct fcmovXD_regS(cmpOp cop, eFlagsReg cr, regXD dst, regXD src) %{
8094   predicate (UseSSE>=2);
8095   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8096   ins_cost(200);
8097   format %{ "Jn$cop   skip\n\t"
8098             "MOVSD  $dst,$src\t# float\n"
8099       "skip:" %}
8100   ins_encode %{
8101     Label skip;
8102     // Invert sense of branch from sense of CMOV
8103     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
8104     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
8105     __ bind(skip);
8106   %}
8107   ins_pipe( pipe_slow );
8108 %}
8109 
8110 // unsigned version
8111 instruct fcmovX_regU(cmpOpU cop, eFlagsRegU cr, regX dst, regX src) %{
8112   predicate (UseSSE>=1);
8113   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8114   ins_cost(200);
8115   format %{ "Jn$cop   skip\n\t"
8116             "MOVSS  $dst,$src\t# float\n"
8117       "skip:" %}
8118   ins_encode %{
8119     Label skip;
8120     // Invert sense of branch from sense of CMOV
8121     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
8122     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
8123     __ bind(skip);
8124   %}
8125   ins_pipe( pipe_slow );
8126 %}
8127 
8128 instruct fcmovX_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regX dst, regX src) %{
8129   predicate (UseSSE>=1);
8130   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
8131   ins_cost(200);
8132   expand %{
8133     fcmovX_regU(cop, cr, dst, src);
8134   %}
8135 %}
8136 
8137 // unsigned version
8138 instruct fcmovXD_regU(cmpOpU cop, eFlagsRegU cr, regXD dst, regXD src) %{
8139   predicate (UseSSE>=2);
8140   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8141   ins_cost(200);
8142   format %{ "Jn$cop   skip\n\t"
8143             "MOVSD  $dst,$src\t# float\n"
8144       "skip:" %}
8145   ins_encode %{
8146     Label skip;
8147     // Invert sense of branch from sense of CMOV
8148     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
8149     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
8150     __ bind(skip);
8151   %}
8152   ins_pipe( pipe_slow );
8153 %}
8154 
8155 instruct fcmovXD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regXD dst, regXD src) %{
8156   predicate (UseSSE>=2);
8157   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
8158   ins_cost(200);
8159   expand %{
8160     fcmovXD_regU(cop, cr, dst, src);
8161   %}
8162 %}
8163 
8164 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
8165   predicate(VM_Version::supports_cmov() );
8166   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8167   ins_cost(200);
8168   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
8169             "CMOV$cop $dst.hi,$src.hi" %}
8170   opcode(0x0F,0x40);
8171   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
8172   ins_pipe( pipe_cmov_reg_long );
8173 %}
8174 
8175 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
8176   predicate(VM_Version::supports_cmov() );
8177   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8178   ins_cost(200);
8179   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
8180             "CMOV$cop $dst.hi,$src.hi" %}
8181   opcode(0x0F,0x40);
8182   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
8183   ins_pipe( pipe_cmov_reg_long );
8184 %}
8185 
8186 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
8187   predicate(VM_Version::supports_cmov() );
8188   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
8189   ins_cost(200);
8190   expand %{
8191     cmovL_regU(cop, cr, dst, src);
8192   %}
8193 %}
8194 
8195 //----------Arithmetic Instructions--------------------------------------------
8196 //----------Addition Instructions----------------------------------------------
8197 // Integer Addition Instructions
8198 instruct addI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8199   match(Set dst (AddI dst src));
8200   effect(KILL cr);
8201 
8202   size(2);
8203   format %{ "ADD    $dst,$src" %}
8204   opcode(0x03);
8205   ins_encode( OpcP, RegReg( dst, src) );
8206   ins_pipe( ialu_reg_reg );
8207 %}
8208 
8209 instruct addI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8210   match(Set dst (AddI dst src));
8211   effect(KILL cr);
8212 
8213   format %{ "ADD    $dst,$src" %}
8214   opcode(0x81, 0x00); /* /0 id */
8215   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8216   ins_pipe( ialu_reg );
8217 %}
8218 
8219 instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
8220   predicate(UseIncDec);
8221   match(Set dst (AddI dst src));
8222   effect(KILL cr);
8223 
8224   size(1);
8225   format %{ "INC    $dst" %}
8226   opcode(0x40); /*  */
8227   ins_encode( Opc_plus( primary, dst ) );
8228   ins_pipe( ialu_reg );
8229 %}
8230 
8231 instruct leaI_eReg_immI(eRegI dst, eRegI src0, immI src1) %{
8232   match(Set dst (AddI src0 src1));
8233   ins_cost(110);
8234 
8235   format %{ "LEA    $dst,[$src0 + $src1]" %}
8236   opcode(0x8D); /* 0x8D /r */
8237   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
8238   ins_pipe( ialu_reg_reg );
8239 %}
8240 
8241 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
8242   match(Set dst (AddP src0 src1));
8243   ins_cost(110);
8244 
8245   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
8246   opcode(0x8D); /* 0x8D /r */
8247   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
8248   ins_pipe( ialu_reg_reg );
8249 %}
8250 
8251 instruct decI_eReg(eRegI dst, immI_M1 src, eFlagsReg cr) %{
8252   predicate(UseIncDec);
8253   match(Set dst (AddI dst src));
8254   effect(KILL cr);
8255 
8256   size(1);
8257   format %{ "DEC    $dst" %}
8258   opcode(0x48); /*  */
8259   ins_encode( Opc_plus( primary, dst ) );
8260   ins_pipe( ialu_reg );
8261 %}
8262 
8263 instruct addP_eReg(eRegP dst, eRegI src, eFlagsReg cr) %{
8264   match(Set dst (AddP dst src));
8265   effect(KILL cr);
8266 
8267   size(2);
8268   format %{ "ADD    $dst,$src" %}
8269   opcode(0x03);
8270   ins_encode( OpcP, RegReg( dst, src) );
8271   ins_pipe( ialu_reg_reg );
8272 %}
8273 
8274 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
8275   match(Set dst (AddP dst src));
8276   effect(KILL cr);
8277 
8278   format %{ "ADD    $dst,$src" %}
8279   opcode(0x81,0x00); /* Opcode 81 /0 id */
8280   // ins_encode( RegImm( dst, src) );
8281   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8282   ins_pipe( ialu_reg );
8283 %}
8284 
8285 instruct addI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8286   match(Set dst (AddI dst (LoadI src)));
8287   effect(KILL cr);
8288 
8289   ins_cost(125);
8290   format %{ "ADD    $dst,$src" %}
8291   opcode(0x03);
8292   ins_encode( OpcP, RegMem( dst, src) );
8293   ins_pipe( ialu_reg_mem );
8294 %}
8295 
8296 instruct addI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8297   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8298   effect(KILL cr);
8299 
8300   ins_cost(150);
8301   format %{ "ADD    $dst,$src" %}
8302   opcode(0x01);  /* Opcode 01 /r */
8303   ins_encode( OpcP, RegMem( src, dst ) );
8304   ins_pipe( ialu_mem_reg );
8305 %}
8306 
8307 // Add Memory with Immediate
8308 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8309   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8310   effect(KILL cr);
8311 
8312   ins_cost(125);
8313   format %{ "ADD    $dst,$src" %}
8314   opcode(0x81);               /* Opcode 81 /0 id */
8315   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
8316   ins_pipe( ialu_mem_imm );
8317 %}
8318 
8319 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
8320   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8321   effect(KILL cr);
8322 
8323   ins_cost(125);
8324   format %{ "INC    $dst" %}
8325   opcode(0xFF);               /* Opcode FF /0 */
8326   ins_encode( OpcP, RMopc_Mem(0x00,dst));
8327   ins_pipe( ialu_mem_imm );
8328 %}
8329 
8330 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
8331   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
8332   effect(KILL cr);
8333 
8334   ins_cost(125);
8335   format %{ "DEC    $dst" %}
8336   opcode(0xFF);               /* Opcode FF /1 */
8337   ins_encode( OpcP, RMopc_Mem(0x01,dst));
8338   ins_pipe( ialu_mem_imm );
8339 %}
8340 
8341 
8342 instruct checkCastPP( eRegP dst ) %{
8343   match(Set dst (CheckCastPP dst));
8344 
8345   size(0);
8346   format %{ "#checkcastPP of $dst" %}
8347   ins_encode( /*empty encoding*/ );
8348   ins_pipe( empty );
8349 %}
8350 
8351 instruct castPP( eRegP dst ) %{
8352   match(Set dst (CastPP dst));
8353   format %{ "#castPP of $dst" %}
8354   ins_encode( /*empty encoding*/ );
8355   ins_pipe( empty );
8356 %}
8357 
8358 instruct castII( eRegI dst ) %{
8359   match(Set dst (CastII dst));
8360   format %{ "#castII of $dst" %}
8361   ins_encode( /*empty encoding*/ );
8362   ins_cost(0);
8363   ins_pipe( empty );
8364 %}
8365 
8366 
8367 // Load-locked - same as a regular pointer load when used with compare-swap
8368 instruct loadPLocked(eRegP dst, memory mem) %{
8369   match(Set dst (LoadPLocked mem));
8370 
8371   ins_cost(125);
8372   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
8373   opcode(0x8B);
8374   ins_encode( OpcP, RegMem(dst,mem));
8375   ins_pipe( ialu_reg_mem );
8376 %}
8377 
8378 // LoadLong-locked - same as a volatile long load when used with compare-swap
8379 instruct loadLLocked(stackSlotL dst, load_long_memory mem) %{
8380   predicate(UseSSE<=1);
8381   match(Set dst (LoadLLocked mem));
8382 
8383   ins_cost(200);
8384   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
8385             "FISTp  $dst" %}
8386   ins_encode(enc_loadL_volatile(mem,dst));
8387   ins_pipe( fpu_reg_mem );
8388 %}
8389 
8390 instruct loadLX_Locked(stackSlotL dst, load_long_memory mem, regXD tmp) %{
8391   predicate(UseSSE>=2);
8392   match(Set dst (LoadLLocked mem));
8393   effect(TEMP tmp);
8394   ins_cost(180);
8395   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
8396             "MOVSD  $dst,$tmp" %}
8397   ins_encode(enc_loadLX_volatile(mem, dst, tmp));
8398   ins_pipe( pipe_slow );
8399 %}
8400 
8401 instruct loadLX_reg_Locked(eRegL dst, load_long_memory mem, regXD tmp) %{
8402   predicate(UseSSE>=2);
8403   match(Set dst (LoadLLocked mem));
8404   effect(TEMP tmp);
8405   ins_cost(160);
8406   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
8407             "MOVD   $dst.lo,$tmp\n\t"
8408             "PSRLQ  $tmp,32\n\t"
8409             "MOVD   $dst.hi,$tmp" %}
8410   ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));
8411   ins_pipe( pipe_slow );
8412 %}
8413 
8414 // Conditional-store of the updated heap-top.
8415 // Used during allocation of the shared heap.
8416 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
8417 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
8418   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
8419   // EAX is killed if there is contention, but then it's also unused.
8420   // In the common case of no contention, EAX holds the new oop address.
8421   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
8422   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
8423   ins_pipe( pipe_cmpxchg );
8424 %}
8425 
8426 // Conditional-store of an int value.
8427 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
8428 instruct storeIConditional( memory mem, eAXRegI oldval, eRegI newval, eFlagsReg cr ) %{
8429   match(Set cr (StoreIConditional mem (Binary oldval newval)));
8430   effect(KILL oldval);
8431   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
8432   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
8433   ins_pipe( pipe_cmpxchg );
8434 %}
8435 
8436 // Conditional-store of a long value.
8437 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
8438 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
8439   match(Set cr (StoreLConditional mem (Binary oldval newval)));
8440   effect(KILL oldval);
8441   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
8442             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
8443             "XCHG   EBX,ECX"
8444   %}
8445   ins_encode %{
8446     // Note: we need to swap rbx, and rcx before and after the
8447     //       cmpxchg8 instruction because the instruction uses
8448     //       rcx as the high order word of the new value to store but
8449     //       our register encoding uses rbx.
8450     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
8451     if( os::is_MP() )
8452       __ lock();
8453     __ cmpxchg8($mem$$Address);
8454     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
8455   %}
8456   ins_pipe( pipe_cmpxchg );
8457 %}
8458 
8459 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
8460 
8461 instruct compareAndSwapL( eRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
8462   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
8463   effect(KILL cr, KILL oldval);
8464   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
8465             "MOV    $res,0\n\t"
8466             "JNE,s  fail\n\t"
8467             "MOV    $res,1\n"
8468           "fail:" %}
8469   ins_encode( enc_cmpxchg8(mem_ptr),
8470               enc_flags_ne_to_boolean(res) );
8471   ins_pipe( pipe_cmpxchg );
8472 %}
8473 
8474 instruct compareAndSwapP( eRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
8475   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
8476   effect(KILL cr, KILL oldval);
8477   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
8478             "MOV    $res,0\n\t"
8479             "JNE,s  fail\n\t"
8480             "MOV    $res,1\n"
8481           "fail:" %}
8482   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
8483   ins_pipe( pipe_cmpxchg );
8484 %}
8485 
8486 instruct compareAndSwapI( eRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
8487   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
8488   effect(KILL cr, KILL oldval);
8489   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
8490             "MOV    $res,0\n\t"
8491             "JNE,s  fail\n\t"
8492             "MOV    $res,1\n"
8493           "fail:" %}
8494   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
8495   ins_pipe( pipe_cmpxchg );
8496 %}
8497 
8498 //----------Subtraction Instructions-------------------------------------------
8499 // Integer Subtraction Instructions
8500 instruct subI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8501   match(Set dst (SubI dst src));
8502   effect(KILL cr);
8503 
8504   size(2);
8505   format %{ "SUB    $dst,$src" %}
8506   opcode(0x2B);
8507   ins_encode( OpcP, RegReg( dst, src) );
8508   ins_pipe( ialu_reg_reg );
8509 %}
8510 
8511 instruct subI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8512   match(Set dst (SubI dst src));
8513   effect(KILL cr);
8514 
8515   format %{ "SUB    $dst,$src" %}
8516   opcode(0x81,0x05);  /* Opcode 81 /5 */
8517   // ins_encode( RegImm( dst, src) );
8518   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8519   ins_pipe( ialu_reg );
8520 %}
8521 
8522 instruct subI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8523   match(Set dst (SubI dst (LoadI src)));
8524   effect(KILL cr);
8525 
8526   ins_cost(125);
8527   format %{ "SUB    $dst,$src" %}
8528   opcode(0x2B);
8529   ins_encode( OpcP, RegMem( dst, src) );
8530   ins_pipe( ialu_reg_mem );
8531 %}
8532 
8533 instruct subI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8534   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8535   effect(KILL cr);
8536 
8537   ins_cost(150);
8538   format %{ "SUB    $dst,$src" %}
8539   opcode(0x29);  /* Opcode 29 /r */
8540   ins_encode( OpcP, RegMem( src, dst ) );
8541   ins_pipe( ialu_mem_reg );
8542 %}
8543 
8544 // Subtract from a pointer
8545 instruct subP_eReg(eRegP dst, eRegI src, immI0 zero, eFlagsReg cr) %{
8546   match(Set dst (AddP dst (SubI zero src)));
8547   effect(KILL cr);
8548 
8549   size(2);
8550   format %{ "SUB    $dst,$src" %}
8551   opcode(0x2B);
8552   ins_encode( OpcP, RegReg( dst, src) );
8553   ins_pipe( ialu_reg_reg );
8554 %}
8555 
8556 instruct negI_eReg(eRegI dst, immI0 zero, eFlagsReg cr) %{
8557   match(Set dst (SubI zero dst));
8558   effect(KILL cr);
8559 
8560   size(2);
8561   format %{ "NEG    $dst" %}
8562   opcode(0xF7,0x03);  // Opcode F7 /3
8563   ins_encode( OpcP, RegOpc( dst ) );
8564   ins_pipe( ialu_reg );
8565 %}
8566 
8567 
8568 //----------Multiplication/Division Instructions-------------------------------
8569 // Integer Multiplication Instructions
8570 // Multiply Register
8571 instruct mulI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8572   match(Set dst (MulI dst src));
8573   effect(KILL cr);
8574 
8575   size(3);
8576   ins_cost(300);
8577   format %{ "IMUL   $dst,$src" %}
8578   opcode(0xAF, 0x0F);
8579   ins_encode( OpcS, OpcP, RegReg( dst, src) );
8580   ins_pipe( ialu_reg_reg_alu0 );
8581 %}
8582 
8583 // Multiply 32-bit Immediate
8584 instruct mulI_eReg_imm(eRegI dst, eRegI src, immI imm, eFlagsReg cr) %{
8585   match(Set dst (MulI src imm));
8586   effect(KILL cr);
8587 
8588   ins_cost(300);
8589   format %{ "IMUL   $dst,$src,$imm" %}
8590   opcode(0x69);  /* 69 /r id */
8591   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
8592   ins_pipe( ialu_reg_reg_alu0 );
8593 %}
8594 
8595 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
8596   match(Set dst src);
8597   effect(KILL cr);
8598 
8599   // Note that this is artificially increased to make it more expensive than loadConL
8600   ins_cost(250);
8601   format %{ "MOV    EAX,$src\t// low word only" %}
8602   opcode(0xB8);
8603   ins_encode( LdImmL_Lo(dst, src) );
8604   ins_pipe( ialu_reg_fat );
8605 %}
8606 
8607 // Multiply by 32-bit Immediate, taking the shifted high order results
8608 //  (special case for shift by 32)
8609 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
8610   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
8611   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
8612              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
8613              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
8614   effect(USE src1, KILL cr);
8615 
8616   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
8617   ins_cost(0*100 + 1*400 - 150);
8618   format %{ "IMUL   EDX:EAX,$src1" %}
8619   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
8620   ins_pipe( pipe_slow );
8621 %}
8622 
8623 // Multiply by 32-bit Immediate, taking the shifted high order results
8624 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
8625   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
8626   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
8627              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
8628              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
8629   effect(USE src1, KILL cr);
8630 
8631   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
8632   ins_cost(1*100 + 1*400 - 150);
8633   format %{ "IMUL   EDX:EAX,$src1\n\t"
8634             "SAR    EDX,$cnt-32" %}
8635   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
8636   ins_pipe( pipe_slow );
8637 %}
8638 
8639 // Multiply Memory 32-bit Immediate
8640 instruct mulI_mem_imm(eRegI dst, memory src, immI imm, eFlagsReg cr) %{
8641   match(Set dst (MulI (LoadI src) imm));
8642   effect(KILL cr);
8643 
8644   ins_cost(300);
8645   format %{ "IMUL   $dst,$src,$imm" %}
8646   opcode(0x69);  /* 69 /r id */
8647   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
8648   ins_pipe( ialu_reg_mem_alu0 );
8649 %}
8650 
8651 // Multiply Memory
8652 instruct mulI(eRegI dst, memory src, eFlagsReg cr) %{
8653   match(Set dst (MulI dst (LoadI src)));
8654   effect(KILL cr);
8655 
8656   ins_cost(350);
8657   format %{ "IMUL   $dst,$src" %}
8658   opcode(0xAF, 0x0F);
8659   ins_encode( OpcS, OpcP, RegMem( dst, src) );
8660   ins_pipe( ialu_reg_mem_alu0 );
8661 %}
8662 
8663 // Multiply Register Int to Long
8664 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
8665   // Basic Idea: long = (long)int * (long)int
8666   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
8667   effect(DEF dst, USE src, USE src1, KILL flags);
8668 
8669   ins_cost(300);
8670   format %{ "IMUL   $dst,$src1" %}
8671 
8672   ins_encode( long_int_multiply( dst, src1 ) );
8673   ins_pipe( ialu_reg_reg_alu0 );
8674 %}
8675 
8676 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
8677   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
8678   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
8679   effect(KILL flags);
8680 
8681   ins_cost(300);
8682   format %{ "MUL    $dst,$src1" %}
8683 
8684   ins_encode( long_uint_multiply(dst, src1) );
8685   ins_pipe( ialu_reg_reg_alu0 );
8686 %}
8687 
8688 // Multiply Register Long
8689 instruct mulL_eReg(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
8690   match(Set dst (MulL dst src));
8691   effect(KILL cr, TEMP tmp);
8692   ins_cost(4*100+3*400);
8693 // Basic idea: lo(result) = lo(x_lo * y_lo)
8694 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
8695   format %{ "MOV    $tmp,$src.lo\n\t"
8696             "IMUL   $tmp,EDX\n\t"
8697             "MOV    EDX,$src.hi\n\t"
8698             "IMUL   EDX,EAX\n\t"
8699             "ADD    $tmp,EDX\n\t"
8700             "MUL    EDX:EAX,$src.lo\n\t"
8701             "ADD    EDX,$tmp" %}
8702   ins_encode( long_multiply( dst, src, tmp ) );
8703   ins_pipe( pipe_slow );
8704 %}
8705 
8706 // Multiply Register Long where the left operand's high 32 bits are zero
8707 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
8708   predicate(is_operand_hi32_zero(n->in(1)));
8709   match(Set dst (MulL dst src));
8710   effect(KILL cr, TEMP tmp);
8711   ins_cost(2*100+2*400);
8712 // Basic idea: lo(result) = lo(x_lo * y_lo)
8713 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
8714   format %{ "MOV    $tmp,$src.hi\n\t"
8715             "IMUL   $tmp,EAX\n\t"
8716             "MUL    EDX:EAX,$src.lo\n\t"
8717             "ADD    EDX,$tmp" %}
8718   ins_encode %{
8719     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
8720     __ imull($tmp$$Register, rax);
8721     __ mull($src$$Register);
8722     __ addl(rdx, $tmp$$Register);
8723   %}
8724   ins_pipe( pipe_slow );
8725 %}
8726 
8727 // Multiply Register Long where the right operand's high 32 bits are zero
8728 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
8729   predicate(is_operand_hi32_zero(n->in(2)));
8730   match(Set dst (MulL dst src));
8731   effect(KILL cr, TEMP tmp);
8732   ins_cost(2*100+2*400);
8733 // Basic idea: lo(result) = lo(x_lo * y_lo)
8734 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
8735   format %{ "MOV    $tmp,$src.lo\n\t"
8736             "IMUL   $tmp,EDX\n\t"
8737             "MUL    EDX:EAX,$src.lo\n\t"
8738             "ADD    EDX,$tmp" %}
8739   ins_encode %{
8740     __ movl($tmp$$Register, $src$$Register);
8741     __ imull($tmp$$Register, rdx);
8742     __ mull($src$$Register);
8743     __ addl(rdx, $tmp$$Register);
8744   %}
8745   ins_pipe( pipe_slow );
8746 %}
8747 
8748 // Multiply Register Long where the left and the right operands' high 32 bits are zero
8749 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
8750   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
8751   match(Set dst (MulL dst src));
8752   effect(KILL cr);
8753   ins_cost(1*400);
8754 // Basic idea: lo(result) = lo(x_lo * y_lo)
8755 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
8756   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
8757   ins_encode %{
8758     __ mull($src$$Register);
8759   %}
8760   ins_pipe( pipe_slow );
8761 %}
8762 
8763 // Multiply Register Long by small constant
8764 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, eRegI tmp, eFlagsReg cr) %{
8765   match(Set dst (MulL dst src));
8766   effect(KILL cr, TEMP tmp);
8767   ins_cost(2*100+2*400);
8768   size(12);
8769 // Basic idea: lo(result) = lo(src * EAX)
8770 //             hi(result) = hi(src * EAX) + lo(src * EDX)
8771   format %{ "IMUL   $tmp,EDX,$src\n\t"
8772             "MOV    EDX,$src\n\t"
8773             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
8774             "ADD    EDX,$tmp" %}
8775   ins_encode( long_multiply_con( dst, src, tmp ) );
8776   ins_pipe( pipe_slow );
8777 %}
8778 
8779 // Integer DIV with Register
8780 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
8781   match(Set rax (DivI rax div));
8782   effect(KILL rdx, KILL cr);
8783   size(26);
8784   ins_cost(30*100+10*100);
8785   format %{ "CMP    EAX,0x80000000\n\t"
8786             "JNE,s  normal\n\t"
8787             "XOR    EDX,EDX\n\t"
8788             "CMP    ECX,-1\n\t"
8789             "JE,s   done\n"
8790     "normal: CDQ\n\t"
8791             "IDIV   $div\n\t"
8792     "done:"        %}
8793   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8794   ins_encode( cdq_enc, OpcP, RegOpc(div) );
8795   ins_pipe( ialu_reg_reg_alu0 );
8796 %}
8797 
8798 // Divide Register Long
8799 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
8800   match(Set dst (DivL src1 src2));
8801   effect( KILL cr, KILL cx, KILL bx );
8802   ins_cost(10000);
8803   format %{ "PUSH   $src1.hi\n\t"
8804             "PUSH   $src1.lo\n\t"
8805             "PUSH   $src2.hi\n\t"
8806             "PUSH   $src2.lo\n\t"
8807             "CALL   SharedRuntime::ldiv\n\t"
8808             "ADD    ESP,16" %}
8809   ins_encode( long_div(src1,src2) );
8810   ins_pipe( pipe_slow );
8811 %}
8812 
8813 // Integer DIVMOD with Register, both quotient and mod results
8814 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
8815   match(DivModI rax div);
8816   effect(KILL cr);
8817   size(26);
8818   ins_cost(30*100+10*100);
8819   format %{ "CMP    EAX,0x80000000\n\t"
8820             "JNE,s  normal\n\t"
8821             "XOR    EDX,EDX\n\t"
8822             "CMP    ECX,-1\n\t"
8823             "JE,s   done\n"
8824     "normal: CDQ\n\t"
8825             "IDIV   $div\n\t"
8826     "done:"        %}
8827   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8828   ins_encode( cdq_enc, OpcP, RegOpc(div) );
8829   ins_pipe( pipe_slow );
8830 %}
8831 
8832 // Integer MOD with Register
8833 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
8834   match(Set rdx (ModI rax div));
8835   effect(KILL rax, KILL cr);
8836 
8837   size(26);
8838   ins_cost(300);
8839   format %{ "CDQ\n\t"
8840             "IDIV   $div" %}
8841   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8842   ins_encode( cdq_enc, OpcP, RegOpc(div) );
8843   ins_pipe( ialu_reg_reg_alu0 );
8844 %}
8845 
8846 // Remainder Register Long
8847 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
8848   match(Set dst (ModL src1 src2));
8849   effect( KILL cr, KILL cx, KILL bx );
8850   ins_cost(10000);
8851   format %{ "PUSH   $src1.hi\n\t"
8852             "PUSH   $src1.lo\n\t"
8853             "PUSH   $src2.hi\n\t"
8854             "PUSH   $src2.lo\n\t"
8855             "CALL   SharedRuntime::lrem\n\t"
8856             "ADD    ESP,16" %}
8857   ins_encode( long_mod(src1,src2) );
8858   ins_pipe( pipe_slow );
8859 %}
8860 
8861 // Divide Register Long (no special case since divisor != -1)
8862 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{
8863   match(Set dst (DivL dst imm));
8864   effect( TEMP tmp, TEMP tmp2, KILL cr );
8865   ins_cost(1000);
8866   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
8867             "XOR    $tmp2,$tmp2\n\t"
8868             "CMP    $tmp,EDX\n\t"
8869             "JA,s   fast\n\t"
8870             "MOV    $tmp2,EAX\n\t"
8871             "MOV    EAX,EDX\n\t"
8872             "MOV    EDX,0\n\t"
8873             "JLE,s  pos\n\t"
8874             "LNEG   EAX : $tmp2\n\t"
8875             "DIV    $tmp # unsigned division\n\t"
8876             "XCHG   EAX,$tmp2\n\t"
8877             "DIV    $tmp\n\t"
8878             "LNEG   $tmp2 : EAX\n\t"
8879             "JMP,s  done\n"
8880     "pos:\n\t"
8881             "DIV    $tmp\n\t"
8882             "XCHG   EAX,$tmp2\n"
8883     "fast:\n\t"
8884             "DIV    $tmp\n"
8885     "done:\n\t"
8886             "MOV    EDX,$tmp2\n\t"
8887             "NEG    EDX:EAX # if $imm < 0" %}
8888   ins_encode %{
8889     int con = (int)$imm$$constant;
8890     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8891     int pcon = (con > 0) ? con : -con;
8892     Label Lfast, Lpos, Ldone;
8893 
8894     __ movl($tmp$$Register, pcon);
8895     __ xorl($tmp2$$Register,$tmp2$$Register);
8896     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8897     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
8898 
8899     __ movl($tmp2$$Register, $dst$$Register); // save
8900     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8901     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8902     __ jccb(Assembler::lessEqual, Lpos); // result is positive
8903 
8904     // Negative dividend.
8905     // convert value to positive to use unsigned division
8906     __ lneg($dst$$Register, $tmp2$$Register);
8907     __ divl($tmp$$Register);
8908     __ xchgl($dst$$Register, $tmp2$$Register);
8909     __ divl($tmp$$Register);
8910     // revert result back to negative
8911     __ lneg($tmp2$$Register, $dst$$Register);
8912     __ jmpb(Ldone);
8913 
8914     __ bind(Lpos);
8915     __ divl($tmp$$Register); // Use unsigned division
8916     __ xchgl($dst$$Register, $tmp2$$Register);
8917     // Fallthrow for final divide, tmp2 has 32 bit hi result
8918 
8919     __ bind(Lfast);
8920     // fast path: src is positive
8921     __ divl($tmp$$Register); // Use unsigned division
8922 
8923     __ bind(Ldone);
8924     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
8925     if (con < 0) {
8926       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
8927     }
8928   %}
8929   ins_pipe( pipe_slow );
8930 %}
8931 
8932 // Remainder Register Long (remainder fit into 32 bits)
8933 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{
8934   match(Set dst (ModL dst imm));
8935   effect( TEMP tmp, TEMP tmp2, KILL cr );
8936   ins_cost(1000);
8937   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
8938             "CMP    $tmp,EDX\n\t"
8939             "JA,s   fast\n\t"
8940             "MOV    $tmp2,EAX\n\t"
8941             "MOV    EAX,EDX\n\t"
8942             "MOV    EDX,0\n\t"
8943             "JLE,s  pos\n\t"
8944             "LNEG   EAX : $tmp2\n\t"
8945             "DIV    $tmp # unsigned division\n\t"
8946             "MOV    EAX,$tmp2\n\t"
8947             "DIV    $tmp\n\t"
8948             "NEG    EDX\n\t"
8949             "JMP,s  done\n"
8950     "pos:\n\t"
8951             "DIV    $tmp\n\t"
8952             "MOV    EAX,$tmp2\n"
8953     "fast:\n\t"
8954             "DIV    $tmp\n"
8955     "done:\n\t"
8956             "MOV    EAX,EDX\n\t"
8957             "SAR    EDX,31\n\t" %}
8958   ins_encode %{
8959     int con = (int)$imm$$constant;
8960     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8961     int pcon = (con > 0) ? con : -con;
8962     Label  Lfast, Lpos, Ldone;
8963 
8964     __ movl($tmp$$Register, pcon);
8965     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8966     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
8967 
8968     __ movl($tmp2$$Register, $dst$$Register); // save
8969     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8970     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8971     __ jccb(Assembler::lessEqual, Lpos); // result is positive
8972 
8973     // Negative dividend.
8974     // convert value to positive to use unsigned division
8975     __ lneg($dst$$Register, $tmp2$$Register);
8976     __ divl($tmp$$Register);
8977     __ movl($dst$$Register, $tmp2$$Register);
8978     __ divl($tmp$$Register);
8979     // revert remainder back to negative
8980     __ negl(HIGH_FROM_LOW($dst$$Register));
8981     __ jmpb(Ldone);
8982 
8983     __ bind(Lpos);
8984     __ divl($tmp$$Register);
8985     __ movl($dst$$Register, $tmp2$$Register);
8986 
8987     __ bind(Lfast);
8988     // fast path: src is positive
8989     __ divl($tmp$$Register);
8990 
8991     __ bind(Ldone);
8992     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8993     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
8994 
8995   %}
8996   ins_pipe( pipe_slow );
8997 %}
8998 
8999 // Integer Shift Instructions
9000 // Shift Left by one
9001 instruct shlI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9002   match(Set dst (LShiftI dst shift));
9003   effect(KILL cr);
9004 
9005   size(2);
9006   format %{ "SHL    $dst,$shift" %}
9007   opcode(0xD1, 0x4);  /* D1 /4 */
9008   ins_encode( OpcP, RegOpc( dst ) );
9009   ins_pipe( ialu_reg );
9010 %}
9011 
9012 // Shift Left by 8-bit immediate
9013 instruct salI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
9014   match(Set dst (LShiftI dst shift));
9015   effect(KILL cr);
9016 
9017   size(3);
9018   format %{ "SHL    $dst,$shift" %}
9019   opcode(0xC1, 0x4);  /* C1 /4 ib */
9020   ins_encode( RegOpcImm( dst, shift) );
9021   ins_pipe( ialu_reg );
9022 %}
9023 
9024 // Shift Left by variable
9025 instruct salI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
9026   match(Set dst (LShiftI dst shift));
9027   effect(KILL cr);
9028 
9029   size(2);
9030   format %{ "SHL    $dst,$shift" %}
9031   opcode(0xD3, 0x4);  /* D3 /4 */
9032   ins_encode( OpcP, RegOpc( dst ) );
9033   ins_pipe( ialu_reg_reg );
9034 %}
9035 
9036 // Arithmetic shift right by one
9037 instruct sarI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9038   match(Set dst (RShiftI dst shift));
9039   effect(KILL cr);
9040 
9041   size(2);
9042   format %{ "SAR    $dst,$shift" %}
9043   opcode(0xD1, 0x7);  /* D1 /7 */
9044   ins_encode( OpcP, RegOpc( dst ) );
9045   ins_pipe( ialu_reg );
9046 %}
9047 
9048 // Arithmetic shift right by one
9049 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
9050   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9051   effect(KILL cr);
9052   format %{ "SAR    $dst,$shift" %}
9053   opcode(0xD1, 0x7);  /* D1 /7 */
9054   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
9055   ins_pipe( ialu_mem_imm );
9056 %}
9057 
9058 // Arithmetic Shift Right by 8-bit immediate
9059 instruct sarI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
9060   match(Set dst (RShiftI dst shift));
9061   effect(KILL cr);
9062 
9063   size(3);
9064   format %{ "SAR    $dst,$shift" %}
9065   opcode(0xC1, 0x7);  /* C1 /7 ib */
9066   ins_encode( RegOpcImm( dst, shift ) );
9067   ins_pipe( ialu_mem_imm );
9068 %}
9069 
9070 // Arithmetic Shift Right by 8-bit immediate
9071 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
9072   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
9073   effect(KILL cr);
9074 
9075   format %{ "SAR    $dst,$shift" %}
9076   opcode(0xC1, 0x7);  /* C1 /7 ib */
9077   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
9078   ins_pipe( ialu_mem_imm );
9079 %}
9080 
9081 // Arithmetic Shift Right by variable
9082 instruct sarI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
9083   match(Set dst (RShiftI dst shift));
9084   effect(KILL cr);
9085 
9086   size(2);
9087   format %{ "SAR    $dst,$shift" %}
9088   opcode(0xD3, 0x7);  /* D3 /7 */
9089   ins_encode( OpcP, RegOpc( dst ) );
9090   ins_pipe( ialu_reg_reg );
9091 %}
9092 
9093 // Logical shift right by one
9094 instruct shrI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9095   match(Set dst (URShiftI dst shift));
9096   effect(KILL cr);
9097 
9098   size(2);
9099   format %{ "SHR    $dst,$shift" %}
9100   opcode(0xD1, 0x5);  /* D1 /5 */
9101   ins_encode( OpcP, RegOpc( dst ) );
9102   ins_pipe( ialu_reg );
9103 %}
9104 
9105 // Logical Shift Right by 8-bit immediate
9106 instruct shrI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
9107   match(Set dst (URShiftI dst shift));
9108   effect(KILL cr);
9109 
9110   size(3);
9111   format %{ "SHR    $dst,$shift" %}
9112   opcode(0xC1, 0x5);  /* C1 /5 ib */
9113   ins_encode( RegOpcImm( dst, shift) );
9114   ins_pipe( ialu_reg );
9115 %}
9116 
9117 
9118 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
9119 // This idiom is used by the compiler for the i2b bytecode.
9120 instruct i2b(eRegI dst, xRegI src, immI_24 twentyfour) %{
9121   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
9122 
9123   size(3);
9124   format %{ "MOVSX  $dst,$src :8" %}
9125   ins_encode %{
9126     __ movsbl($dst$$Register, $src$$Register);
9127   %}
9128   ins_pipe(ialu_reg_reg);
9129 %}
9130 
9131 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
9132 // This idiom is used by the compiler the i2s bytecode.
9133 instruct i2s(eRegI dst, xRegI src, immI_16 sixteen) %{
9134   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
9135 
9136   size(3);
9137   format %{ "MOVSX  $dst,$src :16" %}
9138   ins_encode %{
9139     __ movswl($dst$$Register, $src$$Register);
9140   %}
9141   ins_pipe(ialu_reg_reg);
9142 %}
9143 
9144 
9145 // Logical Shift Right by variable
9146 instruct shrI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
9147   match(Set dst (URShiftI dst shift));
9148   effect(KILL cr);
9149 
9150   size(2);
9151   format %{ "SHR    $dst,$shift" %}
9152   opcode(0xD3, 0x5);  /* D3 /5 */
9153   ins_encode( OpcP, RegOpc( dst ) );
9154   ins_pipe( ialu_reg_reg );
9155 %}
9156 
9157 
9158 //----------Logical Instructions-----------------------------------------------
9159 //----------Integer Logical Instructions---------------------------------------
9160 // And Instructions
9161 // And Register with Register
9162 instruct andI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
9163   match(Set dst (AndI dst src));
9164   effect(KILL cr);
9165 
9166   size(2);
9167   format %{ "AND    $dst,$src" %}
9168   opcode(0x23);
9169   ins_encode( OpcP, RegReg( dst, src) );
9170   ins_pipe( ialu_reg_reg );
9171 %}
9172 
9173 // And Register with Immediate
9174 instruct andI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
9175   match(Set dst (AndI dst src));
9176   effect(KILL cr);
9177 
9178   format %{ "AND    $dst,$src" %}
9179   opcode(0x81,0x04);  /* Opcode 81 /4 */
9180   // ins_encode( RegImm( dst, src) );
9181   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
9182   ins_pipe( ialu_reg );
9183 %}
9184 
9185 // And Register with Memory
9186 instruct andI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
9187   match(Set dst (AndI dst (LoadI src)));
9188   effect(KILL cr);
9189 
9190   ins_cost(125);
9191   format %{ "AND    $dst,$src" %}
9192   opcode(0x23);
9193   ins_encode( OpcP, RegMem( dst, src) );
9194   ins_pipe( ialu_reg_mem );
9195 %}
9196 
9197 // And Memory with Register
9198 instruct andI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
9199   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9200   effect(KILL cr);
9201 
9202   ins_cost(150);
9203   format %{ "AND    $dst,$src" %}
9204   opcode(0x21);  /* Opcode 21 /r */
9205   ins_encode( OpcP, RegMem( src, dst ) );
9206   ins_pipe( ialu_mem_reg );
9207 %}
9208 
9209 // And Memory with Immediate
9210 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
9211   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
9212   effect(KILL cr);
9213 
9214   ins_cost(125);
9215   format %{ "AND    $dst,$src" %}
9216   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
9217   // ins_encode( MemImm( dst, src) );
9218   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
9219   ins_pipe( ialu_mem_imm );
9220 %}
9221 
9222 // Or Instructions
9223 // Or Register with Register
9224 instruct orI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
9225   match(Set dst (OrI dst src));
9226   effect(KILL cr);
9227 
9228   size(2);
9229   format %{ "OR     $dst,$src" %}
9230   opcode(0x0B);
9231   ins_encode( OpcP, RegReg( dst, src) );
9232   ins_pipe( ialu_reg_reg );
9233 %}
9234 
9235 instruct orI_eReg_castP2X(eRegI dst, eRegP src, eFlagsReg cr) %{
9236   match(Set dst (OrI dst (CastP2X src)));
9237   effect(KILL cr);
9238 
9239   size(2);
9240   format %{ "OR     $dst,$src" %}
9241   opcode(0x0B);
9242   ins_encode( OpcP, RegReg( dst, src) );
9243   ins_pipe( ialu_reg_reg );
9244 %}
9245 
9246 
9247 // Or Register with Immediate
9248 instruct orI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
9249   match(Set dst (OrI dst src));
9250   effect(KILL cr);
9251 
9252   format %{ "OR     $dst,$src" %}
9253   opcode(0x81,0x01);  /* Opcode 81 /1 id */
9254   // ins_encode( RegImm( dst, src) );
9255   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
9256   ins_pipe( ialu_reg );
9257 %}
9258 
9259 // Or Register with Memory
9260 instruct orI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
9261   match(Set dst (OrI dst (LoadI src)));
9262   effect(KILL cr);
9263 
9264   ins_cost(125);
9265   format %{ "OR     $dst,$src" %}
9266   opcode(0x0B);
9267   ins_encode( OpcP, RegMem( dst, src) );
9268   ins_pipe( ialu_reg_mem );
9269 %}
9270 
9271 // Or Memory with Register
9272 instruct orI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
9273   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9274   effect(KILL cr);
9275 
9276   ins_cost(150);
9277   format %{ "OR     $dst,$src" %}
9278   opcode(0x09);  /* Opcode 09 /r */
9279   ins_encode( OpcP, RegMem( src, dst ) );
9280   ins_pipe( ialu_mem_reg );
9281 %}
9282 
9283 // Or Memory with Immediate
9284 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
9285   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
9286   effect(KILL cr);
9287 
9288   ins_cost(125);
9289   format %{ "OR     $dst,$src" %}
9290   opcode(0x81,0x1);  /* Opcode 81 /1 id */
9291   // ins_encode( MemImm( dst, src) );
9292   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
9293   ins_pipe( ialu_mem_imm );
9294 %}
9295 
9296 // ROL/ROR
9297 // ROL expand
9298 instruct rolI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9299   effect(USE_DEF dst, USE shift, KILL cr);
9300 
9301   format %{ "ROL    $dst, $shift" %}
9302   opcode(0xD1, 0x0); /* Opcode D1 /0 */
9303   ins_encode( OpcP, RegOpc( dst ));
9304   ins_pipe( ialu_reg );
9305 %}
9306 
9307 instruct rolI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{
9308   effect(USE_DEF dst, USE shift, KILL cr);
9309 
9310   format %{ "ROL    $dst, $shift" %}
9311   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
9312   ins_encode( RegOpcImm(dst, shift) );
9313   ins_pipe(ialu_reg);
9314 %}
9315 
9316 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
9317   effect(USE_DEF dst, USE shift, KILL cr);
9318 
9319   format %{ "ROL    $dst, $shift" %}
9320   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
9321   ins_encode(OpcP, RegOpc(dst));
9322   ins_pipe( ialu_reg_reg );
9323 %}
9324 // end of ROL expand
9325 
9326 // ROL 32bit by one once
9327 instruct rolI_eReg_i1(eRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
9328   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9329 
9330   expand %{
9331     rolI_eReg_imm1(dst, lshift, cr);
9332   %}
9333 %}
9334 
9335 // ROL 32bit var by imm8 once
9336 instruct rolI_eReg_i8(eRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
9337   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9338   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
9339 
9340   expand %{
9341     rolI_eReg_imm8(dst, lshift, cr);
9342   %}
9343 %}
9344 
9345 // ROL 32bit var by var once
9346 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
9347   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
9348 
9349   expand %{
9350     rolI_eReg_CL(dst, shift, cr);
9351   %}
9352 %}
9353 
9354 // ROL 32bit var by var once
9355 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
9356   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
9357 
9358   expand %{
9359     rolI_eReg_CL(dst, shift, cr);
9360   %}
9361 %}
9362 
9363 // ROR expand
9364 instruct rorI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{
9365   effect(USE_DEF dst, USE shift, KILL cr);
9366 
9367   format %{ "ROR    $dst, $shift" %}
9368   opcode(0xD1,0x1);  /* Opcode D1 /1 */
9369   ins_encode( OpcP, RegOpc( dst ) );
9370   ins_pipe( ialu_reg );
9371 %}
9372 
9373 instruct rorI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{
9374   effect (USE_DEF dst, USE shift, KILL cr);
9375 
9376   format %{ "ROR    $dst, $shift" %}
9377   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
9378   ins_encode( RegOpcImm(dst, shift) );
9379   ins_pipe( ialu_reg );
9380 %}
9381 
9382 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
9383   effect(USE_DEF dst, USE shift, KILL cr);
9384 
9385   format %{ "ROR    $dst, $shift" %}
9386   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
9387   ins_encode(OpcP, RegOpc(dst));
9388   ins_pipe( ialu_reg_reg );
9389 %}
9390 // end of ROR expand
9391 
9392 // ROR right once
9393 instruct rorI_eReg_i1(eRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
9394   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9395 
9396   expand %{
9397     rorI_eReg_imm1(dst, rshift, cr);
9398   %}
9399 %}
9400 
9401 // ROR 32bit by immI8 once
9402 instruct rorI_eReg_i8(eRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
9403   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
9404   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
9405 
9406   expand %{
9407     rorI_eReg_imm8(dst, rshift, cr);
9408   %}
9409 %}
9410 
9411 // ROR 32bit var by var once
9412 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
9413   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
9414 
9415   expand %{
9416     rorI_eReg_CL(dst, shift, cr);
9417   %}
9418 %}
9419 
9420 // ROR 32bit var by var once
9421 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
9422   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
9423 
9424   expand %{
9425     rorI_eReg_CL(dst, shift, cr);
9426   %}
9427 %}
9428 
9429 // Xor Instructions
9430 // Xor Register with Register
9431 instruct xorI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
9432   match(Set dst (XorI dst src));
9433   effect(KILL cr);
9434 
9435   size(2);
9436   format %{ "XOR    $dst,$src" %}
9437   opcode(0x33);
9438   ins_encode( OpcP, RegReg( dst, src) );
9439   ins_pipe( ialu_reg_reg );
9440 %}
9441 
9442 // Xor Register with Immediate -1
9443 instruct xorI_eReg_im1(eRegI dst, immI_M1 imm) %{
9444   match(Set dst (XorI dst imm));  
9445 
9446   size(2);
9447   format %{ "NOT    $dst" %}  
9448   ins_encode %{
9449      __ notl($dst$$Register);
9450   %}
9451   ins_pipe( ialu_reg );
9452 %}
9453 
9454 // Xor Register with Immediate
9455 instruct xorI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
9456   match(Set dst (XorI dst src));
9457   effect(KILL cr);
9458 
9459   format %{ "XOR    $dst,$src" %}
9460   opcode(0x81,0x06);  /* Opcode 81 /6 id */
9461   // ins_encode( RegImm( dst, src) );
9462   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
9463   ins_pipe( ialu_reg );
9464 %}
9465 
9466 // Xor Register with Memory
9467 instruct xorI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
9468   match(Set dst (XorI dst (LoadI src)));
9469   effect(KILL cr);
9470 
9471   ins_cost(125);
9472   format %{ "XOR    $dst,$src" %}
9473   opcode(0x33);
9474   ins_encode( OpcP, RegMem(dst, src) );
9475   ins_pipe( ialu_reg_mem );
9476 %}
9477 
9478 // Xor Memory with Register
9479 instruct xorI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
9480   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9481   effect(KILL cr);
9482 
9483   ins_cost(150);
9484   format %{ "XOR    $dst,$src" %}
9485   opcode(0x31);  /* Opcode 31 /r */
9486   ins_encode( OpcP, RegMem( src, dst ) );
9487   ins_pipe( ialu_mem_reg );
9488 %}
9489 
9490 // Xor Memory with Immediate
9491 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
9492   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
9493   effect(KILL cr);
9494 
9495   ins_cost(125);
9496   format %{ "XOR    $dst,$src" %}
9497   opcode(0x81,0x6);  /* Opcode 81 /6 id */
9498   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
9499   ins_pipe( ialu_mem_imm );
9500 %}
9501 
9502 //----------Convert Int to Boolean---------------------------------------------
9503 
9504 instruct movI_nocopy(eRegI dst, eRegI src) %{
9505   effect( DEF dst, USE src );
9506   format %{ "MOV    $dst,$src" %}
9507   ins_encode( enc_Copy( dst, src) );
9508   ins_pipe( ialu_reg_reg );
9509 %}
9510 
9511 instruct ci2b( eRegI dst, eRegI src, eFlagsReg cr ) %{
9512   effect( USE_DEF dst, USE src, KILL cr );
9513 
9514   size(4);
9515   format %{ "NEG    $dst\n\t"
9516             "ADC    $dst,$src" %}
9517   ins_encode( neg_reg(dst),
9518               OpcRegReg(0x13,dst,src) );
9519   ins_pipe( ialu_reg_reg_long );
9520 %}
9521 
9522 instruct convI2B( eRegI dst, eRegI src, eFlagsReg cr ) %{
9523   match(Set dst (Conv2B src));
9524 
9525   expand %{
9526     movI_nocopy(dst,src);
9527     ci2b(dst,src,cr);
9528   %}
9529 %}
9530 
9531 instruct movP_nocopy(eRegI dst, eRegP src) %{
9532   effect( DEF dst, USE src );
9533   format %{ "MOV    $dst,$src" %}
9534   ins_encode( enc_Copy( dst, src) );
9535   ins_pipe( ialu_reg_reg );
9536 %}
9537 
9538 instruct cp2b( eRegI dst, eRegP src, eFlagsReg cr ) %{
9539   effect( USE_DEF dst, USE src, KILL cr );
9540   format %{ "NEG    $dst\n\t"
9541             "ADC    $dst,$src" %}
9542   ins_encode( neg_reg(dst),
9543               OpcRegReg(0x13,dst,src) );
9544   ins_pipe( ialu_reg_reg_long );
9545 %}
9546 
9547 instruct convP2B( eRegI dst, eRegP src, eFlagsReg cr ) %{
9548   match(Set dst (Conv2B src));
9549 
9550   expand %{
9551     movP_nocopy(dst,src);
9552     cp2b(dst,src,cr);
9553   %}
9554 %}
9555 
9556 instruct cmpLTMask( eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr ) %{
9557   match(Set dst (CmpLTMask p q));
9558   effect( KILL cr );
9559   ins_cost(400);
9560 
9561   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
9562   format %{ "XOR    $dst,$dst\n\t"
9563             "CMP    $p,$q\n\t"
9564             "SETlt  $dst\n\t"
9565             "NEG    $dst" %}
9566   ins_encode( OpcRegReg(0x33,dst,dst),
9567               OpcRegReg(0x3B,p,q),
9568               setLT_reg(dst), neg_reg(dst) );
9569   ins_pipe( pipe_slow );
9570 %}
9571 
9572 instruct cmpLTMask0( eRegI dst, immI0 zero, eFlagsReg cr ) %{
9573   match(Set dst (CmpLTMask dst zero));
9574   effect( DEF dst, KILL cr );
9575   ins_cost(100);
9576 
9577   format %{ "SAR    $dst,31" %}
9578   opcode(0xC1, 0x7);  /* C1 /7 ib */
9579   ins_encode( RegOpcImm( dst, 0x1F ) );
9580   ins_pipe( ialu_reg );
9581 %}
9582 
9583 
9584 instruct cadd_cmpLTMask( ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp, eFlagsReg cr ) %{
9585   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
9586   effect( KILL tmp, KILL cr );
9587   ins_cost(400);
9588   // annoyingly, $tmp has no edges so you cant ask for it in
9589   // any format or encoding
9590   format %{ "SUB    $p,$q\n\t"
9591             "SBB    ECX,ECX\n\t"
9592             "AND    ECX,$y\n\t"
9593             "ADD    $p,ECX" %}
9594   ins_encode( enc_cmpLTP(p,q,y,tmp) );
9595   ins_pipe( pipe_cmplt );
9596 %}
9597 
9598 /* If I enable this, I encourage spilling in the inner loop of compress.
9599 instruct cadd_cmpLTMask_mem( ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr ) %{
9600   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
9601   effect( USE_KILL tmp, KILL cr );
9602   ins_cost(400);
9603 
9604   format %{ "SUB    $p,$q\n\t"
9605             "SBB    ECX,ECX\n\t"
9606             "AND    ECX,$y\n\t"
9607             "ADD    $p,ECX" %}
9608   ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
9609 %}
9610 */
9611 
9612 //----------Long Instructions------------------------------------------------
9613 // Add Long Register with Register
9614 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9615   match(Set dst (AddL dst src));
9616   effect(KILL cr);
9617   ins_cost(200);
9618   format %{ "ADD    $dst.lo,$src.lo\n\t"
9619             "ADC    $dst.hi,$src.hi" %}
9620   opcode(0x03, 0x13);
9621   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
9622   ins_pipe( ialu_reg_reg_long );
9623 %}
9624 
9625 // Add Long Register with Immediate
9626 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9627   match(Set dst (AddL dst src));
9628   effect(KILL cr);
9629   format %{ "ADD    $dst.lo,$src.lo\n\t"
9630             "ADC    $dst.hi,$src.hi" %}
9631   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
9632   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9633   ins_pipe( ialu_reg_long );
9634 %}
9635 
9636 // Add Long Register with Memory
9637 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9638   match(Set dst (AddL dst (LoadL mem)));
9639   effect(KILL cr);
9640   ins_cost(125);
9641   format %{ "ADD    $dst.lo,$mem\n\t"
9642             "ADC    $dst.hi,$mem+4" %}
9643   opcode(0x03, 0x13);
9644   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9645   ins_pipe( ialu_reg_long_mem );
9646 %}
9647 
9648 // Subtract Long Register with Register.
9649 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9650   match(Set dst (SubL dst src));
9651   effect(KILL cr);
9652   ins_cost(200);
9653   format %{ "SUB    $dst.lo,$src.lo\n\t"
9654             "SBB    $dst.hi,$src.hi" %}
9655   opcode(0x2B, 0x1B);
9656   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
9657   ins_pipe( ialu_reg_reg_long );
9658 %}
9659 
9660 // Subtract Long Register with Immediate
9661 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9662   match(Set dst (SubL dst src));
9663   effect(KILL cr);
9664   format %{ "SUB    $dst.lo,$src.lo\n\t"
9665             "SBB    $dst.hi,$src.hi" %}
9666   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
9667   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9668   ins_pipe( ialu_reg_long );
9669 %}
9670 
9671 // Subtract Long Register with Memory
9672 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9673   match(Set dst (SubL dst (LoadL mem)));
9674   effect(KILL cr);
9675   ins_cost(125);
9676   format %{ "SUB    $dst.lo,$mem\n\t"
9677             "SBB    $dst.hi,$mem+4" %}
9678   opcode(0x2B, 0x1B);
9679   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9680   ins_pipe( ialu_reg_long_mem );
9681 %}
9682 
9683 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
9684   match(Set dst (SubL zero dst));
9685   effect(KILL cr);
9686   ins_cost(300);
9687   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
9688   ins_encode( neg_long(dst) );
9689   ins_pipe( ialu_reg_reg_long );
9690 %}
9691 
9692 // And Long Register with Register
9693 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9694   match(Set dst (AndL dst src));
9695   effect(KILL cr);
9696   format %{ "AND    $dst.lo,$src.lo\n\t"
9697             "AND    $dst.hi,$src.hi" %}
9698   opcode(0x23,0x23);
9699   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9700   ins_pipe( ialu_reg_reg_long );
9701 %}
9702 
9703 // And Long Register with Immediate
9704 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9705   match(Set dst (AndL dst src));
9706   effect(KILL cr);
9707   format %{ "AND    $dst.lo,$src.lo\n\t"
9708             "AND    $dst.hi,$src.hi" %}
9709   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
9710   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9711   ins_pipe( ialu_reg_long );
9712 %}
9713 
9714 // And Long Register with Memory
9715 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9716   match(Set dst (AndL dst (LoadL mem)));
9717   effect(KILL cr);
9718   ins_cost(125);
9719   format %{ "AND    $dst.lo,$mem\n\t"
9720             "AND    $dst.hi,$mem+4" %}
9721   opcode(0x23, 0x23);
9722   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9723   ins_pipe( ialu_reg_long_mem );
9724 %}
9725 
9726 // Or Long Register with Register
9727 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9728   match(Set dst (OrL dst src));
9729   effect(KILL cr);
9730   format %{ "OR     $dst.lo,$src.lo\n\t"
9731             "OR     $dst.hi,$src.hi" %}
9732   opcode(0x0B,0x0B);
9733   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9734   ins_pipe( ialu_reg_reg_long );
9735 %}
9736 
9737 // Or Long Register with Immediate
9738 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9739   match(Set dst (OrL dst src));
9740   effect(KILL cr);
9741   format %{ "OR     $dst.lo,$src.lo\n\t"
9742             "OR     $dst.hi,$src.hi" %}
9743   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9744   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9745   ins_pipe( ialu_reg_long );
9746 %}
9747 
9748 // Or Long Register with Memory
9749 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9750   match(Set dst (OrL dst (LoadL mem)));
9751   effect(KILL cr);
9752   ins_cost(125);
9753   format %{ "OR     $dst.lo,$mem\n\t"
9754             "OR     $dst.hi,$mem+4" %}
9755   opcode(0x0B,0x0B);
9756   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9757   ins_pipe( ialu_reg_long_mem );
9758 %}
9759 
9760 // Xor Long Register with Register
9761 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9762   match(Set dst (XorL dst src));
9763   effect(KILL cr);
9764   format %{ "XOR    $dst.lo,$src.lo\n\t"
9765             "XOR    $dst.hi,$src.hi" %}
9766   opcode(0x33,0x33);
9767   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9768   ins_pipe( ialu_reg_reg_long );
9769 %}
9770 
9771 // Xor Long Register with Immediate -1
9772 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9773   match(Set dst (XorL dst imm));  
9774   format %{ "NOT    $dst.lo\n\t"
9775             "NOT    $dst.hi" %}
9776   ins_encode %{
9777      __ notl($dst$$Register);
9778      __ notl(HIGH_FROM_LOW($dst$$Register));
9779   %}
9780   ins_pipe( ialu_reg_long );
9781 %}
9782 
9783 // Xor Long Register with Immediate
9784 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9785   match(Set dst (XorL dst src));
9786   effect(KILL cr);
9787   format %{ "XOR    $dst.lo,$src.lo\n\t"
9788             "XOR    $dst.hi,$src.hi" %}
9789   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9790   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9791   ins_pipe( ialu_reg_long );
9792 %}
9793 
9794 // Xor Long Register with Memory
9795 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9796   match(Set dst (XorL dst (LoadL mem)));
9797   effect(KILL cr);
9798   ins_cost(125);
9799   format %{ "XOR    $dst.lo,$mem\n\t"
9800             "XOR    $dst.hi,$mem+4" %}
9801   opcode(0x33,0x33);
9802   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9803   ins_pipe( ialu_reg_long_mem );
9804 %}
9805 
9806 // Shift Left Long by 1
9807 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9808   predicate(UseNewLongLShift);
9809   match(Set dst (LShiftL dst cnt));
9810   effect(KILL cr);
9811   ins_cost(100);
9812   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9813             "ADC    $dst.hi,$dst.hi" %}
9814   ins_encode %{
9815     __ addl($dst$$Register,$dst$$Register);
9816     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9817   %}
9818   ins_pipe( ialu_reg_long );
9819 %}
9820 
9821 // Shift Left Long by 2
9822 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9823   predicate(UseNewLongLShift);
9824   match(Set dst (LShiftL dst cnt));
9825   effect(KILL cr);
9826   ins_cost(100);
9827   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9828             "ADC    $dst.hi,$dst.hi\n\t" 
9829             "ADD    $dst.lo,$dst.lo\n\t"
9830             "ADC    $dst.hi,$dst.hi" %}
9831   ins_encode %{
9832     __ addl($dst$$Register,$dst$$Register);
9833     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9834     __ addl($dst$$Register,$dst$$Register);
9835     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9836   %}
9837   ins_pipe( ialu_reg_long );
9838 %}
9839 
9840 // Shift Left Long by 3
9841 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9842   predicate(UseNewLongLShift);
9843   match(Set dst (LShiftL dst cnt));
9844   effect(KILL cr);
9845   ins_cost(100);
9846   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9847             "ADC    $dst.hi,$dst.hi\n\t" 
9848             "ADD    $dst.lo,$dst.lo\n\t"
9849             "ADC    $dst.hi,$dst.hi\n\t" 
9850             "ADD    $dst.lo,$dst.lo\n\t"
9851             "ADC    $dst.hi,$dst.hi" %}
9852   ins_encode %{
9853     __ addl($dst$$Register,$dst$$Register);
9854     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9855     __ addl($dst$$Register,$dst$$Register);
9856     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9857     __ addl($dst$$Register,$dst$$Register);
9858     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9859   %}
9860   ins_pipe( ialu_reg_long );
9861 %}
9862 
9863 // Shift Left Long by 1-31
9864 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9865   match(Set dst (LShiftL dst cnt));
9866   effect(KILL cr);
9867   ins_cost(200);
9868   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9869             "SHL    $dst.lo,$cnt" %}
9870   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9871   ins_encode( move_long_small_shift(dst,cnt) );
9872   ins_pipe( ialu_reg_long );
9873 %}
9874 
9875 // Shift Left Long by 32-63
9876 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9877   match(Set dst (LShiftL dst cnt));
9878   effect(KILL cr);
9879   ins_cost(300);
9880   format %{ "MOV    $dst.hi,$dst.lo\n"
9881           "\tSHL    $dst.hi,$cnt-32\n"
9882           "\tXOR    $dst.lo,$dst.lo" %}
9883   opcode(0xC1, 0x4);  /* C1 /4 ib */
9884   ins_encode( move_long_big_shift_clr(dst,cnt) );
9885   ins_pipe( ialu_reg_long );
9886 %}
9887 
9888 // Shift Left Long by variable
9889 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9890   match(Set dst (LShiftL dst shift));
9891   effect(KILL cr);
9892   ins_cost(500+200);
9893   size(17);
9894   format %{ "TEST   $shift,32\n\t"
9895             "JEQ,s  small\n\t"
9896             "MOV    $dst.hi,$dst.lo\n\t"
9897             "XOR    $dst.lo,$dst.lo\n"
9898     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9899             "SHL    $dst.lo,$shift" %}
9900   ins_encode( shift_left_long( dst, shift ) );
9901   ins_pipe( pipe_slow );
9902 %}
9903 
9904 // Shift Right Long by 1-31
9905 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9906   match(Set dst (URShiftL dst cnt));
9907   effect(KILL cr);
9908   ins_cost(200);
9909   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9910             "SHR    $dst.hi,$cnt" %}
9911   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9912   ins_encode( move_long_small_shift(dst,cnt) );
9913   ins_pipe( ialu_reg_long );
9914 %}
9915 
9916 // Shift Right Long by 32-63
9917 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9918   match(Set dst (URShiftL dst cnt));
9919   effect(KILL cr);
9920   ins_cost(300);
9921   format %{ "MOV    $dst.lo,$dst.hi\n"
9922           "\tSHR    $dst.lo,$cnt-32\n"
9923           "\tXOR    $dst.hi,$dst.hi" %}
9924   opcode(0xC1, 0x5);  /* C1 /5 ib */
9925   ins_encode( move_long_big_shift_clr(dst,cnt) );
9926   ins_pipe( ialu_reg_long );
9927 %}
9928 
9929 // Shift Right Long by variable
9930 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9931   match(Set dst (URShiftL dst shift));
9932   effect(KILL cr);
9933   ins_cost(600);
9934   size(17);
9935   format %{ "TEST   $shift,32\n\t"
9936             "JEQ,s  small\n\t"
9937             "MOV    $dst.lo,$dst.hi\n\t"
9938             "XOR    $dst.hi,$dst.hi\n"
9939     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9940             "SHR    $dst.hi,$shift" %}
9941   ins_encode( shift_right_long( dst, shift ) );
9942   ins_pipe( pipe_slow );
9943 %}
9944 
9945 // Shift Right Long by 1-31
9946 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9947   match(Set dst (RShiftL dst cnt));
9948   effect(KILL cr);
9949   ins_cost(200);
9950   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9951             "SAR    $dst.hi,$cnt" %}
9952   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9953   ins_encode( move_long_small_shift(dst,cnt) );
9954   ins_pipe( ialu_reg_long );
9955 %}
9956 
9957 // Shift Right Long by 32-63
9958 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9959   match(Set dst (RShiftL dst cnt));
9960   effect(KILL cr);
9961   ins_cost(300);
9962   format %{ "MOV    $dst.lo,$dst.hi\n"
9963           "\tSAR    $dst.lo,$cnt-32\n"
9964           "\tSAR    $dst.hi,31" %}
9965   opcode(0xC1, 0x7);  /* C1 /7 ib */
9966   ins_encode( move_long_big_shift_sign(dst,cnt) );
9967   ins_pipe( ialu_reg_long );
9968 %}
9969 
9970 // Shift Right arithmetic Long by variable
9971 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9972   match(Set dst (RShiftL dst shift));
9973   effect(KILL cr);
9974   ins_cost(600);
9975   size(18);
9976   format %{ "TEST   $shift,32\n\t"
9977             "JEQ,s  small\n\t"
9978             "MOV    $dst.lo,$dst.hi\n\t"
9979             "SAR    $dst.hi,31\n"
9980     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9981             "SAR    $dst.hi,$shift" %}
9982   ins_encode( shift_right_arith_long( dst, shift ) );
9983   ins_pipe( pipe_slow );
9984 %}
9985 
9986 
9987 //----------Double Instructions------------------------------------------------
9988 // Double Math
9989 
9990 // Compare & branch
9991 
9992 // P6 version of float compare, sets condition codes in EFLAGS
9993 instruct cmpD_cc_P6(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
9994   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9995   match(Set cr (CmpD src1 src2));
9996   effect(KILL rax);
9997   ins_cost(150);
9998   format %{ "FLD    $src1\n\t"
9999             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10000             "JNP    exit\n\t"
10001             "MOV    ah,1       // saw a NaN, set CF\n\t"
10002             "SAHF\n"
10003      "exit:\tNOP               // avoid branch to branch" %}
10004   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10005   ins_encode( Push_Reg_D(src1),
10006               OpcP, RegOpc(src2),
10007               cmpF_P6_fixup );
10008   ins_pipe( pipe_slow );
10009 %}
10010 
10011 instruct cmpD_cc_P6CF(eFlagsRegUCF cr, regD src1, regD src2) %{
10012   predicate(VM_Version::supports_cmov() && UseSSE <=1);
10013   match(Set cr (CmpD src1 src2));
10014   ins_cost(150);
10015   format %{ "FLD    $src1\n\t"
10016             "FUCOMIP ST,$src2  // P6 instruction" %}
10017   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10018   ins_encode( Push_Reg_D(src1),
10019               OpcP, RegOpc(src2));
10020   ins_pipe( pipe_slow );
10021 %}
10022 
10023 // Compare & branch
10024 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
10025   predicate(UseSSE<=1);
10026   match(Set cr (CmpD src1 src2));
10027   effect(KILL rax);
10028   ins_cost(200);
10029   format %{ "FLD    $src1\n\t"
10030             "FCOMp  $src2\n\t"
10031             "FNSTSW AX\n\t"
10032             "TEST   AX,0x400\n\t"
10033             "JZ,s   flags\n\t"
10034             "MOV    AH,1\t# unordered treat as LT\n"
10035     "flags:\tSAHF" %}
10036   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10037   ins_encode( Push_Reg_D(src1),
10038               OpcP, RegOpc(src2),
10039               fpu_flags);
10040   ins_pipe( pipe_slow );
10041 %}
10042 
10043 // Compare vs zero into -1,0,1
10044 instruct cmpD_0(eRegI dst, regD src1, immD0 zero, eAXRegI rax, eFlagsReg cr) %{
10045   predicate(UseSSE<=1);
10046   match(Set dst (CmpD3 src1 zero));
10047   effect(KILL cr, KILL rax);
10048   ins_cost(280);
10049   format %{ "FTSTD  $dst,$src1" %}
10050   opcode(0xE4, 0xD9);
10051   ins_encode( Push_Reg_D(src1),
10052               OpcS, OpcP, PopFPU,
10053               CmpF_Result(dst));
10054   ins_pipe( pipe_slow );
10055 %}
10056 
10057 // Compare into -1,0,1
10058 instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI rax, eFlagsReg cr) %{
10059   predicate(UseSSE<=1);
10060   match(Set dst (CmpD3 src1 src2));
10061   effect(KILL cr, KILL rax);
10062   ins_cost(300);
10063   format %{ "FCMPD  $dst,$src1,$src2" %}
10064   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10065   ins_encode( Push_Reg_D(src1),
10066               OpcP, RegOpc(src2),
10067               CmpF_Result(dst));
10068   ins_pipe( pipe_slow );
10069 %}
10070 
10071 // float compare and set condition codes in EFLAGS by XMM regs
10072 instruct cmpXD_cc(eFlagsRegU cr, regXD dst, regXD src, eAXRegI rax) %{
10073   predicate(UseSSE>=2);
10074   match(Set cr (CmpD dst src));
10075   effect(KILL rax);
10076   ins_cost(125);
10077   format %{ "COMISD $dst,$src\n"
10078           "\tJNP    exit\n"
10079           "\tMOV    ah,1       // saw a NaN, set CF\n"
10080           "\tSAHF\n"
10081      "exit:\tNOP               // avoid branch to branch" %}
10082   opcode(0x66, 0x0F, 0x2F);
10083   ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src), cmpF_P6_fixup);
10084   ins_pipe( pipe_slow );
10085 %}
10086 
10087 instruct cmpXD_ccCF(eFlagsRegUCF cr, regXD dst, regXD src) %{
10088   predicate(UseSSE>=2);
10089   match(Set cr (CmpD dst src));
10090   ins_cost(100);
10091   format %{ "COMISD $dst,$src" %}
10092   opcode(0x66, 0x0F, 0x2F);
10093   ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
10094   ins_pipe( pipe_slow );
10095 %}
10096 
10097 // float compare and set condition codes in EFLAGS by XMM regs
10098 instruct cmpXD_ccmem(eFlagsRegU cr, regXD dst, memory src, eAXRegI rax) %{
10099   predicate(UseSSE>=2);
10100   match(Set cr (CmpD dst (LoadD src)));
10101   effect(KILL rax);
10102   ins_cost(145);
10103   format %{ "COMISD $dst,$src\n"
10104           "\tJNP    exit\n"
10105           "\tMOV    ah,1       // saw a NaN, set CF\n"
10106           "\tSAHF\n"
10107      "exit:\tNOP               // avoid branch to branch" %}
10108   opcode(0x66, 0x0F, 0x2F);
10109   ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src), cmpF_P6_fixup);
10110   ins_pipe( pipe_slow );
10111 %}
10112 
10113 instruct cmpXD_ccmemCF(eFlagsRegUCF cr, regXD dst, memory src) %{
10114   predicate(UseSSE>=2);
10115   match(Set cr (CmpD dst (LoadD src)));
10116   ins_cost(100);
10117   format %{ "COMISD $dst,$src" %}
10118   opcode(0x66, 0x0F, 0x2F);
10119   ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src));
10120   ins_pipe( pipe_slow );
10121 %}
10122 
10123 // Compare into -1,0,1 in XMM
10124 instruct cmpXD_reg(eRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{
10125   predicate(UseSSE>=2);
10126   match(Set dst (CmpD3 src1 src2));
10127   effect(KILL cr);
10128   ins_cost(255);
10129   format %{ "XOR    $dst,$dst\n"
10130           "\tCOMISD $src1,$src2\n"
10131           "\tJP,s   nan\n"
10132           "\tJEQ,s  exit\n"
10133           "\tJA,s   inc\n"
10134       "nan:\tDEC    $dst\n"
10135           "\tJMP,s  exit\n"
10136       "inc:\tINC    $dst\n"
10137       "exit:"
10138                 %}
10139   opcode(0x66, 0x0F, 0x2F);
10140   ins_encode(Xor_Reg(dst), OpcP, OpcS, Opcode(tertiary), RegReg(src1, src2),
10141              CmpX_Result(dst));
10142   ins_pipe( pipe_slow );
10143 %}
10144 
10145 // Compare into -1,0,1 in XMM and memory
10146 instruct cmpXD_regmem(eRegI dst, regXD src1, memory mem, eFlagsReg cr) %{
10147   predicate(UseSSE>=2);
10148   match(Set dst (CmpD3 src1 (LoadD mem)));
10149   effect(KILL cr);
10150   ins_cost(275);
10151   format %{ "COMISD $src1,$mem\n"
10152           "\tMOV    $dst,0\t\t# do not blow flags\n"
10153           "\tJP,s   nan\n"
10154           "\tJEQ,s  exit\n"
10155           "\tJA,s   inc\n"
10156       "nan:\tDEC    $dst\n"
10157           "\tJMP,s  exit\n"
10158       "inc:\tINC    $dst\n"
10159       "exit:"
10160                 %}
10161   opcode(0x66, 0x0F, 0x2F);
10162   ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(src1, mem),
10163              LdImmI(dst,0x0), CmpX_Result(dst));
10164   ins_pipe( pipe_slow );
10165 %}
10166 
10167 
10168 instruct subD_reg(regD dst, regD src) %{
10169   predicate (UseSSE <=1);
10170   match(Set dst (SubD dst src));
10171 
10172   format %{ "FLD    $src\n\t"
10173             "DSUBp  $dst,ST" %}
10174   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10175   ins_cost(150);
10176   ins_encode( Push_Reg_D(src),
10177               OpcP, RegOpc(dst) );
10178   ins_pipe( fpu_reg_reg );
10179 %}
10180 
10181 instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{
10182   predicate (UseSSE <=1);
10183   match(Set dst (RoundDouble (SubD src1 src2)));
10184   ins_cost(250);
10185 
10186   format %{ "FLD    $src2\n\t"
10187             "DSUB   ST,$src1\n\t"
10188             "FSTP_D $dst\t# D-round" %}
10189   opcode(0xD8, 0x5);
10190   ins_encode( Push_Reg_D(src2),
10191               OpcP, RegOpc(src1), Pop_Mem_D(dst) );
10192   ins_pipe( fpu_mem_reg_reg );
10193 %}
10194 
10195 
10196 instruct subD_reg_mem(regD dst, memory src) %{
10197   predicate (UseSSE <=1);
10198   match(Set dst (SubD dst (LoadD src)));
10199   ins_cost(150);
10200 
10201   format %{ "FLD    $src\n\t"
10202             "DSUBp  $dst,ST" %}
10203   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
10204   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10205               OpcP, RegOpc(dst) );
10206   ins_pipe( fpu_reg_mem );
10207 %}
10208 
10209 instruct absD_reg(regDPR1 dst, regDPR1 src) %{
10210   predicate (UseSSE<=1);
10211   match(Set dst (AbsD src));
10212   ins_cost(100);
10213   format %{ "FABS" %}
10214   opcode(0xE1, 0xD9);
10215   ins_encode( OpcS, OpcP );
10216   ins_pipe( fpu_reg_reg );
10217 %}
10218 
10219 instruct absXD_reg( regXD dst ) %{
10220   predicate(UseSSE>=2);
10221   match(Set dst (AbsD dst));
10222   format %{ "ANDPD  $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %}
10223   ins_encode( AbsXD_encoding(dst));
10224   ins_pipe( pipe_slow );
10225 %}
10226 
10227 instruct negD_reg(regDPR1 dst, regDPR1 src) %{
10228   predicate(UseSSE<=1);
10229   match(Set dst (NegD src));
10230   ins_cost(100);
10231   format %{ "FCHS" %}
10232   opcode(0xE0, 0xD9);
10233   ins_encode( OpcS, OpcP );
10234   ins_pipe( fpu_reg_reg );
10235 %}
10236 
10237 instruct negXD_reg( regXD dst ) %{
10238   predicate(UseSSE>=2);
10239   match(Set dst (NegD dst));
10240   format %{ "XORPD  $dst,[0x8000000000000000]\t# CHS D by sign flipping" %}
10241   ins_encode %{
10242      __ xorpd($dst$$XMMRegister,
10243               ExternalAddress((address)double_signflip_pool));
10244   %}
10245   ins_pipe( pipe_slow );
10246 %}
10247 
10248 instruct addD_reg(regD dst, regD src) %{
10249   predicate(UseSSE<=1);
10250   match(Set dst (AddD dst src));
10251   format %{ "FLD    $src\n\t"
10252             "DADD   $dst,ST" %}
10253   size(4);
10254   ins_cost(150);
10255   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10256   ins_encode( Push_Reg_D(src),
10257               OpcP, RegOpc(dst) );
10258   ins_pipe( fpu_reg_reg );
10259 %}
10260 
10261 
10262 instruct addD_reg_round(stackSlotD dst, regD src1, regD src2) %{
10263   predicate(UseSSE<=1);
10264   match(Set dst (RoundDouble (AddD src1 src2)));
10265   ins_cost(250);
10266 
10267   format %{ "FLD    $src2\n\t"
10268             "DADD   ST,$src1\n\t"
10269             "FSTP_D $dst\t# D-round" %}
10270   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
10271   ins_encode( Push_Reg_D(src2),
10272               OpcP, RegOpc(src1), Pop_Mem_D(dst) );
10273   ins_pipe( fpu_mem_reg_reg );
10274 %}
10275 
10276 
10277 instruct addD_reg_mem(regD dst, memory src) %{
10278   predicate(UseSSE<=1);
10279   match(Set dst (AddD dst (LoadD src)));
10280   ins_cost(150);
10281 
10282   format %{ "FLD    $src\n\t"
10283             "DADDp  $dst,ST" %}
10284   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
10285   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10286               OpcP, RegOpc(dst) );
10287   ins_pipe( fpu_reg_mem );
10288 %}
10289 
10290 // add-to-memory
10291 instruct addD_mem_reg(memory dst, regD src) %{
10292   predicate(UseSSE<=1);
10293   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
10294   ins_cost(150);
10295 
10296   format %{ "FLD_D  $dst\n\t"
10297             "DADD   ST,$src\n\t"
10298             "FST_D  $dst" %}
10299   opcode(0xDD, 0x0);
10300   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
10301               Opcode(0xD8), RegOpc(src),
10302               set_instruction_start,
10303               Opcode(0xDD), RMopc_Mem(0x03,dst) );
10304   ins_pipe( fpu_reg_mem );
10305 %}
10306 
10307 instruct addD_reg_imm1(regD dst, immD1 con) %{
10308   predicate(UseSSE<=1);
10309   match(Set dst (AddD dst con));
10310   ins_cost(125);
10311   format %{ "FLD1\n\t"
10312             "DADDp  $dst,ST" %}
10313   ins_encode %{
10314     __ fld1();
10315     __ faddp($dst$$reg);
10316   %}
10317   ins_pipe(fpu_reg);
10318 %}
10319 
10320 instruct addD_reg_imm(regD dst, immD con) %{
10321   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
10322   match(Set dst (AddD dst con));
10323   ins_cost(200);
10324   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
10325             "DADDp  $dst,ST" %}
10326   ins_encode %{
10327     __ fld_d($constantaddress($con));
10328     __ faddp($dst$$reg);
10329   %}
10330   ins_pipe(fpu_reg_mem);
10331 %}
10332 
10333 instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{
10334   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
10335   match(Set dst (RoundDouble (AddD src con)));
10336   ins_cost(200);
10337   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
10338             "DADD   ST,$src\n\t"
10339             "FSTP_D $dst\t# D-round" %}
10340   ins_encode %{
10341     __ fld_d($constantaddress($con));
10342     __ fadd($src$$reg);
10343     __ fstp_d(Address(rsp, $dst$$disp));
10344   %}
10345   ins_pipe(fpu_mem_reg_con);
10346 %}
10347 
10348 // Add two double precision floating point values in xmm
10349 instruct addXD_reg(regXD dst, regXD src) %{
10350   predicate(UseSSE>=2);
10351   match(Set dst (AddD dst src));
10352   format %{ "ADDSD  $dst,$src" %}
10353   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
10354   ins_pipe( pipe_slow );
10355 %}
10356 
10357 instruct addXD_imm(regXD dst, immXD con) %{
10358   predicate(UseSSE>=2);
10359   match(Set dst (AddD dst con));
10360   format %{ "ADDSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10361   ins_encode %{
10362     __ addsd($dst$$XMMRegister, $constantaddress($con));
10363   %}
10364   ins_pipe(pipe_slow);
10365 %}
10366 
10367 instruct addXD_mem(regXD dst, memory mem) %{
10368   predicate(UseSSE>=2);
10369   match(Set dst (AddD dst (LoadD mem)));
10370   format %{ "ADDSD  $dst,$mem" %}
10371   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegMem(dst,mem));
10372   ins_pipe( pipe_slow );
10373 %}
10374 
10375 // Sub two double precision floating point values in xmm
10376 instruct subXD_reg(regXD dst, regXD src) %{
10377   predicate(UseSSE>=2);
10378   match(Set dst (SubD dst src));
10379   format %{ "SUBSD  $dst,$src" %}
10380   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
10381   ins_pipe( pipe_slow );
10382 %}
10383 
10384 instruct subXD_imm(regXD dst, immXD con) %{
10385   predicate(UseSSE>=2);
10386   match(Set dst (SubD dst con));
10387   format %{ "SUBSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10388   ins_encode %{
10389     __ subsd($dst$$XMMRegister, $constantaddress($con));
10390   %}
10391   ins_pipe(pipe_slow);
10392 %}
10393 
10394 instruct subXD_mem(regXD dst, memory mem) %{
10395   predicate(UseSSE>=2);
10396   match(Set dst (SubD dst (LoadD mem)));
10397   format %{ "SUBSD  $dst,$mem" %}
10398   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
10399   ins_pipe( pipe_slow );
10400 %}
10401 
10402 // Mul two double precision floating point values in xmm
10403 instruct mulXD_reg(regXD dst, regXD src) %{
10404   predicate(UseSSE>=2);
10405   match(Set dst (MulD dst src));
10406   format %{ "MULSD  $dst,$src" %}
10407   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
10408   ins_pipe( pipe_slow );
10409 %}
10410 
10411 instruct mulXD_imm(regXD dst, immXD con) %{
10412   predicate(UseSSE>=2);
10413   match(Set dst (MulD dst con));
10414   format %{ "MULSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10415   ins_encode %{
10416     __ mulsd($dst$$XMMRegister, $constantaddress($con));
10417   %}
10418   ins_pipe(pipe_slow);
10419 %}
10420 
10421 instruct mulXD_mem(regXD dst, memory mem) %{
10422   predicate(UseSSE>=2);
10423   match(Set dst (MulD dst (LoadD mem)));
10424   format %{ "MULSD  $dst,$mem" %}
10425   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
10426   ins_pipe( pipe_slow );
10427 %}
10428 
10429 // Div two double precision floating point values in xmm
10430 instruct divXD_reg(regXD dst, regXD src) %{
10431   predicate(UseSSE>=2);
10432   match(Set dst (DivD dst src));
10433   format %{ "DIVSD  $dst,$src" %}
10434   opcode(0xF2, 0x0F, 0x5E);
10435   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
10436   ins_pipe( pipe_slow );
10437 %}
10438 
10439 instruct divXD_imm(regXD dst, immXD con) %{
10440   predicate(UseSSE>=2);
10441   match(Set dst (DivD dst con));
10442   format %{ "DIVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
10443   ins_encode %{
10444     __ divsd($dst$$XMMRegister, $constantaddress($con));
10445   %}
10446   ins_pipe(pipe_slow);
10447 %}
10448 
10449 instruct divXD_mem(regXD dst, memory mem) %{
10450   predicate(UseSSE>=2);
10451   match(Set dst (DivD dst (LoadD mem)));
10452   format %{ "DIVSD  $dst,$mem" %}
10453   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
10454   ins_pipe( pipe_slow );
10455 %}
10456 
10457 
10458 instruct mulD_reg(regD dst, regD src) %{
10459   predicate(UseSSE<=1);
10460   match(Set dst (MulD dst src));
10461   format %{ "FLD    $src\n\t"
10462             "DMULp  $dst,ST" %}
10463   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
10464   ins_cost(150);
10465   ins_encode( Push_Reg_D(src),
10466               OpcP, RegOpc(dst) );
10467   ins_pipe( fpu_reg_reg );
10468 %}
10469 
10470 // Strict FP instruction biases argument before multiply then
10471 // biases result to avoid double rounding of subnormals.
10472 //
10473 // scale arg1 by multiplying arg1 by 2^(-15360)
10474 // load arg2
10475 // multiply scaled arg1 by arg2
10476 // rescale product by 2^(15360)
10477 //
10478 instruct strictfp_mulD_reg(regDPR1 dst, regnotDPR1 src) %{
10479   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10480   match(Set dst (MulD dst src));
10481   ins_cost(1);   // Select this instruction for all strict FP double multiplies
10482 
10483   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
10484             "DMULp  $dst,ST\n\t"
10485             "FLD    $src\n\t"
10486             "DMULp  $dst,ST\n\t"
10487             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
10488             "DMULp  $dst,ST\n\t" %}
10489   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
10490   ins_encode( strictfp_bias1(dst),
10491               Push_Reg_D(src),
10492               OpcP, RegOpc(dst),
10493               strictfp_bias2(dst) );
10494   ins_pipe( fpu_reg_reg );
10495 %}
10496 
10497 instruct mulD_reg_imm(regD dst, immD con) %{
10498   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
10499   match(Set dst (MulD dst con));
10500   ins_cost(200);
10501   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
10502             "DMULp  $dst,ST" %}
10503   ins_encode %{
10504     __ fld_d($constantaddress($con));
10505     __ fmulp($dst$$reg);
10506   %}
10507   ins_pipe(fpu_reg_mem);
10508 %}
10509 
10510 
10511 instruct mulD_reg_mem(regD dst, memory src) %{
10512   predicate( UseSSE<=1 );
10513   match(Set dst (MulD dst (LoadD src)));
10514   ins_cost(200);
10515   format %{ "FLD_D  $src\n\t"
10516             "DMULp  $dst,ST" %}
10517   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
10518   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10519               OpcP, RegOpc(dst) );
10520   ins_pipe( fpu_reg_mem );
10521 %}
10522 
10523 //
10524 // Cisc-alternate to reg-reg multiply
10525 instruct mulD_reg_mem_cisc(regD dst, regD src, memory mem) %{
10526   predicate( UseSSE<=1 );
10527   match(Set dst (MulD src (LoadD mem)));
10528   ins_cost(250);
10529   format %{ "FLD_D  $mem\n\t"
10530             "DMUL   ST,$src\n\t"
10531             "FSTP_D $dst" %}
10532   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
10533   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
10534               OpcReg_F(src),
10535               Pop_Reg_D(dst) );
10536   ins_pipe( fpu_reg_reg_mem );
10537 %}
10538 
10539 
10540 // MACRO3 -- addD a mulD
10541 // This instruction is a '2-address' instruction in that the result goes
10542 // back to src2.  This eliminates a move from the macro; possibly the
10543 // register allocator will have to add it back (and maybe not).
10544 instruct addD_mulD_reg(regD src2, regD src1, regD src0) %{
10545   predicate( UseSSE<=1 );
10546   match(Set src2 (AddD (MulD src0 src1) src2));
10547   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
10548             "DMUL   ST,$src1\n\t"
10549             "DADDp  $src2,ST" %}
10550   ins_cost(250);
10551   opcode(0xDD); /* LoadD DD /0 */
10552   ins_encode( Push_Reg_F(src0),
10553               FMul_ST_reg(src1),
10554               FAddP_reg_ST(src2) );
10555   ins_pipe( fpu_reg_reg_reg );
10556 %}
10557 
10558 
10559 // MACRO3 -- subD a mulD
10560 instruct subD_mulD_reg(regD src2, regD src1, regD src0) %{
10561   predicate( UseSSE<=1 );
10562   match(Set src2 (SubD (MulD src0 src1) src2));
10563   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
10564             "DMUL   ST,$src1\n\t"
10565             "DSUBRp $src2,ST" %}
10566   ins_cost(250);
10567   ins_encode( Push_Reg_F(src0),
10568               FMul_ST_reg(src1),
10569               Opcode(0xDE), Opc_plus(0xE0,src2));
10570   ins_pipe( fpu_reg_reg_reg );
10571 %}
10572 
10573 
10574 instruct divD_reg(regD dst, regD src) %{
10575   predicate( UseSSE<=1 );
10576   match(Set dst (DivD dst src));
10577 
10578   format %{ "FLD    $src\n\t"
10579             "FDIVp  $dst,ST" %}
10580   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10581   ins_cost(150);
10582   ins_encode( Push_Reg_D(src),
10583               OpcP, RegOpc(dst) );
10584   ins_pipe( fpu_reg_reg );
10585 %}
10586 
10587 // Strict FP instruction biases argument before division then
10588 // biases result, to avoid double rounding of subnormals.
10589 //
10590 // scale dividend by multiplying dividend by 2^(-15360)
10591 // load divisor
10592 // divide scaled dividend by divisor
10593 // rescale quotient by 2^(15360)
10594 //
10595 instruct strictfp_divD_reg(regDPR1 dst, regnotDPR1 src) %{
10596   predicate (UseSSE<=1);
10597   match(Set dst (DivD dst src));
10598   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10599   ins_cost(01);
10600 
10601   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
10602             "DMULp  $dst,ST\n\t"
10603             "FLD    $src\n\t"
10604             "FDIVp  $dst,ST\n\t"
10605             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
10606             "DMULp  $dst,ST\n\t" %}
10607   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10608   ins_encode( strictfp_bias1(dst),
10609               Push_Reg_D(src),
10610               OpcP, RegOpc(dst),
10611               strictfp_bias2(dst) );
10612   ins_pipe( fpu_reg_reg );
10613 %}
10614 
10615 instruct divD_reg_round(stackSlotD dst, regD src1, regD src2) %{
10616   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
10617   match(Set dst (RoundDouble (DivD src1 src2)));
10618 
10619   format %{ "FLD    $src1\n\t"
10620             "FDIV   ST,$src2\n\t"
10621             "FSTP_D $dst\t# D-round" %}
10622   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
10623   ins_encode( Push_Reg_D(src1),
10624               OpcP, RegOpc(src2), Pop_Mem_D(dst) );
10625   ins_pipe( fpu_mem_reg_reg );
10626 %}
10627 
10628 
10629 instruct modD_reg(regD dst, regD src, eAXRegI rax, eFlagsReg cr) %{
10630   predicate(UseSSE<=1);
10631   match(Set dst (ModD dst src));
10632   effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
10633 
10634   format %{ "DMOD   $dst,$src" %}
10635   ins_cost(250);
10636   ins_encode(Push_Reg_Mod_D(dst, src),
10637               emitModD(),
10638               Push_Result_Mod_D(src),
10639               Pop_Reg_D(dst));
10640   ins_pipe( pipe_slow );
10641 %}
10642 
10643 instruct modXD_reg(regXD dst, regXD src0, regXD src1, eAXRegI rax, eFlagsReg cr) %{
10644   predicate(UseSSE>=2);
10645   match(Set dst (ModD src0 src1));
10646   effect(KILL rax, KILL cr);
10647 
10648   format %{ "SUB    ESP,8\t # DMOD\n"
10649           "\tMOVSD  [ESP+0],$src1\n"
10650           "\tFLD_D  [ESP+0]\n"
10651           "\tMOVSD  [ESP+0],$src0\n"
10652           "\tFLD_D  [ESP+0]\n"
10653      "loop:\tFPREM\n"
10654           "\tFWAIT\n"
10655           "\tFNSTSW AX\n"
10656           "\tSAHF\n"
10657           "\tJP     loop\n"
10658           "\tFSTP_D [ESP+0]\n"
10659           "\tMOVSD  $dst,[ESP+0]\n"
10660           "\tADD    ESP,8\n"
10661           "\tFSTP   ST0\t # Restore FPU Stack"
10662     %}
10663   ins_cost(250);
10664   ins_encode( Push_ModD_encoding(src0, src1), emitModD(), Push_ResultXD(dst), PopFPU);
10665   ins_pipe( pipe_slow );
10666 %}
10667 
10668 instruct sinD_reg(regDPR1 dst, regDPR1 src) %{
10669   predicate (UseSSE<=1);
10670   match(Set dst (SinD src));
10671   ins_cost(1800);
10672   format %{ "DSIN   $dst" %}
10673   opcode(0xD9, 0xFE);
10674   ins_encode( OpcP, OpcS );
10675   ins_pipe( pipe_slow );
10676 %}
10677 
10678 instruct sinXD_reg(regXD dst, eFlagsReg cr) %{
10679   predicate (UseSSE>=2);
10680   match(Set dst (SinD dst));
10681   effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10682   ins_cost(1800);
10683   format %{ "DSIN   $dst" %}
10684   opcode(0xD9, 0xFE);
10685   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10686   ins_pipe( pipe_slow );
10687 %}
10688 
10689 instruct cosD_reg(regDPR1 dst, regDPR1 src) %{
10690   predicate (UseSSE<=1);
10691   match(Set dst (CosD src));
10692   ins_cost(1800);
10693   format %{ "DCOS   $dst" %}
10694   opcode(0xD9, 0xFF);
10695   ins_encode( OpcP, OpcS );
10696   ins_pipe( pipe_slow );
10697 %}
10698 
10699 instruct cosXD_reg(regXD dst, eFlagsReg cr) %{
10700   predicate (UseSSE>=2);
10701   match(Set dst (CosD dst));
10702   effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10703   ins_cost(1800);
10704   format %{ "DCOS   $dst" %}
10705   opcode(0xD9, 0xFF);
10706   ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10707   ins_pipe( pipe_slow );
10708 %}
10709 
10710 instruct tanD_reg(regDPR1 dst, regDPR1 src) %{
10711   predicate (UseSSE<=1);
10712   match(Set dst(TanD src));
10713   format %{ "DTAN   $dst" %}
10714   ins_encode( Opcode(0xD9), Opcode(0xF2),    // fptan
10715               Opcode(0xDD), Opcode(0xD8));   // fstp st
10716   ins_pipe( pipe_slow );
10717 %}
10718 
10719 instruct tanXD_reg(regXD dst, eFlagsReg cr) %{
10720   predicate (UseSSE>=2);
10721   match(Set dst(TanD dst));
10722   effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10723   format %{ "DTAN   $dst" %}
10724   ins_encode( Push_SrcXD(dst),
10725               Opcode(0xD9), Opcode(0xF2),    // fptan
10726               Opcode(0xDD), Opcode(0xD8),   // fstp st
10727               Push_ResultXD(dst) );
10728   ins_pipe( pipe_slow );
10729 %}
10730 
10731 instruct atanD_reg(regD dst, regD src) %{
10732   predicate (UseSSE<=1);
10733   match(Set dst(AtanD dst src));
10734   format %{ "DATA   $dst,$src" %}
10735   opcode(0xD9, 0xF3);
10736   ins_encode( Push_Reg_D(src),
10737               OpcP, OpcS, RegOpc(dst) );
10738   ins_pipe( pipe_slow );
10739 %}
10740 
10741 instruct atanXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10742   predicate (UseSSE>=2);
10743   match(Set dst(AtanD dst src));
10744   effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10745   format %{ "DATA   $dst,$src" %}
10746   opcode(0xD9, 0xF3);
10747   ins_encode( Push_SrcXD(src),
10748               OpcP, OpcS, Push_ResultXD(dst) );
10749   ins_pipe( pipe_slow );
10750 %}
10751 
10752 instruct sqrtD_reg(regD dst, regD src) %{
10753   predicate (UseSSE<=1);
10754   match(Set dst (SqrtD src));
10755   format %{ "DSQRT  $dst,$src" %}
10756   opcode(0xFA, 0xD9);
10757   ins_encode( Push_Reg_D(src),
10758               OpcS, OpcP, Pop_Reg_D(dst) );
10759   ins_pipe( pipe_slow );
10760 %}
10761 
10762 instruct powD_reg(regD X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10763   predicate (UseSSE<=1);
10764   match(Set Y (PowD X Y));  // Raise X to the Yth power
10765   effect(KILL rax, KILL rbx, KILL rcx);
10766   format %{ "SUB    ESP,8\t\t# Fast-path POW encoding\n\t"
10767             "FLD_D  $X\n\t"
10768             "FYL2X  \t\t\t# Q=Y*ln2(X)\n\t"
10769 
10770             "FDUP   \t\t\t# Q Q\n\t"
10771             "FRNDINT\t\t\t# int(Q) Q\n\t"
10772             "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10773             "FISTP  dword [ESP]\n\t"
10774             "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10775             "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10776             "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10777             "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10778             "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10779             "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10780             "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10781             "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10782             "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10783             "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10784             "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10785             "MOV    [ESP+0],0\n\t"
10786             "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10787 
10788             "ADD    ESP,8"
10789              %}
10790   ins_encode( push_stack_temp_qword,
10791               Push_Reg_D(X),
10792               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10793               pow_exp_core_encoding,
10794               pop_stack_temp_qword);
10795   ins_pipe( pipe_slow );
10796 %}
10797 
10798 instruct powXD_reg(regXD dst, regXD src0, regXD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{
10799   predicate (UseSSE>=2);
10800   match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
10801   effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx );
10802   format %{ "SUB    ESP,8\t\t# Fast-path POW encoding\n\t"
10803             "MOVSD  [ESP],$src1\n\t"
10804             "FLD    FPR1,$src1\n\t"
10805             "MOVSD  [ESP],$src0\n\t"
10806             "FLD    FPR1,$src0\n\t"
10807             "FYL2X  \t\t\t# Q=Y*ln2(X)\n\t"
10808 
10809             "FDUP   \t\t\t# Q Q\n\t"
10810             "FRNDINT\t\t\t# int(Q) Q\n\t"
10811             "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10812             "FISTP  dword [ESP]\n\t"
10813             "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10814             "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10815             "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10816             "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10817             "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10818             "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10819             "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10820             "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10821             "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10822             "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10823             "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10824             "MOV    [ESP+0],0\n\t"
10825             "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10826 
10827             "FST_D  [ESP]\n\t"
10828             "MOVSD  $dst,[ESP]\n\t"
10829             "ADD    ESP,8"
10830              %}
10831   ins_encode( push_stack_temp_qword,
10832               push_xmm_to_fpr1(src1),
10833               push_xmm_to_fpr1(src0),
10834               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10835               pow_exp_core_encoding,
10836               Push_ResultXD(dst) );
10837   ins_pipe( pipe_slow );
10838 %}
10839 
10840 
10841 instruct expD_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10842   predicate (UseSSE<=1);
10843   match(Set dpr1 (ExpD dpr1));
10844   effect(KILL rax, KILL rbx, KILL rcx);
10845   format %{ "SUB    ESP,8\t\t# Fast-path EXP encoding"
10846             "FLDL2E \t\t\t# Ld log2(e) X\n\t"
10847             "FMULP  \t\t\t# Q=X*log2(e)\n\t"
10848 
10849             "FDUP   \t\t\t# Q Q\n\t"
10850             "FRNDINT\t\t\t# int(Q) Q\n\t"
10851             "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10852             "FISTP  dword [ESP]\n\t"
10853             "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10854             "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10855             "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10856             "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10857             "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10858             "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10859             "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10860             "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10861             "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10862             "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10863             "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10864             "MOV    [ESP+0],0\n\t"
10865             "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10866 
10867             "ADD    ESP,8"
10868              %}
10869   ins_encode( push_stack_temp_qword,
10870               Opcode(0xD9), Opcode(0xEA),   // fldl2e
10871               Opcode(0xDE), Opcode(0xC9),   // fmulp
10872               pow_exp_core_encoding,
10873               pop_stack_temp_qword);
10874   ins_pipe( pipe_slow );
10875 %}
10876 
10877 instruct expXD_reg(regXD dst, regXD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10878   predicate (UseSSE>=2);
10879   match(Set dst (ExpD src));
10880   effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx);
10881   format %{ "SUB    ESP,8\t\t# Fast-path EXP encoding\n\t"
10882             "MOVSD  [ESP],$src\n\t"
10883             "FLDL2E \t\t\t# Ld log2(e) X\n\t"
10884             "FMULP  \t\t\t# Q=X*log2(e) X\n\t"
10885 
10886             "FDUP   \t\t\t# Q Q\n\t"
10887             "FRNDINT\t\t\t# int(Q) Q\n\t"
10888             "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10889             "FISTP  dword [ESP]\n\t"
10890             "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10891             "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10892             "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10893             "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10894             "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10895             "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10896             "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10897             "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10898             "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10899             "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10900             "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10901             "MOV    [ESP+0],0\n\t"
10902             "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10903 
10904             "FST_D  [ESP]\n\t"
10905             "MOVSD  $dst,[ESP]\n\t"
10906             "ADD    ESP,8"
10907              %}
10908   ins_encode( Push_SrcXD(src),
10909               Opcode(0xD9), Opcode(0xEA),   // fldl2e
10910               Opcode(0xDE), Opcode(0xC9),   // fmulp
10911               pow_exp_core_encoding,
10912               Push_ResultXD(dst) );
10913   ins_pipe( pipe_slow );
10914 %}
10915 
10916 
10917 
10918 instruct log10D_reg(regDPR1 dst, regDPR1 src) %{
10919   predicate (UseSSE<=1);
10920   // The source Double operand on FPU stack
10921   match(Set dst (Log10D src));
10922   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
10923   // fxch         ; swap ST(0) with ST(1)
10924   // fyl2x        ; compute log_10(2) * log_2(x)
10925   format %{ "FLDLG2 \t\t\t#Log10\n\t"
10926             "FXCH   \n\t"
10927             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
10928          %}
10929   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
10930               Opcode(0xD9), Opcode(0xC9),   // fxch
10931               Opcode(0xD9), Opcode(0xF1));  // fyl2x
10932 
10933   ins_pipe( pipe_slow );
10934 %}
10935 
10936 instruct log10XD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10937   predicate (UseSSE>=2);
10938   effect(KILL cr);
10939   match(Set dst (Log10D src));
10940   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
10941   // fyl2x        ; compute log_10(2) * log_2(x)
10942   format %{ "FLDLG2 \t\t\t#Log10\n\t"
10943             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
10944          %}
10945   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
10946               Push_SrcXD(src),
10947               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10948               Push_ResultXD(dst));
10949 
10950   ins_pipe( pipe_slow );
10951 %}
10952 
10953 instruct logD_reg(regDPR1 dst, regDPR1 src) %{
10954   predicate (UseSSE<=1);
10955   // The source Double operand on FPU stack
10956   match(Set dst (LogD src));
10957   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
10958   // fxch         ; swap ST(0) with ST(1)
10959   // fyl2x        ; compute log_e(2) * log_2(x)
10960   format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10961             "FXCH   \n\t"
10962             "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
10963          %}
10964   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
10965               Opcode(0xD9), Opcode(0xC9),   // fxch
10966               Opcode(0xD9), Opcode(0xF1));  // fyl2x
10967 
10968   ins_pipe( pipe_slow );
10969 %}
10970 
10971 instruct logXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10972   predicate (UseSSE>=2);
10973   effect(KILL cr);
10974   // The source and result Double operands in XMM registers
10975   match(Set dst (LogD src));
10976   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
10977   // fyl2x        ; compute log_e(2) * log_2(x)
10978   format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10979             "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
10980          %}
10981   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
10982               Push_SrcXD(src),
10983               Opcode(0xD9), Opcode(0xF1),   // fyl2x
10984               Push_ResultXD(dst));
10985   ins_pipe( pipe_slow );
10986 %}
10987 
10988 //-------------Float Instructions-------------------------------
10989 // Float Math
10990 
10991 // Code for float compare:
10992 //     fcompp();
10993 //     fwait(); fnstsw_ax();
10994 //     sahf();
10995 //     movl(dst, unordered_result);
10996 //     jcc(Assembler::parity, exit);
10997 //     movl(dst, less_result);
10998 //     jcc(Assembler::below, exit);
10999 //     movl(dst, equal_result);
11000 //     jcc(Assembler::equal, exit);
11001 //     movl(dst, greater_result);
11002 //   exit:
11003 
11004 // P6 version of float compare, sets condition codes in EFLAGS
11005 instruct cmpF_cc_P6(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
11006   predicate(VM_Version::supports_cmov() && UseSSE == 0);
11007   match(Set cr (CmpF src1 src2));
11008   effect(KILL rax);
11009   ins_cost(150);
11010   format %{ "FLD    $src1\n\t"
11011             "FUCOMIP ST,$src2  // P6 instruction\n\t"
11012             "JNP    exit\n\t"
11013             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
11014             "SAHF\n"
11015      "exit:\tNOP               // avoid branch to branch" %}
11016   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
11017   ins_encode( Push_Reg_D(src1),
11018               OpcP, RegOpc(src2),
11019               cmpF_P6_fixup );
11020   ins_pipe( pipe_slow );
11021 %}
11022 
11023 instruct cmpF_cc_P6CF(eFlagsRegUCF cr, regF src1, regF src2) %{
11024   predicate(VM_Version::supports_cmov() && UseSSE == 0);
11025   match(Set cr (CmpF src1 src2));
11026   ins_cost(100);
11027   format %{ "FLD    $src1\n\t"
11028             "FUCOMIP ST,$src2  // P6 instruction" %}
11029   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
11030   ins_encode( Push_Reg_D(src1),
11031               OpcP, RegOpc(src2));
11032   ins_pipe( pipe_slow );
11033 %}
11034 
11035 
11036 // Compare & branch
11037 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
11038   predicate(UseSSE == 0);
11039   match(Set cr (CmpF src1 src2));
11040   effect(KILL rax);
11041   ins_cost(200);
11042   format %{ "FLD    $src1\n\t"
11043             "FCOMp  $src2\n\t"
11044             "FNSTSW AX\n\t"
11045             "TEST   AX,0x400\n\t"
11046             "JZ,s   flags\n\t"
11047             "MOV    AH,1\t# unordered treat as LT\n"
11048     "flags:\tSAHF" %}
11049   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
11050   ins_encode( Push_Reg_D(src1),
11051               OpcP, RegOpc(src2),
11052               fpu_flags);
11053   ins_pipe( pipe_slow );
11054 %}
11055 
11056 // Compare vs zero into -1,0,1
11057 instruct cmpF_0(eRegI dst, regF src1, immF0 zero, eAXRegI rax, eFlagsReg cr) %{
11058   predicate(UseSSE == 0);
11059   match(Set dst (CmpF3 src1 zero));
11060   effect(KILL cr, KILL rax);
11061   ins_cost(280);
11062   format %{ "FTSTF  $dst,$src1" %}
11063   opcode(0xE4, 0xD9);
11064   ins_encode( Push_Reg_D(src1),
11065               OpcS, OpcP, PopFPU,
11066               CmpF_Result(dst));
11067   ins_pipe( pipe_slow );
11068 %}
11069 
11070 // Compare into -1,0,1
11071 instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
11072   predicate(UseSSE == 0);
11073   match(Set dst (CmpF3 src1 src2));
11074   effect(KILL cr, KILL rax);
11075   ins_cost(300);
11076   format %{ "FCMPF  $dst,$src1,$src2" %}
11077   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
11078   ins_encode( Push_Reg_D(src1),
11079               OpcP, RegOpc(src2),
11080               CmpF_Result(dst));
11081   ins_pipe( pipe_slow );
11082 %}
11083 
11084 // float compare and set condition codes in EFLAGS by XMM regs
11085 instruct cmpX_cc(eFlagsRegU cr, regX dst, regX src, eAXRegI rax) %{
11086   predicate(UseSSE>=1);
11087   match(Set cr (CmpF dst src));
11088   effect(KILL rax);
11089   ins_cost(145);
11090   format %{ "COMISS $dst,$src\n"
11091           "\tJNP    exit\n"
11092           "\tMOV    ah,1       // saw a NaN, set CF\n"
11093           "\tSAHF\n"
11094      "exit:\tNOP               // avoid branch to branch" %}
11095   opcode(0x0F, 0x2F);
11096   ins_encode(OpcP, OpcS, RegReg(dst, src), cmpF_P6_fixup);
11097   ins_pipe( pipe_slow );
11098 %}
11099 
11100 instruct cmpX_ccCF(eFlagsRegUCF cr, regX dst, regX src) %{
11101   predicate(UseSSE>=1);
11102   match(Set cr (CmpF dst src));
11103   ins_cost(100);
11104   format %{ "COMISS $dst,$src" %}
11105   opcode(0x0F, 0x2F);
11106   ins_encode(OpcP, OpcS, RegReg(dst, src));
11107   ins_pipe( pipe_slow );
11108 %}
11109 
11110 // float compare and set condition codes in EFLAGS by XMM regs
11111 instruct cmpX_ccmem(eFlagsRegU cr, regX dst, memory src, eAXRegI rax) %{
11112   predicate(UseSSE>=1);
11113   match(Set cr (CmpF dst (LoadF src)));
11114   effect(KILL rax);
11115   ins_cost(165);
11116   format %{ "COMISS $dst,$src\n"
11117           "\tJNP    exit\n"
11118           "\tMOV    ah,1       // saw a NaN, set CF\n"
11119           "\tSAHF\n"
11120      "exit:\tNOP               // avoid branch to branch" %}
11121   opcode(0x0F, 0x2F);
11122   ins_encode(OpcP, OpcS, RegMem(dst, src), cmpF_P6_fixup);
11123   ins_pipe( pipe_slow );
11124 %}
11125 
11126 instruct cmpX_ccmemCF(eFlagsRegUCF cr, regX dst, memory src) %{
11127   predicate(UseSSE>=1);
11128   match(Set cr (CmpF dst (LoadF src)));
11129   ins_cost(100);
11130   format %{ "COMISS $dst,$src" %}
11131   opcode(0x0F, 0x2F);
11132   ins_encode(OpcP, OpcS, RegMem(dst, src));
11133   ins_pipe( pipe_slow );
11134 %}
11135 
11136 // Compare into -1,0,1 in XMM
11137 instruct cmpX_reg(eRegI dst, regX src1, regX src2, eFlagsReg cr) %{
11138   predicate(UseSSE>=1);
11139   match(Set dst (CmpF3 src1 src2));
11140   effect(KILL cr);
11141   ins_cost(255);
11142   format %{ "XOR    $dst,$dst\n"
11143           "\tCOMISS $src1,$src2\n"
11144           "\tJP,s   nan\n"
11145           "\tJEQ,s  exit\n"
11146           "\tJA,s   inc\n"
11147       "nan:\tDEC    $dst\n"
11148           "\tJMP,s  exit\n"
11149       "inc:\tINC    $dst\n"
11150       "exit:"
11151                 %}
11152   opcode(0x0F, 0x2F);
11153   ins_encode(Xor_Reg(dst), OpcP, OpcS, RegReg(src1, src2), CmpX_Result(dst));
11154   ins_pipe( pipe_slow );
11155 %}
11156 
11157 // Compare into -1,0,1 in XMM and memory
11158 instruct cmpX_regmem(eRegI dst, regX src1, memory mem, eFlagsReg cr) %{
11159   predicate(UseSSE>=1);
11160   match(Set dst (CmpF3 src1 (LoadF mem)));
11161   effect(KILL cr);
11162   ins_cost(275);
11163   format %{ "COMISS $src1,$mem\n"
11164           "\tMOV    $dst,0\t\t# do not blow flags\n"
11165           "\tJP,s   nan\n"
11166           "\tJEQ,s  exit\n"
11167           "\tJA,s   inc\n"
11168       "nan:\tDEC    $dst\n"
11169           "\tJMP,s  exit\n"
11170       "inc:\tINC    $dst\n"
11171       "exit:"
11172                 %}
11173   opcode(0x0F, 0x2F);
11174   ins_encode(OpcP, OpcS, RegMem(src1, mem), LdImmI(dst,0x0), CmpX_Result(dst));
11175   ins_pipe( pipe_slow );
11176 %}
11177 
11178 // Spill to obtain 24-bit precision
11179 instruct subF24_reg(stackSlotF dst, regF src1, regF src2) %{
11180   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11181   match(Set dst (SubF src1 src2));
11182 
11183   format %{ "FSUB   $dst,$src1 - $src2" %}
11184   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
11185   ins_encode( Push_Reg_F(src1),
11186               OpcReg_F(src2),
11187               Pop_Mem_F(dst) );
11188   ins_pipe( fpu_mem_reg_reg );
11189 %}
11190 //
11191 // This instruction does not round to 24-bits
11192 instruct subF_reg(regF dst, regF src) %{
11193   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11194   match(Set dst (SubF dst src));
11195 
11196   format %{ "FSUB   $dst,$src" %}
11197   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
11198   ins_encode( Push_Reg_F(src),
11199               OpcP, RegOpc(dst) );
11200   ins_pipe( fpu_reg_reg );
11201 %}
11202 
11203 // Spill to obtain 24-bit precision
11204 instruct addF24_reg(stackSlotF dst, regF src1, regF src2) %{
11205   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11206   match(Set dst (AddF src1 src2));
11207 
11208   format %{ "FADD   $dst,$src1,$src2" %}
11209   opcode(0xD8, 0x0); /* D8 C0+i */
11210   ins_encode( Push_Reg_F(src2),
11211               OpcReg_F(src1),
11212               Pop_Mem_F(dst) );
11213   ins_pipe( fpu_mem_reg_reg );
11214 %}
11215 //
11216 // This instruction does not round to 24-bits
11217 instruct addF_reg(regF dst, regF src) %{
11218   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11219   match(Set dst (AddF dst src));
11220 
11221   format %{ "FLD    $src\n\t"
11222             "FADDp  $dst,ST" %}
11223   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
11224   ins_encode( Push_Reg_F(src),
11225               OpcP, RegOpc(dst) );
11226   ins_pipe( fpu_reg_reg );
11227 %}
11228 
11229 // Add two single precision floating point values in xmm
11230 instruct addX_reg(regX dst, regX src) %{
11231   predicate(UseSSE>=1);
11232   match(Set dst (AddF dst src));
11233   format %{ "ADDSS  $dst,$src" %}
11234   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
11235   ins_pipe( pipe_slow );
11236 %}
11237 
11238 instruct addX_imm(regX dst, immXF con) %{
11239   predicate(UseSSE>=1);
11240   match(Set dst (AddF dst con));
11241   format %{ "ADDSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
11242   ins_encode %{
11243     __ addss($dst$$XMMRegister, $constantaddress($con));
11244   %}
11245   ins_pipe(pipe_slow);
11246 %}
11247 
11248 instruct addX_mem(regX dst, memory mem) %{
11249   predicate(UseSSE>=1);
11250   match(Set dst (AddF dst (LoadF mem)));
11251   format %{ "ADDSS  $dst,$mem" %}
11252   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegMem(dst, mem));
11253   ins_pipe( pipe_slow );
11254 %}
11255 
11256 // Subtract two single precision floating point values in xmm
11257 instruct subX_reg(regX dst, regX src) %{
11258   predicate(UseSSE>=1);
11259   match(Set dst (SubF dst src));
11260   format %{ "SUBSS  $dst,$src" %}
11261   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
11262   ins_pipe( pipe_slow );
11263 %}
11264 
11265 instruct subX_imm(regX dst, immXF con) %{
11266   predicate(UseSSE>=1);
11267   match(Set dst (SubF dst con));
11268   format %{ "SUBSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
11269   ins_encode %{
11270     __ subss($dst$$XMMRegister, $constantaddress($con));
11271   %}
11272   ins_pipe(pipe_slow);
11273 %}
11274 
11275 instruct subX_mem(regX dst, memory mem) %{
11276   predicate(UseSSE>=1);
11277   match(Set dst (SubF dst (LoadF mem)));
11278   format %{ "SUBSS  $dst,$mem" %}
11279   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
11280   ins_pipe( pipe_slow );
11281 %}
11282 
11283 // Multiply two single precision floating point values in xmm
11284 instruct mulX_reg(regX dst, regX src) %{
11285   predicate(UseSSE>=1);
11286   match(Set dst (MulF dst src));
11287   format %{ "MULSS  $dst,$src" %}
11288   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
11289   ins_pipe( pipe_slow );
11290 %}
11291 
11292 instruct mulX_imm(regX dst, immXF con) %{
11293   predicate(UseSSE>=1);
11294   match(Set dst (MulF dst con));
11295   format %{ "MULSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
11296   ins_encode %{
11297     __ mulss($dst$$XMMRegister, $constantaddress($con));
11298   %}
11299   ins_pipe(pipe_slow);
11300 %}
11301 
11302 instruct mulX_mem(regX dst, memory mem) %{
11303   predicate(UseSSE>=1);
11304   match(Set dst (MulF dst (LoadF mem)));
11305   format %{ "MULSS  $dst,$mem" %}
11306   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
11307   ins_pipe( pipe_slow );
11308 %}
11309 
11310 // Divide two single precision floating point values in xmm
11311 instruct divX_reg(regX dst, regX src) %{
11312   predicate(UseSSE>=1);
11313   match(Set dst (DivF dst src));
11314   format %{ "DIVSS  $dst,$src" %}
11315   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
11316   ins_pipe( pipe_slow );
11317 %}
11318 
11319 instruct divX_imm(regX dst, immXF con) %{
11320   predicate(UseSSE>=1);
11321   match(Set dst (DivF dst con));
11322   format %{ "DIVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
11323   ins_encode %{
11324     __ divss($dst$$XMMRegister, $constantaddress($con));
11325   %}
11326   ins_pipe(pipe_slow);
11327 %}
11328 
11329 instruct divX_mem(regX dst, memory mem) %{
11330   predicate(UseSSE>=1);
11331   match(Set dst (DivF dst (LoadF mem)));
11332   format %{ "DIVSS  $dst,$mem" %}
11333   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
11334   ins_pipe( pipe_slow );
11335 %}
11336 
11337 // Get the square root of a single precision floating point values in xmm
11338 instruct sqrtX_reg(regX dst, regX src) %{
11339   predicate(UseSSE>=1);
11340   match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
11341   format %{ "SQRTSS $dst,$src" %}
11342   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
11343   ins_pipe( pipe_slow );
11344 %}
11345 
11346 instruct sqrtX_mem(regX dst, memory mem) %{
11347   predicate(UseSSE>=1);
11348   match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF mem)))));
11349   format %{ "SQRTSS $dst,$mem" %}
11350   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
11351   ins_pipe( pipe_slow );
11352 %}
11353 
11354 // Get the square root of a double precision floating point values in xmm
11355 instruct sqrtXD_reg(regXD dst, regXD src) %{
11356   predicate(UseSSE>=2);
11357   match(Set dst (SqrtD src));
11358   format %{ "SQRTSD $dst,$src" %}
11359   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
11360   ins_pipe( pipe_slow );
11361 %}
11362 
11363 instruct sqrtXD_mem(regXD dst, memory mem) %{
11364   predicate(UseSSE>=2);
11365   match(Set dst (SqrtD (LoadD mem)));
11366   format %{ "SQRTSD $dst,$mem" %}
11367   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
11368   ins_pipe( pipe_slow );
11369 %}
11370 
11371 instruct absF_reg(regFPR1 dst, regFPR1 src) %{
11372   predicate(UseSSE==0);
11373   match(Set dst (AbsF src));
11374   ins_cost(100);
11375   format %{ "FABS" %}
11376   opcode(0xE1, 0xD9);
11377   ins_encode( OpcS, OpcP );
11378   ins_pipe( fpu_reg_reg );
11379 %}
11380 
11381 instruct absX_reg(regX dst ) %{
11382   predicate(UseSSE>=1);
11383   match(Set dst (AbsF dst));
11384   format %{ "ANDPS  $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %}
11385   ins_encode( AbsXF_encoding(dst));
11386   ins_pipe( pipe_slow );
11387 %}
11388 
11389 instruct negF_reg(regFPR1 dst, regFPR1 src) %{
11390   predicate(UseSSE==0);
11391   match(Set dst (NegF src));
11392   ins_cost(100);
11393   format %{ "FCHS" %}
11394   opcode(0xE0, 0xD9);
11395   ins_encode( OpcS, OpcP );
11396   ins_pipe( fpu_reg_reg );
11397 %}
11398 
11399 instruct negX_reg( regX dst ) %{
11400   predicate(UseSSE>=1);
11401   match(Set dst (NegF dst));
11402   format %{ "XORPS  $dst,[0x80000000]\t# CHS F by sign flipping" %}
11403   ins_encode( NegXF_encoding(dst));
11404   ins_pipe( pipe_slow );
11405 %}
11406 
11407 // Cisc-alternate to addF_reg
11408 // Spill to obtain 24-bit precision
11409 instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
11410   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11411   match(Set dst (AddF src1 (LoadF src2)));
11412 
11413   format %{ "FLD    $src2\n\t"
11414             "FADD   ST,$src1\n\t"
11415             "FSTP_S $dst" %}
11416   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
11417   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11418               OpcReg_F(src1),
11419               Pop_Mem_F(dst) );
11420   ins_pipe( fpu_mem_reg_mem );
11421 %}
11422 //
11423 // Cisc-alternate to addF_reg
11424 // This instruction does not round to 24-bits
11425 instruct addF_reg_mem(regF dst, memory src) %{
11426   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11427   match(Set dst (AddF dst (LoadF src)));
11428 
11429   format %{ "FADD   $dst,$src" %}
11430   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
11431   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
11432               OpcP, RegOpc(dst) );
11433   ins_pipe( fpu_reg_mem );
11434 %}
11435 
11436 // // Following two instructions for _222_mpegaudio
11437 // Spill to obtain 24-bit precision
11438 instruct addF24_mem_reg(stackSlotF dst, regF src2, memory src1 ) %{
11439   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11440   match(Set dst (AddF src1 src2));
11441 
11442   format %{ "FADD   $dst,$src1,$src2" %}
11443   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
11444   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
11445               OpcReg_F(src2),
11446               Pop_Mem_F(dst) );
11447   ins_pipe( fpu_mem_reg_mem );
11448 %}
11449 
11450 // Cisc-spill variant
11451 // Spill to obtain 24-bit precision
11452 instruct addF24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
11453   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11454   match(Set dst (AddF src1 (LoadF src2)));
11455 
11456   format %{ "FADD   $dst,$src1,$src2 cisc" %}
11457   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
11458   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11459               set_instruction_start,
11460               OpcP, RMopc_Mem(secondary,src1),
11461               Pop_Mem_F(dst) );
11462   ins_pipe( fpu_mem_mem_mem );
11463 %}
11464 
11465 // Spill to obtain 24-bit precision
11466 instruct addF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
11467   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11468   match(Set dst (AddF src1 src2));
11469 
11470   format %{ "FADD   $dst,$src1,$src2" %}
11471   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
11472   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11473               set_instruction_start,
11474               OpcP, RMopc_Mem(secondary,src1),
11475               Pop_Mem_F(dst) );
11476   ins_pipe( fpu_mem_mem_mem );
11477 %}
11478 
11479 
11480 // Spill to obtain 24-bit precision
11481 instruct addF24_reg_imm(stackSlotF dst, regF src, immF con) %{
11482   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11483   match(Set dst (AddF src con));
11484   format %{ "FLD    $src\n\t"
11485             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
11486             "FSTP_S $dst"  %}
11487   ins_encode %{
11488     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
11489     __ fadd_s($constantaddress($con));
11490     __ fstp_s(Address(rsp, $dst$$disp));
11491   %}
11492   ins_pipe(fpu_mem_reg_con);
11493 %}
11494 //
11495 // This instruction does not round to 24-bits
11496 instruct addF_reg_imm(regF dst, regF src, immF con) %{
11497   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11498   match(Set dst (AddF src con));
11499   format %{ "FLD    $src\n\t"
11500             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
11501             "FSTP   $dst"  %}
11502   ins_encode %{
11503     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
11504     __ fadd_s($constantaddress($con));
11505     __ fstp_d($dst$$reg);
11506   %}
11507   ins_pipe(fpu_reg_reg_con);
11508 %}
11509 
11510 // Spill to obtain 24-bit precision
11511 instruct mulF24_reg(stackSlotF dst, regF src1, regF src2) %{
11512   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11513   match(Set dst (MulF src1 src2));
11514 
11515   format %{ "FLD    $src1\n\t"
11516             "FMUL   $src2\n\t"
11517             "FSTP_S $dst"  %}
11518   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
11519   ins_encode( Push_Reg_F(src1),
11520               OpcReg_F(src2),
11521               Pop_Mem_F(dst) );
11522   ins_pipe( fpu_mem_reg_reg );
11523 %}
11524 //
11525 // This instruction does not round to 24-bits
11526 instruct mulF_reg(regF dst, regF src1, regF src2) %{
11527   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11528   match(Set dst (MulF src1 src2));
11529 
11530   format %{ "FLD    $src1\n\t"
11531             "FMUL   $src2\n\t"
11532             "FSTP_S $dst"  %}
11533   opcode(0xD8, 0x1); /* D8 C8+i */
11534   ins_encode( Push_Reg_F(src2),
11535               OpcReg_F(src1),
11536               Pop_Reg_F(dst) );
11537   ins_pipe( fpu_reg_reg_reg );
11538 %}
11539 
11540 
11541 // Spill to obtain 24-bit precision
11542 // Cisc-alternate to reg-reg multiply
11543 instruct mulF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
11544   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11545   match(Set dst (MulF src1 (LoadF src2)));
11546 
11547   format %{ "FLD_S  $src2\n\t"
11548             "FMUL   $src1\n\t"
11549             "FSTP_S $dst"  %}
11550   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
11551   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11552               OpcReg_F(src1),
11553               Pop_Mem_F(dst) );
11554   ins_pipe( fpu_mem_reg_mem );
11555 %}
11556 //
11557 // This instruction does not round to 24-bits
11558 // Cisc-alternate to reg-reg multiply
11559 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
11560   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11561   match(Set dst (MulF src1 (LoadF src2)));
11562 
11563   format %{ "FMUL   $dst,$src1,$src2" %}
11564   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
11565   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11566               OpcReg_F(src1),
11567               Pop_Reg_F(dst) );
11568   ins_pipe( fpu_reg_reg_mem );
11569 %}
11570 
11571 // Spill to obtain 24-bit precision
11572 instruct mulF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
11573   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11574   match(Set dst (MulF src1 src2));
11575 
11576   format %{ "FMUL   $dst,$src1,$src2" %}
11577   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
11578   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
11579               set_instruction_start,
11580               OpcP, RMopc_Mem(secondary,src1),
11581               Pop_Mem_F(dst) );
11582   ins_pipe( fpu_mem_mem_mem );
11583 %}
11584 
11585 // Spill to obtain 24-bit precision
11586 instruct mulF24_reg_imm(stackSlotF dst, regF src, immF con) %{
11587   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11588   match(Set dst (MulF src con));
11589 
11590   format %{ "FLD    $src\n\t"
11591             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
11592             "FSTP_S $dst"  %}
11593   ins_encode %{
11594     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
11595     __ fmul_s($constantaddress($con));
11596     __ fstp_s(Address(rsp, $dst$$disp));
11597   %}
11598   ins_pipe(fpu_mem_reg_con);
11599 %}
11600 //
11601 // This instruction does not round to 24-bits
11602 instruct mulF_reg_imm(regF dst, regF src, immF con) %{
11603   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11604   match(Set dst (MulF src con));
11605 
11606   format %{ "FLD    $src\n\t"
11607             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
11608             "FSTP   $dst"  %}
11609   ins_encode %{
11610     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
11611     __ fmul_s($constantaddress($con));
11612     __ fstp_d($dst$$reg);
11613   %}
11614   ins_pipe(fpu_reg_reg_con);
11615 %}
11616 
11617 
11618 //
11619 // MACRO1 -- subsume unshared load into mulF
11620 // This instruction does not round to 24-bits
11621 instruct mulF_reg_load1(regF dst, regF src, memory mem1 ) %{
11622   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11623   match(Set dst (MulF (LoadF mem1) src));
11624 
11625   format %{ "FLD    $mem1    ===MACRO1===\n\t"
11626             "FMUL   ST,$src\n\t"
11627             "FSTP   $dst" %}
11628   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
11629   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
11630               OpcReg_F(src),
11631               Pop_Reg_F(dst) );
11632   ins_pipe( fpu_reg_reg_mem );
11633 %}
11634 //
11635 // MACRO2 -- addF a mulF which subsumed an unshared load
11636 // This instruction does not round to 24-bits
11637 instruct addF_mulF_reg_load1(regF dst, memory mem1, regF src1, regF src2) %{
11638   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11639   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
11640   ins_cost(95);
11641 
11642   format %{ "FLD    $mem1     ===MACRO2===\n\t"
11643             "FMUL   ST,$src1  subsume mulF left load\n\t"
11644             "FADD   ST,$src2\n\t"
11645             "FSTP   $dst" %}
11646   opcode(0xD9); /* LoadF D9 /0 */
11647   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
11648               FMul_ST_reg(src1),
11649               FAdd_ST_reg(src2),
11650               Pop_Reg_F(dst) );
11651   ins_pipe( fpu_reg_mem_reg_reg );
11652 %}
11653 
11654 // MACRO3 -- addF a mulF
11655 // This instruction does not round to 24-bits.  It is a '2-address'
11656 // instruction in that the result goes back to src2.  This eliminates
11657 // a move from the macro; possibly the register allocator will have
11658 // to add it back (and maybe not).
11659 instruct addF_mulF_reg(regF src2, regF src1, regF src0) %{
11660   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11661   match(Set src2 (AddF (MulF src0 src1) src2));
11662 
11663   format %{ "FLD    $src0     ===MACRO3===\n\t"
11664             "FMUL   ST,$src1\n\t"
11665             "FADDP  $src2,ST" %}
11666   opcode(0xD9); /* LoadF D9 /0 */
11667   ins_encode( Push_Reg_F(src0),
11668               FMul_ST_reg(src1),
11669               FAddP_reg_ST(src2) );
11670   ins_pipe( fpu_reg_reg_reg );
11671 %}
11672 
11673 // MACRO4 -- divF subF
11674 // This instruction does not round to 24-bits
11675 instruct subF_divF_reg(regF dst, regF src1, regF src2, regF src3) %{
11676   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11677   match(Set dst (DivF (SubF src2 src1) src3));
11678 
11679   format %{ "FLD    $src2   ===MACRO4===\n\t"
11680             "FSUB   ST,$src1\n\t"
11681             "FDIV   ST,$src3\n\t"
11682             "FSTP  $dst" %}
11683   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
11684   ins_encode( Push_Reg_F(src2),
11685               subF_divF_encode(src1,src3),
11686               Pop_Reg_F(dst) );
11687   ins_pipe( fpu_reg_reg_reg_reg );
11688 %}
11689 
11690 // Spill to obtain 24-bit precision
11691 instruct divF24_reg(stackSlotF dst, regF src1, regF src2) %{
11692   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
11693   match(Set dst (DivF src1 src2));
11694 
11695   format %{ "FDIV   $dst,$src1,$src2" %}
11696   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
11697   ins_encode( Push_Reg_F(src1),
11698               OpcReg_F(src2),
11699               Pop_Mem_F(dst) );
11700   ins_pipe( fpu_mem_reg_reg );
11701 %}
11702 //
11703 // This instruction does not round to 24-bits
11704 instruct divF_reg(regF dst, regF src) %{
11705   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
11706   match(Set dst (DivF dst src));
11707 
11708   format %{ "FDIV   $dst,$src" %}
11709   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
11710   ins_encode( Push_Reg_F(src),
11711               OpcP, RegOpc(dst) );
11712   ins_pipe( fpu_reg_reg );
11713 %}
11714 
11715 
11716 // Spill to obtain 24-bit precision
11717 instruct modF24_reg(stackSlotF dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
11718   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11719   match(Set dst (ModF src1 src2));
11720   effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
11721 
11722   format %{ "FMOD   $dst,$src1,$src2" %}
11723   ins_encode( Push_Reg_Mod_D(src1, src2),
11724               emitModD(),
11725               Push_Result_Mod_D(src2),
11726               Pop_Mem_F(dst));
11727   ins_pipe( pipe_slow );
11728 %}
11729 //
11730 // This instruction does not round to 24-bits
11731 instruct modF_reg(regF dst, regF src, eAXRegI rax, eFlagsReg cr) %{
11732   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11733   match(Set dst (ModF dst src));
11734   effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
11735 
11736   format %{ "FMOD   $dst,$src" %}
11737   ins_encode(Push_Reg_Mod_D(dst, src),
11738               emitModD(),
11739               Push_Result_Mod_D(src),
11740               Pop_Reg_F(dst));
11741   ins_pipe( pipe_slow );
11742 %}
11743 
11744 instruct modX_reg(regX dst, regX src0, regX src1, eAXRegI rax, eFlagsReg cr) %{
11745   predicate(UseSSE>=1);
11746   match(Set dst (ModF src0 src1));
11747   effect(KILL rax, KILL cr);
11748   format %{ "SUB    ESP,4\t # FMOD\n"
11749           "\tMOVSS  [ESP+0],$src1\n"
11750           "\tFLD_S  [ESP+0]\n"
11751           "\tMOVSS  [ESP+0],$src0\n"
11752           "\tFLD_S  [ESP+0]\n"
11753      "loop:\tFPREM\n"
11754           "\tFWAIT\n"
11755           "\tFNSTSW AX\n"
11756           "\tSAHF\n"
11757           "\tJP     loop\n"
11758           "\tFSTP_S [ESP+0]\n"
11759           "\tMOVSS  $dst,[ESP+0]\n"
11760           "\tADD    ESP,4\n"
11761           "\tFSTP   ST0\t # Restore FPU Stack"
11762     %}
11763   ins_cost(250);
11764   ins_encode( Push_ModX_encoding(src0, src1), emitModD(), Push_ResultX(dst,0x4), PopFPU);
11765   ins_pipe( pipe_slow );
11766 %}
11767 
11768 
11769 //----------Arithmetic Conversion Instructions---------------------------------
11770 // The conversions operations are all Alpha sorted.  Please keep it that way!
11771 
11772 instruct roundFloat_mem_reg(stackSlotF dst, regF src) %{
11773   predicate(UseSSE==0);
11774   match(Set dst (RoundFloat src));
11775   ins_cost(125);
11776   format %{ "FST_S  $dst,$src\t# F-round" %}
11777   ins_encode( Pop_Mem_Reg_F(dst, src) );
11778   ins_pipe( fpu_mem_reg );
11779 %}
11780 
11781 instruct roundDouble_mem_reg(stackSlotD dst, regD src) %{
11782   predicate(UseSSE<=1);
11783   match(Set dst (RoundDouble src));
11784   ins_cost(125);
11785   format %{ "FST_D  $dst,$src\t# D-round" %}
11786   ins_encode( Pop_Mem_Reg_D(dst, src) );
11787   ins_pipe( fpu_mem_reg );
11788 %}
11789 
11790 // Force rounding to 24-bit precision and 6-bit exponent
11791 instruct convD2F_reg(stackSlotF dst, regD src) %{
11792   predicate(UseSSE==0);
11793   match(Set dst (ConvD2F src));
11794   format %{ "FST_S  $dst,$src\t# F-round" %}
11795   expand %{
11796     roundFloat_mem_reg(dst,src);
11797   %}
11798 %}
11799 
11800 // Force rounding to 24-bit precision and 6-bit exponent
11801 instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{
11802   predicate(UseSSE==1);
11803   match(Set dst (ConvD2F src));
11804   effect( KILL cr );
11805   format %{ "SUB    ESP,4\n\t"
11806             "FST_S  [ESP],$src\t# F-round\n\t"
11807             "MOVSS  $dst,[ESP]\n\t"
11808             "ADD ESP,4" %}
11809   ins_encode( D2X_encoding(dst, src) );
11810   ins_pipe( pipe_slow );
11811 %}
11812 
11813 // Force rounding double precision to single precision
11814 instruct convXD2X_reg(regX dst, regXD src) %{
11815   predicate(UseSSE>=2);
11816   match(Set dst (ConvD2F src));
11817   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
11818   opcode(0xF2, 0x0F, 0x5A);
11819   ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
11820   ins_pipe( pipe_slow );
11821 %}
11822 
11823 instruct convF2D_reg_reg(regD dst, regF src) %{
11824   predicate(UseSSE==0);
11825   match(Set dst (ConvF2D src));
11826   format %{ "FST_S  $dst,$src\t# D-round" %}
11827   ins_encode( Pop_Reg_Reg_D(dst, src));
11828   ins_pipe( fpu_reg_reg );
11829 %}
11830 
11831 instruct convF2D_reg(stackSlotD dst, regF src) %{
11832   predicate(UseSSE==1);
11833   match(Set dst (ConvF2D src));
11834   format %{ "FST_D  $dst,$src\t# D-round" %}
11835   expand %{
11836     roundDouble_mem_reg(dst,src);
11837   %}
11838 %}
11839 
11840 instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{
11841   predicate(UseSSE==1);
11842   match(Set dst (ConvF2D src));
11843   effect( KILL cr );
11844   format %{ "SUB    ESP,4\n\t"
11845             "MOVSS  [ESP] $src\n\t"
11846             "FLD_S  [ESP]\n\t"
11847             "ADD    ESP,4\n\t"
11848             "FSTP   $dst\t# D-round" %}
11849   ins_encode( X2D_encoding(dst, src), Pop_Reg_D(dst));
11850   ins_pipe( pipe_slow );
11851 %}
11852 
11853 instruct convX2XD_reg(regXD dst, regX src) %{
11854   predicate(UseSSE>=2);
11855   match(Set dst (ConvF2D src));
11856   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
11857   opcode(0xF3, 0x0F, 0x5A);
11858   ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
11859   ins_pipe( pipe_slow );
11860 %}
11861 
11862 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
11863 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
11864   predicate(UseSSE<=1);
11865   match(Set dst (ConvD2I src));
11866   effect( KILL tmp, KILL cr );
11867   format %{ "FLD    $src\t# Convert double to int \n\t"
11868             "FLDCW  trunc mode\n\t"
11869             "SUB    ESP,4\n\t"
11870             "FISTp  [ESP + #0]\n\t"
11871             "FLDCW  std/24-bit mode\n\t"
11872             "POP    EAX\n\t"
11873             "CMP    EAX,0x80000000\n\t"
11874             "JNE,s  fast\n\t"
11875             "FLD_D  $src\n\t"
11876             "CALL   d2i_wrapper\n"
11877       "fast:" %}
11878   ins_encode( Push_Reg_D(src), D2I_encoding(src) );
11879   ins_pipe( pipe_slow );
11880 %}
11881 
11882 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
11883 instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %{
11884   predicate(UseSSE>=2);
11885   match(Set dst (ConvD2I src));
11886   effect( KILL tmp, KILL cr );
11887   format %{ "CVTTSD2SI $dst, $src\n\t"
11888             "CMP    $dst,0x80000000\n\t"
11889             "JNE,s  fast\n\t"
11890             "SUB    ESP, 8\n\t"
11891             "MOVSD  [ESP], $src\n\t"
11892             "FLD_D  [ESP]\n\t"
11893             "ADD    ESP, 8\n\t"
11894             "CALL   d2i_wrapper\n"
11895       "fast:" %}
11896   opcode(0x1); // double-precision conversion
11897   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
11898   ins_pipe( pipe_slow );
11899 %}
11900 
11901 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
11902   predicate(UseSSE<=1);
11903   match(Set dst (ConvD2L src));
11904   effect( KILL cr );
11905   format %{ "FLD    $src\t# Convert double to long\n\t"
11906             "FLDCW  trunc mode\n\t"
11907             "SUB    ESP,8\n\t"
11908             "FISTp  [ESP + #0]\n\t"
11909             "FLDCW  std/24-bit mode\n\t"
11910             "POP    EAX\n\t"
11911             "POP    EDX\n\t"
11912             "CMP    EDX,0x80000000\n\t"
11913             "JNE,s  fast\n\t"
11914             "TEST   EAX,EAX\n\t"
11915             "JNE,s  fast\n\t"
11916             "FLD    $src\n\t"
11917             "CALL   d2l_wrapper\n"
11918       "fast:" %}
11919   ins_encode( Push_Reg_D(src),  D2L_encoding(src) );
11920   ins_pipe( pipe_slow );
11921 %}
11922 
11923 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11924 instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{
11925   predicate (UseSSE>=2);
11926   match(Set dst (ConvD2L src));
11927   effect( KILL cr );
11928   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
11929             "MOVSD  [ESP],$src\n\t"
11930             "FLD_D  [ESP]\n\t"
11931             "FLDCW  trunc mode\n\t"
11932             "FISTp  [ESP + #0]\n\t"
11933             "FLDCW  std/24-bit mode\n\t"
11934             "POP    EAX\n\t"
11935             "POP    EDX\n\t"
11936             "CMP    EDX,0x80000000\n\t"
11937             "JNE,s  fast\n\t"
11938             "TEST   EAX,EAX\n\t"
11939             "JNE,s  fast\n\t"
11940             "SUB    ESP,8\n\t"
11941             "MOVSD  [ESP],$src\n\t"
11942             "FLD_D  [ESP]\n\t"
11943             "CALL   d2l_wrapper\n"
11944       "fast:" %}
11945   ins_encode( XD2L_encoding(src) );
11946   ins_pipe( pipe_slow );
11947 %}
11948 
11949 // Convert a double to an int.  Java semantics require we do complex
11950 // manglations in the corner cases.  So we set the rounding mode to
11951 // 'zero', store the darned double down as an int, and reset the
11952 // rounding mode to 'nearest'.  The hardware stores a flag value down
11953 // if we would overflow or converted a NAN; we check for this and
11954 // and go the slow path if needed.
11955 instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
11956   predicate(UseSSE==0);
11957   match(Set dst (ConvF2I src));
11958   effect( KILL tmp, KILL cr );
11959   format %{ "FLD    $src\t# Convert float to int \n\t"
11960             "FLDCW  trunc mode\n\t"
11961             "SUB    ESP,4\n\t"
11962             "FISTp  [ESP + #0]\n\t"
11963             "FLDCW  std/24-bit mode\n\t"
11964             "POP    EAX\n\t"
11965             "CMP    EAX,0x80000000\n\t"
11966             "JNE,s  fast\n\t"
11967             "FLD    $src\n\t"
11968             "CALL   d2i_wrapper\n"
11969       "fast:" %}
11970   // D2I_encoding works for F2I
11971   ins_encode( Push_Reg_F(src), D2I_encoding(src) );
11972   ins_pipe( pipe_slow );
11973 %}
11974 
11975 // Convert a float in xmm to an int reg.
11976 instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{
11977   predicate(UseSSE>=1);
11978   match(Set dst (ConvF2I src));
11979   effect( KILL tmp, KILL cr );
11980   format %{ "CVTTSS2SI $dst, $src\n\t"
11981             "CMP    $dst,0x80000000\n\t"
11982             "JNE,s  fast\n\t"
11983             "SUB    ESP, 4\n\t"
11984             "MOVSS  [ESP], $src\n\t"
11985             "FLD    [ESP]\n\t"
11986             "ADD    ESP, 4\n\t"
11987             "CALL   d2i_wrapper\n"
11988       "fast:" %}
11989   opcode(0x0); // single-precision conversion
11990   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
11991   ins_pipe( pipe_slow );
11992 %}
11993 
11994 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
11995   predicate(UseSSE==0);
11996   match(Set dst (ConvF2L src));
11997   effect( KILL cr );
11998   format %{ "FLD    $src\t# Convert float to long\n\t"
11999             "FLDCW  trunc mode\n\t"
12000             "SUB    ESP,8\n\t"
12001             "FISTp  [ESP + #0]\n\t"
12002             "FLDCW  std/24-bit mode\n\t"
12003             "POP    EAX\n\t"
12004             "POP    EDX\n\t"
12005             "CMP    EDX,0x80000000\n\t"
12006             "JNE,s  fast\n\t"
12007             "TEST   EAX,EAX\n\t"
12008             "JNE,s  fast\n\t"
12009             "FLD    $src\n\t"
12010             "CALL   d2l_wrapper\n"
12011       "fast:" %}
12012   // D2L_encoding works for F2L
12013   ins_encode( Push_Reg_F(src), D2L_encoding(src) );
12014   ins_pipe( pipe_slow );
12015 %}
12016 
12017 // XMM lacks a float/double->long conversion, so use the old FPU stack.
12018 instruct convX2L_reg_reg( eADXRegL dst, regX src, eFlagsReg cr ) %{
12019   predicate (UseSSE>=1);
12020   match(Set dst (ConvF2L src));
12021   effect( KILL cr );
12022   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
12023             "MOVSS  [ESP],$src\n\t"
12024             "FLD_S  [ESP]\n\t"
12025             "FLDCW  trunc mode\n\t"
12026             "FISTp  [ESP + #0]\n\t"
12027             "FLDCW  std/24-bit mode\n\t"
12028             "POP    EAX\n\t"
12029             "POP    EDX\n\t"
12030             "CMP    EDX,0x80000000\n\t"
12031             "JNE,s  fast\n\t"
12032             "TEST   EAX,EAX\n\t"
12033             "JNE,s  fast\n\t"
12034             "SUB    ESP,4\t# Convert float to long\n\t"
12035             "MOVSS  [ESP],$src\n\t"
12036             "FLD_S  [ESP]\n\t"
12037             "ADD    ESP,4\n\t"
12038             "CALL   d2l_wrapper\n"
12039       "fast:" %}
12040   ins_encode( X2L_encoding(src) );
12041   ins_pipe( pipe_slow );
12042 %}
12043 
12044 instruct convI2D_reg(regD dst, stackSlotI src) %{
12045   predicate( UseSSE<=1 );
12046   match(Set dst (ConvI2D src));
12047   format %{ "FILD   $src\n\t"
12048             "FSTP   $dst" %}
12049   opcode(0xDB, 0x0);  /* DB /0 */
12050   ins_encode(Push_Mem_I(src), Pop_Reg_D(dst));
12051   ins_pipe( fpu_reg_mem );
12052 %}
12053 
12054 instruct convI2XD_reg(regXD dst, eRegI src) %{
12055   predicate( UseSSE>=2 && !UseXmmI2D );
12056   match(Set dst (ConvI2D src));
12057   format %{ "CVTSI2SD $dst,$src" %}
12058   opcode(0xF2, 0x0F, 0x2A);
12059   ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
12060   ins_pipe( pipe_slow );
12061 %}
12062 
12063 instruct convI2XD_mem(regXD dst, memory mem) %{
12064   predicate( UseSSE>=2 );
12065   match(Set dst (ConvI2D (LoadI mem)));
12066   format %{ "CVTSI2SD $dst,$mem" %}
12067   opcode(0xF2, 0x0F, 0x2A);
12068   ins_encode( OpcP, OpcS, Opcode(tertiary), RegMem(dst, mem));
12069   ins_pipe( pipe_slow );
12070 %}
12071 
12072 instruct convXI2XD_reg(regXD dst, eRegI src)
12073 %{
12074   predicate( UseSSE>=2 && UseXmmI2D );
12075   match(Set dst (ConvI2D src));
12076 
12077   format %{ "MOVD  $dst,$src\n\t"
12078             "CVTDQ2PD $dst,$dst\t# i2d" %}
12079   ins_encode %{
12080     __ movdl($dst$$XMMRegister, $src$$Register);
12081     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
12082   %}
12083   ins_pipe(pipe_slow); // XXX
12084 %}
12085 
12086 instruct convI2D_mem(regD dst, memory mem) %{
12087   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
12088   match(Set dst (ConvI2D (LoadI mem)));
12089   format %{ "FILD   $mem\n\t"
12090             "FSTP   $dst" %}
12091   opcode(0xDB);      /* DB /0 */
12092   ins_encode( OpcP, RMopc_Mem(0x00,mem),
12093               Pop_Reg_D(dst));
12094   ins_pipe( fpu_reg_mem );
12095 %}
12096 
12097 // Convert a byte to a float; no rounding step needed.
12098 instruct conv24I2F_reg(regF dst, stackSlotI src) %{
12099   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
12100   match(Set dst (ConvI2F src));
12101   format %{ "FILD   $src\n\t"
12102             "FSTP   $dst" %}
12103 
12104   opcode(0xDB, 0x0);  /* DB /0 */
12105   ins_encode(Push_Mem_I(src), Pop_Reg_F(dst));
12106   ins_pipe( fpu_reg_mem );
12107 %}
12108 
12109 // In 24-bit mode, force exponent rounding by storing back out
12110 instruct convI2F_SSF(stackSlotF dst, stackSlotI src) %{
12111   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
12112   match(Set dst (ConvI2F src));
12113   ins_cost(200);
12114   format %{ "FILD   $src\n\t"
12115             "FSTP_S $dst" %}
12116   opcode(0xDB, 0x0);  /* DB /0 */
12117   ins_encode( Push_Mem_I(src),
12118               Pop_Mem_F(dst));
12119   ins_pipe( fpu_mem_mem );
12120 %}
12121 
12122 // In 24-bit mode, force exponent rounding by storing back out
12123 instruct convI2F_SSF_mem(stackSlotF dst, memory mem) %{
12124   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
12125   match(Set dst (ConvI2F (LoadI mem)));
12126   ins_cost(200);
12127   format %{ "FILD   $mem\n\t"
12128             "FSTP_S $dst" %}
12129   opcode(0xDB);  /* DB /0 */
12130   ins_encode( OpcP, RMopc_Mem(0x00,mem),
12131               Pop_Mem_F(dst));
12132   ins_pipe( fpu_mem_mem );
12133 %}
12134 
12135 // This instruction does not round to 24-bits
12136 instruct convI2F_reg(regF dst, stackSlotI src) %{
12137   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
12138   match(Set dst (ConvI2F src));
12139   format %{ "FILD   $src\n\t"
12140             "FSTP   $dst" %}
12141   opcode(0xDB, 0x0);  /* DB /0 */
12142   ins_encode( Push_Mem_I(src),
12143               Pop_Reg_F(dst));
12144   ins_pipe( fpu_reg_mem );
12145 %}
12146 
12147 // This instruction does not round to 24-bits
12148 instruct convI2F_mem(regF dst, memory mem) %{
12149   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
12150   match(Set dst (ConvI2F (LoadI mem)));
12151   format %{ "FILD   $mem\n\t"
12152             "FSTP   $dst" %}
12153   opcode(0xDB);      /* DB /0 */
12154   ins_encode( OpcP, RMopc_Mem(0x00,mem),
12155               Pop_Reg_F(dst));
12156   ins_pipe( fpu_reg_mem );
12157 %}
12158 
12159 // Convert an int to a float in xmm; no rounding step needed.
12160 instruct convI2X_reg(regX dst, eRegI src) %{
12161   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
12162   match(Set dst (ConvI2F src));
12163   format %{ "CVTSI2SS $dst, $src" %}
12164 
12165   opcode(0xF3, 0x0F, 0x2A);  /* F3 0F 2A /r */
12166   ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
12167   ins_pipe( pipe_slow );
12168 %}
12169 
12170  instruct convXI2X_reg(regX dst, eRegI src)
12171 %{
12172   predicate( UseSSE>=2 && UseXmmI2F );
12173   match(Set dst (ConvI2F src));
12174 
12175   format %{ "MOVD  $dst,$src\n\t"
12176             "CVTDQ2PS $dst,$dst\t# i2f" %}
12177   ins_encode %{
12178     __ movdl($dst$$XMMRegister, $src$$Register);
12179     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
12180   %}
12181   ins_pipe(pipe_slow); // XXX
12182 %}
12183 
12184 instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{
12185   match(Set dst (ConvI2L src));
12186   effect(KILL cr);
12187   ins_cost(375);
12188   format %{ "MOV    $dst.lo,$src\n\t"
12189             "MOV    $dst.hi,$src\n\t"
12190             "SAR    $dst.hi,31" %}
12191   ins_encode(convert_int_long(dst,src));
12192   ins_pipe( ialu_reg_reg_long );
12193 %}
12194 
12195 // Zero-extend convert int to long
12196 instruct convI2L_reg_zex(eRegL dst, eRegI src, immL_32bits mask, eFlagsReg flags ) %{
12197   match(Set dst (AndL (ConvI2L src) mask) );
12198   effect( KILL flags );
12199   ins_cost(250);
12200   format %{ "MOV    $dst.lo,$src\n\t"
12201             "XOR    $dst.hi,$dst.hi" %}
12202   opcode(0x33); // XOR
12203   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
12204   ins_pipe( ialu_reg_reg_long );
12205 %}
12206 
12207 // Zero-extend long
12208 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
12209   match(Set dst (AndL src mask) );
12210   effect( KILL flags );
12211   ins_cost(250);
12212   format %{ "MOV    $dst.lo,$src.lo\n\t"
12213             "XOR    $dst.hi,$dst.hi\n\t" %}
12214   opcode(0x33); // XOR
12215   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
12216   ins_pipe( ialu_reg_reg_long );
12217 %}
12218 
12219 instruct convL2D_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
12220   predicate (UseSSE<=1);
12221   match(Set dst (ConvL2D src));
12222   effect( KILL cr );
12223   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
12224             "PUSH   $src.lo\n\t"
12225             "FILD   ST,[ESP + #0]\n\t"
12226             "ADD    ESP,8\n\t"
12227             "FSTP_D $dst\t# D-round" %}
12228   opcode(0xDF, 0x5);  /* DF /5 */
12229   ins_encode(convert_long_double(src), Pop_Mem_D(dst));
12230   ins_pipe( pipe_slow );
12231 %}
12232 
12233 instruct convL2XD_reg( regXD dst, eRegL src, eFlagsReg cr) %{
12234   predicate (UseSSE>=2);
12235   match(Set dst (ConvL2D src));
12236   effect( KILL cr );
12237   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
12238             "PUSH   $src.lo\n\t"
12239             "FILD_D [ESP]\n\t"
12240             "FSTP_D [ESP]\n\t"
12241             "MOVSD  $dst,[ESP]\n\t"
12242             "ADD    ESP,8" %}
12243   opcode(0xDF, 0x5);  /* DF /5 */
12244   ins_encode(convert_long_double2(src), Push_ResultXD(dst));
12245   ins_pipe( pipe_slow );
12246 %}
12247 
12248 instruct convL2X_reg( regX dst, eRegL src, eFlagsReg cr) %{
12249   predicate (UseSSE>=1);
12250   match(Set dst (ConvL2F src));
12251   effect( KILL cr );
12252   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
12253             "PUSH   $src.lo\n\t"
12254             "FILD_D [ESP]\n\t"
12255             "FSTP_S [ESP]\n\t"
12256             "MOVSS  $dst,[ESP]\n\t"
12257             "ADD    ESP,8" %}
12258   opcode(0xDF, 0x5);  /* DF /5 */
12259   ins_encode(convert_long_double2(src), Push_ResultX(dst,0x8));
12260   ins_pipe( pipe_slow );
12261 %}
12262 
12263 instruct convL2F_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
12264   match(Set dst (ConvL2F src));
12265   effect( KILL cr );
12266   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
12267             "PUSH   $src.lo\n\t"
12268             "FILD   ST,[ESP + #0]\n\t"
12269             "ADD    ESP,8\n\t"
12270             "FSTP_S $dst\t# F-round" %}
12271   opcode(0xDF, 0x5);  /* DF /5 */
12272   ins_encode(convert_long_double(src), Pop_Mem_F(dst));
12273   ins_pipe( pipe_slow );
12274 %}
12275 
12276 instruct convL2I_reg( eRegI dst, eRegL src ) %{
12277   match(Set dst (ConvL2I src));
12278   effect( DEF dst, USE src );
12279   format %{ "MOV    $dst,$src.lo" %}
12280   ins_encode(enc_CopyL_Lo(dst,src));
12281   ins_pipe( ialu_reg_reg );
12282 %}
12283 
12284 
12285 instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{
12286   match(Set dst (MoveF2I src));
12287   effect( DEF dst, USE src );
12288   ins_cost(100);
12289   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
12290   opcode(0x8B);
12291   ins_encode( OpcP, RegMem(dst,src));
12292   ins_pipe( ialu_reg_mem );
12293 %}
12294 
12295 instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
12296   predicate(UseSSE==0);
12297   match(Set dst (MoveF2I src));
12298   effect( DEF dst, USE src );
12299 
12300   ins_cost(125);
12301   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
12302   ins_encode( Pop_Mem_Reg_F(dst, src) );
12303   ins_pipe( fpu_mem_reg );
12304 %}
12305 
12306 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{
12307   predicate(UseSSE>=1);
12308   match(Set dst (MoveF2I src));
12309   effect( DEF dst, USE src );
12310 
12311   ins_cost(95);
12312   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
12313   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, dst));
12314   ins_pipe( pipe_slow );
12315 %}
12316 
12317 instruct MoveF2I_reg_reg_sse(eRegI dst, regX src) %{
12318   predicate(UseSSE>=2);
12319   match(Set dst (MoveF2I src));
12320   effect( DEF dst, USE src );
12321   ins_cost(85);
12322   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
12323   ins_encode( MovX2I_reg(dst, src));
12324   ins_pipe( pipe_slow );
12325 %}
12326 
12327 instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{
12328   match(Set dst (MoveI2F src));
12329   effect( DEF dst, USE src );
12330 
12331   ins_cost(100);
12332   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
12333   opcode(0x89);
12334   ins_encode( OpcPRegSS( dst, src ) );
12335   ins_pipe( ialu_mem_reg );
12336 %}
12337 
12338 
12339 instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
12340   predicate(UseSSE==0);
12341   match(Set dst (MoveI2F src));
12342   effect(DEF dst, USE src);
12343 
12344   ins_cost(125);
12345   format %{ "FLD_S  $src\n\t"
12346             "FSTP   $dst\t# MoveI2F_stack_reg" %}
12347   opcode(0xD9);               /* D9 /0, FLD m32real */
12348   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
12349               Pop_Reg_F(dst) );
12350   ins_pipe( fpu_reg_mem );
12351 %}
12352 
12353 instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{
12354   predicate(UseSSE>=1);
12355   match(Set dst (MoveI2F src));
12356   effect( DEF dst, USE src );
12357 
12358   ins_cost(95);
12359   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
12360   ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
12361   ins_pipe( pipe_slow );
12362 %}
12363 
12364 instruct MoveI2F_reg_reg_sse(regX dst, eRegI src) %{
12365   predicate(UseSSE>=2);
12366   match(Set dst (MoveI2F src));
12367   effect( DEF dst, USE src );
12368 
12369   ins_cost(85);
12370   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
12371   ins_encode( MovI2X_reg(dst, src) );
12372   ins_pipe( pipe_slow );
12373 %}
12374 
12375 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
12376   match(Set dst (MoveD2L src));
12377   effect(DEF dst, USE src);
12378 
12379   ins_cost(250);
12380   format %{ "MOV    $dst.lo,$src\n\t"
12381             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
12382   opcode(0x8B, 0x8B);
12383   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
12384   ins_pipe( ialu_mem_long_reg );
12385 %}
12386 
12387 instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
12388   predicate(UseSSE<=1);
12389   match(Set dst (MoveD2L src));
12390   effect(DEF dst, USE src);
12391 
12392   ins_cost(125);
12393   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
12394   ins_encode( Pop_Mem_Reg_D(dst, src) );
12395   ins_pipe( fpu_mem_reg );
12396 %}
12397 
12398 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{
12399   predicate(UseSSE>=2);
12400   match(Set dst (MoveD2L src));
12401   effect(DEF dst, USE src);
12402   ins_cost(95);
12403 
12404   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
12405   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src,dst));
12406   ins_pipe( pipe_slow );
12407 %}
12408 
12409 instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{
12410   predicate(UseSSE>=2);
12411   match(Set dst (MoveD2L src));
12412   effect(DEF dst, USE src, TEMP tmp);
12413   ins_cost(85);
12414   format %{ "MOVD   $dst.lo,$src\n\t"
12415             "PSHUFLW $tmp,$src,0x4E\n\t"
12416             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
12417   ins_encode( MovXD2L_reg(dst, src, tmp) );
12418   ins_pipe( pipe_slow );
12419 %}
12420 
12421 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
12422   match(Set dst (MoveL2D src));
12423   effect(DEF dst, USE src);
12424 
12425   ins_cost(200);
12426   format %{ "MOV    $dst,$src.lo\n\t"
12427             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
12428   opcode(0x89, 0x89);
12429   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
12430   ins_pipe( ialu_mem_long_reg );
12431 %}
12432 
12433 
12434 instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
12435   predicate(UseSSE<=1);
12436   match(Set dst (MoveL2D src));
12437   effect(DEF dst, USE src);
12438   ins_cost(125);
12439 
12440   format %{ "FLD_D  $src\n\t"
12441             "FSTP   $dst\t# MoveL2D_stack_reg" %}
12442   opcode(0xDD);               /* DD /0, FLD m64real */
12443   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
12444               Pop_Reg_D(dst) );
12445   ins_pipe( fpu_reg_mem );
12446 %}
12447 
12448 
12449 instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{
12450   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
12451   match(Set dst (MoveL2D src));
12452   effect(DEF dst, USE src);
12453 
12454   ins_cost(95);
12455   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
12456   ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
12457   ins_pipe( pipe_slow );
12458 %}
12459 
12460 instruct MoveL2D_stack_reg_sse_partial(regXD dst, stackSlotL src) %{
12461   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
12462   match(Set dst (MoveL2D src));
12463   effect(DEF dst, USE src);
12464 
12465   ins_cost(95);
12466   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
12467   ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,src));
12468   ins_pipe( pipe_slow );
12469 %}
12470 
12471 instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{
12472   predicate(UseSSE>=2);
12473   match(Set dst (MoveL2D src));
12474   effect(TEMP dst, USE src, TEMP tmp);
12475   ins_cost(85);
12476   format %{ "MOVD   $dst,$src.lo\n\t"
12477             "MOVD   $tmp,$src.hi\n\t"
12478             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
12479   ins_encode( MovL2XD_reg(dst, src, tmp) );
12480   ins_pipe( pipe_slow );
12481 %}
12482 
12483 // Replicate scalar to packed byte (1 byte) values in xmm
12484 instruct Repl8B_reg(regXD dst, regXD src) %{
12485   predicate(UseSSE>=2);
12486   match(Set dst (Replicate8B src));
12487   format %{ "MOVDQA  $dst,$src\n\t"
12488             "PUNPCKLBW $dst,$dst\n\t"
12489             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12490   ins_encode( pshufd_8x8(dst, src));
12491   ins_pipe( pipe_slow );
12492 %}
12493 
12494 // Replicate scalar to packed byte (1 byte) values in xmm
12495 instruct Repl8B_eRegI(regXD dst, eRegI src) %{
12496   predicate(UseSSE>=2);
12497   match(Set dst (Replicate8B src));
12498   format %{ "MOVD    $dst,$src\n\t"
12499             "PUNPCKLBW $dst,$dst\n\t"
12500             "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
12501   ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
12502   ins_pipe( pipe_slow );
12503 %}
12504 
12505 // Replicate scalar zero to packed byte (1 byte) values in xmm
12506 instruct Repl8B_immI0(regXD dst, immI0 zero) %{
12507   predicate(UseSSE>=2);
12508   match(Set dst (Replicate8B zero));
12509   format %{ "PXOR  $dst,$dst\t! replicate8B" %}
12510   ins_encode( pxor(dst, dst));
12511   ins_pipe( fpu_reg_reg );
12512 %}
12513 
12514 // Replicate scalar to packed shore (2 byte) values in xmm
12515 instruct Repl4S_reg(regXD dst, regXD src) %{
12516   predicate(UseSSE>=2);
12517   match(Set dst (Replicate4S src));
12518   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
12519   ins_encode( pshufd_4x16(dst, src));
12520   ins_pipe( fpu_reg_reg );
12521 %}
12522 
12523 // Replicate scalar to packed shore (2 byte) values in xmm
12524 instruct Repl4S_eRegI(regXD dst, eRegI src) %{
12525   predicate(UseSSE>=2);
12526   match(Set dst (Replicate4S src));
12527   format %{ "MOVD    $dst,$src\n\t"
12528             "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
12529   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
12530   ins_pipe( fpu_reg_reg );
12531 %}
12532 
12533 // Replicate scalar zero to packed short (2 byte) values in xmm
12534 instruct Repl4S_immI0(regXD dst, immI0 zero) %{
12535   predicate(UseSSE>=2);
12536   match(Set dst (Replicate4S zero));
12537   format %{ "PXOR  $dst,$dst\t! replicate4S" %}
12538   ins_encode( pxor(dst, dst));
12539   ins_pipe( fpu_reg_reg );
12540 %}
12541 
12542 // Replicate scalar to packed char (2 byte) values in xmm
12543 instruct Repl4C_reg(regXD dst, regXD src) %{
12544   predicate(UseSSE>=2);
12545   match(Set dst (Replicate4C src));
12546   format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
12547   ins_encode( pshufd_4x16(dst, src));
12548   ins_pipe( fpu_reg_reg );
12549 %}
12550 
12551 // Replicate scalar to packed char (2 byte) values in xmm
12552 instruct Repl4C_eRegI(regXD dst, eRegI src) %{
12553   predicate(UseSSE>=2);
12554   match(Set dst (Replicate4C src));
12555   format %{ "MOVD    $dst,$src\n\t"
12556             "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
12557   ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
12558   ins_pipe( fpu_reg_reg );
12559 %}
12560 
12561 // Replicate scalar zero to packed char (2 byte) values in xmm
12562 instruct Repl4C_immI0(regXD dst, immI0 zero) %{
12563   predicate(UseSSE>=2);
12564   match(Set dst (Replicate4C zero));
12565   format %{ "PXOR  $dst,$dst\t! replicate4C" %}
12566   ins_encode( pxor(dst, dst));
12567   ins_pipe( fpu_reg_reg );
12568 %}
12569 
12570 // Replicate scalar to packed integer (4 byte) values in xmm
12571 instruct Repl2I_reg(regXD dst, regXD src) %{
12572   predicate(UseSSE>=2);
12573   match(Set dst (Replicate2I src));
12574   format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
12575   ins_encode( pshufd(dst, src, 0x00));
12576   ins_pipe( fpu_reg_reg );
12577 %}
12578 
12579 // Replicate scalar to packed integer (4 byte) values in xmm
12580 instruct Repl2I_eRegI(regXD dst, eRegI src) %{
12581   predicate(UseSSE>=2);
12582   match(Set dst (Replicate2I src));
12583   format %{ "MOVD   $dst,$src\n\t"
12584             "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
12585   ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
12586   ins_pipe( fpu_reg_reg );
12587 %}
12588 
12589 // Replicate scalar zero to packed integer (2 byte) values in xmm
12590 instruct Repl2I_immI0(regXD dst, immI0 zero) %{
12591   predicate(UseSSE>=2);
12592   match(Set dst (Replicate2I zero));
12593   format %{ "PXOR  $dst,$dst\t! replicate2I" %}
12594   ins_encode( pxor(dst, dst));
12595   ins_pipe( fpu_reg_reg );
12596 %}
12597 
12598 // Replicate scalar to packed single precision floating point values in xmm
12599 instruct Repl2F_reg(regXD dst, regXD src) %{
12600   predicate(UseSSE>=2);
12601   match(Set dst (Replicate2F src));
12602   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12603   ins_encode( pshufd(dst, src, 0xe0));
12604   ins_pipe( fpu_reg_reg );
12605 %}
12606 
12607 // Replicate scalar to packed single precision floating point values in xmm
12608 instruct Repl2F_regX(regXD dst, regX src) %{
12609   predicate(UseSSE>=2);
12610   match(Set dst (Replicate2F src));
12611   format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
12612   ins_encode( pshufd(dst, src, 0xe0));
12613   ins_pipe( fpu_reg_reg );
12614 %}
12615 
12616 // Replicate scalar to packed single precision floating point values in xmm
12617 instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{
12618   predicate(UseSSE>=2);
12619   match(Set dst (Replicate2F zero));
12620   format %{ "PXOR  $dst,$dst\t! replicate2F" %}
12621   ins_encode( pxor(dst, dst));
12622   ins_pipe( fpu_reg_reg );
12623 %}
12624 
12625 // =======================================================================
12626 // fast clearing of an array
12627 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
12628   match(Set dummy (ClearArray cnt base));
12629   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
12630   format %{ "SHL    ECX,1\t# Convert doublewords to words\n\t"
12631             "XOR    EAX,EAX\n\t"
12632             "REP STOS\t# store EAX into [EDI++] while ECX--" %}
12633   opcode(0,0x4);
12634   ins_encode( Opcode(0xD1), RegOpc(ECX),
12635               OpcRegReg(0x33,EAX,EAX),
12636               Opcode(0xF3), Opcode(0xAB) );
12637   ins_pipe( pipe_slow );
12638 %}
12639 
12640 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
12641                         eAXRegI result, regXD tmp1, eFlagsReg cr) %{
12642   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
12643   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
12644 
12645   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
12646   ins_encode %{
12647     __ string_compare($str1$$Register, $str2$$Register,
12648                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
12649                       $tmp1$$XMMRegister);
12650   %}
12651   ins_pipe( pipe_slow );
12652 %}
12653 
12654 // fast string equals
12655 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
12656                        regXD tmp1, regXD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
12657   match(Set result (StrEquals (Binary str1 str2) cnt));
12658   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
12659 
12660   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
12661   ins_encode %{
12662     __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
12663                           $cnt$$Register, $result$$Register, $tmp3$$Register,
12664                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
12665   %}
12666   ins_pipe( pipe_slow );
12667 %}
12668 
12669 // fast search of substring with known size.
12670 instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
12671                             eBXRegI result, regXD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
12672   predicate(UseSSE42Intrinsics);
12673   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12674   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12675 
12676   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
12677   ins_encode %{
12678     int icnt2 = (int)$int_cnt2$$constant;
12679     if (icnt2 >= 8) {
12680       // IndexOf for constant substrings with size >= 8 elements
12681       // which don't need to be loaded through stack.
12682       __ string_indexofC8($str1$$Register, $str2$$Register,
12683                           $cnt1$$Register, $cnt2$$Register,
12684                           icnt2, $result$$Register,
12685                           $vec$$XMMRegister, $tmp$$Register);
12686     } else {
12687       // Small strings are loaded through stack if they cross page boundary.
12688       __ string_indexof($str1$$Register, $str2$$Register,
12689                         $cnt1$$Register, $cnt2$$Register,
12690                         icnt2, $result$$Register,
12691                         $vec$$XMMRegister, $tmp$$Register);
12692     }
12693   %}
12694   ins_pipe( pipe_slow );
12695 %}
12696 
12697 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12698                         eBXRegI result, regXD vec, eCXRegI tmp, eFlagsReg cr) %{
12699   predicate(UseSSE42Intrinsics);
12700   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12701   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12702 
12703   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12704   ins_encode %{
12705     __ string_indexof($str1$$Register, $str2$$Register,
12706                       $cnt1$$Register, $cnt2$$Register,
12707                       (-1), $result$$Register,
12708                       $vec$$XMMRegister, $tmp$$Register);
12709   %}
12710   ins_pipe( pipe_slow );
12711 %}
12712 
12713 // fast array equals
12714 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12715                       regXD tmp1, regXD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12716 %{
12717   match(Set result (AryEq ary1 ary2));
12718   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12719   //ins_cost(300);
12720 
12721   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12722   ins_encode %{
12723     __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
12724                           $tmp3$$Register, $result$$Register, $tmp4$$Register,
12725                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
12726   %}
12727   ins_pipe( pipe_slow );
12728 %}
12729 
12730 //----------Control Flow Instructions------------------------------------------
12731 // Signed compare Instructions
12732 instruct compI_eReg(eFlagsReg cr, eRegI op1, eRegI op2) %{
12733   match(Set cr (CmpI op1 op2));
12734   effect( DEF cr, USE op1, USE op2 );
12735   format %{ "CMP    $op1,$op2" %}
12736   opcode(0x3B);  /* Opcode 3B /r */
12737   ins_encode( OpcP, RegReg( op1, op2) );
12738   ins_pipe( ialu_cr_reg_reg );
12739 %}
12740 
12741 instruct compI_eReg_imm(eFlagsReg cr, eRegI op1, immI op2) %{
12742   match(Set cr (CmpI op1 op2));
12743   effect( DEF cr, USE op1 );
12744   format %{ "CMP    $op1,$op2" %}
12745   opcode(0x81,0x07);  /* Opcode 81 /7 */
12746   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12747   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12748   ins_pipe( ialu_cr_reg_imm );
12749 %}
12750 
12751 // Cisc-spilled version of cmpI_eReg
12752 instruct compI_eReg_mem(eFlagsReg cr, eRegI op1, memory op2) %{
12753   match(Set cr (CmpI op1 (LoadI op2)));
12754 
12755   format %{ "CMP    $op1,$op2" %}
12756   ins_cost(500);
12757   opcode(0x3B);  /* Opcode 3B /r */
12758   ins_encode( OpcP, RegMem( op1, op2) );
12759   ins_pipe( ialu_cr_reg_mem );
12760 %}
12761 
12762 instruct testI_reg( eFlagsReg cr, eRegI src, immI0 zero ) %{
12763   match(Set cr (CmpI src zero));
12764   effect( DEF cr, USE src );
12765 
12766   format %{ "TEST   $src,$src" %}
12767   opcode(0x85);
12768   ins_encode( OpcP, RegReg( src, src ) );
12769   ins_pipe( ialu_cr_reg_imm );
12770 %}
12771 
12772 instruct testI_reg_imm( eFlagsReg cr, eRegI src, immI con, immI0 zero ) %{
12773   match(Set cr (CmpI (AndI src con) zero));
12774 
12775   format %{ "TEST   $src,$con" %}
12776   opcode(0xF7,0x00);
12777   ins_encode( OpcP, RegOpc(src), Con32(con) );
12778   ins_pipe( ialu_cr_reg_imm );
12779 %}
12780 
12781 instruct testI_reg_mem( eFlagsReg cr, eRegI src, memory mem, immI0 zero ) %{
12782   match(Set cr (CmpI (AndI src mem) zero));
12783 
12784   format %{ "TEST   $src,$mem" %}
12785   opcode(0x85);
12786   ins_encode( OpcP, RegMem( src, mem ) );
12787   ins_pipe( ialu_cr_reg_mem );
12788 %}
12789 
12790 // Unsigned compare Instructions; really, same as signed except they
12791 // produce an eFlagsRegU instead of eFlagsReg.
12792 instruct compU_eReg(eFlagsRegU cr, eRegI op1, eRegI op2) %{
12793   match(Set cr (CmpU op1 op2));
12794 
12795   format %{ "CMPu   $op1,$op2" %}
12796   opcode(0x3B);  /* Opcode 3B /r */
12797   ins_encode( OpcP, RegReg( op1, op2) );
12798   ins_pipe( ialu_cr_reg_reg );
12799 %}
12800 
12801 instruct compU_eReg_imm(eFlagsRegU cr, eRegI op1, immI op2) %{
12802   match(Set cr (CmpU op1 op2));
12803 
12804   format %{ "CMPu   $op1,$op2" %}
12805   opcode(0x81,0x07);  /* Opcode 81 /7 */
12806   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12807   ins_pipe( ialu_cr_reg_imm );
12808 %}
12809 
12810 // // Cisc-spilled version of cmpU_eReg
12811 instruct compU_eReg_mem(eFlagsRegU cr, eRegI op1, memory op2) %{
12812   match(Set cr (CmpU op1 (LoadI op2)));
12813 
12814   format %{ "CMPu   $op1,$op2" %}
12815   ins_cost(500);
12816   opcode(0x3B);  /* Opcode 3B /r */
12817   ins_encode( OpcP, RegMem( op1, op2) );
12818   ins_pipe( ialu_cr_reg_mem );
12819 %}
12820 
12821 // // Cisc-spilled version of cmpU_eReg
12822 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, eRegI op2) %{
12823 //  match(Set cr (CmpU (LoadI op1) op2));
12824 //
12825 //  format %{ "CMPu   $op1,$op2" %}
12826 //  ins_cost(500);
12827 //  opcode(0x39);  /* Opcode 39 /r */
12828 //  ins_encode( OpcP, RegMem( op1, op2) );
12829 //%}
12830 
12831 instruct testU_reg( eFlagsRegU cr, eRegI src, immI0 zero ) %{
12832   match(Set cr (CmpU src zero));
12833 
12834   format %{ "TESTu  $src,$src" %}
12835   opcode(0x85);
12836   ins_encode( OpcP, RegReg( src, src ) );
12837   ins_pipe( ialu_cr_reg_imm );
12838 %}
12839 
12840 // Unsigned pointer compare Instructions
12841 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12842   match(Set cr (CmpP op1 op2));
12843 
12844   format %{ "CMPu   $op1,$op2" %}
12845   opcode(0x3B);  /* Opcode 3B /r */
12846   ins_encode( OpcP, RegReg( op1, op2) );
12847   ins_pipe( ialu_cr_reg_reg );
12848 %}
12849 
12850 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12851   match(Set cr (CmpP op1 op2));
12852 
12853   format %{ "CMPu   $op1,$op2" %}
12854   opcode(0x81,0x07);  /* Opcode 81 /7 */
12855   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12856   ins_pipe( ialu_cr_reg_imm );
12857 %}
12858 
12859 // // Cisc-spilled version of cmpP_eReg
12860 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12861   match(Set cr (CmpP op1 (LoadP op2)));
12862 
12863   format %{ "CMPu   $op1,$op2" %}
12864   ins_cost(500);
12865   opcode(0x3B);  /* Opcode 3B /r */
12866   ins_encode( OpcP, RegMem( op1, op2) );
12867   ins_pipe( ialu_cr_reg_mem );
12868 %}
12869 
12870 // // Cisc-spilled version of cmpP_eReg
12871 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12872 //  match(Set cr (CmpP (LoadP op1) op2));
12873 //
12874 //  format %{ "CMPu   $op1,$op2" %}
12875 //  ins_cost(500);
12876 //  opcode(0x39);  /* Opcode 39 /r */
12877 //  ins_encode( OpcP, RegMem( op1, op2) );
12878 //%}
12879 
12880 // Compare raw pointer (used in out-of-heap check).
12881 // Only works because non-oop pointers must be raw pointers
12882 // and raw pointers have no anti-dependencies.
12883 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12884   predicate( !n->in(2)->in(2)->bottom_type()->isa_oop_ptr() );
12885   match(Set cr (CmpP op1 (LoadP op2)));
12886 
12887   format %{ "CMPu   $op1,$op2" %}
12888   opcode(0x3B);  /* Opcode 3B /r */
12889   ins_encode( OpcP, RegMem( op1, op2) );
12890   ins_pipe( ialu_cr_reg_mem );
12891 %}
12892 
12893 //
12894 // This will generate a signed flags result. This should be ok
12895 // since any compare to a zero should be eq/neq.
12896 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12897   match(Set cr (CmpP src zero));
12898 
12899   format %{ "TEST   $src,$src" %}
12900   opcode(0x85);
12901   ins_encode( OpcP, RegReg( src, src ) );
12902   ins_pipe( ialu_cr_reg_imm );
12903 %}
12904 
12905 // Cisc-spilled version of testP_reg
12906 // This will generate a signed flags result. This should be ok
12907 // since any compare to a zero should be eq/neq.
12908 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
12909   match(Set cr (CmpP (LoadP op) zero));
12910 
12911   format %{ "TEST   $op,0xFFFFFFFF" %}
12912   ins_cost(500);
12913   opcode(0xF7);               /* Opcode F7 /0 */
12914   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12915   ins_pipe( ialu_cr_reg_imm );
12916 %}
12917 
12918 // Yanked all unsigned pointer compare operations.
12919 // Pointer compares are done with CmpP which is already unsigned.
12920 
12921 //----------Max and Min--------------------------------------------------------
12922 // Min Instructions
12923 ////
12924 //   *** Min and Max using the conditional move are slower than the
12925 //   *** branch version on a Pentium III.
12926 // // Conditional move for min
12927 //instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
12928 //  effect( USE_DEF op2, USE op1, USE cr );
12929 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12930 //  opcode(0x4C,0x0F);
12931 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12932 //  ins_pipe( pipe_cmov_reg );
12933 //%}
12934 //
12935 //// Min Register with Register (P6 version)
12936 //instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{
12937 //  predicate(VM_Version::supports_cmov() );
12938 //  match(Set op2 (MinI op1 op2));
12939 //  ins_cost(200);
12940 //  expand %{
12941 //    eFlagsReg cr;
12942 //    compI_eReg(cr,op1,op2);
12943 //    cmovI_reg_lt(op2,op1,cr);
12944 //  %}
12945 //%}
12946 
12947 // Min Register with Register (generic version)
12948 instruct minI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
12949   match(Set dst (MinI dst src));
12950   effect(KILL flags);
12951   ins_cost(300);
12952 
12953   format %{ "MIN    $dst,$src" %}
12954   opcode(0xCC);
12955   ins_encode( min_enc(dst,src) );
12956   ins_pipe( pipe_slow );
12957 %}
12958 
12959 // Max Register with Register
12960 //   *** Min and Max using the conditional move are slower than the
12961 //   *** branch version on a Pentium III.
12962 // // Conditional move for max
12963 //instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
12964 //  effect( USE_DEF op2, USE op1, USE cr );
12965 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12966 //  opcode(0x4F,0x0F);
12967 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12968 //  ins_pipe( pipe_cmov_reg );
12969 //%}
12970 //
12971 // // Max Register with Register (P6 version)
12972 //instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{
12973 //  predicate(VM_Version::supports_cmov() );
12974 //  match(Set op2 (MaxI op1 op2));
12975 //  ins_cost(200);
12976 //  expand %{
12977 //    eFlagsReg cr;
12978 //    compI_eReg(cr,op1,op2);
12979 //    cmovI_reg_gt(op2,op1,cr);
12980 //  %}
12981 //%}
12982 
12983 // Max Register with Register (generic version)
12984 instruct maxI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
12985   match(Set dst (MaxI dst src));
12986   effect(KILL flags);
12987   ins_cost(300);
12988 
12989   format %{ "MAX    $dst,$src" %}
12990   opcode(0xCC);
12991   ins_encode( max_enc(dst,src) );
12992   ins_pipe( pipe_slow );
12993 %}
12994 
12995 // ============================================================================
12996 // Counted Loop limit node which represents exact final iterator value.
12997 // Note: the resulting value should fit into integer range since
12998 // counted loops have limit check on overflow.
12999 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
13000   match(Set limit (LoopLimit (Binary init limit) stride));
13001   effect(TEMP limit_hi, TEMP tmp, KILL flags);
13002   ins_cost(300);
13003 
13004   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
13005   ins_encode %{
13006     int strd = (int)$stride$$constant;
13007     assert(strd != 1 && strd != -1, "sanity");
13008     int m1 = (strd > 0) ? 1 : -1;
13009     // Convert limit to long (EAX:EDX)
13010     __ cdql();
13011     // Convert init to long (init:tmp)
13012     __ movl($tmp$$Register, $init$$Register);
13013     __ sarl($tmp$$Register, 31);
13014     // $limit - $init
13015     __ subl($limit$$Register, $init$$Register);
13016     __ sbbl($limit_hi$$Register, $tmp$$Register);
13017     // + ($stride - 1)
13018     if (strd > 0) {
13019       __ addl($limit$$Register, (strd - 1));
13020       __ adcl($limit_hi$$Register, 0);
13021       __ movl($tmp$$Register, strd);
13022     } else {
13023       __ addl($limit$$Register, (strd + 1));
13024       __ adcl($limit_hi$$Register, -1);
13025       __ lneg($limit_hi$$Register, $limit$$Register);
13026       __ movl($tmp$$Register, -strd);
13027     }
13028     // signed devision: (EAX:EDX) / pos_stride
13029     __ idivl($tmp$$Register);
13030     if (strd < 0) {
13031       // restore sign
13032       __ negl($tmp$$Register);
13033     }
13034     // (EAX) * stride
13035     __ mull($tmp$$Register);
13036     // + init (ignore upper bits)
13037     __ addl($limit$$Register, $init$$Register);
13038   %}
13039   ins_pipe( pipe_slow );
13040 %}
13041 
13042 // ============================================================================
13043 // Branch Instructions
13044 // Jump Table
13045 instruct jumpXtnd(eRegI switch_val) %{
13046   match(Jump switch_val);
13047   ins_cost(350);
13048   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
13049   ins_encode %{
13050     // Jump to Address(table_base + switch_reg)
13051     Address index(noreg, $switch_val$$Register, Address::times_1);
13052     __ jump(ArrayAddress($constantaddress, index));
13053   %}
13054   ins_pc_relative(1);
13055   ins_pipe(pipe_jmp);
13056 %}
13057 
13058 // Jump Direct - Label defines a relative address from JMP+1
13059 instruct jmpDir(label labl) %{
13060   match(Goto);
13061   effect(USE labl);
13062 
13063   ins_cost(300);
13064   format %{ "JMP    $labl" %}
13065   size(5);
13066   opcode(0xE9);
13067   ins_encode( OpcP, Lbl( labl ) );
13068   ins_pipe( pipe_jmp );
13069   ins_pc_relative(1);
13070 %}
13071 
13072 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13073 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
13074   match(If cop cr);
13075   effect(USE labl);
13076 
13077   ins_cost(300);
13078   format %{ "J$cop    $labl" %}
13079   size(6);
13080   opcode(0x0F, 0x80);
13081   ins_encode( Jcc( cop, labl) );
13082   ins_pipe( pipe_jcc );
13083   ins_pc_relative(1);
13084 %}
13085 
13086 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13087 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
13088   match(CountedLoopEnd cop cr);
13089   effect(USE labl);
13090 
13091   ins_cost(300);
13092   format %{ "J$cop    $labl\t# Loop end" %}
13093   size(6);
13094   opcode(0x0F, 0x80);
13095   ins_encode( Jcc( cop, labl) );
13096   ins_pipe( pipe_jcc );
13097   ins_pc_relative(1);
13098 %}
13099 
13100 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13101 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
13102   match(CountedLoopEnd cop cmp);
13103   effect(USE labl);
13104 
13105   ins_cost(300);
13106   format %{ "J$cop,u  $labl\t# Loop end" %}
13107   size(6);
13108   opcode(0x0F, 0x80);
13109   ins_encode( Jcc( cop, labl) );
13110   ins_pipe( pipe_jcc );
13111   ins_pc_relative(1);
13112 %}
13113 
13114 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
13115   match(CountedLoopEnd cop cmp);
13116   effect(USE labl);
13117 
13118   ins_cost(200);
13119   format %{ "J$cop,u  $labl\t# Loop end" %}
13120   size(6);
13121   opcode(0x0F, 0x80);
13122   ins_encode( Jcc( cop, labl) );
13123   ins_pipe( pipe_jcc );
13124   ins_pc_relative(1);
13125 %}
13126 
13127 // Jump Direct Conditional - using unsigned comparison
13128 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
13129   match(If cop cmp);
13130   effect(USE labl);
13131 
13132   ins_cost(300);
13133   format %{ "J$cop,u  $labl" %}
13134   size(6);
13135   opcode(0x0F, 0x80);
13136   ins_encode(Jcc(cop, labl));
13137   ins_pipe(pipe_jcc);
13138   ins_pc_relative(1);
13139 %}
13140 
13141 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
13142   match(If cop cmp);
13143   effect(USE labl);
13144 
13145   ins_cost(200);
13146   format %{ "J$cop,u  $labl" %}
13147   size(6);
13148   opcode(0x0F, 0x80);
13149   ins_encode(Jcc(cop, labl));
13150   ins_pipe(pipe_jcc);
13151   ins_pc_relative(1);
13152 %}
13153 
13154 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
13155   match(If cop cmp);
13156   effect(USE labl);
13157 
13158   ins_cost(200);
13159   format %{ $$template
13160     if ($cop$$cmpcode == Assembler::notEqual) {
13161       $$emit$$"JP,u   $labl\n\t"
13162       $$emit$$"J$cop,u   $labl"
13163     } else {
13164       $$emit$$"JP,u   done\n\t"
13165       $$emit$$"J$cop,u   $labl\n\t"
13166       $$emit$$"done:"
13167     }
13168   %}
13169   size(12);
13170   opcode(0x0F, 0x80);
13171   ins_encode %{
13172     Label* l = $labl$$label;
13173     assert(l != NULL, "need Label");
13174     $$$emit8$primary;
13175     emit_cc(cbuf, $secondary, Assembler::parity);
13176     int parity_disp = -1;
13177     bool ok = false;
13178     if ($cop$$cmpcode == Assembler::notEqual) {
13179        // the two jumps 6 bytes apart so the jump distances are too
13180        parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0;
13181     } else if ($cop$$cmpcode == Assembler::equal) {
13182        parity_disp = 6;
13183        ok = true;
13184     } else {
13185        ShouldNotReachHere();
13186     }
13187     emit_d32(cbuf, parity_disp);
13188     $$$emit8$primary;
13189     emit_cc(cbuf, $secondary, $cop$$cmpcode);
13190     int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 4)) : 0;
13191     emit_d32(cbuf, disp);
13192   %}
13193   ins_pipe(pipe_jcc);
13194   ins_pc_relative(1);
13195 %}
13196 
13197 // ============================================================================
13198 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
13199 // array for an instance of the superklass.  Set a hidden internal cache on a
13200 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
13201 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
13202 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
13203   match(Set result (PartialSubtypeCheck sub super));
13204   effect( KILL rcx, KILL cr );
13205 
13206   ins_cost(1100);  // slightly larger than the next version
13207   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
13208             "MOV    ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
13209             "ADD    EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
13210             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
13211             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
13212             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
13213             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
13214      "miss:\t" %}
13215 
13216   opcode(0x1); // Force a XOR of EDI
13217   ins_encode( enc_PartialSubtypeCheck() );
13218   ins_pipe( pipe_slow );
13219 %}
13220 
13221 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
13222   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
13223   effect( KILL rcx, KILL result );
13224 
13225   ins_cost(1000);
13226   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
13227             "MOV    ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
13228             "ADD    EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
13229             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
13230             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
13231             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
13232      "miss:\t" %}
13233 
13234   opcode(0x0);  // No need to XOR EDI
13235   ins_encode( enc_PartialSubtypeCheck() );
13236   ins_pipe( pipe_slow );
13237 %}
13238 
13239 // ============================================================================
13240 // Branch Instructions -- short offset versions
13241 //
13242 // These instructions are used to replace jumps of a long offset (the default
13243 // match) with jumps of a shorter offset.  These instructions are all tagged
13244 // with the ins_short_branch attribute, which causes the ADLC to suppress the
13245 // match rules in general matching.  Instead, the ADLC generates a conversion
13246 // method in the MachNode which can be used to do in-place replacement of the
13247 // long variant with the shorter variant.  The compiler will determine if a
13248 // branch can be taken by the is_short_branch_offset() predicate in the machine
13249 // specific code section of the file.
13250 
13251 // Jump Direct - Label defines a relative address from JMP+1
13252 instruct jmpDir_short(label labl) %{
13253   match(Goto);
13254   effect(USE labl);
13255 
13256   ins_cost(300);
13257   format %{ "JMP,s  $labl" %}
13258   size(2);
13259   opcode(0xEB);
13260   ins_encode( OpcP, LblShort( labl ) );
13261   ins_pipe( pipe_jmp );
13262   ins_pc_relative(1);
13263   ins_short_branch(1);
13264 %}
13265 
13266 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13267 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
13268   match(If cop cr);
13269   effect(USE labl);
13270 
13271   ins_cost(300);
13272   format %{ "J$cop,s  $labl" %}
13273   size(2);
13274   opcode(0x70);
13275   ins_encode( JccShort( cop, labl) );
13276   ins_pipe( pipe_jcc );
13277   ins_pc_relative(1);
13278   ins_short_branch(1);
13279 %}
13280 
13281 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13282 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
13283   match(CountedLoopEnd cop cr);
13284   effect(USE labl);
13285 
13286   ins_cost(300);
13287   format %{ "J$cop,s  $labl\t# Loop end" %}
13288   size(2);
13289   opcode(0x70);
13290   ins_encode( JccShort( cop, labl) );
13291   ins_pipe( pipe_jcc );
13292   ins_pc_relative(1);
13293   ins_short_branch(1);
13294 %}
13295 
13296 // Jump Direct Conditional - Label defines a relative address from Jcc+1
13297 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
13298   match(CountedLoopEnd cop cmp);
13299   effect(USE labl);
13300 
13301   ins_cost(300);
13302   format %{ "J$cop,us $labl\t# Loop end" %}
13303   size(2);
13304   opcode(0x70);
13305   ins_encode( JccShort( cop, labl) );
13306   ins_pipe( pipe_jcc );
13307   ins_pc_relative(1);
13308   ins_short_branch(1);
13309 %}
13310 
13311 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
13312   match(CountedLoopEnd cop cmp);
13313   effect(USE labl);
13314 
13315   ins_cost(300);
13316   format %{ "J$cop,us $labl\t# Loop end" %}
13317   size(2);
13318   opcode(0x70);
13319   ins_encode( JccShort( cop, labl) );
13320   ins_pipe( pipe_jcc );
13321   ins_pc_relative(1);
13322   ins_short_branch(1);
13323 %}
13324 
13325 // Jump Direct Conditional - using unsigned comparison
13326 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
13327   match(If cop cmp);
13328   effect(USE labl);
13329 
13330   ins_cost(300);
13331   format %{ "J$cop,us $labl" %}
13332   size(2);
13333   opcode(0x70);
13334   ins_encode( JccShort( cop, labl) );
13335   ins_pipe( pipe_jcc );
13336   ins_pc_relative(1);
13337   ins_short_branch(1);
13338 %}
13339 
13340 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
13341   match(If cop cmp);
13342   effect(USE labl);
13343 
13344   ins_cost(300);
13345   format %{ "J$cop,us $labl" %}
13346   size(2);
13347   opcode(0x70);
13348   ins_encode( JccShort( cop, labl) );
13349   ins_pipe( pipe_jcc );
13350   ins_pc_relative(1);
13351   ins_short_branch(1);
13352 %}
13353 
13354 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
13355   match(If cop cmp);
13356   effect(USE labl);
13357 
13358   ins_cost(300);
13359   format %{ $$template
13360     if ($cop$$cmpcode == Assembler::notEqual) {
13361       $$emit$$"JP,u,s   $labl\n\t"
13362       $$emit$$"J$cop,u,s   $labl"
13363     } else {
13364       $$emit$$"JP,u,s   done\n\t"
13365       $$emit$$"J$cop,u,s  $labl\n\t"
13366       $$emit$$"done:"
13367     }
13368   %}
13369   size(4);
13370   opcode(0x70);
13371   ins_encode %{
13372     Label* l = $labl$$label;
13373     assert(l != NULL, "need Label");
13374     emit_cc(cbuf, $primary, Assembler::parity);
13375     int parity_disp = -1;
13376     if ($cop$$cmpcode == Assembler::notEqual) {
13377       parity_disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
13378     } else if ($cop$$cmpcode == Assembler::equal) {
13379       parity_disp = 2;
13380     } else {
13381       ShouldNotReachHere();
13382     }
13383     emit_d8(cbuf, parity_disp);
13384     emit_cc(cbuf, $primary, $cop$$cmpcode);
13385     int disp = l ? (l->loc_pos() - (cbuf.insts_size() + 1)) : 0;
13386     emit_d8(cbuf, disp);
13387     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
13388     assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp");
13389   %}
13390   ins_pipe(pipe_jcc);
13391   ins_pc_relative(1);
13392   ins_short_branch(1);
13393 %}
13394 
13395 // ============================================================================
13396 // Long Compare
13397 //
13398 // Currently we hold longs in 2 registers.  Comparing such values efficiently
13399 // is tricky.  The flavor of compare used depends on whether we are testing
13400 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
13401 // The GE test is the negated LT test.  The LE test can be had by commuting
13402 // the operands (yielding a GE test) and then negating; negate again for the
13403 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
13404 // NE test is negated from that.
13405 
13406 // Due to a shortcoming in the ADLC, it mixes up expressions like:
13407 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
13408 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
13409 // are collapsed internally in the ADLC's dfa-gen code.  The match for
13410 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
13411 // foo match ends up with the wrong leaf.  One fix is to not match both
13412 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
13413 // both forms beat the trinary form of long-compare and both are very useful
13414 // on Intel which has so few registers.
13415 
13416 // Manifest a CmpL result in an integer register.  Very painful.
13417 // This is the test to avoid.
13418 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
13419   match(Set dst (CmpL3 src1 src2));
13420   effect( KILL flags );
13421   ins_cost(1000);
13422   format %{ "XOR    $dst,$dst\n\t"
13423             "CMP    $src1.hi,$src2.hi\n\t"
13424             "JLT,s  m_one\n\t"
13425             "JGT,s  p_one\n\t"
13426             "CMP    $src1.lo,$src2.lo\n\t"
13427             "JB,s   m_one\n\t"
13428             "JEQ,s  done\n"
13429     "p_one:\tINC    $dst\n\t"
13430             "JMP,s  done\n"
13431     "m_one:\tDEC    $dst\n"
13432      "done:" %}
13433   ins_encode %{
13434     Label p_one, m_one, done;
13435     __ xorptr($dst$$Register, $dst$$Register);
13436     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
13437     __ jccb(Assembler::less,    m_one);
13438     __ jccb(Assembler::greater, p_one);
13439     __ cmpl($src1$$Register, $src2$$Register);
13440     __ jccb(Assembler::below,   m_one);
13441     __ jccb(Assembler::equal,   done);
13442     __ bind(p_one);
13443     __ incrementl($dst$$Register);
13444     __ jmpb(done);
13445     __ bind(m_one);
13446     __ decrementl($dst$$Register);
13447     __ bind(done);
13448   %}
13449   ins_pipe( pipe_slow );
13450 %}
13451 
13452 //======
13453 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13454 // compares.  Can be used for LE or GT compares by reversing arguments.
13455 // NOT GOOD FOR EQ/NE tests.
13456 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
13457   match( Set flags (CmpL src zero ));
13458   ins_cost(100);
13459   format %{ "TEST   $src.hi,$src.hi" %}
13460   opcode(0x85);
13461   ins_encode( OpcP, RegReg_Hi2( src, src ) );
13462   ins_pipe( ialu_cr_reg_reg );
13463 %}
13464 
13465 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
13466 // compares.  Can be used for LE or GT compares by reversing arguments.
13467 // NOT GOOD FOR EQ/NE tests.
13468 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, eRegI tmp ) %{
13469   match( Set flags (CmpL src1 src2 ));
13470   effect( TEMP tmp );
13471   ins_cost(300);
13472   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13473             "MOV    $tmp,$src1.hi\n\t"
13474             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
13475   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
13476   ins_pipe( ialu_cr_reg_reg );
13477 %}
13478 
13479 // Long compares reg < zero/req OR reg >= zero/req.
13480 // Just a wrapper for a normal branch, plus the predicate test.
13481 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
13482   match(If cmp flags);
13483   effect(USE labl);
13484   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13485   expand %{
13486     jmpCon(cmp,flags,labl);    // JLT or JGE...
13487   %}
13488 %}
13489 
13490 // Compare 2 longs and CMOVE longs.
13491 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
13492   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13493   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13494   ins_cost(400);
13495   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13496             "CMOV$cmp $dst.hi,$src.hi" %}
13497   opcode(0x0F,0x40);
13498   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13499   ins_pipe( pipe_cmov_reg_long );
13500 %}
13501 
13502 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
13503   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13504   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13505   ins_cost(500);
13506   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13507             "CMOV$cmp $dst.hi,$src.hi" %}
13508   opcode(0x0F,0x40);
13509   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13510   ins_pipe( pipe_cmov_reg_long );
13511 %}
13512 
13513 // Compare 2 longs and CMOVE ints.
13514 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, eRegI src) %{
13515   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13516   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13517   ins_cost(200);
13518   format %{ "CMOV$cmp $dst,$src" %}
13519   opcode(0x0F,0x40);
13520   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13521   ins_pipe( pipe_cmov_reg );
13522 %}
13523 
13524 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, memory src) %{
13525   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13526   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13527   ins_cost(250);
13528   format %{ "CMOV$cmp $dst,$src" %}
13529   opcode(0x0F,0x40);
13530   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13531   ins_pipe( pipe_cmov_mem );
13532 %}
13533 
13534 // Compare 2 longs and CMOVE ints.
13535 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
13536   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
13537   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13538   ins_cost(200);
13539   format %{ "CMOV$cmp $dst,$src" %}
13540   opcode(0x0F,0x40);
13541   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13542   ins_pipe( pipe_cmov_reg );
13543 %}
13544 
13545 // Compare 2 longs and CMOVE doubles
13546 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
13547   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13548   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13549   ins_cost(200);
13550   expand %{
13551     fcmovD_regS(cmp,flags,dst,src);
13552   %}
13553 %}
13554 
13555 // Compare 2 longs and CMOVE doubles
13556 instruct cmovXDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regXD dst, regXD src) %{
13557   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13558   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13559   ins_cost(200);
13560   expand %{
13561     fcmovXD_regS(cmp,flags,dst,src);
13562   %}
13563 %}
13564 
13565 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
13566   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13567   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13568   ins_cost(200);
13569   expand %{
13570     fcmovF_regS(cmp,flags,dst,src);
13571   %}
13572 %}
13573 
13574 instruct cmovXX_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regX dst, regX src) %{
13575   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
13576   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13577   ins_cost(200);
13578   expand %{
13579     fcmovX_regS(cmp,flags,dst,src);
13580   %}
13581 %}
13582 
13583 //======
13584 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13585 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, eRegI tmp ) %{
13586   match( Set flags (CmpL src zero ));
13587   effect(TEMP tmp);
13588   ins_cost(200);
13589   format %{ "MOV    $tmp,$src.lo\n\t"
13590             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
13591   ins_encode( long_cmp_flags0( src, tmp ) );
13592   ins_pipe( ialu_reg_reg_long );
13593 %}
13594 
13595 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
13596 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
13597   match( Set flags (CmpL src1 src2 ));
13598   ins_cost(200+300);
13599   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
13600             "JNE,s  skip\n\t"
13601             "CMP    $src1.hi,$src2.hi\n\t"
13602      "skip:\t" %}
13603   ins_encode( long_cmp_flags1( src1, src2 ) );
13604   ins_pipe( ialu_cr_reg_reg );
13605 %}
13606 
13607 // Long compare reg == zero/reg OR reg != zero/reg
13608 // Just a wrapper for a normal branch, plus the predicate test.
13609 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13610   match(If cmp flags);
13611   effect(USE labl);
13612   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13613   expand %{
13614     jmpCon(cmp,flags,labl);    // JEQ or JNE...
13615   %}
13616 %}
13617 
13618 // Compare 2 longs and CMOVE longs.
13619 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13620   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13621   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13622   ins_cost(400);
13623   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13624             "CMOV$cmp $dst.hi,$src.hi" %}
13625   opcode(0x0F,0x40);
13626   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13627   ins_pipe( pipe_cmov_reg_long );
13628 %}
13629 
13630 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13631   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13632   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13633   ins_cost(500);
13634   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13635             "CMOV$cmp $dst.hi,$src.hi" %}
13636   opcode(0x0F,0x40);
13637   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13638   ins_pipe( pipe_cmov_reg_long );
13639 %}
13640 
13641 // Compare 2 longs and CMOVE ints.
13642 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, eRegI src) %{
13643   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13644   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13645   ins_cost(200);
13646   format %{ "CMOV$cmp $dst,$src" %}
13647   opcode(0x0F,0x40);
13648   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13649   ins_pipe( pipe_cmov_reg );
13650 %}
13651 
13652 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, memory src) %{
13653   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13654   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13655   ins_cost(250);
13656   format %{ "CMOV$cmp $dst,$src" %}
13657   opcode(0x0F,0x40);
13658   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13659   ins_pipe( pipe_cmov_mem );
13660 %}
13661 
13662 // Compare 2 longs and CMOVE ints.
13663 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13664   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13665   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13666   ins_cost(200);
13667   format %{ "CMOV$cmp $dst,$src" %}
13668   opcode(0x0F,0x40);
13669   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13670   ins_pipe( pipe_cmov_reg );
13671 %}
13672 
13673 // Compare 2 longs and CMOVE doubles
13674 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13675   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13676   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13677   ins_cost(200);
13678   expand %{
13679     fcmovD_regS(cmp,flags,dst,src);
13680   %}
13681 %}
13682 
13683 // Compare 2 longs and CMOVE doubles
13684 instruct cmovXDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regXD dst, regXD src) %{
13685   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13686   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13687   ins_cost(200);
13688   expand %{
13689     fcmovXD_regS(cmp,flags,dst,src);
13690   %}
13691 %}
13692 
13693 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13694   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13695   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13696   ins_cost(200);
13697   expand %{
13698     fcmovF_regS(cmp,flags,dst,src);
13699   %}
13700 %}
13701 
13702 instruct cmovXX_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regX dst, regX src) %{
13703   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13704   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13705   ins_cost(200);
13706   expand %{
13707     fcmovX_regS(cmp,flags,dst,src);
13708   %}
13709 %}
13710 
13711 //======
13712 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13713 // Same as cmpL_reg_flags_LEGT except must negate src
13714 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, eRegI tmp ) %{
13715   match( Set flags (CmpL src zero ));
13716   effect( TEMP tmp );
13717   ins_cost(300);
13718   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13719             "CMP    $tmp,$src.lo\n\t"
13720             "SBB    $tmp,$src.hi\n\t" %}
13721   ins_encode( long_cmp_flags3(src, tmp) );
13722   ins_pipe( ialu_reg_reg_long );
13723 %}
13724 
13725 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13726 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13727 // requires a commuted test to get the same result.
13728 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, eRegI tmp ) %{
13729   match( Set flags (CmpL src1 src2 ));
13730   effect( TEMP tmp );
13731   ins_cost(300);
13732   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13733             "MOV    $tmp,$src2.hi\n\t"
13734             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13735   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13736   ins_pipe( ialu_cr_reg_reg );
13737 %}
13738 
13739 // Long compares reg < zero/req OR reg >= zero/req.
13740 // Just a wrapper for a normal branch, plus the predicate test
13741 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13742   match(If cmp flags);
13743   effect(USE labl);
13744   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13745   ins_cost(300);
13746   expand %{
13747     jmpCon(cmp,flags,labl);    // JGT or JLE...
13748   %}
13749 %}
13750 
13751 // Compare 2 longs and CMOVE longs.
13752 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13753   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13754   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13755   ins_cost(400);
13756   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13757             "CMOV$cmp $dst.hi,$src.hi" %}
13758   opcode(0x0F,0x40);
13759   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13760   ins_pipe( pipe_cmov_reg_long );
13761 %}
13762 
13763 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13764   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13765   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13766   ins_cost(500);
13767   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13768             "CMOV$cmp $dst.hi,$src.hi+4" %}
13769   opcode(0x0F,0x40);
13770   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13771   ins_pipe( pipe_cmov_reg_long );
13772 %}
13773 
13774 // Compare 2 longs and CMOVE ints.
13775 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, eRegI src) %{
13776   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13777   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13778   ins_cost(200);
13779   format %{ "CMOV$cmp $dst,$src" %}
13780   opcode(0x0F,0x40);
13781   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13782   ins_pipe( pipe_cmov_reg );
13783 %}
13784 
13785 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, memory src) %{
13786   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13787   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13788   ins_cost(250);
13789   format %{ "CMOV$cmp $dst,$src" %}
13790   opcode(0x0F,0x40);
13791   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13792   ins_pipe( pipe_cmov_mem );
13793 %}
13794 
13795 // Compare 2 longs and CMOVE ptrs.
13796 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13797   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13798   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13799   ins_cost(200);
13800   format %{ "CMOV$cmp $dst,$src" %}
13801   opcode(0x0F,0x40);
13802   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13803   ins_pipe( pipe_cmov_reg );
13804 %}
13805 
13806 // Compare 2 longs and CMOVE doubles
13807 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13808   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13809   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13810   ins_cost(200);
13811   expand %{
13812     fcmovD_regS(cmp,flags,dst,src);
13813   %}
13814 %}
13815 
13816 // Compare 2 longs and CMOVE doubles
13817 instruct cmovXDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regXD dst, regXD src) %{
13818   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13819   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13820   ins_cost(200);
13821   expand %{
13822     fcmovXD_regS(cmp,flags,dst,src);
13823   %}
13824 %}
13825 
13826 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13827   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13828   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13829   ins_cost(200);
13830   expand %{
13831     fcmovF_regS(cmp,flags,dst,src);
13832   %}
13833 %}
13834 
13835 
13836 instruct cmovXX_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regX dst, regX src) %{
13837   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13838   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13839   ins_cost(200);
13840   expand %{
13841     fcmovX_regS(cmp,flags,dst,src);
13842   %}
13843 %}
13844 
13845 
13846 // ============================================================================
13847 // Procedure Call/Return Instructions
13848 // Call Java Static Instruction
13849 // Note: If this code changes, the corresponding ret_addr_offset() and
13850 //       compute_padding() functions will have to be adjusted.
13851 instruct CallStaticJavaDirect(method meth) %{
13852   match(CallStaticJava);
13853   predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke());
13854   effect(USE meth);
13855 
13856   ins_cost(300);
13857   format %{ "CALL,static " %}
13858   opcode(0xE8); /* E8 cd */
13859   ins_encode( pre_call_FPU,
13860               Java_Static_Call( meth ),
13861               call_epilog,
13862               post_call_FPU );
13863   ins_pipe( pipe_slow );
13864   ins_pc_relative(1);
13865   ins_alignment(4);
13866 %}
13867 
13868 // Call Java Static Instruction (method handle version)
13869 // Note: If this code changes, the corresponding ret_addr_offset() and
13870 //       compute_padding() functions will have to be adjusted.
13871 instruct CallStaticJavaHandle(method meth, eBPRegP ebp_mh_SP_save) %{
13872   match(CallStaticJava);
13873   predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
13874   effect(USE meth);
13875   // EBP is saved by all callees (for interpreter stack correction).
13876   // We use it here for a similar purpose, in {preserve,restore}_SP.
13877 
13878   ins_cost(300);
13879   format %{ "CALL,static/MethodHandle " %}
13880   opcode(0xE8); /* E8 cd */
13881   ins_encode( pre_call_FPU,
13882               preserve_SP,
13883               Java_Static_Call( meth ),
13884               restore_SP,
13885               call_epilog,
13886               post_call_FPU );
13887   ins_pipe( pipe_slow );
13888   ins_pc_relative(1);
13889   ins_alignment(4);
13890 %}
13891 
13892 // Call Java Dynamic Instruction
13893 // Note: If this code changes, the corresponding ret_addr_offset() and
13894 //       compute_padding() functions will have to be adjusted.
13895 instruct CallDynamicJavaDirect(method meth) %{
13896   match(CallDynamicJava);
13897   effect(USE meth);
13898 
13899   ins_cost(300);
13900   format %{ "MOV    EAX,(oop)-1\n\t"
13901             "CALL,dynamic" %}
13902   opcode(0xE8); /* E8 cd */
13903   ins_encode( pre_call_FPU,
13904               Java_Dynamic_Call( meth ),
13905               call_epilog,
13906               post_call_FPU );
13907   ins_pipe( pipe_slow );
13908   ins_pc_relative(1);
13909   ins_alignment(4);
13910 %}
13911 
13912 // Call Runtime Instruction
13913 instruct CallRuntimeDirect(method meth) %{
13914   match(CallRuntime );
13915   effect(USE meth);
13916 
13917   ins_cost(300);
13918   format %{ "CALL,runtime " %}
13919   opcode(0xE8); /* E8 cd */
13920   // Use FFREEs to clear entries in float stack
13921   ins_encode( pre_call_FPU,
13922               FFree_Float_Stack_All,
13923               Java_To_Runtime( meth ),
13924               post_call_FPU );
13925   ins_pipe( pipe_slow );
13926   ins_pc_relative(1);
13927 %}
13928 
13929 // Call runtime without safepoint
13930 instruct CallLeafDirect(method meth) %{
13931   match(CallLeaf);
13932   effect(USE meth);
13933 
13934   ins_cost(300);
13935   format %{ "CALL_LEAF,runtime " %}
13936   opcode(0xE8); /* E8 cd */
13937   ins_encode( pre_call_FPU,
13938               FFree_Float_Stack_All,
13939               Java_To_Runtime( meth ),
13940               Verify_FPU_For_Leaf, post_call_FPU );
13941   ins_pipe( pipe_slow );
13942   ins_pc_relative(1);
13943 %}
13944 
13945 instruct CallLeafNoFPDirect(method meth) %{
13946   match(CallLeafNoFP);
13947   effect(USE meth);
13948 
13949   ins_cost(300);
13950   format %{ "CALL_LEAF_NOFP,runtime " %}
13951   opcode(0xE8); /* E8 cd */
13952   ins_encode(Java_To_Runtime(meth));
13953   ins_pipe( pipe_slow );
13954   ins_pc_relative(1);
13955 %}
13956 
13957 
13958 // Return Instruction
13959 // Remove the return address & jump to it.
13960 instruct Ret() %{
13961   match(Return);
13962   format %{ "RET" %}
13963   opcode(0xC3);
13964   ins_encode(OpcP);
13965   ins_pipe( pipe_jmp );
13966 %}
13967 
13968 // Tail Call; Jump from runtime stub to Java code.
13969 // Also known as an 'interprocedural jump'.
13970 // Target of jump will eventually return to caller.
13971 // TailJump below removes the return address.
13972 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
13973   match(TailCall jump_target method_oop );
13974   ins_cost(300);
13975   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
13976   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13977   ins_encode( OpcP, RegOpc(jump_target) );
13978   ins_pipe( pipe_jmp );
13979 %}
13980 
13981 
13982 // Tail Jump; remove the return address; jump to target.
13983 // TailCall above leaves the return address around.
13984 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13985   match( TailJump jump_target ex_oop );
13986   ins_cost(300);
13987   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13988             "JMP    $jump_target " %}
13989   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13990   ins_encode( enc_pop_rdx,
13991               OpcP, RegOpc(jump_target) );
13992   ins_pipe( pipe_jmp );
13993 %}
13994 
13995 // Create exception oop: created by stack-crawling runtime code.
13996 // Created exception is now available to this handler, and is setup
13997 // just prior to jumping to this handler.  No code emitted.
13998 instruct CreateException( eAXRegP ex_oop )
13999 %{
14000   match(Set ex_oop (CreateEx));
14001 
14002   size(0);
14003   // use the following format syntax
14004   format %{ "# exception oop is in EAX; no code emitted" %}
14005   ins_encode();
14006   ins_pipe( empty );
14007 %}
14008 
14009 
14010 // Rethrow exception:
14011 // The exception oop will come in the first argument position.
14012 // Then JUMP (not call) to the rethrow stub code.
14013 instruct RethrowException()
14014 %{
14015   match(Rethrow);
14016 
14017   // use the following format syntax
14018   format %{ "JMP    rethrow_stub" %}
14019   ins_encode(enc_rethrow);
14020   ins_pipe( pipe_jmp );
14021 %}
14022 
14023 // inlined locking and unlocking
14024 
14025 
14026 instruct cmpFastLock( eFlagsReg cr, eRegP object, eRegP box, eAXRegI tmp, eRegP scr) %{
14027   match( Set cr (FastLock object box) );
14028   effect( TEMP tmp, TEMP scr );
14029   ins_cost(300);
14030   format %{ "FASTLOCK $object, $box KILLS $tmp,$scr" %}
14031   ins_encode( Fast_Lock(object,box,tmp,scr) );
14032   ins_pipe( pipe_slow );
14033   ins_pc_relative(1);
14034 %}
14035 
14036 instruct cmpFastUnlock( eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
14037   match( Set cr (FastUnlock object box) );
14038   effect( TEMP tmp );
14039   ins_cost(300);
14040   format %{ "FASTUNLOCK $object, $box, $tmp" %}
14041   ins_encode( Fast_Unlock(object,box,tmp) );
14042   ins_pipe( pipe_slow );
14043   ins_pc_relative(1);
14044 %}
14045 
14046 
14047 
14048 // ============================================================================
14049 // Safepoint Instruction
14050 instruct safePoint_poll(eFlagsReg cr) %{
14051   match(SafePoint);
14052   effect(KILL cr);
14053 
14054   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
14055   // On SPARC that might be acceptable as we can generate the address with
14056   // just a sethi, saving an or.  By polling at offset 0 we can end up
14057   // putting additional pressure on the index-0 in the D$.  Because of
14058   // alignment (just like the situation at hand) the lower indices tend
14059   // to see more traffic.  It'd be better to change the polling address
14060   // to offset 0 of the last $line in the polling page.
14061 
14062   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
14063   ins_cost(125);
14064   size(6) ;
14065   ins_encode( Safepoint_Poll() );
14066   ins_pipe( ialu_reg_mem );
14067 %}
14068 
14069 //----------PEEPHOLE RULES-----------------------------------------------------
14070 // These must follow all instruction definitions as they use the names
14071 // defined in the instructions definitions.
14072 //
14073 // peepmatch ( root_instr_name [preceding_instruction]* );
14074 //
14075 // peepconstraint %{
14076 // (instruction_number.operand_name relational_op instruction_number.operand_name
14077 //  [, ...] );
14078 // // instruction numbers are zero-based using left to right order in peepmatch
14079 //
14080 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
14081 // // provide an instruction_number.operand_name for each operand that appears
14082 // // in the replacement instruction's match rule
14083 //
14084 // ---------VM FLAGS---------------------------------------------------------
14085 //
14086 // All peephole optimizations can be turned off using -XX:-OptoPeephole
14087 //
14088 // Each peephole rule is given an identifying number starting with zero and
14089 // increasing by one in the order seen by the parser.  An individual peephole
14090 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
14091 // on the command-line.
14092 //
14093 // ---------CURRENT LIMITATIONS----------------------------------------------
14094 //
14095 // Only match adjacent instructions in same basic block
14096 // Only equality constraints
14097 // Only constraints between operands, not (0.dest_reg == EAX_enc)
14098 // Only one replacement instruction
14099 //
14100 // ---------EXAMPLE----------------------------------------------------------
14101 //
14102 // // pertinent parts of existing instructions in architecture description
14103 // instruct movI(eRegI dst, eRegI src) %{
14104 //   match(Set dst (CopyI src));
14105 // %}
14106 //
14107 // instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
14108 //   match(Set dst (AddI dst src));
14109 //   effect(KILL cr);
14110 // %}
14111 //
14112 // // Change (inc mov) to lea
14113 // peephole %{
14114 //   // increment preceeded by register-register move
14115 //   peepmatch ( incI_eReg movI );
14116 //   // require that the destination register of the increment
14117 //   // match the destination register of the move
14118 //   peepconstraint ( 0.dst == 1.dst );
14119 //   // construct a replacement instruction that sets
14120 //   // the destination to ( move's source register + one )
14121 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14122 // %}
14123 //
14124 // Implementation no longer uses movX instructions since
14125 // machine-independent system no longer uses CopyX nodes.
14126 //
14127 // peephole %{
14128 //   peepmatch ( incI_eReg movI );
14129 //   peepconstraint ( 0.dst == 1.dst );
14130 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14131 // %}
14132 //
14133 // peephole %{
14134 //   peepmatch ( decI_eReg movI );
14135 //   peepconstraint ( 0.dst == 1.dst );
14136 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14137 // %}
14138 //
14139 // peephole %{
14140 //   peepmatch ( addI_eReg_imm movI );
14141 //   peepconstraint ( 0.dst == 1.dst );
14142 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
14143 // %}
14144 //
14145 // peephole %{
14146 //   peepmatch ( addP_eReg_imm movP );
14147 //   peepconstraint ( 0.dst == 1.dst );
14148 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
14149 // %}
14150 
14151 // // Change load of spilled value to only a spill
14152 // instruct storeI(memory mem, eRegI src) %{
14153 //   match(Set mem (StoreI mem src));
14154 // %}
14155 //
14156 // instruct loadI(eRegI dst, memory mem) %{
14157 //   match(Set dst (LoadI mem));
14158 // %}
14159 //
14160 peephole %{
14161   peepmatch ( loadI storeI );
14162   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
14163   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
14164 %}
14165 
14166 //----------SMARTSPILL RULES---------------------------------------------------
14167 // These must follow all instruction definitions as they use the names
14168 // defined in the instructions definitions.