1 //
   2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 //
 105 // Empty fill registers, which are never used, but supply alignment to xmm regs
 106 //
 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2));
 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3));
 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4));
 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5));
 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6));
 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7));
 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8));
 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9));
 115 
 116 // Specify priority of register selection within phases of register
 117 // allocation.  Highest priority is first.  A useful heuristic is to
 118 // give registers a low priority when they are required by machine
 119 // instructions, like EAX and EDX.  Registers which are used as
 120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 121 // For the Intel integer registers, the equivalent Long pairs are
 122 // EDX:EAX, EBX:ECX, and EDI:EBP.
 123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 126                     FPR6L, FPR6H, FPR7L, FPR7H,
 127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
 128 
 129 
 130 //----------Architecture Description Register Classes--------------------------
 131 // Several register classes are automatically defined based upon information in
 132 // this architecture description.
 133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 134 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 137 //
 138 // Class for all registers
 139 reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 140 // Class for general registers
 141 reg_class int_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 142 // Class for general registers which may be used for implicit null checks on win95
 143 // Also safe for use by tailjump. We don't want to allocate in rbp,
 144 reg_class int_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX);
 145 // Class of "X" registers
 146 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 147 // Class of registers that can appear in an address with no offset.
 148 // EBP and ESP require an extra instruction byte for zero offset.
 149 // Used in fast-unlock
 150 reg_class p_reg(EDX, EDI, ESI, EBX);
 151 // Class for general registers not including ECX
 152 reg_class ncx_reg(EAX, EDX, EBP, EDI, ESI, EBX);
 153 // Class for general registers not including EAX
 154 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 155 // Class for general registers not including EAX or EBX.
 156 reg_class nabx_reg(EDX, EDI, ESI, ECX, EBP);
 157 // Class of EAX (for multiply and divide operations)
 158 reg_class eax_reg(EAX);
 159 // Class of EBX (for atomic add)
 160 reg_class ebx_reg(EBX);
 161 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 162 reg_class ecx_reg(ECX);
 163 // Class of EDX (for multiply and divide operations)
 164 reg_class edx_reg(EDX);
 165 // Class of EDI (for synchronization)
 166 reg_class edi_reg(EDI);
 167 // Class of ESI (for synchronization)
 168 reg_class esi_reg(ESI);
 169 // Singleton class for interpreter's stack pointer
 170 reg_class ebp_reg(EBP);
 171 // Singleton class for stack pointer
 172 reg_class sp_reg(ESP);
 173 // Singleton class for instruction pointer
 174 // reg_class ip_reg(EIP);
 175 // Class of integer register pairs
 176 reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI );
 177 // Class of integer register pairs that aligns with calling convention
 178 reg_class eadx_reg( EAX,EDX );
 179 reg_class ebcx_reg( ECX,EBX );
 180 // Not AX or DX, used in divides
 181 reg_class nadx_reg( EBX,ECX,ESI,EDI,EBP );
 182 
 183 // Floating point registers.  Notice FPR0 is not a choice.
 184 // FPR0 is not ever allocated; we use clever encodings to fake
 185 // a 2-address instructions out of Intels FP stack.
 186 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 187 
 188 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 189                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 190                       FPR7L,FPR7H );
 191 
 192 reg_class fp_flt_reg0( FPR1L );
 193 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 194 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 195 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 196                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 197 
 198 %}
 199 
 200 
 201 //----------SOURCE BLOCK-------------------------------------------------------
 202 // This is a block of C++ code which provides values, functions, and
 203 // definitions necessary in the rest of the architecture description
 204 source_hpp %{
 205 // Must be visible to the DFA in dfa_x86_32.cpp
 206 extern bool is_operand_hi32_zero(Node* n);
 207 %}
 208 
 209 source %{
 210 #define   RELOC_IMM32    Assembler::imm_operand
 211 #define   RELOC_DISP32   Assembler::disp32_operand
 212 
 213 #define __ _masm.
 214 
 215 // How to find the high register of a Long pair, given the low register
 216 #define   HIGH_FROM_LOW(x) ((x)+2)
 217 
 218 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 219 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 220 // fast versions of NegF/NegD and AbsF/AbsD.
 221 
 222 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 223 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 224   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 225   // of 128-bits operands for SSE instructions.
 226   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 227   // Store the value to a 128-bits operand.
 228   operand[0] = lo;
 229   operand[1] = hi;
 230   return operand;
 231 }
 232 
 233 // Buffer for 128-bits masks used by SSE instructions.
 234 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 235 
 236 // Static initialization during VM startup.
 237 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 238 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 239 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 240 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 241 
 242 // Offset hacking within calls.
 243 static int pre_call_resets_size() {
 244   int size = 0;
 245   Compile* C = Compile::current();
 246   if (C->in_24_bit_fp_mode()) {
 247     size += 6; // fldcw
 248   }
 249   if (C->max_vector_size() > 16) {
 250     if(UseAVX <= 2) {
 251       size += 3; // vzeroupper
 252     }
 253   }
 254   return size;
 255 }
 256 
 257 static int preserve_SP_size() {
 258   return 2;  // op, rm(reg/reg)
 259 }
 260 
 261 // !!!!! Special hack to get all type of calls to specify the byte offset
 262 //       from the start of the call to the point where the return address
 263 //       will point.
 264 int MachCallStaticJavaNode::ret_addr_offset() {
 265   int offset = 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
 266   if (_method_handle_invoke)
 267     offset += preserve_SP_size();
 268   return offset;
 269 }
 270 
 271 int MachCallDynamicJavaNode::ret_addr_offset() {
 272   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 273 }
 274 
 275 static int sizeof_FFree_Float_Stack_All = -1;
 276 
 277 int MachCallRuntimeNode::ret_addr_offset() {
 278   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 279   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 280 }
 281 
 282 // Indicate if the safepoint node needs the polling page as an input.
 283 // Since x86 does have absolute addressing, it doesn't.
 284 bool SafePointNode::needs_polling_address_input() {
 285   return false;
 286 }
 287 
 288 //
 289 // Compute padding required for nodes which need alignment
 290 //
 291 
 292 // The address of the call instruction needs to be 4-byte aligned to
 293 // ensure that it does not span a cache line so that it can be patched.
 294 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 295   current_offset += pre_call_resets_size();  // skip fldcw, if any
 296   current_offset += 1;      // skip call opcode byte
 297   return round_to(current_offset, alignment_required()) - current_offset;
 298 }
 299 
 300 // The address of the call instruction needs to be 4-byte aligned to
 301 // ensure that it does not span a cache line so that it can be patched.
 302 int CallStaticJavaHandleNode::compute_padding(int current_offset) const {
 303   current_offset += pre_call_resets_size();  // skip fldcw, if any
 304   current_offset += preserve_SP_size();   // skip mov rbp, rsp
 305   current_offset += 1;      // skip call opcode byte
 306   return round_to(current_offset, alignment_required()) - current_offset;
 307 }
 308 
 309 // The address of the call instruction needs to be 4-byte aligned to
 310 // ensure that it does not span a cache line so that it can be patched.
 311 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 312   current_offset += pre_call_resets_size();  // skip fldcw, if any
 313   current_offset += 5;      // skip MOV instruction
 314   current_offset += 1;      // skip call opcode byte
 315   return round_to(current_offset, alignment_required()) - current_offset;
 316 }
 317 
 318 // EMIT_RM()
 319 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 320   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 321   cbuf.insts()->emit_int8(c);
 322 }
 323 
 324 // EMIT_CC()
 325 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 326   unsigned char c = (unsigned char)( f1 | f2 );
 327   cbuf.insts()->emit_int8(c);
 328 }
 329 
 330 // EMIT_OPCODE()
 331 void emit_opcode(CodeBuffer &cbuf, int code) {
 332   cbuf.insts()->emit_int8((unsigned char) code);
 333 }
 334 
 335 // EMIT_OPCODE() w/ relocation information
 336 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 337   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 338   emit_opcode(cbuf, code);
 339 }
 340 
 341 // EMIT_D8()
 342 void emit_d8(CodeBuffer &cbuf, int d8) {
 343   cbuf.insts()->emit_int8((unsigned char) d8);
 344 }
 345 
 346 // EMIT_D16()
 347 void emit_d16(CodeBuffer &cbuf, int d16) {
 348   cbuf.insts()->emit_int16(d16);
 349 }
 350 
 351 // EMIT_D32()
 352 void emit_d32(CodeBuffer &cbuf, int d32) {
 353   cbuf.insts()->emit_int32(d32);
 354 }
 355 
 356 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 357 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 358         int format) {
 359   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 360   cbuf.insts()->emit_int32(d32);
 361 }
 362 
 363 // emit 32 bit value and construct relocation entry from RelocationHolder
 364 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 365         int format) {
 366 #ifdef ASSERT
 367   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 368     assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
 369   }
 370 #endif
 371   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 372   cbuf.insts()->emit_int32(d32);
 373 }
 374 
 375 // Access stack slot for load or store
 376 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 377   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 378   if( -128 <= disp && disp <= 127 ) {
 379     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 380     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 381     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 382   } else {
 383     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 384     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 385     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 386   }
 387 }
 388 
 389    // rRegI ereg, memory mem) %{    // emit_reg_mem
 390 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 391   // There is no index & no scale, use form without SIB byte
 392   if ((index == 0x4) &&
 393       (scale == 0) && (base != ESP_enc)) {
 394     // If no displacement, mode is 0x0; unless base is [EBP]
 395     if ( (displace == 0) && (base != EBP_enc) ) {
 396       emit_rm(cbuf, 0x0, reg_encoding, base);
 397     }
 398     else {                    // If 8-bit displacement, mode 0x1
 399       if ((displace >= -128) && (displace <= 127)
 400           && (disp_reloc == relocInfo::none) ) {
 401         emit_rm(cbuf, 0x1, reg_encoding, base);
 402         emit_d8(cbuf, displace);
 403       }
 404       else {                  // If 32-bit displacement
 405         if (base == -1) { // Special flag for absolute address
 406           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 407           // (manual lies; no SIB needed here)
 408           if ( disp_reloc != relocInfo::none ) {
 409             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 410           } else {
 411             emit_d32      (cbuf, displace);
 412           }
 413         }
 414         else {                // Normal base + offset
 415           emit_rm(cbuf, 0x2, reg_encoding, base);
 416           if ( disp_reloc != relocInfo::none ) {
 417             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 418           } else {
 419             emit_d32      (cbuf, displace);
 420           }
 421         }
 422       }
 423     }
 424   }
 425   else {                      // Else, encode with the SIB byte
 426     // If no displacement, mode is 0x0; unless base is [EBP]
 427     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 428       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 429       emit_rm(cbuf, scale, index, base);
 430     }
 431     else {                    // If 8-bit displacement, mode 0x1
 432       if ((displace >= -128) && (displace <= 127)
 433           && (disp_reloc == relocInfo::none) ) {
 434         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 435         emit_rm(cbuf, scale, index, base);
 436         emit_d8(cbuf, displace);
 437       }
 438       else {                  // If 32-bit displacement
 439         if (base == 0x04 ) {
 440           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 441           emit_rm(cbuf, scale, index, 0x04);
 442         } else {
 443           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 444           emit_rm(cbuf, scale, index, base);
 445         }
 446         if ( disp_reloc != relocInfo::none ) {
 447           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 448         } else {
 449           emit_d32      (cbuf, displace);
 450         }
 451       }
 452     }
 453   }
 454 }
 455 
 456 
 457 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 458   if( dst_encoding == src_encoding ) {
 459     // reg-reg copy, use an empty encoding
 460   } else {
 461     emit_opcode( cbuf, 0x8B );
 462     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 463   }
 464 }
 465 
 466 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 467   Label exit;
 468   __ jccb(Assembler::noParity, exit);
 469   __ pushf();
 470   //
 471   // comiss/ucomiss instructions set ZF,PF,CF flags and
 472   // zero OF,AF,SF for NaN values.
 473   // Fixup flags by zeroing ZF,PF so that compare of NaN
 474   // values returns 'less than' result (CF is set).
 475   // Leave the rest of flags unchanged.
 476   //
 477   //    7 6 5 4 3 2 1 0
 478   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 479   //    0 0 1 0 1 0 1 1   (0x2B)
 480   //
 481   __ andl(Address(rsp, 0), 0xffffff2b);
 482   __ popf();
 483   __ bind(exit);
 484 }
 485 
 486 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 487   Label done;
 488   __ movl(dst, -1);
 489   __ jcc(Assembler::parity, done);
 490   __ jcc(Assembler::below, done);
 491   __ setb(Assembler::notEqual, dst);
 492   __ movzbl(dst, dst);
 493   __ bind(done);
 494 }
 495 
 496 
 497 //=============================================================================
 498 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 499 
 500 int Compile::ConstantTable::calculate_table_base_offset() const {
 501   return 0;  // absolute addressing, no offset
 502 }
 503 
 504 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 505 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 506   ShouldNotReachHere();
 507 }
 508 
 509 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 510   // Empty encoding
 511 }
 512 
 513 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 514   return 0;
 515 }
 516 
 517 #ifndef PRODUCT
 518 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 519   st->print("# MachConstantBaseNode (empty encoding)");
 520 }
 521 #endif
 522 
 523 
 524 //=============================================================================
 525 #ifndef PRODUCT
 526 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 527   Compile* C = ra_->C;
 528 
 529   int framesize = C->frame_size_in_bytes();
 530   int bangsize = C->bang_size_in_bytes();
 531   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 532   // Remove wordSize for return addr which is already pushed.
 533   framesize -= wordSize;
 534 
 535   if (C->need_stack_bang(bangsize)) {
 536     framesize -= wordSize;
 537     st->print("# stack bang (%d bytes)", bangsize);
 538     st->print("\n\t");
 539     st->print("PUSH   EBP\t# Save EBP");
 540     if (framesize) {
 541       st->print("\n\t");
 542       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 543     }
 544   } else {
 545     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 546     st->print("\n\t");
 547     framesize -= wordSize;
 548     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 549   }
 550 
 551   if (VerifyStackAtCalls) {
 552     st->print("\n\t");
 553     framesize -= wordSize;
 554     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 555   }
 556 
 557   if( C->in_24_bit_fp_mode() ) {
 558     st->print("\n\t");
 559     st->print("FLDCW  \t# load 24 bit fpu control word");
 560   }
 561   if (UseSSE >= 2 && VerifyFPU) {
 562     st->print("\n\t");
 563     st->print("# verify FPU stack (must be clean on entry)");
 564   }
 565 
 566 #ifdef ASSERT
 567   if (VerifyStackAtCalls) {
 568     st->print("\n\t");
 569     st->print("# stack alignment check");
 570   }
 571 #endif
 572   st->cr();
 573 }
 574 #endif
 575 
 576 
 577 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 578   Compile* C = ra_->C;
 579   MacroAssembler _masm(&cbuf);
 580 
 581   int framesize = C->frame_size_in_bytes();
 582   int bangsize = C->bang_size_in_bytes();
 583 
 584   __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode());
 585 
 586   C->set_frame_complete(cbuf.insts_size());
 587 
 588   if (C->has_mach_constant_base_node()) {
 589     // NOTE: We set the table base offset here because users might be
 590     // emitted before MachConstantBaseNode.
 591     Compile::ConstantTable& constant_table = C->constant_table();
 592     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 593   }
 594 }
 595 
 596 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 597   return MachNode::size(ra_); // too many variables; just compute it the hard way
 598 }
 599 
 600 int MachPrologNode::reloc() const {
 601   return 0; // a large enough number
 602 }
 603 
 604 //=============================================================================
 605 #ifndef PRODUCT
 606 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 607   Compile *C = ra_->C;
 608   int framesize = C->frame_size_in_bytes();
 609   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 610   // Remove two words for return addr and rbp,
 611   framesize -= 2*wordSize;
 612 
 613   if (C->max_vector_size() > 16) {
 614     st->print("VZEROUPPER");
 615     st->cr(); st->print("\t");
 616   }
 617   if (C->in_24_bit_fp_mode()) {
 618     st->print("FLDCW  standard control word");
 619     st->cr(); st->print("\t");
 620   }
 621   if (framesize) {
 622     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 623     st->cr(); st->print("\t");
 624   }
 625   st->print_cr("POPL   EBP"); st->print("\t");
 626   if (do_polling() && C->is_method_compilation()) {
 627     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 628     st->cr(); st->print("\t");
 629   }
 630 }
 631 #endif
 632 
 633 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 634   Compile *C = ra_->C;
 635 
 636   if (C->max_vector_size() > 16) {
 637     // Clear upper bits of YMM registers when current compiled code uses
 638     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 639     MacroAssembler masm(&cbuf);
 640     masm.vzeroupper();
 641   }
 642   // If method set FPU control word, restore to standard control word
 643   if (C->in_24_bit_fp_mode()) {
 644     MacroAssembler masm(&cbuf);
 645     masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 646   }
 647 
 648   int framesize = C->frame_size_in_bytes();
 649   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 650   // Remove two words for return addr and rbp,
 651   framesize -= 2*wordSize;
 652 
 653   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 654 
 655   if (framesize >= 128) {
 656     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 657     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 658     emit_d32(cbuf, framesize);
 659   } else if (framesize) {
 660     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 661     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 662     emit_d8(cbuf, framesize);
 663   }
 664 
 665   emit_opcode(cbuf, 0x58 | EBP_enc);
 666 
 667   if (do_polling() && C->is_method_compilation()) {
 668     cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 669     emit_opcode(cbuf,0x85);
 670     emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 671     emit_d32(cbuf, (intptr_t)os::get_polling_page());
 672   }
 673 }
 674 
 675 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 676   Compile *C = ra_->C;
 677   // If method set FPU control word, restore to standard control word
 678   int size = C->in_24_bit_fp_mode() ? 6 : 0;
 679   if (C->max_vector_size() > 16) size += 3; // vzeroupper
 680   if (do_polling() && C->is_method_compilation()) size += 6;
 681 
 682   int framesize = C->frame_size_in_bytes();
 683   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 684   // Remove two words for return addr and rbp,
 685   framesize -= 2*wordSize;
 686 
 687   size++; // popl rbp,
 688 
 689   if (framesize >= 128) {
 690     size += 6;
 691   } else {
 692     size += framesize ? 3 : 0;
 693   }
 694   return size;
 695 }
 696 
 697 int MachEpilogNode::reloc() const {
 698   return 0; // a large enough number
 699 }
 700 
 701 const Pipeline * MachEpilogNode::pipeline() const {
 702   return MachNode::pipeline_class();
 703 }
 704 
 705 int MachEpilogNode::safepoint_offset() const { return 0; }
 706 
 707 //=============================================================================
 708 
 709 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 710 static enum RC rc_class( OptoReg::Name reg ) {
 711 
 712   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 713   if (OptoReg::is_stack(reg)) return rc_stack;
 714 
 715   VMReg r = OptoReg::as_VMReg(reg);
 716   if (r->is_Register()) return rc_int;
 717   if (r->is_FloatRegister()) {
 718     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 719     return rc_float;
 720   }
 721   assert(r->is_XMMRegister(), "must be");
 722   return rc_xmm;
 723 }
 724 
 725 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 726                         int opcode, const char *op_str, int size, outputStream* st ) {
 727   if( cbuf ) {
 728     emit_opcode  (*cbuf, opcode );
 729     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 730 #ifndef PRODUCT
 731   } else if( !do_size ) {
 732     if( size != 0 ) st->print("\n\t");
 733     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 734       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 735       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 736     } else { // FLD, FST, PUSH, POP
 737       st->print("%s [ESP + #%d]",op_str,offset);
 738     }
 739 #endif
 740   }
 741   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 742   return size+3+offset_size;
 743 }
 744 
 745 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 746 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 747                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 748   int in_size_in_bits = Assembler::EVEX_32bit;
 749   int evex_encoding = 0;
 750   if (reg_lo+1 == reg_hi) {
 751     in_size_in_bits = Assembler::EVEX_64bit;
 752     evex_encoding = Assembler::VEX_W;
 753   }
 754   if (cbuf) {
 755     MacroAssembler _masm(cbuf);
 756     if (reg_lo+1 == reg_hi) { // double move?
 757       if (is_load) {
 758         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 759       } else {
 760         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 761       }
 762     } else {
 763       if (is_load) {
 764         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 765       } else {
 766         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 767       }
 768     }
 769 #ifndef PRODUCT
 770   } else if (!do_size) {
 771     if (size != 0) st->print("\n\t");
 772     if (reg_lo+1 == reg_hi) { // double move?
 773       if (is_load) st->print("%s %s,[ESP + #%d]",
 774                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 775                               Matcher::regName[reg_lo], offset);
 776       else         st->print("MOVSD  [ESP + #%d],%s",
 777                               offset, Matcher::regName[reg_lo]);
 778     } else {
 779       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 780                               Matcher::regName[reg_lo], offset);
 781       else         st->print("MOVSS  [ESP + #%d],%s",
 782                               offset, Matcher::regName[reg_lo]);
 783     }
 784 #endif
 785   }
 786   bool is_single_byte = false;
 787   if ((UseAVX > 2) && (offset != 0)) {
 788     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
 789   }
 790   int offset_size = 0;
 791   if (UseAVX > 2 ) {
 792     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
 793   } else {
 794     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 795   }
 796   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
 797   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 798   return size+5+offset_size;
 799 }
 800 
 801 
 802 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 803                             int src_hi, int dst_hi, int size, outputStream* st ) {
 804   if (cbuf) {
 805     MacroAssembler _masm(cbuf);
 806     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 807       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 808                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 809     } else {
 810       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 811                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 812     }
 813 #ifndef PRODUCT
 814   } else if (!do_size) {
 815     if (size != 0) st->print("\n\t");
 816     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 817       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 818         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 819       } else {
 820         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 821       }
 822     } else {
 823       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 824         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 825       } else {
 826         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 827       }
 828     }
 829 #endif
 830   }
 831   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 832   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
 833   int sz = (UseAVX > 2) ? 6 : 4;
 834   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 835       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 836   return size + sz;
 837 }
 838 
 839 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 840                             int src_hi, int dst_hi, int size, outputStream* st ) {
 841   // 32-bit
 842   if (cbuf) {
 843     MacroAssembler _masm(cbuf);
 844     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 845              as_Register(Matcher::_regEncode[src_lo]));
 846 #ifndef PRODUCT
 847   } else if (!do_size) {
 848     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 849 #endif
 850   }
 851   return (UseAVX> 2) ? 6 : 4;
 852 }
 853 
 854 
 855 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 856                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 857   // 32-bit
 858   if (cbuf) {
 859     MacroAssembler _masm(cbuf);
 860     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 861              as_XMMRegister(Matcher::_regEncode[src_lo]));
 862 #ifndef PRODUCT
 863   } else if (!do_size) {
 864     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 865 #endif
 866   }
 867   return (UseAVX> 2) ? 6 : 4;
 868 }
 869 
 870 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 871   if( cbuf ) {
 872     emit_opcode(*cbuf, 0x8B );
 873     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 874 #ifndef PRODUCT
 875   } else if( !do_size ) {
 876     if( size != 0 ) st->print("\n\t");
 877     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 878 #endif
 879   }
 880   return size+2;
 881 }
 882 
 883 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 884                                  int offset, int size, outputStream* st ) {
 885   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 886     if( cbuf ) {
 887       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 888       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 889 #ifndef PRODUCT
 890     } else if( !do_size ) {
 891       if( size != 0 ) st->print("\n\t");
 892       st->print("FLD    %s",Matcher::regName[src_lo]);
 893 #endif
 894     }
 895     size += 2;
 896   }
 897 
 898   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 899   const char *op_str;
 900   int op;
 901   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 902     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 903     op = 0xDD;
 904   } else {                   // 32-bit store
 905     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 906     op = 0xD9;
 907     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 908   }
 909 
 910   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 911 }
 912 
 913 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 914 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 915                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 916 
 917 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 918                             int stack_offset, int reg, uint ireg, outputStream* st);
 919 
 920 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 921                                      int dst_offset, uint ireg, outputStream* st) {
 922   int calc_size = 0;
 923   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 924   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 925   switch (ireg) {
 926   case Op_VecS:
 927     calc_size = 3+src_offset_size + 3+dst_offset_size;
 928     break;
 929   case Op_VecD:
 930     calc_size = 3+src_offset_size + 3+dst_offset_size;
 931     src_offset += 4;
 932     dst_offset += 4;
 933     src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 934     dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 935     calc_size += 3+src_offset_size + 3+dst_offset_size;
 936     break;
 937   case Op_VecX:
 938   case Op_VecY:
 939   case Op_VecZ:
 940     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 941     break;
 942   default:
 943     ShouldNotReachHere();
 944   }
 945   if (cbuf) {
 946     MacroAssembler _masm(cbuf);
 947     int offset = __ offset();
 948     switch (ireg) {
 949     case Op_VecS:
 950       __ pushl(Address(rsp, src_offset));
 951       __ popl (Address(rsp, dst_offset));
 952       break;
 953     case Op_VecD:
 954       __ pushl(Address(rsp, src_offset));
 955       __ popl (Address(rsp, dst_offset));
 956       __ pushl(Address(rsp, src_offset+4));
 957       __ popl (Address(rsp, dst_offset+4));
 958       break;
 959     case Op_VecX:
 960       __ movdqu(Address(rsp, -16), xmm0);
 961       __ movdqu(xmm0, Address(rsp, src_offset));
 962       __ movdqu(Address(rsp, dst_offset), xmm0);
 963       __ movdqu(xmm0, Address(rsp, -16));
 964       break;
 965     case Op_VecY:
 966       __ vmovdqu(Address(rsp, -32), xmm0);
 967       __ vmovdqu(xmm0, Address(rsp, src_offset));
 968       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 969       __ vmovdqu(xmm0, Address(rsp, -32));
 970     case Op_VecZ:
 971       __ evmovdqu(Address(rsp, -64), xmm0, 2);
 972       __ evmovdqu(xmm0, Address(rsp, src_offset), 2);
 973       __ evmovdqu(Address(rsp, dst_offset), xmm0, 2);
 974       __ evmovdqu(xmm0, Address(rsp, -64), 2);
 975       break;
 976     default:
 977       ShouldNotReachHere();
 978     }
 979     int size = __ offset() - offset;
 980     assert(size == calc_size, "incorrect size calculattion");
 981     return size;
 982 #ifndef PRODUCT
 983   } else if (!do_size) {
 984     switch (ireg) {
 985     case Op_VecS:
 986       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
 987                 "popl    [rsp + #%d]",
 988                 src_offset, dst_offset);
 989       break;
 990     case Op_VecD:
 991       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
 992                 "popq    [rsp + #%d]\n\t"
 993                 "pushl   [rsp + #%d]\n\t"
 994                 "popq    [rsp + #%d]",
 995                 src_offset, dst_offset, src_offset+4, dst_offset+4);
 996       break;
 997      case Op_VecX:
 998       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
 999                 "movdqu  xmm0, [rsp + #%d]\n\t"
1000                 "movdqu  [rsp + #%d], xmm0\n\t"
1001                 "movdqu  xmm0, [rsp - #16]",
1002                 src_offset, dst_offset);
1003       break;
1004     case Op_VecY:
1005       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1006                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1007                 "vmovdqu [rsp + #%d], xmm0\n\t"
1008                 "vmovdqu xmm0, [rsp - #32]",
1009                 src_offset, dst_offset);
1010     case Op_VecZ:
1011       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1012                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1013                 "vmovdqu [rsp + #%d], xmm0\n\t"
1014                 "vmovdqu xmm0, [rsp - #64]",
1015                 src_offset, dst_offset);
1016       break;
1017     default:
1018       ShouldNotReachHere();
1019     }
1020 #endif
1021   }
1022   return calc_size;
1023 }
1024 
1025 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1026   // Get registers to move
1027   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1028   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1029   OptoReg::Name dst_second = ra_->get_reg_second(this );
1030   OptoReg::Name dst_first = ra_->get_reg_first(this );
1031 
1032   enum RC src_second_rc = rc_class(src_second);
1033   enum RC src_first_rc = rc_class(src_first);
1034   enum RC dst_second_rc = rc_class(dst_second);
1035   enum RC dst_first_rc = rc_class(dst_first);
1036 
1037   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1038 
1039   // Generate spill code!
1040   int size = 0;
1041 
1042   if( src_first == dst_first && src_second == dst_second )
1043     return size;            // Self copy, no move
1044 
1045   if (bottom_type()->isa_vect() != NULL) {
1046     uint ireg = ideal_reg();
1047     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1048     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1049     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1050     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1051       // mem -> mem
1052       int src_offset = ra_->reg2offset(src_first);
1053       int dst_offset = ra_->reg2offset(dst_first);
1054       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1055     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1056       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1057     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1058       int stack_offset = ra_->reg2offset(dst_first);
1059       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1060     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1061       int stack_offset = ra_->reg2offset(src_first);
1062       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1063     } else {
1064       ShouldNotReachHere();
1065     }
1066   }
1067 
1068   // --------------------------------------
1069   // Check for mem-mem move.  push/pop to move.
1070   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1071     if( src_second == dst_first ) { // overlapping stack copy ranges
1072       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1073       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1074       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1075       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1076     }
1077     // move low bits
1078     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1079     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1080     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1081       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1082       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1083     }
1084     return size;
1085   }
1086 
1087   // --------------------------------------
1088   // Check for integer reg-reg copy
1089   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1090     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1091 
1092   // Check for integer store
1093   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1094     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1095 
1096   // Check for integer load
1097   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1098     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1099 
1100   // Check for integer reg-xmm reg copy
1101   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1102     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1103             "no 64 bit integer-float reg moves" );
1104     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1105   }
1106   // --------------------------------------
1107   // Check for float reg-reg copy
1108   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1109     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1110             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1111     if( cbuf ) {
1112 
1113       // Note the mucking with the register encode to compensate for the 0/1
1114       // indexing issue mentioned in a comment in the reg_def sections
1115       // for FPR registers many lines above here.
1116 
1117       if( src_first != FPR1L_num ) {
1118         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1119         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1120         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1121         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1122      } else {
1123         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1124         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1125      }
1126 #ifndef PRODUCT
1127     } else if( !do_size ) {
1128       if( size != 0 ) st->print("\n\t");
1129       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1130       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1131 #endif
1132     }
1133     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1134   }
1135 
1136   // Check for float store
1137   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1138     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1139   }
1140 
1141   // Check for float load
1142   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1143     int offset = ra_->reg2offset(src_first);
1144     const char *op_str;
1145     int op;
1146     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1147       op_str = "FLD_D";
1148       op = 0xDD;
1149     } else {                   // 32-bit load
1150       op_str = "FLD_S";
1151       op = 0xD9;
1152       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1153     }
1154     if( cbuf ) {
1155       emit_opcode  (*cbuf, op );
1156       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1157       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1158       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1159 #ifndef PRODUCT
1160     } else if( !do_size ) {
1161       if( size != 0 ) st->print("\n\t");
1162       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1163 #endif
1164     }
1165     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1166     return size + 3+offset_size+2;
1167   }
1168 
1169   // Check for xmm reg-reg copy
1170   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1171     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1172             (src_first+1 == src_second && dst_first+1 == dst_second),
1173             "no non-adjacent float-moves" );
1174     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1175   }
1176 
1177   // Check for xmm reg-integer reg copy
1178   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1179     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1180             "no 64 bit float-integer reg moves" );
1181     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1182   }
1183 
1184   // Check for xmm store
1185   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1186     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1187   }
1188 
1189   // Check for float xmm load
1190   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1191     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1192   }
1193 
1194   // Copy from float reg to xmm reg
1195   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1196     // copy to the top of stack from floating point reg
1197     // and use LEA to preserve flags
1198     if( cbuf ) {
1199       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1200       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1201       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1202       emit_d8(*cbuf,0xF8);
1203 #ifndef PRODUCT
1204     } else if( !do_size ) {
1205       if( size != 0 ) st->print("\n\t");
1206       st->print("LEA    ESP,[ESP-8]");
1207 #endif
1208     }
1209     size += 4;
1210 
1211     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1212 
1213     // Copy from the temp memory to the xmm reg.
1214     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1215 
1216     if( cbuf ) {
1217       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1218       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1219       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1220       emit_d8(*cbuf,0x08);
1221 #ifndef PRODUCT
1222     } else if( !do_size ) {
1223       if( size != 0 ) st->print("\n\t");
1224       st->print("LEA    ESP,[ESP+8]");
1225 #endif
1226     }
1227     size += 4;
1228     return size;
1229   }
1230 
1231   assert( size > 0, "missed a case" );
1232 
1233   // --------------------------------------------------------------------
1234   // Check for second bits still needing moving.
1235   if( src_second == dst_second )
1236     return size;               // Self copy; no move
1237   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1238 
1239   // Check for second word int-int move
1240   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1241     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1242 
1243   // Check for second word integer store
1244   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1245     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1246 
1247   // Check for second word integer load
1248   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1249     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1250 
1251 
1252   Unimplemented();
1253   return 0; // Mute compiler
1254 }
1255 
1256 #ifndef PRODUCT
1257 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1258   implementation( NULL, ra_, false, st );
1259 }
1260 #endif
1261 
1262 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1263   implementation( &cbuf, ra_, false, NULL );
1264 }
1265 
1266 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1267   return implementation( NULL, ra_, true, NULL );
1268 }
1269 
1270 
1271 //=============================================================================
1272 #ifndef PRODUCT
1273 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1274   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1275   int reg = ra_->get_reg_first(this);
1276   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1277 }
1278 #endif
1279 
1280 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1281   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1282   int reg = ra_->get_encode(this);
1283   if( offset >= 128 ) {
1284     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1285     emit_rm(cbuf, 0x2, reg, 0x04);
1286     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1287     emit_d32(cbuf, offset);
1288   }
1289   else {
1290     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1291     emit_rm(cbuf, 0x1, reg, 0x04);
1292     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1293     emit_d8(cbuf, offset);
1294   }
1295 }
1296 
1297 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1298   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1299   if( offset >= 128 ) {
1300     return 7;
1301   }
1302   else {
1303     return 4;
1304   }
1305 }
1306 
1307 //=============================================================================
1308 #ifndef PRODUCT
1309 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1310   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1311   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1312   st->print_cr("\tNOP");
1313   st->print_cr("\tNOP");
1314   if( !OptoBreakpoint )
1315     st->print_cr("\tNOP");
1316 }
1317 #endif
1318 
1319 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1320   MacroAssembler masm(&cbuf);
1321 #ifdef ASSERT
1322   uint insts_size = cbuf.insts_size();
1323 #endif
1324   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1325   masm.jump_cc(Assembler::notEqual,
1326                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1327   /* WARNING these NOPs are critical so that verified entry point is properly
1328      aligned for patching by NativeJump::patch_verified_entry() */
1329   int nops_cnt = 2;
1330   if( !OptoBreakpoint ) // Leave space for int3
1331      nops_cnt += 1;
1332   masm.nop(nops_cnt);
1333 
1334   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1335 }
1336 
1337 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1338   return OptoBreakpoint ? 11 : 12;
1339 }
1340 
1341 
1342 //=============================================================================
1343 
1344 int Matcher::regnum_to_fpu_offset(int regnum) {
1345   return regnum - 32; // The FP registers are in the second chunk
1346 }
1347 
1348 // This is UltraSparc specific, true just means we have fast l2f conversion
1349 const bool Matcher::convL2FSupported(void) {
1350   return true;
1351 }
1352 
1353 // Is this branch offset short enough that a short branch can be used?
1354 //
1355 // NOTE: If the platform does not provide any short branch variants, then
1356 //       this method should return false for offset 0.
1357 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1358   // The passed offset is relative to address of the branch.
1359   // On 86 a branch displacement is calculated relative to address
1360   // of a next instruction.
1361   offset -= br_size;
1362 
1363   // the short version of jmpConUCF2 contains multiple branches,
1364   // making the reach slightly less
1365   if (rule == jmpConUCF2_rule)
1366     return (-126 <= offset && offset <= 125);
1367   return (-128 <= offset && offset <= 127);
1368 }
1369 
1370 const bool Matcher::isSimpleConstant64(jlong value) {
1371   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1372   return false;
1373 }
1374 
1375 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1376 const bool Matcher::init_array_count_is_in_bytes = false;
1377 
1378 // Threshold size for cleararray.
1379 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1380 
1381 // Needs 2 CMOV's for longs.
1382 const int Matcher::long_cmove_cost() { return 1; }
1383 
1384 // No CMOVF/CMOVD with SSE/SSE2
1385 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1386 
1387 // Does the CPU require late expand (see block.cpp for description of late expand)?
1388 const bool Matcher::require_postalloc_expand = false;
1389 
1390 // Should the Matcher clone shifts on addressing modes, expecting them to
1391 // be subsumed into complex addressing expressions or compute them into
1392 // registers?  True for Intel but false for most RISCs
1393 const bool Matcher::clone_shift_expressions = true;
1394 
1395 // Do we need to mask the count passed to shift instructions or does
1396 // the cpu only look at the lower 5/6 bits anyway?
1397 const bool Matcher::need_masked_shift_count = false;
1398 
1399 bool Matcher::narrow_oop_use_complex_address() {
1400   ShouldNotCallThis();
1401   return true;
1402 }
1403 
1404 bool Matcher::narrow_klass_use_complex_address() {
1405   ShouldNotCallThis();
1406   return true;
1407 }
1408 
1409 
1410 // Is it better to copy float constants, or load them directly from memory?
1411 // Intel can load a float constant from a direct address, requiring no
1412 // extra registers.  Most RISCs will have to materialize an address into a
1413 // register first, so they would do better to copy the constant from stack.
1414 const bool Matcher::rematerialize_float_constants = true;
1415 
1416 // If CPU can load and store mis-aligned doubles directly then no fixup is
1417 // needed.  Else we split the double into 2 integer pieces and move it
1418 // piece-by-piece.  Only happens when passing doubles into C code as the
1419 // Java calling convention forces doubles to be aligned.
1420 const bool Matcher::misaligned_doubles_ok = true;
1421 
1422 
1423 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1424   // Get the memory operand from the node
1425   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1426   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1427   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1428   uint opcnt     = 1;                 // First operand
1429   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1430   while( idx >= skipped+num_edges ) {
1431     skipped += num_edges;
1432     opcnt++;                          // Bump operand count
1433     assert( opcnt < numopnds, "Accessing non-existent operand" );
1434     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1435   }
1436 
1437   MachOper *memory = node->_opnds[opcnt];
1438   MachOper *new_memory = NULL;
1439   switch (memory->opcode()) {
1440   case DIRECT:
1441   case INDOFFSET32X:
1442     // No transformation necessary.
1443     return;
1444   case INDIRECT:
1445     new_memory = new indirect_win95_safeOper( );
1446     break;
1447   case INDOFFSET8:
1448     new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1449     break;
1450   case INDOFFSET32:
1451     new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1452     break;
1453   case INDINDEXOFFSET:
1454     new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1455     break;
1456   case INDINDEXSCALE:
1457     new_memory = new indIndexScale_win95_safeOper(memory->scale());
1458     break;
1459   case INDINDEXSCALEOFFSET:
1460     new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1461     break;
1462   case LOAD_LONG_INDIRECT:
1463   case LOAD_LONG_INDOFFSET32:
1464     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1465     return;
1466   default:
1467     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1468     return;
1469   }
1470   node->_opnds[opcnt] = new_memory;
1471 }
1472 
1473 // Advertise here if the CPU requires explicit rounding operations
1474 // to implement the UseStrictFP mode.
1475 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1476 
1477 // Are floats conerted to double when stored to stack during deoptimization?
1478 // On x32 it is stored with convertion only when FPU is used for floats.
1479 bool Matcher::float_in_double() { return (UseSSE == 0); }
1480 
1481 // Do ints take an entire long register or just half?
1482 const bool Matcher::int_in_long = false;
1483 
1484 // Return whether or not this register is ever used as an argument.  This
1485 // function is used on startup to build the trampoline stubs in generateOptoStub.
1486 // Registers not mentioned will be killed by the VM call in the trampoline, and
1487 // arguments in those registers not be available to the callee.
1488 bool Matcher::can_be_java_arg( int reg ) {
1489   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1490   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1491   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1492   return false;
1493 }
1494 
1495 bool Matcher::is_spillable_arg( int reg ) {
1496   return can_be_java_arg(reg);
1497 }
1498 
1499 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1500   // Use hardware integer DIV instruction when
1501   // it is faster than a code which use multiply.
1502   // Only when constant divisor fits into 32 bit
1503   // (min_jint is excluded to get only correct
1504   // positive 32 bit values from negative).
1505   return VM_Version::has_fast_idiv() &&
1506          (divisor == (int)divisor && divisor != min_jint);
1507 }
1508 
1509 // Register for DIVI projection of divmodI
1510 RegMask Matcher::divI_proj_mask() {
1511   return EAX_REG_mask();
1512 }
1513 
1514 // Register for MODI projection of divmodI
1515 RegMask Matcher::modI_proj_mask() {
1516   return EDX_REG_mask();
1517 }
1518 
1519 // Register for DIVL projection of divmodL
1520 RegMask Matcher::divL_proj_mask() {
1521   ShouldNotReachHere();
1522   return RegMask();
1523 }
1524 
1525 // Register for MODL projection of divmodL
1526 RegMask Matcher::modL_proj_mask() {
1527   ShouldNotReachHere();
1528   return RegMask();
1529 }
1530 
1531 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1532   return EBP_REG_mask();
1533 }
1534 
1535 // Returns true if the high 32 bits of the value is known to be zero.
1536 bool is_operand_hi32_zero(Node* n) {
1537   int opc = n->Opcode();
1538   if (opc == Op_AndL) {
1539     Node* o2 = n->in(2);
1540     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1541       return true;
1542     }
1543   }
1544   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1545     return true;
1546   }
1547   return false;
1548 }
1549 
1550 %}
1551 
1552 //----------ENCODING BLOCK-----------------------------------------------------
1553 // This block specifies the encoding classes used by the compiler to output
1554 // byte streams.  Encoding classes generate functions which are called by
1555 // Machine Instruction Nodes in order to generate the bit encoding of the
1556 // instruction.  Operands specify their base encoding interface with the
1557 // interface keyword.  There are currently supported four interfaces,
1558 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1559 // operand to generate a function which returns its register number when
1560 // queried.   CONST_INTER causes an operand to generate a function which
1561 // returns the value of the constant when queried.  MEMORY_INTER causes an
1562 // operand to generate four functions which return the Base Register, the
1563 // Index Register, the Scale Value, and the Offset Value of the operand when
1564 // queried.  COND_INTER causes an operand to generate six functions which
1565 // return the encoding code (ie - encoding bits for the instruction)
1566 // associated with each basic boolean condition for a conditional instruction.
1567 // Instructions specify two basic values for encoding.  They use the
1568 // ins_encode keyword to specify their encoding class (which must be one of
1569 // the class names specified in the encoding block), and they use the
1570 // opcode keyword to specify, in order, their primary, secondary, and
1571 // tertiary opcode.  Only the opcode sections which a particular instruction
1572 // needs for encoding need to be specified.
1573 encode %{
1574   // Build emit functions for each basic byte or larger field in the intel
1575   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1576   // code in the enc_class source block.  Emit functions will live in the
1577   // main source block for now.  In future, we can generalize this by
1578   // adding a syntax that specifies the sizes of fields in an order,
1579   // so that the adlc can build the emit functions automagically
1580 
1581   // Emit primary opcode
1582   enc_class OpcP %{
1583     emit_opcode(cbuf, $primary);
1584   %}
1585 
1586   // Emit secondary opcode
1587   enc_class OpcS %{
1588     emit_opcode(cbuf, $secondary);
1589   %}
1590 
1591   // Emit opcode directly
1592   enc_class Opcode(immI d8) %{
1593     emit_opcode(cbuf, $d8$$constant);
1594   %}
1595 
1596   enc_class SizePrefix %{
1597     emit_opcode(cbuf,0x66);
1598   %}
1599 
1600   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1601     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1602   %}
1603 
1604   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1605     emit_opcode(cbuf,$opcode$$constant);
1606     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1607   %}
1608 
1609   enc_class mov_r32_imm0( rRegI dst ) %{
1610     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1611     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1612   %}
1613 
1614   enc_class cdq_enc %{
1615     // Full implementation of Java idiv and irem; checks for
1616     // special case as described in JVM spec., p.243 & p.271.
1617     //
1618     //         normal case                           special case
1619     //
1620     // input : rax,: dividend                         min_int
1621     //         reg: divisor                          -1
1622     //
1623     // output: rax,: quotient  (= rax, idiv reg)       min_int
1624     //         rdx: remainder (= rax, irem reg)       0
1625     //
1626     //  Code sequnce:
1627     //
1628     //  81 F8 00 00 00 80    cmp         rax,80000000h
1629     //  0F 85 0B 00 00 00    jne         normal_case
1630     //  33 D2                xor         rdx,edx
1631     //  83 F9 FF             cmp         rcx,0FFh
1632     //  0F 84 03 00 00 00    je          done
1633     //                  normal_case:
1634     //  99                   cdq
1635     //  F7 F9                idiv        rax,ecx
1636     //                  done:
1637     //
1638     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1639     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1640     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1641     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1642     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1643     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1644     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1645     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1646     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1647     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1648     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1649     // normal_case:
1650     emit_opcode(cbuf,0x99);                                         // cdq
1651     // idiv (note: must be emitted by the user of this rule)
1652     // normal:
1653   %}
1654 
1655   // Dense encoding for older common ops
1656   enc_class Opc_plus(immI opcode, rRegI reg) %{
1657     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1658   %}
1659 
1660 
1661   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1662   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1663     // Check for 8-bit immediate, and set sign extend bit in opcode
1664     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1665       emit_opcode(cbuf, $primary | 0x02);
1666     }
1667     else {                          // If 32-bit immediate
1668       emit_opcode(cbuf, $primary);
1669     }
1670   %}
1671 
1672   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1673     // Emit primary opcode and set sign-extend bit
1674     // Check for 8-bit immediate, and set sign extend bit in opcode
1675     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1676       emit_opcode(cbuf, $primary | 0x02);    }
1677     else {                          // If 32-bit immediate
1678       emit_opcode(cbuf, $primary);
1679     }
1680     // Emit r/m byte with secondary opcode, after primary opcode.
1681     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1682   %}
1683 
1684   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1685     // Check for 8-bit immediate, and set sign extend bit in opcode
1686     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1687       $$$emit8$imm$$constant;
1688     }
1689     else {                          // If 32-bit immediate
1690       // Output immediate
1691       $$$emit32$imm$$constant;
1692     }
1693   %}
1694 
1695   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1696     // Emit primary opcode and set sign-extend bit
1697     // Check for 8-bit immediate, and set sign extend bit in opcode
1698     int con = (int)$imm$$constant; // Throw away top bits
1699     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1700     // Emit r/m byte with secondary opcode, after primary opcode.
1701     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1702     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1703     else                               emit_d32(cbuf,con);
1704   %}
1705 
1706   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1707     // Emit primary opcode and set sign-extend bit
1708     // Check for 8-bit immediate, and set sign extend bit in opcode
1709     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1710     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1711     // Emit r/m byte with tertiary opcode, after primary opcode.
1712     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1713     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1714     else                               emit_d32(cbuf,con);
1715   %}
1716 
1717   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1718     emit_cc(cbuf, $secondary, $dst$$reg );
1719   %}
1720 
1721   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1722     int destlo = $dst$$reg;
1723     int desthi = HIGH_FROM_LOW(destlo);
1724     // bswap lo
1725     emit_opcode(cbuf, 0x0F);
1726     emit_cc(cbuf, 0xC8, destlo);
1727     // bswap hi
1728     emit_opcode(cbuf, 0x0F);
1729     emit_cc(cbuf, 0xC8, desthi);
1730     // xchg lo and hi
1731     emit_opcode(cbuf, 0x87);
1732     emit_rm(cbuf, 0x3, destlo, desthi);
1733   %}
1734 
1735   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1736     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1737   %}
1738 
1739   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1740     $$$emit8$primary;
1741     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1742   %}
1743 
1744   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1745     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1746     emit_d8(cbuf, op >> 8 );
1747     emit_d8(cbuf, op & 255);
1748   %}
1749 
1750   // emulate a CMOV with a conditional branch around a MOV
1751   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1752     // Invert sense of branch from sense of CMOV
1753     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1754     emit_d8( cbuf, $brOffs$$constant );
1755   %}
1756 
1757   enc_class enc_PartialSubtypeCheck( ) %{
1758     Register Redi = as_Register(EDI_enc); // result register
1759     Register Reax = as_Register(EAX_enc); // super class
1760     Register Recx = as_Register(ECX_enc); // killed
1761     Register Resi = as_Register(ESI_enc); // sub class
1762     Label miss;
1763 
1764     MacroAssembler _masm(&cbuf);
1765     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1766                                      NULL, &miss,
1767                                      /*set_cond_codes:*/ true);
1768     if ($primary) {
1769       __ xorptr(Redi, Redi);
1770     }
1771     __ bind(miss);
1772   %}
1773 
1774   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1775     MacroAssembler masm(&cbuf);
1776     int start = masm.offset();
1777     if (UseSSE >= 2) {
1778       if (VerifyFPU) {
1779         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1780       }
1781     } else {
1782       // External c_calling_convention expects the FPU stack to be 'clean'.
1783       // Compiled code leaves it dirty.  Do cleanup now.
1784       masm.empty_FPU_stack();
1785     }
1786     if (sizeof_FFree_Float_Stack_All == -1) {
1787       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1788     } else {
1789       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1790     }
1791   %}
1792 
1793   enc_class Verify_FPU_For_Leaf %{
1794     if( VerifyFPU ) {
1795       MacroAssembler masm(&cbuf);
1796       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1797     }
1798   %}
1799 
1800   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1801     // This is the instruction starting address for relocation info.
1802     cbuf.set_insts_mark();
1803     $$$emit8$primary;
1804     // CALL directly to the runtime
1805     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1806                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1807 
1808     if (UseSSE >= 2) {
1809       MacroAssembler _masm(&cbuf);
1810       BasicType rt = tf()->return_type();
1811 
1812       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1813         // A C runtime call where the return value is unused.  In SSE2+
1814         // mode the result needs to be removed from the FPU stack.  It's
1815         // likely that this function call could be removed by the
1816         // optimizer if the C function is a pure function.
1817         __ ffree(0);
1818       } else if (rt == T_FLOAT) {
1819         __ lea(rsp, Address(rsp, -4));
1820         __ fstp_s(Address(rsp, 0));
1821         __ movflt(xmm0, Address(rsp, 0));
1822         __ lea(rsp, Address(rsp,  4));
1823       } else if (rt == T_DOUBLE) {
1824         __ lea(rsp, Address(rsp, -8));
1825         __ fstp_d(Address(rsp, 0));
1826         __ movdbl(xmm0, Address(rsp, 0));
1827         __ lea(rsp, Address(rsp,  8));
1828       }
1829     }
1830   %}
1831 
1832 
1833   enc_class pre_call_resets %{
1834     // If method sets FPU control word restore it here
1835     debug_only(int off0 = cbuf.insts_size());
1836     if (ra_->C->in_24_bit_fp_mode()) {
1837       MacroAssembler _masm(&cbuf);
1838       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1839     }
1840     if (ra_->C->max_vector_size() > 16) {
1841       // Clear upper bits of YMM registers when current compiled code uses
1842       // wide vectors to avoid AVX <-> SSE transition penalty during call.
1843       MacroAssembler _masm(&cbuf);
1844       __ vzeroupper();
1845     }
1846     debug_only(int off1 = cbuf.insts_size());
1847     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1848   %}
1849 
1850   enc_class post_call_FPU %{
1851     // If method sets FPU control word do it here also
1852     if (Compile::current()->in_24_bit_fp_mode()) {
1853       MacroAssembler masm(&cbuf);
1854       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1855     }
1856   %}
1857 
1858   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1859     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1860     // who we intended to call.
1861     cbuf.set_insts_mark();
1862     $$$emit8$primary;
1863     if (!_method) {
1864       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1865                      runtime_call_Relocation::spec(), RELOC_IMM32 );
1866     } else if (_optimized_virtual) {
1867       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1868                      opt_virtual_call_Relocation::spec(), RELOC_IMM32 );
1869     } else {
1870       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1871                      static_call_Relocation::spec(), RELOC_IMM32 );
1872     }
1873     if (_method) {  // Emit stub for static call.
1874       CompiledStaticCall::emit_to_interp_stub(cbuf);
1875     }
1876   %}
1877 
1878   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1879     MacroAssembler _masm(&cbuf);
1880     __ ic_call((address)$meth$$method);
1881   %}
1882 
1883   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1884     int disp = in_bytes(Method::from_compiled_offset());
1885     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1886 
1887     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1888     cbuf.set_insts_mark();
1889     $$$emit8$primary;
1890     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1891     emit_d8(cbuf, disp);             // Displacement
1892 
1893   %}
1894 
1895 //   Following encoding is no longer used, but may be restored if calling
1896 //   convention changes significantly.
1897 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1898 //
1899 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1900 //     // int ic_reg     = Matcher::inline_cache_reg();
1901 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1902 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1903 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1904 //
1905 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1906 //     // // so we load it immediately before the call
1907 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1908 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1909 //
1910 //     // xor rbp,ebp
1911 //     emit_opcode(cbuf, 0x33);
1912 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1913 //
1914 //     // CALL to interpreter.
1915 //     cbuf.set_insts_mark();
1916 //     $$$emit8$primary;
1917 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1918 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1919 //   %}
1920 
1921   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1922     $$$emit8$primary;
1923     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1924     $$$emit8$shift$$constant;
1925   %}
1926 
1927   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1928     // Load immediate does not have a zero or sign extended version
1929     // for 8-bit immediates
1930     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1931     $$$emit32$src$$constant;
1932   %}
1933 
1934   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1935     // Load immediate does not have a zero or sign extended version
1936     // for 8-bit immediates
1937     emit_opcode(cbuf, $primary + $dst$$reg);
1938     $$$emit32$src$$constant;
1939   %}
1940 
1941   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1942     // Load immediate does not have a zero or sign extended version
1943     // for 8-bit immediates
1944     int dst_enc = $dst$$reg;
1945     int src_con = $src$$constant & 0x0FFFFFFFFL;
1946     if (src_con == 0) {
1947       // xor dst, dst
1948       emit_opcode(cbuf, 0x33);
1949       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1950     } else {
1951       emit_opcode(cbuf, $primary + dst_enc);
1952       emit_d32(cbuf, src_con);
1953     }
1954   %}
1955 
1956   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
1957     // Load immediate does not have a zero or sign extended version
1958     // for 8-bit immediates
1959     int dst_enc = $dst$$reg + 2;
1960     int src_con = ((julong)($src$$constant)) >> 32;
1961     if (src_con == 0) {
1962       // xor dst, dst
1963       emit_opcode(cbuf, 0x33);
1964       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1965     } else {
1966       emit_opcode(cbuf, $primary + dst_enc);
1967       emit_d32(cbuf, src_con);
1968     }
1969   %}
1970 
1971 
1972   // Encode a reg-reg copy.  If it is useless, then empty encoding.
1973   enc_class enc_Copy( rRegI dst, rRegI src ) %{
1974     encode_Copy( cbuf, $dst$$reg, $src$$reg );
1975   %}
1976 
1977   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
1978     encode_Copy( cbuf, $dst$$reg, $src$$reg );
1979   %}
1980 
1981   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1982     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1983   %}
1984 
1985   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
1986     $$$emit8$primary;
1987     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1988   %}
1989 
1990   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
1991     $$$emit8$secondary;
1992     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
1993   %}
1994 
1995   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
1996     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1997   %}
1998 
1999   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2000     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2001   %}
2002 
2003   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2004     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2005   %}
2006 
2007   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2008     // Output immediate
2009     $$$emit32$src$$constant;
2010   %}
2011 
2012   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2013     // Output Float immediate bits
2014     jfloat jf = $src$$constant;
2015     int    jf_as_bits = jint_cast( jf );
2016     emit_d32(cbuf, jf_as_bits);
2017   %}
2018 
2019   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2020     // Output Float immediate bits
2021     jfloat jf = $src$$constant;
2022     int    jf_as_bits = jint_cast( jf );
2023     emit_d32(cbuf, jf_as_bits);
2024   %}
2025 
2026   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2027     // Output immediate
2028     $$$emit16$src$$constant;
2029   %}
2030 
2031   enc_class Con_d32(immI src) %{
2032     emit_d32(cbuf,$src$$constant);
2033   %}
2034 
2035   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2036     // Output immediate memory reference
2037     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2038     emit_d32(cbuf, 0x00);
2039   %}
2040 
2041   enc_class lock_prefix( ) %{
2042     if( os::is_MP() )
2043       emit_opcode(cbuf,0xF0);         // [Lock]
2044   %}
2045 
2046   // Cmp-xchg long value.
2047   // Note: we need to swap rbx, and rcx before and after the
2048   //       cmpxchg8 instruction because the instruction uses
2049   //       rcx as the high order word of the new value to store but
2050   //       our register encoding uses rbx,.
2051   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2052 
2053     // XCHG  rbx,ecx
2054     emit_opcode(cbuf,0x87);
2055     emit_opcode(cbuf,0xD9);
2056     // [Lock]
2057     if( os::is_MP() )
2058       emit_opcode(cbuf,0xF0);
2059     // CMPXCHG8 [Eptr]
2060     emit_opcode(cbuf,0x0F);
2061     emit_opcode(cbuf,0xC7);
2062     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2063     // XCHG  rbx,ecx
2064     emit_opcode(cbuf,0x87);
2065     emit_opcode(cbuf,0xD9);
2066   %}
2067 
2068   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2069     // [Lock]
2070     if( os::is_MP() )
2071       emit_opcode(cbuf,0xF0);
2072 
2073     // CMPXCHG [Eptr]
2074     emit_opcode(cbuf,0x0F);
2075     emit_opcode(cbuf,0xB1);
2076     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2077   %}
2078 
2079   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2080     int res_encoding = $res$$reg;
2081 
2082     // MOV  res,0
2083     emit_opcode( cbuf, 0xB8 + res_encoding);
2084     emit_d32( cbuf, 0 );
2085     // JNE,s  fail
2086     emit_opcode(cbuf,0x75);
2087     emit_d8(cbuf, 5 );
2088     // MOV  res,1
2089     emit_opcode( cbuf, 0xB8 + res_encoding);
2090     emit_d32( cbuf, 1 );
2091     // fail:
2092   %}
2093 
2094   enc_class set_instruction_start( ) %{
2095     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2096   %}
2097 
2098   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2099     int reg_encoding = $ereg$$reg;
2100     int base  = $mem$$base;
2101     int index = $mem$$index;
2102     int scale = $mem$$scale;
2103     int displace = $mem$$disp;
2104     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2105     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2106   %}
2107 
2108   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2109     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2110     int base  = $mem$$base;
2111     int index = $mem$$index;
2112     int scale = $mem$$scale;
2113     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2114     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2115     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2116   %}
2117 
2118   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2119     int r1, r2;
2120     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2121     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2122     emit_opcode(cbuf,0x0F);
2123     emit_opcode(cbuf,$tertiary);
2124     emit_rm(cbuf, 0x3, r1, r2);
2125     emit_d8(cbuf,$cnt$$constant);
2126     emit_d8(cbuf,$primary);
2127     emit_rm(cbuf, 0x3, $secondary, r1);
2128     emit_d8(cbuf,$cnt$$constant);
2129   %}
2130 
2131   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2132     emit_opcode( cbuf, 0x8B ); // Move
2133     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2134     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2135       emit_d8(cbuf,$primary);
2136       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2137       emit_d8(cbuf,$cnt$$constant-32);
2138     }
2139     emit_d8(cbuf,$primary);
2140     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2141     emit_d8(cbuf,31);
2142   %}
2143 
2144   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2145     int r1, r2;
2146     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2147     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2148 
2149     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2150     emit_rm(cbuf, 0x3, r1, r2);
2151     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2152       emit_opcode(cbuf,$primary);
2153       emit_rm(cbuf, 0x3, $secondary, r1);
2154       emit_d8(cbuf,$cnt$$constant-32);
2155     }
2156     emit_opcode(cbuf,0x33);  // XOR r2,r2
2157     emit_rm(cbuf, 0x3, r2, r2);
2158   %}
2159 
2160   // Clone of RegMem but accepts an extra parameter to access each
2161   // half of a double in memory; it never needs relocation info.
2162   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2163     emit_opcode(cbuf,$opcode$$constant);
2164     int reg_encoding = $rm_reg$$reg;
2165     int base     = $mem$$base;
2166     int index    = $mem$$index;
2167     int scale    = $mem$$scale;
2168     int displace = $mem$$disp + $disp_for_half$$constant;
2169     relocInfo::relocType disp_reloc = relocInfo::none;
2170     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2171   %}
2172 
2173   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2174   //
2175   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2176   // and it never needs relocation information.
2177   // Frequently used to move data between FPU's Stack Top and memory.
2178   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2179     int rm_byte_opcode = $rm_opcode$$constant;
2180     int base     = $mem$$base;
2181     int index    = $mem$$index;
2182     int scale    = $mem$$scale;
2183     int displace = $mem$$disp;
2184     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2185     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2186   %}
2187 
2188   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2189     int rm_byte_opcode = $rm_opcode$$constant;
2190     int base     = $mem$$base;
2191     int index    = $mem$$index;
2192     int scale    = $mem$$scale;
2193     int displace = $mem$$disp;
2194     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2195     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2196   %}
2197 
2198   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2199     int reg_encoding = $dst$$reg;
2200     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2201     int index        = 0x04;            // 0x04 indicates no index
2202     int scale        = 0x00;            // 0x00 indicates no scale
2203     int displace     = $src1$$constant; // 0x00 indicates no displacement
2204     relocInfo::relocType disp_reloc = relocInfo::none;
2205     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2206   %}
2207 
2208   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2209     // Compare dst,src
2210     emit_opcode(cbuf,0x3B);
2211     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2212     // jmp dst < src around move
2213     emit_opcode(cbuf,0x7C);
2214     emit_d8(cbuf,2);
2215     // move dst,src
2216     emit_opcode(cbuf,0x8B);
2217     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2218   %}
2219 
2220   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2221     // Compare dst,src
2222     emit_opcode(cbuf,0x3B);
2223     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2224     // jmp dst > src around move
2225     emit_opcode(cbuf,0x7F);
2226     emit_d8(cbuf,2);
2227     // move dst,src
2228     emit_opcode(cbuf,0x8B);
2229     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2230   %}
2231 
2232   enc_class enc_FPR_store(memory mem, regDPR src) %{
2233     // If src is FPR1, we can just FST to store it.
2234     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2235     int reg_encoding = 0x2; // Just store
2236     int base  = $mem$$base;
2237     int index = $mem$$index;
2238     int scale = $mem$$scale;
2239     int displace = $mem$$disp;
2240     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2241     if( $src$$reg != FPR1L_enc ) {
2242       reg_encoding = 0x3;  // Store & pop
2243       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2244       emit_d8( cbuf, 0xC0-1+$src$$reg );
2245     }
2246     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2247     emit_opcode(cbuf,$primary);
2248     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2249   %}
2250 
2251   enc_class neg_reg(rRegI dst) %{
2252     // NEG $dst
2253     emit_opcode(cbuf,0xF7);
2254     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2255   %}
2256 
2257   enc_class setLT_reg(eCXRegI dst) %{
2258     // SETLT $dst
2259     emit_opcode(cbuf,0x0F);
2260     emit_opcode(cbuf,0x9C);
2261     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2262   %}
2263 
2264   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2265     int tmpReg = $tmp$$reg;
2266 
2267     // SUB $p,$q
2268     emit_opcode(cbuf,0x2B);
2269     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2270     // SBB $tmp,$tmp
2271     emit_opcode(cbuf,0x1B);
2272     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2273     // AND $tmp,$y
2274     emit_opcode(cbuf,0x23);
2275     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2276     // ADD $p,$tmp
2277     emit_opcode(cbuf,0x03);
2278     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2279   %}
2280 
2281   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2282     // TEST shift,32
2283     emit_opcode(cbuf,0xF7);
2284     emit_rm(cbuf, 0x3, 0, ECX_enc);
2285     emit_d32(cbuf,0x20);
2286     // JEQ,s small
2287     emit_opcode(cbuf, 0x74);
2288     emit_d8(cbuf, 0x04);
2289     // MOV    $dst.hi,$dst.lo
2290     emit_opcode( cbuf, 0x8B );
2291     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2292     // CLR    $dst.lo
2293     emit_opcode(cbuf, 0x33);
2294     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2295 // small:
2296     // SHLD   $dst.hi,$dst.lo,$shift
2297     emit_opcode(cbuf,0x0F);
2298     emit_opcode(cbuf,0xA5);
2299     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2300     // SHL    $dst.lo,$shift"
2301     emit_opcode(cbuf,0xD3);
2302     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2303   %}
2304 
2305   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2306     // TEST shift,32
2307     emit_opcode(cbuf,0xF7);
2308     emit_rm(cbuf, 0x3, 0, ECX_enc);
2309     emit_d32(cbuf,0x20);
2310     // JEQ,s small
2311     emit_opcode(cbuf, 0x74);
2312     emit_d8(cbuf, 0x04);
2313     // MOV    $dst.lo,$dst.hi
2314     emit_opcode( cbuf, 0x8B );
2315     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2316     // CLR    $dst.hi
2317     emit_opcode(cbuf, 0x33);
2318     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2319 // small:
2320     // SHRD   $dst.lo,$dst.hi,$shift
2321     emit_opcode(cbuf,0x0F);
2322     emit_opcode(cbuf,0xAD);
2323     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2324     // SHR    $dst.hi,$shift"
2325     emit_opcode(cbuf,0xD3);
2326     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2327   %}
2328 
2329   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2330     // TEST shift,32
2331     emit_opcode(cbuf,0xF7);
2332     emit_rm(cbuf, 0x3, 0, ECX_enc);
2333     emit_d32(cbuf,0x20);
2334     // JEQ,s small
2335     emit_opcode(cbuf, 0x74);
2336     emit_d8(cbuf, 0x05);
2337     // MOV    $dst.lo,$dst.hi
2338     emit_opcode( cbuf, 0x8B );
2339     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2340     // SAR    $dst.hi,31
2341     emit_opcode(cbuf, 0xC1);
2342     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2343     emit_d8(cbuf, 0x1F );
2344 // small:
2345     // SHRD   $dst.lo,$dst.hi,$shift
2346     emit_opcode(cbuf,0x0F);
2347     emit_opcode(cbuf,0xAD);
2348     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2349     // SAR    $dst.hi,$shift"
2350     emit_opcode(cbuf,0xD3);
2351     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2352   %}
2353 
2354 
2355   // ----------------- Encodings for floating point unit -----------------
2356   // May leave result in FPU-TOS or FPU reg depending on opcodes
2357   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2358     $$$emit8$primary;
2359     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2360   %}
2361 
2362   // Pop argument in FPR0 with FSTP ST(0)
2363   enc_class PopFPU() %{
2364     emit_opcode( cbuf, 0xDD );
2365     emit_d8( cbuf, 0xD8 );
2366   %}
2367 
2368   // !!!!! equivalent to Pop_Reg_F
2369   enc_class Pop_Reg_DPR( regDPR dst ) %{
2370     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2371     emit_d8( cbuf, 0xD8+$dst$$reg );
2372   %}
2373 
2374   enc_class Push_Reg_DPR( regDPR dst ) %{
2375     emit_opcode( cbuf, 0xD9 );
2376     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2377   %}
2378 
2379   enc_class strictfp_bias1( regDPR dst ) %{
2380     emit_opcode( cbuf, 0xDB );           // FLD m80real
2381     emit_opcode( cbuf, 0x2D );
2382     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2383     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2384     emit_opcode( cbuf, 0xC8+$dst$$reg );
2385   %}
2386 
2387   enc_class strictfp_bias2( regDPR dst ) %{
2388     emit_opcode( cbuf, 0xDB );           // FLD m80real
2389     emit_opcode( cbuf, 0x2D );
2390     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2391     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2392     emit_opcode( cbuf, 0xC8+$dst$$reg );
2393   %}
2394 
2395   // Special case for moving an integer register to a stack slot.
2396   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2397     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2398   %}
2399 
2400   // Special case for moving a register to a stack slot.
2401   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2402     // Opcode already emitted
2403     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2404     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2405     emit_d32(cbuf, $dst$$disp);   // Displacement
2406   %}
2407 
2408   // Push the integer in stackSlot 'src' onto FP-stack
2409   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2410     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2411   %}
2412 
2413   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2414   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2415     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2416   %}
2417 
2418   // Same as Pop_Mem_F except for opcode
2419   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2420   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2421     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2422   %}
2423 
2424   enc_class Pop_Reg_FPR( regFPR dst ) %{
2425     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2426     emit_d8( cbuf, 0xD8+$dst$$reg );
2427   %}
2428 
2429   enc_class Push_Reg_FPR( regFPR dst ) %{
2430     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2431     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2432   %}
2433 
2434   // Push FPU's float to a stack-slot, and pop FPU-stack
2435   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2436     int pop = 0x02;
2437     if ($src$$reg != FPR1L_enc) {
2438       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2439       emit_d8( cbuf, 0xC0-1+$src$$reg );
2440       pop = 0x03;
2441     }
2442     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2443   %}
2444 
2445   // Push FPU's double to a stack-slot, and pop FPU-stack
2446   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2447     int pop = 0x02;
2448     if ($src$$reg != FPR1L_enc) {
2449       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2450       emit_d8( cbuf, 0xC0-1+$src$$reg );
2451       pop = 0x03;
2452     }
2453     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2454   %}
2455 
2456   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2457   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2458     int pop = 0xD0 - 1; // -1 since we skip FLD
2459     if ($src$$reg != FPR1L_enc) {
2460       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2461       emit_d8( cbuf, 0xC0-1+$src$$reg );
2462       pop = 0xD8;
2463     }
2464     emit_opcode( cbuf, 0xDD );
2465     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2466   %}
2467 
2468 
2469   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2470     // load dst in FPR0
2471     emit_opcode( cbuf, 0xD9 );
2472     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2473     if ($src$$reg != FPR1L_enc) {
2474       // fincstp
2475       emit_opcode (cbuf, 0xD9);
2476       emit_opcode (cbuf, 0xF7);
2477       // swap src with FPR1:
2478       // FXCH FPR1 with src
2479       emit_opcode(cbuf, 0xD9);
2480       emit_d8(cbuf, 0xC8-1+$src$$reg );
2481       // fdecstp
2482       emit_opcode (cbuf, 0xD9);
2483       emit_opcode (cbuf, 0xF6);
2484     }
2485   %}
2486 
2487   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2488     MacroAssembler _masm(&cbuf);
2489     __ subptr(rsp, 8);
2490     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2491     __ fld_d(Address(rsp, 0));
2492     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2493     __ fld_d(Address(rsp, 0));
2494   %}
2495 
2496   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2497     MacroAssembler _masm(&cbuf);
2498     __ subptr(rsp, 4);
2499     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2500     __ fld_s(Address(rsp, 0));
2501     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2502     __ fld_s(Address(rsp, 0));
2503   %}
2504 
2505   enc_class Push_ResultD(regD dst) %{
2506     MacroAssembler _masm(&cbuf);
2507     __ fstp_d(Address(rsp, 0));
2508     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2509     __ addptr(rsp, 8);
2510   %}
2511 
2512   enc_class Push_ResultF(regF dst, immI d8) %{
2513     MacroAssembler _masm(&cbuf);
2514     __ fstp_s(Address(rsp, 0));
2515     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2516     __ addptr(rsp, $d8$$constant);
2517   %}
2518 
2519   enc_class Push_SrcD(regD src) %{
2520     MacroAssembler _masm(&cbuf);
2521     __ subptr(rsp, 8);
2522     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2523     __ fld_d(Address(rsp, 0));
2524   %}
2525 
2526   enc_class push_stack_temp_qword() %{
2527     MacroAssembler _masm(&cbuf);
2528     __ subptr(rsp, 8);
2529   %}
2530 
2531   enc_class pop_stack_temp_qword() %{
2532     MacroAssembler _masm(&cbuf);
2533     __ addptr(rsp, 8);
2534   %}
2535 
2536   enc_class push_xmm_to_fpr1(regD src) %{
2537     MacroAssembler _masm(&cbuf);
2538     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2539     __ fld_d(Address(rsp, 0));
2540   %}
2541 
2542   enc_class Push_Result_Mod_DPR( regDPR src) %{
2543     if ($src$$reg != FPR1L_enc) {
2544       // fincstp
2545       emit_opcode (cbuf, 0xD9);
2546       emit_opcode (cbuf, 0xF7);
2547       // FXCH FPR1 with src
2548       emit_opcode(cbuf, 0xD9);
2549       emit_d8(cbuf, 0xC8-1+$src$$reg );
2550       // fdecstp
2551       emit_opcode (cbuf, 0xD9);
2552       emit_opcode (cbuf, 0xF6);
2553     }
2554     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2555     // // FSTP   FPR$dst$$reg
2556     // emit_opcode( cbuf, 0xDD );
2557     // emit_d8( cbuf, 0xD8+$dst$$reg );
2558   %}
2559 
2560   enc_class fnstsw_sahf_skip_parity() %{
2561     // fnstsw ax
2562     emit_opcode( cbuf, 0xDF );
2563     emit_opcode( cbuf, 0xE0 );
2564     // sahf
2565     emit_opcode( cbuf, 0x9E );
2566     // jnp  ::skip
2567     emit_opcode( cbuf, 0x7B );
2568     emit_opcode( cbuf, 0x05 );
2569   %}
2570 
2571   enc_class emitModDPR() %{
2572     // fprem must be iterative
2573     // :: loop
2574     // fprem
2575     emit_opcode( cbuf, 0xD9 );
2576     emit_opcode( cbuf, 0xF8 );
2577     // wait
2578     emit_opcode( cbuf, 0x9b );
2579     // fnstsw ax
2580     emit_opcode( cbuf, 0xDF );
2581     emit_opcode( cbuf, 0xE0 );
2582     // sahf
2583     emit_opcode( cbuf, 0x9E );
2584     // jp  ::loop
2585     emit_opcode( cbuf, 0x0F );
2586     emit_opcode( cbuf, 0x8A );
2587     emit_opcode( cbuf, 0xF4 );
2588     emit_opcode( cbuf, 0xFF );
2589     emit_opcode( cbuf, 0xFF );
2590     emit_opcode( cbuf, 0xFF );
2591   %}
2592 
2593   enc_class fpu_flags() %{
2594     // fnstsw_ax
2595     emit_opcode( cbuf, 0xDF);
2596     emit_opcode( cbuf, 0xE0);
2597     // test ax,0x0400
2598     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2599     emit_opcode( cbuf, 0xA9 );
2600     emit_d16   ( cbuf, 0x0400 );
2601     // // // This sequence works, but stalls for 12-16 cycles on PPro
2602     // // test rax,0x0400
2603     // emit_opcode( cbuf, 0xA9 );
2604     // emit_d32   ( cbuf, 0x00000400 );
2605     //
2606     // jz exit (no unordered comparison)
2607     emit_opcode( cbuf, 0x74 );
2608     emit_d8    ( cbuf, 0x02 );
2609     // mov ah,1 - treat as LT case (set carry flag)
2610     emit_opcode( cbuf, 0xB4 );
2611     emit_d8    ( cbuf, 0x01 );
2612     // sahf
2613     emit_opcode( cbuf, 0x9E);
2614   %}
2615 
2616   enc_class cmpF_P6_fixup() %{
2617     // Fixup the integer flags in case comparison involved a NaN
2618     //
2619     // JNP exit (no unordered comparison, P-flag is set by NaN)
2620     emit_opcode( cbuf, 0x7B );
2621     emit_d8    ( cbuf, 0x03 );
2622     // MOV AH,1 - treat as LT case (set carry flag)
2623     emit_opcode( cbuf, 0xB4 );
2624     emit_d8    ( cbuf, 0x01 );
2625     // SAHF
2626     emit_opcode( cbuf, 0x9E);
2627     // NOP     // target for branch to avoid branch to branch
2628     emit_opcode( cbuf, 0x90);
2629   %}
2630 
2631 //     fnstsw_ax();
2632 //     sahf();
2633 //     movl(dst, nan_result);
2634 //     jcc(Assembler::parity, exit);
2635 //     movl(dst, less_result);
2636 //     jcc(Assembler::below, exit);
2637 //     movl(dst, equal_result);
2638 //     jcc(Assembler::equal, exit);
2639 //     movl(dst, greater_result);
2640 
2641 // less_result     =  1;
2642 // greater_result  = -1;
2643 // equal_result    = 0;
2644 // nan_result      = -1;
2645 
2646   enc_class CmpF_Result(rRegI dst) %{
2647     // fnstsw_ax();
2648     emit_opcode( cbuf, 0xDF);
2649     emit_opcode( cbuf, 0xE0);
2650     // sahf
2651     emit_opcode( cbuf, 0x9E);
2652     // movl(dst, nan_result);
2653     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2654     emit_d32( cbuf, -1 );
2655     // jcc(Assembler::parity, exit);
2656     emit_opcode( cbuf, 0x7A );
2657     emit_d8    ( cbuf, 0x13 );
2658     // movl(dst, less_result);
2659     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2660     emit_d32( cbuf, -1 );
2661     // jcc(Assembler::below, exit);
2662     emit_opcode( cbuf, 0x72 );
2663     emit_d8    ( cbuf, 0x0C );
2664     // movl(dst, equal_result);
2665     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2666     emit_d32( cbuf, 0 );
2667     // jcc(Assembler::equal, exit);
2668     emit_opcode( cbuf, 0x74 );
2669     emit_d8    ( cbuf, 0x05 );
2670     // movl(dst, greater_result);
2671     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2672     emit_d32( cbuf, 1 );
2673   %}
2674 
2675 
2676   // Compare the longs and set flags
2677   // BROKEN!  Do Not use as-is
2678   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2679     // CMP    $src1.hi,$src2.hi
2680     emit_opcode( cbuf, 0x3B );
2681     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2682     // JNE,s  done
2683     emit_opcode(cbuf,0x75);
2684     emit_d8(cbuf, 2 );
2685     // CMP    $src1.lo,$src2.lo
2686     emit_opcode( cbuf, 0x3B );
2687     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2688 // done:
2689   %}
2690 
2691   enc_class convert_int_long( regL dst, rRegI src ) %{
2692     // mov $dst.lo,$src
2693     int dst_encoding = $dst$$reg;
2694     int src_encoding = $src$$reg;
2695     encode_Copy( cbuf, dst_encoding  , src_encoding );
2696     // mov $dst.hi,$src
2697     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2698     // sar $dst.hi,31
2699     emit_opcode( cbuf, 0xC1 );
2700     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2701     emit_d8(cbuf, 0x1F );
2702   %}
2703 
2704   enc_class convert_long_double( eRegL src ) %{
2705     // push $src.hi
2706     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2707     // push $src.lo
2708     emit_opcode(cbuf, 0x50+$src$$reg  );
2709     // fild 64-bits at [SP]
2710     emit_opcode(cbuf,0xdf);
2711     emit_d8(cbuf, 0x6C);
2712     emit_d8(cbuf, 0x24);
2713     emit_d8(cbuf, 0x00);
2714     // pop stack
2715     emit_opcode(cbuf, 0x83); // add  SP, #8
2716     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2717     emit_d8(cbuf, 0x8);
2718   %}
2719 
2720   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2721     // IMUL   EDX:EAX,$src1
2722     emit_opcode( cbuf, 0xF7 );
2723     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2724     // SAR    EDX,$cnt-32
2725     int shift_count = ((int)$cnt$$constant) - 32;
2726     if (shift_count > 0) {
2727       emit_opcode(cbuf, 0xC1);
2728       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2729       emit_d8(cbuf, shift_count);
2730     }
2731   %}
2732 
2733   // this version doesn't have add sp, 8
2734   enc_class convert_long_double2( eRegL src ) %{
2735     // push $src.hi
2736     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2737     // push $src.lo
2738     emit_opcode(cbuf, 0x50+$src$$reg  );
2739     // fild 64-bits at [SP]
2740     emit_opcode(cbuf,0xdf);
2741     emit_d8(cbuf, 0x6C);
2742     emit_d8(cbuf, 0x24);
2743     emit_d8(cbuf, 0x00);
2744   %}
2745 
2746   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2747     // Basic idea: long = (long)int * (long)int
2748     // IMUL EDX:EAX, src
2749     emit_opcode( cbuf, 0xF7 );
2750     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2751   %}
2752 
2753   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2754     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2755     // MUL EDX:EAX, src
2756     emit_opcode( cbuf, 0xF7 );
2757     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2758   %}
2759 
2760   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2761     // Basic idea: lo(result) = lo(x_lo * y_lo)
2762     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2763     // MOV    $tmp,$src.lo
2764     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2765     // IMUL   $tmp,EDX
2766     emit_opcode( cbuf, 0x0F );
2767     emit_opcode( cbuf, 0xAF );
2768     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2769     // MOV    EDX,$src.hi
2770     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2771     // IMUL   EDX,EAX
2772     emit_opcode( cbuf, 0x0F );
2773     emit_opcode( cbuf, 0xAF );
2774     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2775     // ADD    $tmp,EDX
2776     emit_opcode( cbuf, 0x03 );
2777     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2778     // MUL   EDX:EAX,$src.lo
2779     emit_opcode( cbuf, 0xF7 );
2780     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2781     // ADD    EDX,ESI
2782     emit_opcode( cbuf, 0x03 );
2783     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2784   %}
2785 
2786   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2787     // Basic idea: lo(result) = lo(src * y_lo)
2788     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2789     // IMUL   $tmp,EDX,$src
2790     emit_opcode( cbuf, 0x6B );
2791     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2792     emit_d8( cbuf, (int)$src$$constant );
2793     // MOV    EDX,$src
2794     emit_opcode(cbuf, 0xB8 + EDX_enc);
2795     emit_d32( cbuf, (int)$src$$constant );
2796     // MUL   EDX:EAX,EDX
2797     emit_opcode( cbuf, 0xF7 );
2798     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2799     // ADD    EDX,ESI
2800     emit_opcode( cbuf, 0x03 );
2801     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2802   %}
2803 
2804   enc_class long_div( eRegL src1, eRegL src2 ) %{
2805     // PUSH src1.hi
2806     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2807     // PUSH src1.lo
2808     emit_opcode(cbuf,               0x50+$src1$$reg  );
2809     // PUSH src2.hi
2810     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2811     // PUSH src2.lo
2812     emit_opcode(cbuf,               0x50+$src2$$reg  );
2813     // CALL directly to the runtime
2814     cbuf.set_insts_mark();
2815     emit_opcode(cbuf,0xE8);       // Call into runtime
2816     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2817     // Restore stack
2818     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2819     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2820     emit_d8(cbuf, 4*4);
2821   %}
2822 
2823   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2824     // PUSH src1.hi
2825     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2826     // PUSH src1.lo
2827     emit_opcode(cbuf,               0x50+$src1$$reg  );
2828     // PUSH src2.hi
2829     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2830     // PUSH src2.lo
2831     emit_opcode(cbuf,               0x50+$src2$$reg  );
2832     // CALL directly to the runtime
2833     cbuf.set_insts_mark();
2834     emit_opcode(cbuf,0xE8);       // Call into runtime
2835     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2836     // Restore stack
2837     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2838     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2839     emit_d8(cbuf, 4*4);
2840   %}
2841 
2842   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2843     // MOV   $tmp,$src.lo
2844     emit_opcode(cbuf, 0x8B);
2845     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2846     // OR    $tmp,$src.hi
2847     emit_opcode(cbuf, 0x0B);
2848     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2849   %}
2850 
2851   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2852     // CMP    $src1.lo,$src2.lo
2853     emit_opcode( cbuf, 0x3B );
2854     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2855     // JNE,s  skip
2856     emit_cc(cbuf, 0x70, 0x5);
2857     emit_d8(cbuf,2);
2858     // CMP    $src1.hi,$src2.hi
2859     emit_opcode( cbuf, 0x3B );
2860     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2861   %}
2862 
2863   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2864     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2865     emit_opcode( cbuf, 0x3B );
2866     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2867     // MOV    $tmp,$src1.hi
2868     emit_opcode( cbuf, 0x8B );
2869     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2870     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2871     emit_opcode( cbuf, 0x1B );
2872     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2873   %}
2874 
2875   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2876     // XOR    $tmp,$tmp
2877     emit_opcode(cbuf,0x33);  // XOR
2878     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2879     // CMP    $tmp,$src.lo
2880     emit_opcode( cbuf, 0x3B );
2881     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2882     // SBB    $tmp,$src.hi
2883     emit_opcode( cbuf, 0x1B );
2884     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2885   %}
2886 
2887  // Sniff, sniff... smells like Gnu Superoptimizer
2888   enc_class neg_long( eRegL dst ) %{
2889     emit_opcode(cbuf,0xF7);    // NEG hi
2890     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2891     emit_opcode(cbuf,0xF7);    // NEG lo
2892     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2893     emit_opcode(cbuf,0x83);    // SBB hi,0
2894     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2895     emit_d8    (cbuf,0 );
2896   %}
2897 
2898   enc_class enc_pop_rdx() %{
2899     emit_opcode(cbuf,0x5A);
2900   %}
2901 
2902   enc_class enc_rethrow() %{
2903     cbuf.set_insts_mark();
2904     emit_opcode(cbuf, 0xE9);        // jmp    entry
2905     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2906                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2907   %}
2908 
2909 
2910   // Convert a double to an int.  Java semantics require we do complex
2911   // manglelations in the corner cases.  So we set the rounding mode to
2912   // 'zero', store the darned double down as an int, and reset the
2913   // rounding mode to 'nearest'.  The hardware throws an exception which
2914   // patches up the correct value directly to the stack.
2915   enc_class DPR2I_encoding( regDPR src ) %{
2916     // Flip to round-to-zero mode.  We attempted to allow invalid-op
2917     // exceptions here, so that a NAN or other corner-case value will
2918     // thrown an exception (but normal values get converted at full speed).
2919     // However, I2C adapters and other float-stack manglers leave pending
2920     // invalid-op exceptions hanging.  We would have to clear them before
2921     // enabling them and that is more expensive than just testing for the
2922     // invalid value Intel stores down in the corner cases.
2923     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2924     emit_opcode(cbuf,0x2D);
2925     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2926     // Allocate a word
2927     emit_opcode(cbuf,0x83);            // SUB ESP,4
2928     emit_opcode(cbuf,0xEC);
2929     emit_d8(cbuf,0x04);
2930     // Encoding assumes a double has been pushed into FPR0.
2931     // Store down the double as an int, popping the FPU stack
2932     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
2933     emit_opcode(cbuf,0x1C);
2934     emit_d8(cbuf,0x24);
2935     // Restore the rounding mode; mask the exception
2936     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
2937     emit_opcode(cbuf,0x2D);
2938     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
2939         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
2940         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
2941 
2942     // Load the converted int; adjust CPU stack
2943     emit_opcode(cbuf,0x58);       // POP EAX
2944     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
2945     emit_d32   (cbuf,0x80000000); //         0x80000000
2946     emit_opcode(cbuf,0x75);       // JNE around_slow_call
2947     emit_d8    (cbuf,0x07);       // Size of slow_call
2948     // Push src onto stack slow-path
2949     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
2950     emit_d8    (cbuf,0xC0-1+$src$$reg );
2951     // CALL directly to the runtime
2952     cbuf.set_insts_mark();
2953     emit_opcode(cbuf,0xE8);       // Call into runtime
2954     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2955     // Carry on here...
2956   %}
2957 
2958   enc_class DPR2L_encoding( regDPR src ) %{
2959     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2960     emit_opcode(cbuf,0x2D);
2961     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2962     // Allocate a word
2963     emit_opcode(cbuf,0x83);            // SUB ESP,8
2964     emit_opcode(cbuf,0xEC);
2965     emit_d8(cbuf,0x08);
2966     // Encoding assumes a double has been pushed into FPR0.
2967     // Store down the double as a long, popping the FPU stack
2968     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
2969     emit_opcode(cbuf,0x3C);
2970     emit_d8(cbuf,0x24);
2971     // Restore the rounding mode; mask the exception
2972     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
2973     emit_opcode(cbuf,0x2D);
2974     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
2975         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
2976         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
2977 
2978     // Load the converted int; adjust CPU stack
2979     emit_opcode(cbuf,0x58);       // POP EAX
2980     emit_opcode(cbuf,0x5A);       // POP EDX
2981     emit_opcode(cbuf,0x81);       // CMP EDX,imm
2982     emit_d8    (cbuf,0xFA);       // rdx
2983     emit_d32   (cbuf,0x80000000); //         0x80000000
2984     emit_opcode(cbuf,0x75);       // JNE around_slow_call
2985     emit_d8    (cbuf,0x07+4);     // Size of slow_call
2986     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
2987     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
2988     emit_opcode(cbuf,0x75);       // JNE around_slow_call
2989     emit_d8    (cbuf,0x07);       // Size of slow_call
2990     // Push src onto stack slow-path
2991     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
2992     emit_d8    (cbuf,0xC0-1+$src$$reg );
2993     // CALL directly to the runtime
2994     cbuf.set_insts_mark();
2995     emit_opcode(cbuf,0xE8);       // Call into runtime
2996     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2997     // Carry on here...
2998   %}
2999 
3000   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3001     // Operand was loaded from memory into fp ST (stack top)
3002     // FMUL   ST,$src  /* D8 C8+i */
3003     emit_opcode(cbuf, 0xD8);
3004     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3005   %}
3006 
3007   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3008     // FADDP  ST,src2  /* D8 C0+i */
3009     emit_opcode(cbuf, 0xD8);
3010     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3011     //could use FADDP  src2,fpST  /* DE C0+i */
3012   %}
3013 
3014   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3015     // FADDP  src2,ST  /* DE C0+i */
3016     emit_opcode(cbuf, 0xDE);
3017     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3018   %}
3019 
3020   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3021     // Operand has been loaded into fp ST (stack top)
3022       // FSUB   ST,$src1
3023       emit_opcode(cbuf, 0xD8);
3024       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3025 
3026       // FDIV
3027       emit_opcode(cbuf, 0xD8);
3028       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3029   %}
3030 
3031   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3032     // Operand was loaded from memory into fp ST (stack top)
3033     // FADD   ST,$src  /* D8 C0+i */
3034     emit_opcode(cbuf, 0xD8);
3035     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3036 
3037     // FMUL  ST,src2  /* D8 C*+i */
3038     emit_opcode(cbuf, 0xD8);
3039     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3040   %}
3041 
3042 
3043   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3044     // Operand was loaded from memory into fp ST (stack top)
3045     // FADD   ST,$src  /* D8 C0+i */
3046     emit_opcode(cbuf, 0xD8);
3047     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3048 
3049     // FMULP  src2,ST  /* DE C8+i */
3050     emit_opcode(cbuf, 0xDE);
3051     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3052   %}
3053 
3054   // Atomically load the volatile long
3055   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3056     emit_opcode(cbuf,0xDF);
3057     int rm_byte_opcode = 0x05;
3058     int base     = $mem$$base;
3059     int index    = $mem$$index;
3060     int scale    = $mem$$scale;
3061     int displace = $mem$$disp;
3062     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3063     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3064     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3065   %}
3066 
3067   // Volatile Store Long.  Must be atomic, so move it into
3068   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3069   // target address before the store (for null-ptr checks)
3070   // so the memory operand is used twice in the encoding.
3071   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3072     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3073     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3074     emit_opcode(cbuf,0xDF);
3075     int rm_byte_opcode = 0x07;
3076     int base     = $mem$$base;
3077     int index    = $mem$$index;
3078     int scale    = $mem$$scale;
3079     int displace = $mem$$disp;
3080     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3081     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3082   %}
3083 
3084   // Safepoint Poll.  This polls the safepoint page, and causes an
3085   // exception if it is not readable. Unfortunately, it kills the condition code
3086   // in the process
3087   // We current use TESTL [spp],EDI
3088   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3089 
3090   enc_class Safepoint_Poll() %{
3091     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3092     emit_opcode(cbuf,0x85);
3093     emit_rm (cbuf, 0x0, 0x7, 0x5);
3094     emit_d32(cbuf, (intptr_t)os::get_polling_page());
3095   %}
3096 %}
3097 
3098 
3099 //----------FRAME--------------------------------------------------------------
3100 // Definition of frame structure and management information.
3101 //
3102 //  S T A C K   L A Y O U T    Allocators stack-slot number
3103 //                             |   (to get allocators register number
3104 //  G  Owned by    |        |  v    add OptoReg::stack0())
3105 //  r   CALLER     |        |
3106 //  o     |        +--------+      pad to even-align allocators stack-slot
3107 //  w     V        |  pad0  |        numbers; owned by CALLER
3108 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3109 //  h     ^        |   in   |  5
3110 //        |        |  args  |  4   Holes in incoming args owned by SELF
3111 //  |     |        |        |  3
3112 //  |     |        +--------+
3113 //  V     |        | old out|      Empty on Intel, window on Sparc
3114 //        |    old |preserve|      Must be even aligned.
3115 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3116 //        |        |   in   |  3   area for Intel ret address
3117 //     Owned by    |preserve|      Empty on Sparc.
3118 //       SELF      +--------+
3119 //        |        |  pad2  |  2   pad to align old SP
3120 //        |        +--------+  1
3121 //        |        | locks  |  0
3122 //        |        +--------+----> OptoReg::stack0(), even aligned
3123 //        |        |  pad1  | 11   pad to align new SP
3124 //        |        +--------+
3125 //        |        |        | 10
3126 //        |        | spills |  9   spills
3127 //        V        |        |  8   (pad0 slot for callee)
3128 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3129 //        ^        |  out   |  7
3130 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3131 //     Owned by    +--------+
3132 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3133 //        |    new |preserve|      Must be even-aligned.
3134 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3135 //        |        |        |
3136 //
3137 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3138 //         known from SELF's arguments and the Java calling convention.
3139 //         Region 6-7 is determined per call site.
3140 // Note 2: If the calling convention leaves holes in the incoming argument
3141 //         area, those holes are owned by SELF.  Holes in the outgoing area
3142 //         are owned by the CALLEE.  Holes should not be nessecary in the
3143 //         incoming area, as the Java calling convention is completely under
3144 //         the control of the AD file.  Doubles can be sorted and packed to
3145 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3146 //         varargs C calling conventions.
3147 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3148 //         even aligned with pad0 as needed.
3149 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3150 //         region 6-11 is even aligned; it may be padded out more so that
3151 //         the region from SP to FP meets the minimum stack alignment.
3152 
3153 frame %{
3154   // What direction does stack grow in (assumed to be same for C & Java)
3155   stack_direction(TOWARDS_LOW);
3156 
3157   // These three registers define part of the calling convention
3158   // between compiled code and the interpreter.
3159   inline_cache_reg(EAX);                // Inline Cache Register
3160   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3161 
3162   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3163   cisc_spilling_operand_name(indOffset32);
3164 
3165   // Number of stack slots consumed by locking an object
3166   sync_stack_slots(1);
3167 
3168   // Compiled code's Frame Pointer
3169   frame_pointer(ESP);
3170   // Interpreter stores its frame pointer in a register which is
3171   // stored to the stack by I2CAdaptors.
3172   // I2CAdaptors convert from interpreted java to compiled java.
3173   interpreter_frame_pointer(EBP);
3174 
3175   // Stack alignment requirement
3176   // Alignment size in bytes (128-bit -> 16 bytes)
3177   stack_alignment(StackAlignmentInBytes);
3178 
3179   // Number of stack slots between incoming argument block and the start of
3180   // a new frame.  The PROLOG must add this many slots to the stack.  The
3181   // EPILOG must remove this many slots.  Intel needs one slot for
3182   // return address and one for rbp, (must save rbp)
3183   in_preserve_stack_slots(2+VerifyStackAtCalls);
3184 
3185   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3186   // for calls to C.  Supports the var-args backing area for register parms.
3187   varargs_C_out_slots_killed(0);
3188 
3189   // The after-PROLOG location of the return address.  Location of
3190   // return address specifies a type (REG or STACK) and a number
3191   // representing the register number (i.e. - use a register name) or
3192   // stack slot.
3193   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3194   // Otherwise, it is above the locks and verification slot and alignment word
3195   return_addr(STACK - 1 +
3196               round_to((Compile::current()->in_preserve_stack_slots() +
3197                         Compile::current()->fixed_slots()),
3198                        stack_alignment_in_slots()));
3199 
3200   // Body of function which returns an integer array locating
3201   // arguments either in registers or in stack slots.  Passed an array
3202   // of ideal registers called "sig" and a "length" count.  Stack-slot
3203   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3204   // arguments for a CALLEE.  Incoming stack arguments are
3205   // automatically biased by the preserve_stack_slots field above.
3206   calling_convention %{
3207     // No difference between ingoing/outgoing just pass false
3208     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3209   %}
3210 
3211 
3212   // Body of function which returns an integer array locating
3213   // arguments either in registers or in stack slots.  Passed an array
3214   // of ideal registers called "sig" and a "length" count.  Stack-slot
3215   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3216   // arguments for a CALLEE.  Incoming stack arguments are
3217   // automatically biased by the preserve_stack_slots field above.
3218   c_calling_convention %{
3219     // This is obviously always outgoing
3220     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3221   %}
3222 
3223   // Location of C & interpreter return values
3224   c_return_value %{
3225     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3226     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3227     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3228 
3229     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3230     // that C functions return float and double results in XMM0.
3231     if( ideal_reg == Op_RegD && UseSSE>=2 )
3232       return OptoRegPair(XMM0b_num,XMM0_num);
3233     if( ideal_reg == Op_RegF && UseSSE>=2 )
3234       return OptoRegPair(OptoReg::Bad,XMM0_num);
3235 
3236     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3237   %}
3238 
3239   // Location of return values
3240   return_value %{
3241     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3242     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3243     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3244     if( ideal_reg == Op_RegD && UseSSE>=2 )
3245       return OptoRegPair(XMM0b_num,XMM0_num);
3246     if( ideal_reg == Op_RegF && UseSSE>=1 )
3247       return OptoRegPair(OptoReg::Bad,XMM0_num);
3248     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3249   %}
3250 
3251 %}
3252 
3253 //----------ATTRIBUTES---------------------------------------------------------
3254 //----------Operand Attributes-------------------------------------------------
3255 op_attrib op_cost(0);        // Required cost attribute
3256 
3257 //----------Instruction Attributes---------------------------------------------
3258 ins_attrib ins_cost(100);       // Required cost attribute
3259 ins_attrib ins_size(8);         // Required size attribute (in bits)
3260 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3261                                 // non-matching short branch variant of some
3262                                                             // long branch?
3263 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3264                                 // specifies the alignment that some part of the instruction (not
3265                                 // necessarily the start) requires.  If > 1, a compute_padding()
3266                                 // function must be provided for the instruction
3267 
3268 //----------OPERANDS-----------------------------------------------------------
3269 // Operand definitions must precede instruction definitions for correct parsing
3270 // in the ADLC because operands constitute user defined types which are used in
3271 // instruction definitions.
3272 
3273 //----------Simple Operands----------------------------------------------------
3274 // Immediate Operands
3275 // Integer Immediate
3276 operand immI() %{
3277   match(ConI);
3278 
3279   op_cost(10);
3280   format %{ %}
3281   interface(CONST_INTER);
3282 %}
3283 
3284 // Constant for test vs zero
3285 operand immI0() %{
3286   predicate(n->get_int() == 0);
3287   match(ConI);
3288 
3289   op_cost(0);
3290   format %{ %}
3291   interface(CONST_INTER);
3292 %}
3293 
3294 // Constant for increment
3295 operand immI1() %{
3296   predicate(n->get_int() == 1);
3297   match(ConI);
3298 
3299   op_cost(0);
3300   format %{ %}
3301   interface(CONST_INTER);
3302 %}
3303 
3304 // Constant for decrement
3305 operand immI_M1() %{
3306   predicate(n->get_int() == -1);
3307   match(ConI);
3308 
3309   op_cost(0);
3310   format %{ %}
3311   interface(CONST_INTER);
3312 %}
3313 
3314 // Valid scale values for addressing modes
3315 operand immI2() %{
3316   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3317   match(ConI);
3318 
3319   format %{ %}
3320   interface(CONST_INTER);
3321 %}
3322 
3323 operand immI8() %{
3324   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3325   match(ConI);
3326 
3327   op_cost(5);
3328   format %{ %}
3329   interface(CONST_INTER);
3330 %}
3331 
3332 operand immI16() %{
3333   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3334   match(ConI);
3335 
3336   op_cost(10);
3337   format %{ %}
3338   interface(CONST_INTER);
3339 %}
3340 
3341 // Int Immediate non-negative
3342 operand immU31()
3343 %{
3344   predicate(n->get_int() >= 0);
3345   match(ConI);
3346 
3347   op_cost(0);
3348   format %{ %}
3349   interface(CONST_INTER);
3350 %}
3351 
3352 // Constant for long shifts
3353 operand immI_32() %{
3354   predicate( n->get_int() == 32 );
3355   match(ConI);
3356 
3357   op_cost(0);
3358   format %{ %}
3359   interface(CONST_INTER);
3360 %}
3361 
3362 operand immI_1_31() %{
3363   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3364   match(ConI);
3365 
3366   op_cost(0);
3367   format %{ %}
3368   interface(CONST_INTER);
3369 %}
3370 
3371 operand immI_32_63() %{
3372   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3373   match(ConI);
3374   op_cost(0);
3375 
3376   format %{ %}
3377   interface(CONST_INTER);
3378 %}
3379 
3380 operand immI_1() %{
3381   predicate( n->get_int() == 1 );
3382   match(ConI);
3383 
3384   op_cost(0);
3385   format %{ %}
3386   interface(CONST_INTER);
3387 %}
3388 
3389 operand immI_2() %{
3390   predicate( n->get_int() == 2 );
3391   match(ConI);
3392 
3393   op_cost(0);
3394   format %{ %}
3395   interface(CONST_INTER);
3396 %}
3397 
3398 operand immI_3() %{
3399   predicate( n->get_int() == 3 );
3400   match(ConI);
3401 
3402   op_cost(0);
3403   format %{ %}
3404   interface(CONST_INTER);
3405 %}
3406 
3407 // Pointer Immediate
3408 operand immP() %{
3409   match(ConP);
3410 
3411   op_cost(10);
3412   format %{ %}
3413   interface(CONST_INTER);
3414 %}
3415 
3416 // NULL Pointer Immediate
3417 operand immP0() %{
3418   predicate( n->get_ptr() == 0 );
3419   match(ConP);
3420   op_cost(0);
3421 
3422   format %{ %}
3423   interface(CONST_INTER);
3424 %}
3425 
3426 // Long Immediate
3427 operand immL() %{
3428   match(ConL);
3429 
3430   op_cost(20);
3431   format %{ %}
3432   interface(CONST_INTER);
3433 %}
3434 
3435 // Long Immediate zero
3436 operand immL0() %{
3437   predicate( n->get_long() == 0L );
3438   match(ConL);
3439   op_cost(0);
3440 
3441   format %{ %}
3442   interface(CONST_INTER);
3443 %}
3444 
3445 // Long Immediate zero
3446 operand immL_M1() %{
3447   predicate( n->get_long() == -1L );
3448   match(ConL);
3449   op_cost(0);
3450 
3451   format %{ %}
3452   interface(CONST_INTER);
3453 %}
3454 
3455 // Long immediate from 0 to 127.
3456 // Used for a shorter form of long mul by 10.
3457 operand immL_127() %{
3458   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3459   match(ConL);
3460   op_cost(0);
3461 
3462   format %{ %}
3463   interface(CONST_INTER);
3464 %}
3465 
3466 // Long Immediate: low 32-bit mask
3467 operand immL_32bits() %{
3468   predicate(n->get_long() == 0xFFFFFFFFL);
3469   match(ConL);
3470   op_cost(0);
3471 
3472   format %{ %}
3473   interface(CONST_INTER);
3474 %}
3475 
3476 // Long Immediate: low 32-bit mask
3477 operand immL32() %{
3478   predicate(n->get_long() == (int)(n->get_long()));
3479   match(ConL);
3480   op_cost(20);
3481 
3482   format %{ %}
3483   interface(CONST_INTER);
3484 %}
3485 
3486 //Double Immediate zero
3487 operand immDPR0() %{
3488   // Do additional (and counter-intuitive) test against NaN to work around VC++
3489   // bug that generates code such that NaNs compare equal to 0.0
3490   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3491   match(ConD);
3492 
3493   op_cost(5);
3494   format %{ %}
3495   interface(CONST_INTER);
3496 %}
3497 
3498 // Double Immediate one
3499 operand immDPR1() %{
3500   predicate( UseSSE<=1 && n->getd() == 1.0 );
3501   match(ConD);
3502 
3503   op_cost(5);
3504   format %{ %}
3505   interface(CONST_INTER);
3506 %}
3507 
3508 // Double Immediate
3509 operand immDPR() %{
3510   predicate(UseSSE<=1);
3511   match(ConD);
3512 
3513   op_cost(5);
3514   format %{ %}
3515   interface(CONST_INTER);
3516 %}
3517 
3518 operand immD() %{
3519   predicate(UseSSE>=2);
3520   match(ConD);
3521 
3522   op_cost(5);
3523   format %{ %}
3524   interface(CONST_INTER);
3525 %}
3526 
3527 // Double Immediate zero
3528 operand immD0() %{
3529   // Do additional (and counter-intuitive) test against NaN to work around VC++
3530   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3531   // compare equal to -0.0.
3532   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3533   match(ConD);
3534 
3535   format %{ %}
3536   interface(CONST_INTER);
3537 %}
3538 
3539 // Float Immediate zero
3540 operand immFPR0() %{
3541   predicate(UseSSE == 0 && n->getf() == 0.0F);
3542   match(ConF);
3543 
3544   op_cost(5);
3545   format %{ %}
3546   interface(CONST_INTER);
3547 %}
3548 
3549 // Float Immediate one
3550 operand immFPR1() %{
3551   predicate(UseSSE == 0 && n->getf() == 1.0F);
3552   match(ConF);
3553 
3554   op_cost(5);
3555   format %{ %}
3556   interface(CONST_INTER);
3557 %}
3558 
3559 // Float Immediate
3560 operand immFPR() %{
3561   predicate( UseSSE == 0 );
3562   match(ConF);
3563 
3564   op_cost(5);
3565   format %{ %}
3566   interface(CONST_INTER);
3567 %}
3568 
3569 // Float Immediate
3570 operand immF() %{
3571   predicate(UseSSE >= 1);
3572   match(ConF);
3573 
3574   op_cost(5);
3575   format %{ %}
3576   interface(CONST_INTER);
3577 %}
3578 
3579 // Float Immediate zero.  Zero and not -0.0
3580 operand immF0() %{
3581   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3582   match(ConF);
3583 
3584   op_cost(5);
3585   format %{ %}
3586   interface(CONST_INTER);
3587 %}
3588 
3589 // Immediates for special shifts (sign extend)
3590 
3591 // Constants for increment
3592 operand immI_16() %{
3593   predicate( n->get_int() == 16 );
3594   match(ConI);
3595 
3596   format %{ %}
3597   interface(CONST_INTER);
3598 %}
3599 
3600 operand immI_24() %{
3601   predicate( n->get_int() == 24 );
3602   match(ConI);
3603 
3604   format %{ %}
3605   interface(CONST_INTER);
3606 %}
3607 
3608 // Constant for byte-wide masking
3609 operand immI_255() %{
3610   predicate( n->get_int() == 255 );
3611   match(ConI);
3612 
3613   format %{ %}
3614   interface(CONST_INTER);
3615 %}
3616 
3617 // Constant for short-wide masking
3618 operand immI_65535() %{
3619   predicate(n->get_int() == 65535);
3620   match(ConI);
3621 
3622   format %{ %}
3623   interface(CONST_INTER);
3624 %}
3625 
3626 // Register Operands
3627 // Integer Register
3628 operand rRegI() %{
3629   constraint(ALLOC_IN_RC(int_reg));
3630   match(RegI);
3631   match(xRegI);
3632   match(eAXRegI);
3633   match(eBXRegI);
3634   match(eCXRegI);
3635   match(eDXRegI);
3636   match(eDIRegI);
3637   match(eSIRegI);
3638 
3639   format %{ %}
3640   interface(REG_INTER);
3641 %}
3642 
3643 // Subset of Integer Register
3644 operand xRegI(rRegI reg) %{
3645   constraint(ALLOC_IN_RC(int_x_reg));
3646   match(reg);
3647   match(eAXRegI);
3648   match(eBXRegI);
3649   match(eCXRegI);
3650   match(eDXRegI);
3651 
3652   format %{ %}
3653   interface(REG_INTER);
3654 %}
3655 
3656 // Special Registers
3657 operand eAXRegI(xRegI reg) %{
3658   constraint(ALLOC_IN_RC(eax_reg));
3659   match(reg);
3660   match(rRegI);
3661 
3662   format %{ "EAX" %}
3663   interface(REG_INTER);
3664 %}
3665 
3666 // Special Registers
3667 operand eBXRegI(xRegI reg) %{
3668   constraint(ALLOC_IN_RC(ebx_reg));
3669   match(reg);
3670   match(rRegI);
3671 
3672   format %{ "EBX" %}
3673   interface(REG_INTER);
3674 %}
3675 
3676 operand eCXRegI(xRegI reg) %{
3677   constraint(ALLOC_IN_RC(ecx_reg));
3678   match(reg);
3679   match(rRegI);
3680 
3681   format %{ "ECX" %}
3682   interface(REG_INTER);
3683 %}
3684 
3685 operand eDXRegI(xRegI reg) %{
3686   constraint(ALLOC_IN_RC(edx_reg));
3687   match(reg);
3688   match(rRegI);
3689 
3690   format %{ "EDX" %}
3691   interface(REG_INTER);
3692 %}
3693 
3694 operand eDIRegI(xRegI reg) %{
3695   constraint(ALLOC_IN_RC(edi_reg));
3696   match(reg);
3697   match(rRegI);
3698 
3699   format %{ "EDI" %}
3700   interface(REG_INTER);
3701 %}
3702 
3703 operand naxRegI() %{
3704   constraint(ALLOC_IN_RC(nax_reg));
3705   match(RegI);
3706   match(eCXRegI);
3707   match(eDXRegI);
3708   match(eSIRegI);
3709   match(eDIRegI);
3710 
3711   format %{ %}
3712   interface(REG_INTER);
3713 %}
3714 
3715 operand nadxRegI() %{
3716   constraint(ALLOC_IN_RC(nadx_reg));
3717   match(RegI);
3718   match(eBXRegI);
3719   match(eCXRegI);
3720   match(eSIRegI);
3721   match(eDIRegI);
3722 
3723   format %{ %}
3724   interface(REG_INTER);
3725 %}
3726 
3727 operand ncxRegI() %{
3728   constraint(ALLOC_IN_RC(ncx_reg));
3729   match(RegI);
3730   match(eAXRegI);
3731   match(eDXRegI);
3732   match(eSIRegI);
3733   match(eDIRegI);
3734 
3735   format %{ %}
3736   interface(REG_INTER);
3737 %}
3738 
3739 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3740 // //
3741 operand eSIRegI(xRegI reg) %{
3742    constraint(ALLOC_IN_RC(esi_reg));
3743    match(reg);
3744    match(rRegI);
3745 
3746    format %{ "ESI" %}
3747    interface(REG_INTER);
3748 %}
3749 
3750 // Pointer Register
3751 operand anyRegP() %{
3752   constraint(ALLOC_IN_RC(any_reg));
3753   match(RegP);
3754   match(eAXRegP);
3755   match(eBXRegP);
3756   match(eCXRegP);
3757   match(eDIRegP);
3758   match(eRegP);
3759 
3760   format %{ %}
3761   interface(REG_INTER);
3762 %}
3763 
3764 operand eRegP() %{
3765   constraint(ALLOC_IN_RC(int_reg));
3766   match(RegP);
3767   match(eAXRegP);
3768   match(eBXRegP);
3769   match(eCXRegP);
3770   match(eDIRegP);
3771 
3772   format %{ %}
3773   interface(REG_INTER);
3774 %}
3775 
3776 // On windows95, EBP is not safe to use for implicit null tests.
3777 operand eRegP_no_EBP() %{
3778   constraint(ALLOC_IN_RC(int_reg_no_rbp));
3779   match(RegP);
3780   match(eAXRegP);
3781   match(eBXRegP);
3782   match(eCXRegP);
3783   match(eDIRegP);
3784 
3785   op_cost(100);
3786   format %{ %}
3787   interface(REG_INTER);
3788 %}
3789 
3790 operand naxRegP() %{
3791   constraint(ALLOC_IN_RC(nax_reg));
3792   match(RegP);
3793   match(eBXRegP);
3794   match(eDXRegP);
3795   match(eCXRegP);
3796   match(eSIRegP);
3797   match(eDIRegP);
3798 
3799   format %{ %}
3800   interface(REG_INTER);
3801 %}
3802 
3803 operand nabxRegP() %{
3804   constraint(ALLOC_IN_RC(nabx_reg));
3805   match(RegP);
3806   match(eCXRegP);
3807   match(eDXRegP);
3808   match(eSIRegP);
3809   match(eDIRegP);
3810 
3811   format %{ %}
3812   interface(REG_INTER);
3813 %}
3814 
3815 operand pRegP() %{
3816   constraint(ALLOC_IN_RC(p_reg));
3817   match(RegP);
3818   match(eBXRegP);
3819   match(eDXRegP);
3820   match(eSIRegP);
3821   match(eDIRegP);
3822 
3823   format %{ %}
3824   interface(REG_INTER);
3825 %}
3826 
3827 // Special Registers
3828 // Return a pointer value
3829 operand eAXRegP(eRegP reg) %{
3830   constraint(ALLOC_IN_RC(eax_reg));
3831   match(reg);
3832   format %{ "EAX" %}
3833   interface(REG_INTER);
3834 %}
3835 
3836 // Used in AtomicAdd
3837 operand eBXRegP(eRegP reg) %{
3838   constraint(ALLOC_IN_RC(ebx_reg));
3839   match(reg);
3840   format %{ "EBX" %}
3841   interface(REG_INTER);
3842 %}
3843 
3844 // Tail-call (interprocedural jump) to interpreter
3845 operand eCXRegP(eRegP reg) %{
3846   constraint(ALLOC_IN_RC(ecx_reg));
3847   match(reg);
3848   format %{ "ECX" %}
3849   interface(REG_INTER);
3850 %}
3851 
3852 operand eSIRegP(eRegP reg) %{
3853   constraint(ALLOC_IN_RC(esi_reg));
3854   match(reg);
3855   format %{ "ESI" %}
3856   interface(REG_INTER);
3857 %}
3858 
3859 // Used in rep stosw
3860 operand eDIRegP(eRegP reg) %{
3861   constraint(ALLOC_IN_RC(edi_reg));
3862   match(reg);
3863   format %{ "EDI" %}
3864   interface(REG_INTER);
3865 %}
3866 
3867 operand eBPRegP() %{
3868   constraint(ALLOC_IN_RC(ebp_reg));
3869   match(RegP);
3870   format %{ "EBP" %}
3871   interface(REG_INTER);
3872 %}
3873 
3874 operand eRegL() %{
3875   constraint(ALLOC_IN_RC(long_reg));
3876   match(RegL);
3877   match(eADXRegL);
3878 
3879   format %{ %}
3880   interface(REG_INTER);
3881 %}
3882 
3883 operand eADXRegL( eRegL reg ) %{
3884   constraint(ALLOC_IN_RC(eadx_reg));
3885   match(reg);
3886 
3887   format %{ "EDX:EAX" %}
3888   interface(REG_INTER);
3889 %}
3890 
3891 operand eBCXRegL( eRegL reg ) %{
3892   constraint(ALLOC_IN_RC(ebcx_reg));
3893   match(reg);
3894 
3895   format %{ "EBX:ECX" %}
3896   interface(REG_INTER);
3897 %}
3898 
3899 // Special case for integer high multiply
3900 operand eADXRegL_low_only() %{
3901   constraint(ALLOC_IN_RC(eadx_reg));
3902   match(RegL);
3903 
3904   format %{ "EAX" %}
3905   interface(REG_INTER);
3906 %}
3907 
3908 // Flags register, used as output of compare instructions
3909 operand eFlagsReg() %{
3910   constraint(ALLOC_IN_RC(int_flags));
3911   match(RegFlags);
3912 
3913   format %{ "EFLAGS" %}
3914   interface(REG_INTER);
3915 %}
3916 
3917 // Flags register, used as output of FLOATING POINT compare instructions
3918 operand eFlagsRegU() %{
3919   constraint(ALLOC_IN_RC(int_flags));
3920   match(RegFlags);
3921 
3922   format %{ "EFLAGS_U" %}
3923   interface(REG_INTER);
3924 %}
3925 
3926 operand eFlagsRegUCF() %{
3927   constraint(ALLOC_IN_RC(int_flags));
3928   match(RegFlags);
3929   predicate(false);
3930 
3931   format %{ "EFLAGS_U_CF" %}
3932   interface(REG_INTER);
3933 %}
3934 
3935 // Condition Code Register used by long compare
3936 operand flagsReg_long_LTGE() %{
3937   constraint(ALLOC_IN_RC(int_flags));
3938   match(RegFlags);
3939   format %{ "FLAGS_LTGE" %}
3940   interface(REG_INTER);
3941 %}
3942 operand flagsReg_long_EQNE() %{
3943   constraint(ALLOC_IN_RC(int_flags));
3944   match(RegFlags);
3945   format %{ "FLAGS_EQNE" %}
3946   interface(REG_INTER);
3947 %}
3948 operand flagsReg_long_LEGT() %{
3949   constraint(ALLOC_IN_RC(int_flags));
3950   match(RegFlags);
3951   format %{ "FLAGS_LEGT" %}
3952   interface(REG_INTER);
3953 %}
3954 
3955 // Float register operands
3956 operand regDPR() %{
3957   predicate( UseSSE < 2 );
3958   constraint(ALLOC_IN_RC(fp_dbl_reg));
3959   match(RegD);
3960   match(regDPR1);
3961   match(regDPR2);
3962   format %{ %}
3963   interface(REG_INTER);
3964 %}
3965 
3966 operand regDPR1(regDPR reg) %{
3967   predicate( UseSSE < 2 );
3968   constraint(ALLOC_IN_RC(fp_dbl_reg0));
3969   match(reg);
3970   format %{ "FPR1" %}
3971   interface(REG_INTER);
3972 %}
3973 
3974 operand regDPR2(regDPR reg) %{
3975   predicate( UseSSE < 2 );
3976   constraint(ALLOC_IN_RC(fp_dbl_reg1));
3977   match(reg);
3978   format %{ "FPR2" %}
3979   interface(REG_INTER);
3980 %}
3981 
3982 operand regnotDPR1(regDPR reg) %{
3983   predicate( UseSSE < 2 );
3984   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
3985   match(reg);
3986   format %{ %}
3987   interface(REG_INTER);
3988 %}
3989 
3990 // Float register operands
3991 operand regFPR() %{
3992   predicate( UseSSE < 2 );
3993   constraint(ALLOC_IN_RC(fp_flt_reg));
3994   match(RegF);
3995   match(regFPR1);
3996   format %{ %}
3997   interface(REG_INTER);
3998 %}
3999 
4000 // Float register operands
4001 operand regFPR1(regFPR reg) %{
4002   predicate( UseSSE < 2 );
4003   constraint(ALLOC_IN_RC(fp_flt_reg0));
4004   match(reg);
4005   format %{ "FPR1" %}
4006   interface(REG_INTER);
4007 %}
4008 
4009 // XMM Float register operands
4010 operand regF() %{
4011   predicate( UseSSE>=1 );
4012   constraint(ALLOC_IN_RC(float_reg_legacy));
4013   match(RegF);
4014   format %{ %}
4015   interface(REG_INTER);
4016 %}
4017 
4018 // XMM Double register operands
4019 operand regD() %{
4020   predicate( UseSSE>=2 );
4021   constraint(ALLOC_IN_RC(double_reg_legacy));
4022   match(RegD);
4023   format %{ %}
4024   interface(REG_INTER);
4025 %}
4026 
4027 // Vectors
4028 operand vecS() %{
4029   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4030   match(VecS);
4031 
4032   format %{ %}
4033   interface(REG_INTER);
4034 %}
4035 
4036 operand vecD() %{
4037   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4038   match(VecD);
4039 
4040   format %{ %}
4041   interface(REG_INTER);
4042 %}
4043 
4044 operand vecX() %{
4045   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4046   match(VecX);
4047 
4048   format %{ %}
4049   interface(REG_INTER);
4050 %}
4051 
4052 operand vecY() %{
4053   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4054   match(VecY);
4055 
4056   format %{ %}
4057   interface(REG_INTER);
4058 %}
4059 
4060 //----------Memory Operands----------------------------------------------------
4061 // Direct Memory Operand
4062 operand direct(immP addr) %{
4063   match(addr);
4064 
4065   format %{ "[$addr]" %}
4066   interface(MEMORY_INTER) %{
4067     base(0xFFFFFFFF);
4068     index(0x4);
4069     scale(0x0);
4070     disp($addr);
4071   %}
4072 %}
4073 
4074 // Indirect Memory Operand
4075 operand indirect(eRegP reg) %{
4076   constraint(ALLOC_IN_RC(int_reg));
4077   match(reg);
4078 
4079   format %{ "[$reg]" %}
4080   interface(MEMORY_INTER) %{
4081     base($reg);
4082     index(0x4);
4083     scale(0x0);
4084     disp(0x0);
4085   %}
4086 %}
4087 
4088 // Indirect Memory Plus Short Offset Operand
4089 operand indOffset8(eRegP reg, immI8 off) %{
4090   match(AddP reg off);
4091 
4092   format %{ "[$reg + $off]" %}
4093   interface(MEMORY_INTER) %{
4094     base($reg);
4095     index(0x4);
4096     scale(0x0);
4097     disp($off);
4098   %}
4099 %}
4100 
4101 // Indirect Memory Plus Long Offset Operand
4102 operand indOffset32(eRegP reg, immI off) %{
4103   match(AddP reg off);
4104 
4105   format %{ "[$reg + $off]" %}
4106   interface(MEMORY_INTER) %{
4107     base($reg);
4108     index(0x4);
4109     scale(0x0);
4110     disp($off);
4111   %}
4112 %}
4113 
4114 // Indirect Memory Plus Long Offset Operand
4115 operand indOffset32X(rRegI reg, immP off) %{
4116   match(AddP off reg);
4117 
4118   format %{ "[$reg + $off]" %}
4119   interface(MEMORY_INTER) %{
4120     base($reg);
4121     index(0x4);
4122     scale(0x0);
4123     disp($off);
4124   %}
4125 %}
4126 
4127 // Indirect Memory Plus Index Register Plus Offset Operand
4128 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4129   match(AddP (AddP reg ireg) off);
4130 
4131   op_cost(10);
4132   format %{"[$reg + $off + $ireg]" %}
4133   interface(MEMORY_INTER) %{
4134     base($reg);
4135     index($ireg);
4136     scale(0x0);
4137     disp($off);
4138   %}
4139 %}
4140 
4141 // Indirect Memory Plus Index Register Plus Offset Operand
4142 operand indIndex(eRegP reg, rRegI ireg) %{
4143   match(AddP reg ireg);
4144 
4145   op_cost(10);
4146   format %{"[$reg + $ireg]" %}
4147   interface(MEMORY_INTER) %{
4148     base($reg);
4149     index($ireg);
4150     scale(0x0);
4151     disp(0x0);
4152   %}
4153 %}
4154 
4155 // // -------------------------------------------------------------------------
4156 // // 486 architecture doesn't support "scale * index + offset" with out a base
4157 // // -------------------------------------------------------------------------
4158 // // Scaled Memory Operands
4159 // // Indirect Memory Times Scale Plus Offset Operand
4160 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4161 //   match(AddP off (LShiftI ireg scale));
4162 //
4163 //   op_cost(10);
4164 //   format %{"[$off + $ireg << $scale]" %}
4165 //   interface(MEMORY_INTER) %{
4166 //     base(0x4);
4167 //     index($ireg);
4168 //     scale($scale);
4169 //     disp($off);
4170 //   %}
4171 // %}
4172 
4173 // Indirect Memory Times Scale Plus Index Register
4174 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4175   match(AddP reg (LShiftI ireg scale));
4176 
4177   op_cost(10);
4178   format %{"[$reg + $ireg << $scale]" %}
4179   interface(MEMORY_INTER) %{
4180     base($reg);
4181     index($ireg);
4182     scale($scale);
4183     disp(0x0);
4184   %}
4185 %}
4186 
4187 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4188 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4189   match(AddP (AddP reg (LShiftI ireg scale)) off);
4190 
4191   op_cost(10);
4192   format %{"[$reg + $off + $ireg << $scale]" %}
4193   interface(MEMORY_INTER) %{
4194     base($reg);
4195     index($ireg);
4196     scale($scale);
4197     disp($off);
4198   %}
4199 %}
4200 
4201 //----------Load Long Memory Operands------------------------------------------
4202 // The load-long idiom will use it's address expression again after loading
4203 // the first word of the long.  If the load-long destination overlaps with
4204 // registers used in the addressing expression, the 2nd half will be loaded
4205 // from a clobbered address.  Fix this by requiring that load-long use
4206 // address registers that do not overlap with the load-long target.
4207 
4208 // load-long support
4209 operand load_long_RegP() %{
4210   constraint(ALLOC_IN_RC(esi_reg));
4211   match(RegP);
4212   match(eSIRegP);
4213   op_cost(100);
4214   format %{  %}
4215   interface(REG_INTER);
4216 %}
4217 
4218 // Indirect Memory Operand Long
4219 operand load_long_indirect(load_long_RegP reg) %{
4220   constraint(ALLOC_IN_RC(esi_reg));
4221   match(reg);
4222 
4223   format %{ "[$reg]" %}
4224   interface(MEMORY_INTER) %{
4225     base($reg);
4226     index(0x4);
4227     scale(0x0);
4228     disp(0x0);
4229   %}
4230 %}
4231 
4232 // Indirect Memory Plus Long Offset Operand
4233 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4234   match(AddP reg off);
4235 
4236   format %{ "[$reg + $off]" %}
4237   interface(MEMORY_INTER) %{
4238     base($reg);
4239     index(0x4);
4240     scale(0x0);
4241     disp($off);
4242   %}
4243 %}
4244 
4245 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4246 
4247 
4248 //----------Special Memory Operands--------------------------------------------
4249 // Stack Slot Operand - This operand is used for loading and storing temporary
4250 //                      values on the stack where a match requires a value to
4251 //                      flow through memory.
4252 operand stackSlotP(sRegP reg) %{
4253   constraint(ALLOC_IN_RC(stack_slots));
4254   // No match rule because this operand is only generated in matching
4255   format %{ "[$reg]" %}
4256   interface(MEMORY_INTER) %{
4257     base(0x4);   // ESP
4258     index(0x4);  // No Index
4259     scale(0x0);  // No Scale
4260     disp($reg);  // Stack Offset
4261   %}
4262 %}
4263 
4264 operand stackSlotI(sRegI reg) %{
4265   constraint(ALLOC_IN_RC(stack_slots));
4266   // No match rule because this operand is only generated in matching
4267   format %{ "[$reg]" %}
4268   interface(MEMORY_INTER) %{
4269     base(0x4);   // ESP
4270     index(0x4);  // No Index
4271     scale(0x0);  // No Scale
4272     disp($reg);  // Stack Offset
4273   %}
4274 %}
4275 
4276 operand stackSlotF(sRegF reg) %{
4277   constraint(ALLOC_IN_RC(stack_slots));
4278   // No match rule because this operand is only generated in matching
4279   format %{ "[$reg]" %}
4280   interface(MEMORY_INTER) %{
4281     base(0x4);   // ESP
4282     index(0x4);  // No Index
4283     scale(0x0);  // No Scale
4284     disp($reg);  // Stack Offset
4285   %}
4286 %}
4287 
4288 operand stackSlotD(sRegD reg) %{
4289   constraint(ALLOC_IN_RC(stack_slots));
4290   // No match rule because this operand is only generated in matching
4291   format %{ "[$reg]" %}
4292   interface(MEMORY_INTER) %{
4293     base(0x4);   // ESP
4294     index(0x4);  // No Index
4295     scale(0x0);  // No Scale
4296     disp($reg);  // Stack Offset
4297   %}
4298 %}
4299 
4300 operand stackSlotL(sRegL reg) %{
4301   constraint(ALLOC_IN_RC(stack_slots));
4302   // No match rule because this operand is only generated in matching
4303   format %{ "[$reg]" %}
4304   interface(MEMORY_INTER) %{
4305     base(0x4);   // ESP
4306     index(0x4);  // No Index
4307     scale(0x0);  // No Scale
4308     disp($reg);  // Stack Offset
4309   %}
4310 %}
4311 
4312 //----------Memory Operands - Win95 Implicit Null Variants----------------
4313 // Indirect Memory Operand
4314 operand indirect_win95_safe(eRegP_no_EBP reg)
4315 %{
4316   constraint(ALLOC_IN_RC(int_reg));
4317   match(reg);
4318 
4319   op_cost(100);
4320   format %{ "[$reg]" %}
4321   interface(MEMORY_INTER) %{
4322     base($reg);
4323     index(0x4);
4324     scale(0x0);
4325     disp(0x0);
4326   %}
4327 %}
4328 
4329 // Indirect Memory Plus Short Offset Operand
4330 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4331 %{
4332   match(AddP reg off);
4333 
4334   op_cost(100);
4335   format %{ "[$reg + $off]" %}
4336   interface(MEMORY_INTER) %{
4337     base($reg);
4338     index(0x4);
4339     scale(0x0);
4340     disp($off);
4341   %}
4342 %}
4343 
4344 // Indirect Memory Plus Long Offset Operand
4345 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4346 %{
4347   match(AddP reg off);
4348 
4349   op_cost(100);
4350   format %{ "[$reg + $off]" %}
4351   interface(MEMORY_INTER) %{
4352     base($reg);
4353     index(0x4);
4354     scale(0x0);
4355     disp($off);
4356   %}
4357 %}
4358 
4359 // Indirect Memory Plus Index Register Plus Offset Operand
4360 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4361 %{
4362   match(AddP (AddP reg ireg) off);
4363 
4364   op_cost(100);
4365   format %{"[$reg + $off + $ireg]" %}
4366   interface(MEMORY_INTER) %{
4367     base($reg);
4368     index($ireg);
4369     scale(0x0);
4370     disp($off);
4371   %}
4372 %}
4373 
4374 // Indirect Memory Times Scale Plus Index Register
4375 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4376 %{
4377   match(AddP reg (LShiftI ireg scale));
4378 
4379   op_cost(100);
4380   format %{"[$reg + $ireg << $scale]" %}
4381   interface(MEMORY_INTER) %{
4382     base($reg);
4383     index($ireg);
4384     scale($scale);
4385     disp(0x0);
4386   %}
4387 %}
4388 
4389 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4390 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4391 %{
4392   match(AddP (AddP reg (LShiftI ireg scale)) off);
4393 
4394   op_cost(100);
4395   format %{"[$reg + $off + $ireg << $scale]" %}
4396   interface(MEMORY_INTER) %{
4397     base($reg);
4398     index($ireg);
4399     scale($scale);
4400     disp($off);
4401   %}
4402 %}
4403 
4404 //----------Conditional Branch Operands----------------------------------------
4405 // Comparison Op  - This is the operation of the comparison, and is limited to
4406 //                  the following set of codes:
4407 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4408 //
4409 // Other attributes of the comparison, such as unsignedness, are specified
4410 // by the comparison instruction that sets a condition code flags register.
4411 // That result is represented by a flags operand whose subtype is appropriate
4412 // to the unsignedness (etc.) of the comparison.
4413 //
4414 // Later, the instruction which matches both the Comparison Op (a Bool) and
4415 // the flags (produced by the Cmp) specifies the coding of the comparison op
4416 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4417 
4418 // Comparision Code
4419 operand cmpOp() %{
4420   match(Bool);
4421 
4422   format %{ "" %}
4423   interface(COND_INTER) %{
4424     equal(0x4, "e");
4425     not_equal(0x5, "ne");
4426     less(0xC, "l");
4427     greater_equal(0xD, "ge");
4428     less_equal(0xE, "le");
4429     greater(0xF, "g");
4430     overflow(0x0, "o");
4431     no_overflow(0x1, "no");
4432   %}
4433 %}
4434 
4435 // Comparison Code, unsigned compare.  Used by FP also, with
4436 // C2 (unordered) turned into GT or LT already.  The other bits
4437 // C0 and C3 are turned into Carry & Zero flags.
4438 operand cmpOpU() %{
4439   match(Bool);
4440 
4441   format %{ "" %}
4442   interface(COND_INTER) %{
4443     equal(0x4, "e");
4444     not_equal(0x5, "ne");
4445     less(0x2, "b");
4446     greater_equal(0x3, "nb");
4447     less_equal(0x6, "be");
4448     greater(0x7, "nbe");
4449     overflow(0x0, "o");
4450     no_overflow(0x1, "no");
4451   %}
4452 %}
4453 
4454 // Floating comparisons that don't require any fixup for the unordered case
4455 operand cmpOpUCF() %{
4456   match(Bool);
4457   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4458             n->as_Bool()->_test._test == BoolTest::ge ||
4459             n->as_Bool()->_test._test == BoolTest::le ||
4460             n->as_Bool()->_test._test == BoolTest::gt);
4461   format %{ "" %}
4462   interface(COND_INTER) %{
4463     equal(0x4, "e");
4464     not_equal(0x5, "ne");
4465     less(0x2, "b");
4466     greater_equal(0x3, "nb");
4467     less_equal(0x6, "be");
4468     greater(0x7, "nbe");
4469     overflow(0x0, "o");
4470     no_overflow(0x1, "no");
4471   %}
4472 %}
4473 
4474 
4475 // Floating comparisons that can be fixed up with extra conditional jumps
4476 operand cmpOpUCF2() %{
4477   match(Bool);
4478   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4479             n->as_Bool()->_test._test == BoolTest::eq);
4480   format %{ "" %}
4481   interface(COND_INTER) %{
4482     equal(0x4, "e");
4483     not_equal(0x5, "ne");
4484     less(0x2, "b");
4485     greater_equal(0x3, "nb");
4486     less_equal(0x6, "be");
4487     greater(0x7, "nbe");
4488     overflow(0x0, "o");
4489     no_overflow(0x1, "no");
4490   %}
4491 %}
4492 
4493 // Comparison Code for FP conditional move
4494 operand cmpOp_fcmov() %{
4495   match(Bool);
4496 
4497   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4498             n->as_Bool()->_test._test != BoolTest::no_overflow);
4499   format %{ "" %}
4500   interface(COND_INTER) %{
4501     equal        (0x0C8);
4502     not_equal    (0x1C8);
4503     less         (0x0C0);
4504     greater_equal(0x1C0);
4505     less_equal   (0x0D0);
4506     greater      (0x1D0);
4507     overflow(0x0, "o"); // not really supported by the instruction
4508     no_overflow(0x1, "no"); // not really supported by the instruction
4509   %}
4510 %}
4511 
4512 // Comparision Code used in long compares
4513 operand cmpOp_commute() %{
4514   match(Bool);
4515 
4516   format %{ "" %}
4517   interface(COND_INTER) %{
4518     equal(0x4, "e");
4519     not_equal(0x5, "ne");
4520     less(0xF, "g");
4521     greater_equal(0xE, "le");
4522     less_equal(0xD, "ge");
4523     greater(0xC, "l");
4524     overflow(0x0, "o");
4525     no_overflow(0x1, "no");
4526   %}
4527 %}
4528 
4529 //----------OPERAND CLASSES----------------------------------------------------
4530 // Operand Classes are groups of operands that are used as to simplify
4531 // instruction definitions by not requiring the AD writer to specify separate
4532 // instructions for every form of operand when the instruction accepts
4533 // multiple operand types with the same basic encoding and format.  The classic
4534 // case of this is memory operands.
4535 
4536 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4537                indIndex, indIndexScale, indIndexScaleOffset);
4538 
4539 // Long memory operations are encoded in 2 instructions and a +4 offset.
4540 // This means some kind of offset is always required and you cannot use
4541 // an oop as the offset (done when working on static globals).
4542 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4543                     indIndex, indIndexScale, indIndexScaleOffset);
4544 
4545 
4546 //----------PIPELINE-----------------------------------------------------------
4547 // Rules which define the behavior of the target architectures pipeline.
4548 pipeline %{
4549 
4550 //----------ATTRIBUTES---------------------------------------------------------
4551 attributes %{
4552   variable_size_instructions;        // Fixed size instructions
4553   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4554   instruction_unit_size = 1;         // An instruction is 1 bytes long
4555   instruction_fetch_unit_size = 16;  // The processor fetches one line
4556   instruction_fetch_units = 1;       // of 16 bytes
4557 
4558   // List of nop instructions
4559   nops( MachNop );
4560 %}
4561 
4562 //----------RESOURCES----------------------------------------------------------
4563 // Resources are the functional units available to the machine
4564 
4565 // Generic P2/P3 pipeline
4566 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4567 // 3 instructions decoded per cycle.
4568 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4569 // 2 ALU op, only ALU0 handles mul/div instructions.
4570 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4571            MS0, MS1, MEM = MS0 | MS1,
4572            BR, FPU,
4573            ALU0, ALU1, ALU = ALU0 | ALU1 );
4574 
4575 //----------PIPELINE DESCRIPTION-----------------------------------------------
4576 // Pipeline Description specifies the stages in the machine's pipeline
4577 
4578 // Generic P2/P3 pipeline
4579 pipe_desc(S0, S1, S2, S3, S4, S5);
4580 
4581 //----------PIPELINE CLASSES---------------------------------------------------
4582 // Pipeline Classes describe the stages in which input and output are
4583 // referenced by the hardware pipeline.
4584 
4585 // Naming convention: ialu or fpu
4586 // Then: _reg
4587 // Then: _reg if there is a 2nd register
4588 // Then: _long if it's a pair of instructions implementing a long
4589 // Then: _fat if it requires the big decoder
4590 //   Or: _mem if it requires the big decoder and a memory unit.
4591 
4592 // Integer ALU reg operation
4593 pipe_class ialu_reg(rRegI dst) %{
4594     single_instruction;
4595     dst    : S4(write);
4596     dst    : S3(read);
4597     DECODE : S0;        // any decoder
4598     ALU    : S3;        // any alu
4599 %}
4600 
4601 // Long ALU reg operation
4602 pipe_class ialu_reg_long(eRegL dst) %{
4603     instruction_count(2);
4604     dst    : S4(write);
4605     dst    : S3(read);
4606     DECODE : S0(2);     // any 2 decoders
4607     ALU    : S3(2);     // both alus
4608 %}
4609 
4610 // Integer ALU reg operation using big decoder
4611 pipe_class ialu_reg_fat(rRegI dst) %{
4612     single_instruction;
4613     dst    : S4(write);
4614     dst    : S3(read);
4615     D0     : S0;        // big decoder only
4616     ALU    : S3;        // any alu
4617 %}
4618 
4619 // Long ALU reg operation using big decoder
4620 pipe_class ialu_reg_long_fat(eRegL dst) %{
4621     instruction_count(2);
4622     dst    : S4(write);
4623     dst    : S3(read);
4624     D0     : S0(2);     // big decoder only; twice
4625     ALU    : S3(2);     // any 2 alus
4626 %}
4627 
4628 // Integer ALU reg-reg operation
4629 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4630     single_instruction;
4631     dst    : S4(write);
4632     src    : S3(read);
4633     DECODE : S0;        // any decoder
4634     ALU    : S3;        // any alu
4635 %}
4636 
4637 // Long ALU reg-reg operation
4638 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4639     instruction_count(2);
4640     dst    : S4(write);
4641     src    : S3(read);
4642     DECODE : S0(2);     // any 2 decoders
4643     ALU    : S3(2);     // both alus
4644 %}
4645 
4646 // Integer ALU reg-reg operation
4647 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4648     single_instruction;
4649     dst    : S4(write);
4650     src    : S3(read);
4651     D0     : S0;        // big decoder only
4652     ALU    : S3;        // any alu
4653 %}
4654 
4655 // Long ALU reg-reg operation
4656 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4657     instruction_count(2);
4658     dst    : S4(write);
4659     src    : S3(read);
4660     D0     : S0(2);     // big decoder only; twice
4661     ALU    : S3(2);     // both alus
4662 %}
4663 
4664 // Integer ALU reg-mem operation
4665 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4666     single_instruction;
4667     dst    : S5(write);
4668     mem    : S3(read);
4669     D0     : S0;        // big decoder only
4670     ALU    : S4;        // any alu
4671     MEM    : S3;        // any mem
4672 %}
4673 
4674 // Long ALU reg-mem operation
4675 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4676     instruction_count(2);
4677     dst    : S5(write);
4678     mem    : S3(read);
4679     D0     : S0(2);     // big decoder only; twice
4680     ALU    : S4(2);     // any 2 alus
4681     MEM    : S3(2);     // both mems
4682 %}
4683 
4684 // Integer mem operation (prefetch)
4685 pipe_class ialu_mem(memory mem)
4686 %{
4687     single_instruction;
4688     mem    : S3(read);
4689     D0     : S0;        // big decoder only
4690     MEM    : S3;        // any mem
4691 %}
4692 
4693 // Integer Store to Memory
4694 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4695     single_instruction;
4696     mem    : S3(read);
4697     src    : S5(read);
4698     D0     : S0;        // big decoder only
4699     ALU    : S4;        // any alu
4700     MEM    : S3;
4701 %}
4702 
4703 // Long Store to Memory
4704 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4705     instruction_count(2);
4706     mem    : S3(read);
4707     src    : S5(read);
4708     D0     : S0(2);     // big decoder only; twice
4709     ALU    : S4(2);     // any 2 alus
4710     MEM    : S3(2);     // Both mems
4711 %}
4712 
4713 // Integer Store to Memory
4714 pipe_class ialu_mem_imm(memory mem) %{
4715     single_instruction;
4716     mem    : S3(read);
4717     D0     : S0;        // big decoder only
4718     ALU    : S4;        // any alu
4719     MEM    : S3;
4720 %}
4721 
4722 // Integer ALU0 reg-reg operation
4723 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4724     single_instruction;
4725     dst    : S4(write);
4726     src    : S3(read);
4727     D0     : S0;        // Big decoder only
4728     ALU0   : S3;        // only alu0
4729 %}
4730 
4731 // Integer ALU0 reg-mem operation
4732 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4733     single_instruction;
4734     dst    : S5(write);
4735     mem    : S3(read);
4736     D0     : S0;        // big decoder only
4737     ALU0   : S4;        // ALU0 only
4738     MEM    : S3;        // any mem
4739 %}
4740 
4741 // Integer ALU reg-reg operation
4742 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4743     single_instruction;
4744     cr     : S4(write);
4745     src1   : S3(read);
4746     src2   : S3(read);
4747     DECODE : S0;        // any decoder
4748     ALU    : S3;        // any alu
4749 %}
4750 
4751 // Integer ALU reg-imm operation
4752 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4753     single_instruction;
4754     cr     : S4(write);
4755     src1   : S3(read);
4756     DECODE : S0;        // any decoder
4757     ALU    : S3;        // any alu
4758 %}
4759 
4760 // Integer ALU reg-mem operation
4761 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4762     single_instruction;
4763     cr     : S4(write);
4764     src1   : S3(read);
4765     src2   : S3(read);
4766     D0     : S0;        // big decoder only
4767     ALU    : S4;        // any alu
4768     MEM    : S3;
4769 %}
4770 
4771 // Conditional move reg-reg
4772 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4773     instruction_count(4);
4774     y      : S4(read);
4775     q      : S3(read);
4776     p      : S3(read);
4777     DECODE : S0(4);     // any decoder
4778 %}
4779 
4780 // Conditional move reg-reg
4781 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4782     single_instruction;
4783     dst    : S4(write);
4784     src    : S3(read);
4785     cr     : S3(read);
4786     DECODE : S0;        // any decoder
4787 %}
4788 
4789 // Conditional move reg-mem
4790 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4791     single_instruction;
4792     dst    : S4(write);
4793     src    : S3(read);
4794     cr     : S3(read);
4795     DECODE : S0;        // any decoder
4796     MEM    : S3;
4797 %}
4798 
4799 // Conditional move reg-reg long
4800 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4801     single_instruction;
4802     dst    : S4(write);
4803     src    : S3(read);
4804     cr     : S3(read);
4805     DECODE : S0(2);     // any 2 decoders
4806 %}
4807 
4808 // Conditional move double reg-reg
4809 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4810     single_instruction;
4811     dst    : S4(write);
4812     src    : S3(read);
4813     cr     : S3(read);
4814     DECODE : S0;        // any decoder
4815 %}
4816 
4817 // Float reg-reg operation
4818 pipe_class fpu_reg(regDPR dst) %{
4819     instruction_count(2);
4820     dst    : S3(read);
4821     DECODE : S0(2);     // any 2 decoders
4822     FPU    : S3;
4823 %}
4824 
4825 // Float reg-reg operation
4826 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4827     instruction_count(2);
4828     dst    : S4(write);
4829     src    : S3(read);
4830     DECODE : S0(2);     // any 2 decoders
4831     FPU    : S3;
4832 %}
4833 
4834 // Float reg-reg operation
4835 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4836     instruction_count(3);
4837     dst    : S4(write);
4838     src1   : S3(read);
4839     src2   : S3(read);
4840     DECODE : S0(3);     // any 3 decoders
4841     FPU    : S3(2);
4842 %}
4843 
4844 // Float reg-reg operation
4845 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4846     instruction_count(4);
4847     dst    : S4(write);
4848     src1   : S3(read);
4849     src2   : S3(read);
4850     src3   : S3(read);
4851     DECODE : S0(4);     // any 3 decoders
4852     FPU    : S3(2);
4853 %}
4854 
4855 // Float reg-reg operation
4856 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4857     instruction_count(4);
4858     dst    : S4(write);
4859     src1   : S3(read);
4860     src2   : S3(read);
4861     src3   : S3(read);
4862     DECODE : S1(3);     // any 3 decoders
4863     D0     : S0;        // Big decoder only
4864     FPU    : S3(2);
4865     MEM    : S3;
4866 %}
4867 
4868 // Float reg-mem operation
4869 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4870     instruction_count(2);
4871     dst    : S5(write);
4872     mem    : S3(read);
4873     D0     : S0;        // big decoder only
4874     DECODE : S1;        // any decoder for FPU POP
4875     FPU    : S4;
4876     MEM    : S3;        // any mem
4877 %}
4878 
4879 // Float reg-mem operation
4880 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4881     instruction_count(3);
4882     dst    : S5(write);
4883     src1   : S3(read);
4884     mem    : S3(read);
4885     D0     : S0;        // big decoder only
4886     DECODE : S1(2);     // any decoder for FPU POP
4887     FPU    : S4;
4888     MEM    : S3;        // any mem
4889 %}
4890 
4891 // Float mem-reg operation
4892 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4893     instruction_count(2);
4894     src    : S5(read);
4895     mem    : S3(read);
4896     DECODE : S0;        // any decoder for FPU PUSH
4897     D0     : S1;        // big decoder only
4898     FPU    : S4;
4899     MEM    : S3;        // any mem
4900 %}
4901 
4902 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
4903     instruction_count(3);
4904     src1   : S3(read);
4905     src2   : S3(read);
4906     mem    : S3(read);
4907     DECODE : S0(2);     // any decoder for FPU PUSH
4908     D0     : S1;        // big decoder only
4909     FPU    : S4;
4910     MEM    : S3;        // any mem
4911 %}
4912 
4913 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
4914     instruction_count(3);
4915     src1   : S3(read);
4916     src2   : S3(read);
4917     mem    : S4(read);
4918     DECODE : S0;        // any decoder for FPU PUSH
4919     D0     : S0(2);     // big decoder only
4920     FPU    : S4;
4921     MEM    : S3(2);     // any mem
4922 %}
4923 
4924 pipe_class fpu_mem_mem(memory dst, memory src1) %{
4925     instruction_count(2);
4926     src1   : S3(read);
4927     dst    : S4(read);
4928     D0     : S0(2);     // big decoder only
4929     MEM    : S3(2);     // any mem
4930 %}
4931 
4932 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
4933     instruction_count(3);
4934     src1   : S3(read);
4935     src2   : S3(read);
4936     dst    : S4(read);
4937     D0     : S0(3);     // big decoder only
4938     FPU    : S4;
4939     MEM    : S3(3);     // any mem
4940 %}
4941 
4942 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
4943     instruction_count(3);
4944     src1   : S4(read);
4945     mem    : S4(read);
4946     DECODE : S0;        // any decoder for FPU PUSH
4947     D0     : S0(2);     // big decoder only
4948     FPU    : S4;
4949     MEM    : S3(2);     // any mem
4950 %}
4951 
4952 // Float load constant
4953 pipe_class fpu_reg_con(regDPR dst) %{
4954     instruction_count(2);
4955     dst    : S5(write);
4956     D0     : S0;        // big decoder only for the load
4957     DECODE : S1;        // any decoder for FPU POP
4958     FPU    : S4;
4959     MEM    : S3;        // any mem
4960 %}
4961 
4962 // Float load constant
4963 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
4964     instruction_count(3);
4965     dst    : S5(write);
4966     src    : S3(read);
4967     D0     : S0;        // big decoder only for the load
4968     DECODE : S1(2);     // any decoder for FPU POP
4969     FPU    : S4;
4970     MEM    : S3;        // any mem
4971 %}
4972 
4973 // UnConditional branch
4974 pipe_class pipe_jmp( label labl ) %{
4975     single_instruction;
4976     BR   : S3;
4977 %}
4978 
4979 // Conditional branch
4980 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
4981     single_instruction;
4982     cr    : S1(read);
4983     BR    : S3;
4984 %}
4985 
4986 // Allocation idiom
4987 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
4988     instruction_count(1); force_serialization;
4989     fixed_latency(6);
4990     heap_ptr : S3(read);
4991     DECODE   : S0(3);
4992     D0       : S2;
4993     MEM      : S3;
4994     ALU      : S3(2);
4995     dst      : S5(write);
4996     BR       : S5;
4997 %}
4998 
4999 // Generic big/slow expanded idiom
5000 pipe_class pipe_slow(  ) %{
5001     instruction_count(10); multiple_bundles; force_serialization;
5002     fixed_latency(100);
5003     D0  : S0(2);
5004     MEM : S3(2);
5005 %}
5006 
5007 // The real do-nothing guy
5008 pipe_class empty( ) %{
5009     instruction_count(0);
5010 %}
5011 
5012 // Define the class for the Nop node
5013 define %{
5014    MachNop = empty;
5015 %}
5016 
5017 %}
5018 
5019 //----------INSTRUCTIONS-------------------------------------------------------
5020 //
5021 // match      -- States which machine-independent subtree may be replaced
5022 //               by this instruction.
5023 // ins_cost   -- The estimated cost of this instruction is used by instruction
5024 //               selection to identify a minimum cost tree of machine
5025 //               instructions that matches a tree of machine-independent
5026 //               instructions.
5027 // format     -- A string providing the disassembly for this instruction.
5028 //               The value of an instruction's operand may be inserted
5029 //               by referring to it with a '$' prefix.
5030 // opcode     -- Three instruction opcodes may be provided.  These are referred
5031 //               to within an encode class as $primary, $secondary, and $tertiary
5032 //               respectively.  The primary opcode is commonly used to
5033 //               indicate the type of machine instruction, while secondary
5034 //               and tertiary are often used for prefix options or addressing
5035 //               modes.
5036 // ins_encode -- A list of encode classes with parameters. The encode class
5037 //               name must have been defined in an 'enc_class' specification
5038 //               in the encode section of the architecture description.
5039 
5040 //----------BSWAP-Instruction--------------------------------------------------
5041 instruct bytes_reverse_int(rRegI dst) %{
5042   match(Set dst (ReverseBytesI dst));
5043 
5044   format %{ "BSWAP  $dst" %}
5045   opcode(0x0F, 0xC8);
5046   ins_encode( OpcP, OpcSReg(dst) );
5047   ins_pipe( ialu_reg );
5048 %}
5049 
5050 instruct bytes_reverse_long(eRegL dst) %{
5051   match(Set dst (ReverseBytesL dst));
5052 
5053   format %{ "BSWAP  $dst.lo\n\t"
5054             "BSWAP  $dst.hi\n\t"
5055             "XCHG   $dst.lo $dst.hi" %}
5056 
5057   ins_cost(125);
5058   ins_encode( bswap_long_bytes(dst) );
5059   ins_pipe( ialu_reg_reg);
5060 %}
5061 
5062 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5063   match(Set dst (ReverseBytesUS dst));
5064   effect(KILL cr);
5065 
5066   format %{ "BSWAP  $dst\n\t"
5067             "SHR    $dst,16\n\t" %}
5068   ins_encode %{
5069     __ bswapl($dst$$Register);
5070     __ shrl($dst$$Register, 16);
5071   %}
5072   ins_pipe( ialu_reg );
5073 %}
5074 
5075 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5076   match(Set dst (ReverseBytesS dst));
5077   effect(KILL cr);
5078 
5079   format %{ "BSWAP  $dst\n\t"
5080             "SAR    $dst,16\n\t" %}
5081   ins_encode %{
5082     __ bswapl($dst$$Register);
5083     __ sarl($dst$$Register, 16);
5084   %}
5085   ins_pipe( ialu_reg );
5086 %}
5087 
5088 
5089 //---------- Zeros Count Instructions ------------------------------------------
5090 
5091 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5092   predicate(UseCountLeadingZerosInstruction);
5093   match(Set dst (CountLeadingZerosI src));
5094   effect(KILL cr);
5095 
5096   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5097   ins_encode %{
5098     __ lzcntl($dst$$Register, $src$$Register);
5099   %}
5100   ins_pipe(ialu_reg);
5101 %}
5102 
5103 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5104   predicate(!UseCountLeadingZerosInstruction);
5105   match(Set dst (CountLeadingZerosI src));
5106   effect(KILL cr);
5107 
5108   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5109             "JNZ    skip\n\t"
5110             "MOV    $dst, -1\n"
5111       "skip:\n\t"
5112             "NEG    $dst\n\t"
5113             "ADD    $dst, 31" %}
5114   ins_encode %{
5115     Register Rdst = $dst$$Register;
5116     Register Rsrc = $src$$Register;
5117     Label skip;
5118     __ bsrl(Rdst, Rsrc);
5119     __ jccb(Assembler::notZero, skip);
5120     __ movl(Rdst, -1);
5121     __ bind(skip);
5122     __ negl(Rdst);
5123     __ addl(Rdst, BitsPerInt - 1);
5124   %}
5125   ins_pipe(ialu_reg);
5126 %}
5127 
5128 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5129   predicate(UseCountLeadingZerosInstruction);
5130   match(Set dst (CountLeadingZerosL src));
5131   effect(TEMP dst, KILL cr);
5132 
5133   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5134             "JNC    done\n\t"
5135             "LZCNT  $dst, $src.lo\n\t"
5136             "ADD    $dst, 32\n"
5137       "done:" %}
5138   ins_encode %{
5139     Register Rdst = $dst$$Register;
5140     Register Rsrc = $src$$Register;
5141     Label done;
5142     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5143     __ jccb(Assembler::carryClear, done);
5144     __ lzcntl(Rdst, Rsrc);
5145     __ addl(Rdst, BitsPerInt);
5146     __ bind(done);
5147   %}
5148   ins_pipe(ialu_reg);
5149 %}
5150 
5151 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5152   predicate(!UseCountLeadingZerosInstruction);
5153   match(Set dst (CountLeadingZerosL src));
5154   effect(TEMP dst, KILL cr);
5155 
5156   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5157             "JZ     msw_is_zero\n\t"
5158             "ADD    $dst, 32\n\t"
5159             "JMP    not_zero\n"
5160       "msw_is_zero:\n\t"
5161             "BSR    $dst, $src.lo\n\t"
5162             "JNZ    not_zero\n\t"
5163             "MOV    $dst, -1\n"
5164       "not_zero:\n\t"
5165             "NEG    $dst\n\t"
5166             "ADD    $dst, 63\n" %}
5167  ins_encode %{
5168     Register Rdst = $dst$$Register;
5169     Register Rsrc = $src$$Register;
5170     Label msw_is_zero;
5171     Label not_zero;
5172     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5173     __ jccb(Assembler::zero, msw_is_zero);
5174     __ addl(Rdst, BitsPerInt);
5175     __ jmpb(not_zero);
5176     __ bind(msw_is_zero);
5177     __ bsrl(Rdst, Rsrc);
5178     __ jccb(Assembler::notZero, not_zero);
5179     __ movl(Rdst, -1);
5180     __ bind(not_zero);
5181     __ negl(Rdst);
5182     __ addl(Rdst, BitsPerLong - 1);
5183   %}
5184   ins_pipe(ialu_reg);
5185 %}
5186 
5187 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5188   predicate(UseCountTrailingZerosInstruction);
5189   match(Set dst (CountTrailingZerosI src));
5190   effect(KILL cr);
5191 
5192   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5193   ins_encode %{
5194     __ tzcntl($dst$$Register, $src$$Register);
5195   %}
5196   ins_pipe(ialu_reg);
5197 %}
5198 
5199 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5200   predicate(!UseCountTrailingZerosInstruction);
5201   match(Set dst (CountTrailingZerosI src));
5202   effect(KILL cr);
5203 
5204   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5205             "JNZ    done\n\t"
5206             "MOV    $dst, 32\n"
5207       "done:" %}
5208   ins_encode %{
5209     Register Rdst = $dst$$Register;
5210     Label done;
5211     __ bsfl(Rdst, $src$$Register);
5212     __ jccb(Assembler::notZero, done);
5213     __ movl(Rdst, BitsPerInt);
5214     __ bind(done);
5215   %}
5216   ins_pipe(ialu_reg);
5217 %}
5218 
5219 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5220   predicate(UseCountTrailingZerosInstruction);
5221   match(Set dst (CountTrailingZerosL src));
5222   effect(TEMP dst, KILL cr);
5223 
5224   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5225             "JNC    done\n\t"
5226             "TZCNT  $dst, $src.hi\n\t"
5227             "ADD    $dst, 32\n"
5228             "done:" %}
5229   ins_encode %{
5230     Register Rdst = $dst$$Register;
5231     Register Rsrc = $src$$Register;
5232     Label done;
5233     __ tzcntl(Rdst, Rsrc);
5234     __ jccb(Assembler::carryClear, done);
5235     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5236     __ addl(Rdst, BitsPerInt);
5237     __ bind(done);
5238   %}
5239   ins_pipe(ialu_reg);
5240 %}
5241 
5242 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5243   predicate(!UseCountTrailingZerosInstruction);
5244   match(Set dst (CountTrailingZerosL src));
5245   effect(TEMP dst, KILL cr);
5246 
5247   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5248             "JNZ    done\n\t"
5249             "BSF    $dst, $src.hi\n\t"
5250             "JNZ    msw_not_zero\n\t"
5251             "MOV    $dst, 32\n"
5252       "msw_not_zero:\n\t"
5253             "ADD    $dst, 32\n"
5254       "done:" %}
5255   ins_encode %{
5256     Register Rdst = $dst$$Register;
5257     Register Rsrc = $src$$Register;
5258     Label msw_not_zero;
5259     Label done;
5260     __ bsfl(Rdst, Rsrc);
5261     __ jccb(Assembler::notZero, done);
5262     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5263     __ jccb(Assembler::notZero, msw_not_zero);
5264     __ movl(Rdst, BitsPerInt);
5265     __ bind(msw_not_zero);
5266     __ addl(Rdst, BitsPerInt);
5267     __ bind(done);
5268   %}
5269   ins_pipe(ialu_reg);
5270 %}
5271 
5272 
5273 //---------- Population Count Instructions -------------------------------------
5274 
5275 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5276   predicate(UsePopCountInstruction);
5277   match(Set dst (PopCountI src));
5278   effect(KILL cr);
5279 
5280   format %{ "POPCNT $dst, $src" %}
5281   ins_encode %{
5282     __ popcntl($dst$$Register, $src$$Register);
5283   %}
5284   ins_pipe(ialu_reg);
5285 %}
5286 
5287 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5288   predicate(UsePopCountInstruction);
5289   match(Set dst (PopCountI (LoadI mem)));
5290   effect(KILL cr);
5291 
5292   format %{ "POPCNT $dst, $mem" %}
5293   ins_encode %{
5294     __ popcntl($dst$$Register, $mem$$Address);
5295   %}
5296   ins_pipe(ialu_reg);
5297 %}
5298 
5299 // Note: Long.bitCount(long) returns an int.
5300 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5301   predicate(UsePopCountInstruction);
5302   match(Set dst (PopCountL src));
5303   effect(KILL cr, TEMP tmp, TEMP dst);
5304 
5305   format %{ "POPCNT $dst, $src.lo\n\t"
5306             "POPCNT $tmp, $src.hi\n\t"
5307             "ADD    $dst, $tmp" %}
5308   ins_encode %{
5309     __ popcntl($dst$$Register, $src$$Register);
5310     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5311     __ addl($dst$$Register, $tmp$$Register);
5312   %}
5313   ins_pipe(ialu_reg);
5314 %}
5315 
5316 // Note: Long.bitCount(long) returns an int.
5317 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5318   predicate(UsePopCountInstruction);
5319   match(Set dst (PopCountL (LoadL mem)));
5320   effect(KILL cr, TEMP tmp, TEMP dst);
5321 
5322   format %{ "POPCNT $dst, $mem\n\t"
5323             "POPCNT $tmp, $mem+4\n\t"
5324             "ADD    $dst, $tmp" %}
5325   ins_encode %{
5326     //__ popcntl($dst$$Register, $mem$$Address$$first);
5327     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5328     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5329     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5330     __ addl($dst$$Register, $tmp$$Register);
5331   %}
5332   ins_pipe(ialu_reg);
5333 %}
5334 
5335 
5336 //----------Load/Store/Move Instructions---------------------------------------
5337 //----------Load Instructions--------------------------------------------------
5338 // Load Byte (8bit signed)
5339 instruct loadB(xRegI dst, memory mem) %{
5340   match(Set dst (LoadB mem));
5341 
5342   ins_cost(125);
5343   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5344 
5345   ins_encode %{
5346     __ movsbl($dst$$Register, $mem$$Address);
5347   %}
5348 
5349   ins_pipe(ialu_reg_mem);
5350 %}
5351 
5352 // Load Byte (8bit signed) into Long Register
5353 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5354   match(Set dst (ConvI2L (LoadB mem)));
5355   effect(KILL cr);
5356 
5357   ins_cost(375);
5358   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5359             "MOV    $dst.hi,$dst.lo\n\t"
5360             "SAR    $dst.hi,7" %}
5361 
5362   ins_encode %{
5363     __ movsbl($dst$$Register, $mem$$Address);
5364     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5365     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5366   %}
5367 
5368   ins_pipe(ialu_reg_mem);
5369 %}
5370 
5371 // Load Unsigned Byte (8bit UNsigned)
5372 instruct loadUB(xRegI dst, memory mem) %{
5373   match(Set dst (LoadUB mem));
5374 
5375   ins_cost(125);
5376   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5377 
5378   ins_encode %{
5379     __ movzbl($dst$$Register, $mem$$Address);
5380   %}
5381 
5382   ins_pipe(ialu_reg_mem);
5383 %}
5384 
5385 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5386 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5387   match(Set dst (ConvI2L (LoadUB mem)));
5388   effect(KILL cr);
5389 
5390   ins_cost(250);
5391   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5392             "XOR    $dst.hi,$dst.hi" %}
5393 
5394   ins_encode %{
5395     Register Rdst = $dst$$Register;
5396     __ movzbl(Rdst, $mem$$Address);
5397     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5398   %}
5399 
5400   ins_pipe(ialu_reg_mem);
5401 %}
5402 
5403 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5404 instruct loadUB2L_immI8(eRegL dst, memory mem, immI8 mask, eFlagsReg cr) %{
5405   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5406   effect(KILL cr);
5407 
5408   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 8-bit mask -> long\n\t"
5409             "XOR    $dst.hi,$dst.hi\n\t"
5410             "AND    $dst.lo,$mask" %}
5411   ins_encode %{
5412     Register Rdst = $dst$$Register;
5413     __ movzbl(Rdst, $mem$$Address);
5414     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5415     __ andl(Rdst, $mask$$constant);
5416   %}
5417   ins_pipe(ialu_reg_mem);
5418 %}
5419 
5420 // Load Short (16bit signed)
5421 instruct loadS(rRegI dst, memory mem) %{
5422   match(Set dst (LoadS mem));
5423 
5424   ins_cost(125);
5425   format %{ "MOVSX  $dst,$mem\t# short" %}
5426 
5427   ins_encode %{
5428     __ movswl($dst$$Register, $mem$$Address);
5429   %}
5430 
5431   ins_pipe(ialu_reg_mem);
5432 %}
5433 
5434 // Load Short (16 bit signed) to Byte (8 bit signed)
5435 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5436   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5437 
5438   ins_cost(125);
5439   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5440   ins_encode %{
5441     __ movsbl($dst$$Register, $mem$$Address);
5442   %}
5443   ins_pipe(ialu_reg_mem);
5444 %}
5445 
5446 // Load Short (16bit signed) into Long Register
5447 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5448   match(Set dst (ConvI2L (LoadS mem)));
5449   effect(KILL cr);
5450 
5451   ins_cost(375);
5452   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5453             "MOV    $dst.hi,$dst.lo\n\t"
5454             "SAR    $dst.hi,15" %}
5455 
5456   ins_encode %{
5457     __ movswl($dst$$Register, $mem$$Address);
5458     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5459     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5460   %}
5461 
5462   ins_pipe(ialu_reg_mem);
5463 %}
5464 
5465 // Load Unsigned Short/Char (16bit unsigned)
5466 instruct loadUS(rRegI dst, memory mem) %{
5467   match(Set dst (LoadUS mem));
5468 
5469   ins_cost(125);
5470   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5471 
5472   ins_encode %{
5473     __ movzwl($dst$$Register, $mem$$Address);
5474   %}
5475 
5476   ins_pipe(ialu_reg_mem);
5477 %}
5478 
5479 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5480 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5481   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5482 
5483   ins_cost(125);
5484   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5485   ins_encode %{
5486     __ movsbl($dst$$Register, $mem$$Address);
5487   %}
5488   ins_pipe(ialu_reg_mem);
5489 %}
5490 
5491 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5492 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5493   match(Set dst (ConvI2L (LoadUS mem)));
5494   effect(KILL cr);
5495 
5496   ins_cost(250);
5497   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5498             "XOR    $dst.hi,$dst.hi" %}
5499 
5500   ins_encode %{
5501     __ movzwl($dst$$Register, $mem$$Address);
5502     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5503   %}
5504 
5505   ins_pipe(ialu_reg_mem);
5506 %}
5507 
5508 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5509 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5510   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5511   effect(KILL cr);
5512 
5513   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5514             "XOR    $dst.hi,$dst.hi" %}
5515   ins_encode %{
5516     Register Rdst = $dst$$Register;
5517     __ movzbl(Rdst, $mem$$Address);
5518     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5519   %}
5520   ins_pipe(ialu_reg_mem);
5521 %}
5522 
5523 // Load Unsigned Short/Char (16 bit UNsigned) with a 16-bit mask into Long Register
5524 instruct loadUS2L_immI16(eRegL dst, memory mem, immI16 mask, eFlagsReg cr) %{
5525   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5526   effect(KILL cr);
5527 
5528   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 16-bit mask -> long\n\t"
5529             "XOR    $dst.hi,$dst.hi\n\t"
5530             "AND    $dst.lo,$mask" %}
5531   ins_encode %{
5532     Register Rdst = $dst$$Register;
5533     __ movzwl(Rdst, $mem$$Address);
5534     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5535     __ andl(Rdst, $mask$$constant);
5536   %}
5537   ins_pipe(ialu_reg_mem);
5538 %}
5539 
5540 // Load Integer
5541 instruct loadI(rRegI dst, memory mem) %{
5542   match(Set dst (LoadI mem));
5543 
5544   ins_cost(125);
5545   format %{ "MOV    $dst,$mem\t# int" %}
5546 
5547   ins_encode %{
5548     __ movl($dst$$Register, $mem$$Address);
5549   %}
5550 
5551   ins_pipe(ialu_reg_mem);
5552 %}
5553 
5554 // Load Integer (32 bit signed) to Byte (8 bit signed)
5555 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5556   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5557 
5558   ins_cost(125);
5559   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5560   ins_encode %{
5561     __ movsbl($dst$$Register, $mem$$Address);
5562   %}
5563   ins_pipe(ialu_reg_mem);
5564 %}
5565 
5566 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5567 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5568   match(Set dst (AndI (LoadI mem) mask));
5569 
5570   ins_cost(125);
5571   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5572   ins_encode %{
5573     __ movzbl($dst$$Register, $mem$$Address);
5574   %}
5575   ins_pipe(ialu_reg_mem);
5576 %}
5577 
5578 // Load Integer (32 bit signed) to Short (16 bit signed)
5579 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5580   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5581 
5582   ins_cost(125);
5583   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5584   ins_encode %{
5585     __ movswl($dst$$Register, $mem$$Address);
5586   %}
5587   ins_pipe(ialu_reg_mem);
5588 %}
5589 
5590 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5591 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5592   match(Set dst (AndI (LoadI mem) mask));
5593 
5594   ins_cost(125);
5595   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5596   ins_encode %{
5597     __ movzwl($dst$$Register, $mem$$Address);
5598   %}
5599   ins_pipe(ialu_reg_mem);
5600 %}
5601 
5602 // Load Integer into Long Register
5603 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5604   match(Set dst (ConvI2L (LoadI mem)));
5605   effect(KILL cr);
5606 
5607   ins_cost(375);
5608   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5609             "MOV    $dst.hi,$dst.lo\n\t"
5610             "SAR    $dst.hi,31" %}
5611 
5612   ins_encode %{
5613     __ movl($dst$$Register, $mem$$Address);
5614     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5615     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5616   %}
5617 
5618   ins_pipe(ialu_reg_mem);
5619 %}
5620 
5621 // Load Integer with mask 0xFF into Long Register
5622 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5623   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5624   effect(KILL cr);
5625 
5626   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5627             "XOR    $dst.hi,$dst.hi" %}
5628   ins_encode %{
5629     Register Rdst = $dst$$Register;
5630     __ movzbl(Rdst, $mem$$Address);
5631     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5632   %}
5633   ins_pipe(ialu_reg_mem);
5634 %}
5635 
5636 // Load Integer with mask 0xFFFF into Long Register
5637 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5638   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5639   effect(KILL cr);
5640 
5641   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5642             "XOR    $dst.hi,$dst.hi" %}
5643   ins_encode %{
5644     Register Rdst = $dst$$Register;
5645     __ movzwl(Rdst, $mem$$Address);
5646     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5647   %}
5648   ins_pipe(ialu_reg_mem);
5649 %}
5650 
5651 // Load Integer with 31-bit mask into Long Register
5652 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5653   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5654   effect(KILL cr);
5655 
5656   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5657             "XOR    $dst.hi,$dst.hi\n\t"
5658             "AND    $dst.lo,$mask" %}
5659   ins_encode %{
5660     Register Rdst = $dst$$Register;
5661     __ movl(Rdst, $mem$$Address);
5662     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5663     __ andl(Rdst, $mask$$constant);
5664   %}
5665   ins_pipe(ialu_reg_mem);
5666 %}
5667 
5668 // Load Unsigned Integer into Long Register
5669 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5670   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5671   effect(KILL cr);
5672 
5673   ins_cost(250);
5674   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5675             "XOR    $dst.hi,$dst.hi" %}
5676 
5677   ins_encode %{
5678     __ movl($dst$$Register, $mem$$Address);
5679     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5680   %}
5681 
5682   ins_pipe(ialu_reg_mem);
5683 %}
5684 
5685 // Load Long.  Cannot clobber address while loading, so restrict address
5686 // register to ESI
5687 instruct loadL(eRegL dst, load_long_memory mem) %{
5688   predicate(!((LoadLNode*)n)->require_atomic_access());
5689   match(Set dst (LoadL mem));
5690 
5691   ins_cost(250);
5692   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5693             "MOV    $dst.hi,$mem+4" %}
5694 
5695   ins_encode %{
5696     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5697     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5698     __ movl($dst$$Register, Amemlo);
5699     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5700   %}
5701 
5702   ins_pipe(ialu_reg_long_mem);
5703 %}
5704 
5705 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5706 // then store it down to the stack and reload on the int
5707 // side.
5708 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5709   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5710   match(Set dst (LoadL mem));
5711 
5712   ins_cost(200);
5713   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5714             "FISTp  $dst" %}
5715   ins_encode(enc_loadL_volatile(mem,dst));
5716   ins_pipe( fpu_reg_mem );
5717 %}
5718 
5719 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5720   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5721   match(Set dst (LoadL mem));
5722   effect(TEMP tmp);
5723   ins_cost(180);
5724   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5725             "MOVSD  $dst,$tmp" %}
5726   ins_encode %{
5727     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5728     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5729   %}
5730   ins_pipe( pipe_slow );
5731 %}
5732 
5733 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5734   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5735   match(Set dst (LoadL mem));
5736   effect(TEMP tmp);
5737   ins_cost(160);
5738   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5739             "MOVD   $dst.lo,$tmp\n\t"
5740             "PSRLQ  $tmp,32\n\t"
5741             "MOVD   $dst.hi,$tmp" %}
5742   ins_encode %{
5743     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5744     __ movdl($dst$$Register, $tmp$$XMMRegister);
5745     __ psrlq($tmp$$XMMRegister, 32);
5746     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5747   %}
5748   ins_pipe( pipe_slow );
5749 %}
5750 
5751 // Load Range
5752 instruct loadRange(rRegI dst, memory mem) %{
5753   match(Set dst (LoadRange mem));
5754 
5755   ins_cost(125);
5756   format %{ "MOV    $dst,$mem" %}
5757   opcode(0x8B);
5758   ins_encode( OpcP, RegMem(dst,mem));
5759   ins_pipe( ialu_reg_mem );
5760 %}
5761 
5762 
5763 // Load Pointer
5764 instruct loadP(eRegP dst, memory mem) %{
5765   match(Set dst (LoadP mem));
5766 
5767   ins_cost(125);
5768   format %{ "MOV    $dst,$mem" %}
5769   opcode(0x8B);
5770   ins_encode( OpcP, RegMem(dst,mem));
5771   ins_pipe( ialu_reg_mem );
5772 %}
5773 
5774 // Load Klass Pointer
5775 instruct loadKlass(eRegP dst, memory mem) %{
5776   match(Set dst (LoadKlass mem));
5777 
5778   ins_cost(125);
5779   format %{ "MOV    $dst,$mem" %}
5780   opcode(0x8B);
5781   ins_encode( OpcP, RegMem(dst,mem));
5782   ins_pipe( ialu_reg_mem );
5783 %}
5784 
5785 // Load Double
5786 instruct loadDPR(regDPR dst, memory mem) %{
5787   predicate(UseSSE<=1);
5788   match(Set dst (LoadD mem));
5789 
5790   ins_cost(150);
5791   format %{ "FLD_D  ST,$mem\n\t"
5792             "FSTP   $dst" %}
5793   opcode(0xDD);               /* DD /0 */
5794   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5795               Pop_Reg_DPR(dst) );
5796   ins_pipe( fpu_reg_mem );
5797 %}
5798 
5799 // Load Double to XMM
5800 instruct loadD(regD dst, memory mem) %{
5801   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5802   match(Set dst (LoadD mem));
5803   ins_cost(145);
5804   format %{ "MOVSD  $dst,$mem" %}
5805   ins_encode %{
5806     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5807   %}
5808   ins_pipe( pipe_slow );
5809 %}
5810 
5811 instruct loadD_partial(regD dst, memory mem) %{
5812   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5813   match(Set dst (LoadD mem));
5814   ins_cost(145);
5815   format %{ "MOVLPD $dst,$mem" %}
5816   ins_encode %{
5817     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5818   %}
5819   ins_pipe( pipe_slow );
5820 %}
5821 
5822 // Load to XMM register (single-precision floating point)
5823 // MOVSS instruction
5824 instruct loadF(regF dst, memory mem) %{
5825   predicate(UseSSE>=1);
5826   match(Set dst (LoadF mem));
5827   ins_cost(145);
5828   format %{ "MOVSS  $dst,$mem" %}
5829   ins_encode %{
5830     __ movflt ($dst$$XMMRegister, $mem$$Address);
5831   %}
5832   ins_pipe( pipe_slow );
5833 %}
5834 
5835 // Load Float
5836 instruct loadFPR(regFPR dst, memory mem) %{
5837   predicate(UseSSE==0);
5838   match(Set dst (LoadF mem));
5839 
5840   ins_cost(150);
5841   format %{ "FLD_S  ST,$mem\n\t"
5842             "FSTP   $dst" %}
5843   opcode(0xD9);               /* D9 /0 */
5844   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5845               Pop_Reg_FPR(dst) );
5846   ins_pipe( fpu_reg_mem );
5847 %}
5848 
5849 // Load Effective Address
5850 instruct leaP8(eRegP dst, indOffset8 mem) %{
5851   match(Set dst mem);
5852 
5853   ins_cost(110);
5854   format %{ "LEA    $dst,$mem" %}
5855   opcode(0x8D);
5856   ins_encode( OpcP, RegMem(dst,mem));
5857   ins_pipe( ialu_reg_reg_fat );
5858 %}
5859 
5860 instruct leaP32(eRegP dst, indOffset32 mem) %{
5861   match(Set dst mem);
5862 
5863   ins_cost(110);
5864   format %{ "LEA    $dst,$mem" %}
5865   opcode(0x8D);
5866   ins_encode( OpcP, RegMem(dst,mem));
5867   ins_pipe( ialu_reg_reg_fat );
5868 %}
5869 
5870 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5871   match(Set dst mem);
5872 
5873   ins_cost(110);
5874   format %{ "LEA    $dst,$mem" %}
5875   opcode(0x8D);
5876   ins_encode( OpcP, RegMem(dst,mem));
5877   ins_pipe( ialu_reg_reg_fat );
5878 %}
5879 
5880 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5881   match(Set dst mem);
5882 
5883   ins_cost(110);
5884   format %{ "LEA    $dst,$mem" %}
5885   opcode(0x8D);
5886   ins_encode( OpcP, RegMem(dst,mem));
5887   ins_pipe( ialu_reg_reg_fat );
5888 %}
5889 
5890 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5891   match(Set dst mem);
5892 
5893   ins_cost(110);
5894   format %{ "LEA    $dst,$mem" %}
5895   opcode(0x8D);
5896   ins_encode( OpcP, RegMem(dst,mem));
5897   ins_pipe( ialu_reg_reg_fat );
5898 %}
5899 
5900 // Load Constant
5901 instruct loadConI(rRegI dst, immI src) %{
5902   match(Set dst src);
5903 
5904   format %{ "MOV    $dst,$src" %}
5905   ins_encode( LdImmI(dst, src) );
5906   ins_pipe( ialu_reg_fat );
5907 %}
5908 
5909 // Load Constant zero
5910 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
5911   match(Set dst src);
5912   effect(KILL cr);
5913 
5914   ins_cost(50);
5915   format %{ "XOR    $dst,$dst" %}
5916   opcode(0x33);  /* + rd */
5917   ins_encode( OpcP, RegReg( dst, dst ) );
5918   ins_pipe( ialu_reg );
5919 %}
5920 
5921 instruct loadConP(eRegP dst, immP src) %{
5922   match(Set dst src);
5923 
5924   format %{ "MOV    $dst,$src" %}
5925   opcode(0xB8);  /* + rd */
5926   ins_encode( LdImmP(dst, src) );
5927   ins_pipe( ialu_reg_fat );
5928 %}
5929 
5930 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
5931   match(Set dst src);
5932   effect(KILL cr);
5933   ins_cost(200);
5934   format %{ "MOV    $dst.lo,$src.lo\n\t"
5935             "MOV    $dst.hi,$src.hi" %}
5936   opcode(0xB8);
5937   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
5938   ins_pipe( ialu_reg_long_fat );
5939 %}
5940 
5941 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
5942   match(Set dst src);
5943   effect(KILL cr);
5944   ins_cost(150);
5945   format %{ "XOR    $dst.lo,$dst.lo\n\t"
5946             "XOR    $dst.hi,$dst.hi" %}
5947   opcode(0x33,0x33);
5948   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
5949   ins_pipe( ialu_reg_long );
5950 %}
5951 
5952 // The instruction usage is guarded by predicate in operand immFPR().
5953 instruct loadConFPR(regFPR dst, immFPR con) %{
5954   match(Set dst con);
5955   ins_cost(125);
5956   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
5957             "FSTP   $dst" %}
5958   ins_encode %{
5959     __ fld_s($constantaddress($con));
5960     __ fstp_d($dst$$reg);
5961   %}
5962   ins_pipe(fpu_reg_con);
5963 %}
5964 
5965 // The instruction usage is guarded by predicate in operand immFPR0().
5966 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
5967   match(Set dst con);
5968   ins_cost(125);
5969   format %{ "FLDZ   ST\n\t"
5970             "FSTP   $dst" %}
5971   ins_encode %{
5972     __ fldz();
5973     __ fstp_d($dst$$reg);
5974   %}
5975   ins_pipe(fpu_reg_con);
5976 %}
5977 
5978 // The instruction usage is guarded by predicate in operand immFPR1().
5979 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
5980   match(Set dst con);
5981   ins_cost(125);
5982   format %{ "FLD1   ST\n\t"
5983             "FSTP   $dst" %}
5984   ins_encode %{
5985     __ fld1();
5986     __ fstp_d($dst$$reg);
5987   %}
5988   ins_pipe(fpu_reg_con);
5989 %}
5990 
5991 // The instruction usage is guarded by predicate in operand immF().
5992 instruct loadConF(regF dst, immF con) %{
5993   match(Set dst con);
5994   ins_cost(125);
5995   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
5996   ins_encode %{
5997     __ movflt($dst$$XMMRegister, $constantaddress($con));
5998   %}
5999   ins_pipe(pipe_slow);
6000 %}
6001 
6002 // The instruction usage is guarded by predicate in operand immF0().
6003 instruct loadConF0(regF dst, immF0 src) %{
6004   match(Set dst src);
6005   ins_cost(100);
6006   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6007   ins_encode %{
6008     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6009   %}
6010   ins_pipe(pipe_slow);
6011 %}
6012 
6013 // The instruction usage is guarded by predicate in operand immDPR().
6014 instruct loadConDPR(regDPR dst, immDPR con) %{
6015   match(Set dst con);
6016   ins_cost(125);
6017 
6018   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6019             "FSTP   $dst" %}
6020   ins_encode %{
6021     __ fld_d($constantaddress($con));
6022     __ fstp_d($dst$$reg);
6023   %}
6024   ins_pipe(fpu_reg_con);
6025 %}
6026 
6027 // The instruction usage is guarded by predicate in operand immDPR0().
6028 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6029   match(Set dst con);
6030   ins_cost(125);
6031 
6032   format %{ "FLDZ   ST\n\t"
6033             "FSTP   $dst" %}
6034   ins_encode %{
6035     __ fldz();
6036     __ fstp_d($dst$$reg);
6037   %}
6038   ins_pipe(fpu_reg_con);
6039 %}
6040 
6041 // The instruction usage is guarded by predicate in operand immDPR1().
6042 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6043   match(Set dst con);
6044   ins_cost(125);
6045 
6046   format %{ "FLD1   ST\n\t"
6047             "FSTP   $dst" %}
6048   ins_encode %{
6049     __ fld1();
6050     __ fstp_d($dst$$reg);
6051   %}
6052   ins_pipe(fpu_reg_con);
6053 %}
6054 
6055 // The instruction usage is guarded by predicate in operand immD().
6056 instruct loadConD(regD dst, immD con) %{
6057   match(Set dst con);
6058   ins_cost(125);
6059   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6060   ins_encode %{
6061     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6062   %}
6063   ins_pipe(pipe_slow);
6064 %}
6065 
6066 // The instruction usage is guarded by predicate in operand immD0().
6067 instruct loadConD0(regD dst, immD0 src) %{
6068   match(Set dst src);
6069   ins_cost(100);
6070   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6071   ins_encode %{
6072     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6073   %}
6074   ins_pipe( pipe_slow );
6075 %}
6076 
6077 // Load Stack Slot
6078 instruct loadSSI(rRegI dst, stackSlotI src) %{
6079   match(Set dst src);
6080   ins_cost(125);
6081 
6082   format %{ "MOV    $dst,$src" %}
6083   opcode(0x8B);
6084   ins_encode( OpcP, RegMem(dst,src));
6085   ins_pipe( ialu_reg_mem );
6086 %}
6087 
6088 instruct loadSSL(eRegL dst, stackSlotL src) %{
6089   match(Set dst src);
6090 
6091   ins_cost(200);
6092   format %{ "MOV    $dst,$src.lo\n\t"
6093             "MOV    $dst+4,$src.hi" %}
6094   opcode(0x8B, 0x8B);
6095   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6096   ins_pipe( ialu_mem_long_reg );
6097 %}
6098 
6099 // Load Stack Slot
6100 instruct loadSSP(eRegP dst, stackSlotP src) %{
6101   match(Set dst src);
6102   ins_cost(125);
6103 
6104   format %{ "MOV    $dst,$src" %}
6105   opcode(0x8B);
6106   ins_encode( OpcP, RegMem(dst,src));
6107   ins_pipe( ialu_reg_mem );
6108 %}
6109 
6110 // Load Stack Slot
6111 instruct loadSSF(regFPR dst, stackSlotF src) %{
6112   match(Set dst src);
6113   ins_cost(125);
6114 
6115   format %{ "FLD_S  $src\n\t"
6116             "FSTP   $dst" %}
6117   opcode(0xD9);               /* D9 /0, FLD m32real */
6118   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6119               Pop_Reg_FPR(dst) );
6120   ins_pipe( fpu_reg_mem );
6121 %}
6122 
6123 // Load Stack Slot
6124 instruct loadSSD(regDPR dst, stackSlotD src) %{
6125   match(Set dst src);
6126   ins_cost(125);
6127 
6128   format %{ "FLD_D  $src\n\t"
6129             "FSTP   $dst" %}
6130   opcode(0xDD);               /* DD /0, FLD m64real */
6131   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6132               Pop_Reg_DPR(dst) );
6133   ins_pipe( fpu_reg_mem );
6134 %}
6135 
6136 // Prefetch instructions for allocation.
6137 // Must be safe to execute with invalid address (cannot fault).
6138 
6139 instruct prefetchAlloc0( memory mem ) %{
6140   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6141   match(PrefetchAllocation mem);
6142   ins_cost(0);
6143   size(0);
6144   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6145   ins_encode();
6146   ins_pipe(empty);
6147 %}
6148 
6149 instruct prefetchAlloc( memory mem ) %{
6150   predicate(AllocatePrefetchInstr==3);
6151   match( PrefetchAllocation mem );
6152   ins_cost(100);
6153 
6154   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6155   ins_encode %{
6156     __ prefetchw($mem$$Address);
6157   %}
6158   ins_pipe(ialu_mem);
6159 %}
6160 
6161 instruct prefetchAllocNTA( memory mem ) %{
6162   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6163   match(PrefetchAllocation mem);
6164   ins_cost(100);
6165 
6166   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6167   ins_encode %{
6168     __ prefetchnta($mem$$Address);
6169   %}
6170   ins_pipe(ialu_mem);
6171 %}
6172 
6173 instruct prefetchAllocT0( memory mem ) %{
6174   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6175   match(PrefetchAllocation mem);
6176   ins_cost(100);
6177 
6178   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6179   ins_encode %{
6180     __ prefetcht0($mem$$Address);
6181   %}
6182   ins_pipe(ialu_mem);
6183 %}
6184 
6185 instruct prefetchAllocT2( memory mem ) %{
6186   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6187   match(PrefetchAllocation mem);
6188   ins_cost(100);
6189 
6190   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6191   ins_encode %{
6192     __ prefetcht2($mem$$Address);
6193   %}
6194   ins_pipe(ialu_mem);
6195 %}
6196 
6197 //----------Store Instructions-------------------------------------------------
6198 
6199 // Store Byte
6200 instruct storeB(memory mem, xRegI src) %{
6201   match(Set mem (StoreB mem src));
6202 
6203   ins_cost(125);
6204   format %{ "MOV8   $mem,$src" %}
6205   opcode(0x88);
6206   ins_encode( OpcP, RegMem( src, mem ) );
6207   ins_pipe( ialu_mem_reg );
6208 %}
6209 
6210 // Store Char/Short
6211 instruct storeC(memory mem, rRegI src) %{
6212   match(Set mem (StoreC mem src));
6213 
6214   ins_cost(125);
6215   format %{ "MOV16  $mem,$src" %}
6216   opcode(0x89, 0x66);
6217   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6218   ins_pipe( ialu_mem_reg );
6219 %}
6220 
6221 // Store Integer
6222 instruct storeI(memory mem, rRegI src) %{
6223   match(Set mem (StoreI mem src));
6224 
6225   ins_cost(125);
6226   format %{ "MOV    $mem,$src" %}
6227   opcode(0x89);
6228   ins_encode( OpcP, RegMem( src, mem ) );
6229   ins_pipe( ialu_mem_reg );
6230 %}
6231 
6232 // Store Long
6233 instruct storeL(long_memory mem, eRegL src) %{
6234   predicate(!((StoreLNode*)n)->require_atomic_access());
6235   match(Set mem (StoreL mem src));
6236 
6237   ins_cost(200);
6238   format %{ "MOV    $mem,$src.lo\n\t"
6239             "MOV    $mem+4,$src.hi" %}
6240   opcode(0x89, 0x89);
6241   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6242   ins_pipe( ialu_mem_long_reg );
6243 %}
6244 
6245 // Store Long to Integer
6246 instruct storeL2I(memory mem, eRegL src) %{
6247   match(Set mem (StoreI mem (ConvL2I src)));
6248 
6249   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6250   ins_encode %{
6251     __ movl($mem$$Address, $src$$Register);
6252   %}
6253   ins_pipe(ialu_mem_reg);
6254 %}
6255 
6256 // Volatile Store Long.  Must be atomic, so move it into
6257 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6258 // target address before the store (for null-ptr checks)
6259 // so the memory operand is used twice in the encoding.
6260 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6261   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6262   match(Set mem (StoreL mem src));
6263   effect( KILL cr );
6264   ins_cost(400);
6265   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6266             "FILD   $src\n\t"
6267             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6268   opcode(0x3B);
6269   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6270   ins_pipe( fpu_reg_mem );
6271 %}
6272 
6273 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6274   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6275   match(Set mem (StoreL mem src));
6276   effect( TEMP tmp, KILL cr );
6277   ins_cost(380);
6278   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6279             "MOVSD  $tmp,$src\n\t"
6280             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6281   ins_encode %{
6282     __ cmpl(rax, $mem$$Address);
6283     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6284     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6285   %}
6286   ins_pipe( pipe_slow );
6287 %}
6288 
6289 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6290   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6291   match(Set mem (StoreL mem src));
6292   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6293   ins_cost(360);
6294   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6295             "MOVD   $tmp,$src.lo\n\t"
6296             "MOVD   $tmp2,$src.hi\n\t"
6297             "PUNPCKLDQ $tmp,$tmp2\n\t"
6298             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6299   ins_encode %{
6300     __ cmpl(rax, $mem$$Address);
6301     __ movdl($tmp$$XMMRegister, $src$$Register);
6302     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6303     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6304     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6305   %}
6306   ins_pipe( pipe_slow );
6307 %}
6308 
6309 // Store Pointer; for storing unknown oops and raw pointers
6310 instruct storeP(memory mem, anyRegP src) %{
6311   match(Set mem (StoreP mem src));
6312 
6313   ins_cost(125);
6314   format %{ "MOV    $mem,$src" %}
6315   opcode(0x89);
6316   ins_encode( OpcP, RegMem( src, mem ) );
6317   ins_pipe( ialu_mem_reg );
6318 %}
6319 
6320 // Store Integer Immediate
6321 instruct storeImmI(memory mem, immI src) %{
6322   match(Set mem (StoreI mem src));
6323 
6324   ins_cost(150);
6325   format %{ "MOV    $mem,$src" %}
6326   opcode(0xC7);               /* C7 /0 */
6327   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6328   ins_pipe( ialu_mem_imm );
6329 %}
6330 
6331 // Store Short/Char Immediate
6332 instruct storeImmI16(memory mem, immI16 src) %{
6333   predicate(UseStoreImmI16);
6334   match(Set mem (StoreC mem src));
6335 
6336   ins_cost(150);
6337   format %{ "MOV16  $mem,$src" %}
6338   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6339   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6340   ins_pipe( ialu_mem_imm );
6341 %}
6342 
6343 // Store Pointer Immediate; null pointers or constant oops that do not
6344 // need card-mark barriers.
6345 instruct storeImmP(memory mem, immP src) %{
6346   match(Set mem (StoreP mem src));
6347 
6348   ins_cost(150);
6349   format %{ "MOV    $mem,$src" %}
6350   opcode(0xC7);               /* C7 /0 */
6351   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6352   ins_pipe( ialu_mem_imm );
6353 %}
6354 
6355 // Store Byte Immediate
6356 instruct storeImmB(memory mem, immI8 src) %{
6357   match(Set mem (StoreB mem src));
6358 
6359   ins_cost(150);
6360   format %{ "MOV8   $mem,$src" %}
6361   opcode(0xC6);               /* C6 /0 */
6362   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6363   ins_pipe( ialu_mem_imm );
6364 %}
6365 
6366 // Store CMS card-mark Immediate
6367 instruct storeImmCM(memory mem, immI8 src) %{
6368   match(Set mem (StoreCM mem src));
6369 
6370   ins_cost(150);
6371   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6372   opcode(0xC6);               /* C6 /0 */
6373   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6374   ins_pipe( ialu_mem_imm );
6375 %}
6376 
6377 // Store Double
6378 instruct storeDPR( memory mem, regDPR1 src) %{
6379   predicate(UseSSE<=1);
6380   match(Set mem (StoreD mem src));
6381 
6382   ins_cost(100);
6383   format %{ "FST_D  $mem,$src" %}
6384   opcode(0xDD);       /* DD /2 */
6385   ins_encode( enc_FPR_store(mem,src) );
6386   ins_pipe( fpu_mem_reg );
6387 %}
6388 
6389 // Store double does rounding on x86
6390 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6391   predicate(UseSSE<=1);
6392   match(Set mem (StoreD mem (RoundDouble src)));
6393 
6394   ins_cost(100);
6395   format %{ "FST_D  $mem,$src\t# round" %}
6396   opcode(0xDD);       /* DD /2 */
6397   ins_encode( enc_FPR_store(mem,src) );
6398   ins_pipe( fpu_mem_reg );
6399 %}
6400 
6401 // Store XMM register to memory (double-precision floating points)
6402 // MOVSD instruction
6403 instruct storeD(memory mem, regD src) %{
6404   predicate(UseSSE>=2);
6405   match(Set mem (StoreD mem src));
6406   ins_cost(95);
6407   format %{ "MOVSD  $mem,$src" %}
6408   ins_encode %{
6409     __ movdbl($mem$$Address, $src$$XMMRegister);
6410   %}
6411   ins_pipe( pipe_slow );
6412 %}
6413 
6414 // Store XMM register to memory (single-precision floating point)
6415 // MOVSS instruction
6416 instruct storeF(memory mem, regF src) %{
6417   predicate(UseSSE>=1);
6418   match(Set mem (StoreF mem src));
6419   ins_cost(95);
6420   format %{ "MOVSS  $mem,$src" %}
6421   ins_encode %{
6422     __ movflt($mem$$Address, $src$$XMMRegister);
6423   %}
6424   ins_pipe( pipe_slow );
6425 %}
6426 
6427 // Store Float
6428 instruct storeFPR( memory mem, regFPR1 src) %{
6429   predicate(UseSSE==0);
6430   match(Set mem (StoreF mem src));
6431 
6432   ins_cost(100);
6433   format %{ "FST_S  $mem,$src" %}
6434   opcode(0xD9);       /* D9 /2 */
6435   ins_encode( enc_FPR_store(mem,src) );
6436   ins_pipe( fpu_mem_reg );
6437 %}
6438 
6439 // Store Float does rounding on x86
6440 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6441   predicate(UseSSE==0);
6442   match(Set mem (StoreF mem (RoundFloat src)));
6443 
6444   ins_cost(100);
6445   format %{ "FST_S  $mem,$src\t# round" %}
6446   opcode(0xD9);       /* D9 /2 */
6447   ins_encode( enc_FPR_store(mem,src) );
6448   ins_pipe( fpu_mem_reg );
6449 %}
6450 
6451 // Store Float does rounding on x86
6452 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6453   predicate(UseSSE<=1);
6454   match(Set mem (StoreF mem (ConvD2F src)));
6455 
6456   ins_cost(100);
6457   format %{ "FST_S  $mem,$src\t# D-round" %}
6458   opcode(0xD9);       /* D9 /2 */
6459   ins_encode( enc_FPR_store(mem,src) );
6460   ins_pipe( fpu_mem_reg );
6461 %}
6462 
6463 // Store immediate Float value (it is faster than store from FPU register)
6464 // The instruction usage is guarded by predicate in operand immFPR().
6465 instruct storeFPR_imm( memory mem, immFPR src) %{
6466   match(Set mem (StoreF mem src));
6467 
6468   ins_cost(50);
6469   format %{ "MOV    $mem,$src\t# store float" %}
6470   opcode(0xC7);               /* C7 /0 */
6471   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6472   ins_pipe( ialu_mem_imm );
6473 %}
6474 
6475 // Store immediate Float value (it is faster than store from XMM register)
6476 // The instruction usage is guarded by predicate in operand immF().
6477 instruct storeF_imm( memory mem, immF src) %{
6478   match(Set mem (StoreF mem src));
6479 
6480   ins_cost(50);
6481   format %{ "MOV    $mem,$src\t# store float" %}
6482   opcode(0xC7);               /* C7 /0 */
6483   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6484   ins_pipe( ialu_mem_imm );
6485 %}
6486 
6487 // Store Integer to stack slot
6488 instruct storeSSI(stackSlotI dst, rRegI src) %{
6489   match(Set dst src);
6490 
6491   ins_cost(100);
6492   format %{ "MOV    $dst,$src" %}
6493   opcode(0x89);
6494   ins_encode( OpcPRegSS( dst, src ) );
6495   ins_pipe( ialu_mem_reg );
6496 %}
6497 
6498 // Store Integer to stack slot
6499 instruct storeSSP(stackSlotP dst, eRegP src) %{
6500   match(Set dst src);
6501 
6502   ins_cost(100);
6503   format %{ "MOV    $dst,$src" %}
6504   opcode(0x89);
6505   ins_encode( OpcPRegSS( dst, src ) );
6506   ins_pipe( ialu_mem_reg );
6507 %}
6508 
6509 // Store Long to stack slot
6510 instruct storeSSL(stackSlotL dst, eRegL src) %{
6511   match(Set dst src);
6512 
6513   ins_cost(200);
6514   format %{ "MOV    $dst,$src.lo\n\t"
6515             "MOV    $dst+4,$src.hi" %}
6516   opcode(0x89, 0x89);
6517   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6518   ins_pipe( ialu_mem_long_reg );
6519 %}
6520 
6521 //----------MemBar Instructions-----------------------------------------------
6522 // Memory barrier flavors
6523 
6524 instruct membar_acquire() %{
6525   match(MemBarAcquire);
6526   match(LoadFence);
6527   ins_cost(400);
6528 
6529   size(0);
6530   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6531   ins_encode();
6532   ins_pipe(empty);
6533 %}
6534 
6535 instruct membar_acquire_lock() %{
6536   match(MemBarAcquireLock);
6537   ins_cost(0);
6538 
6539   size(0);
6540   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6541   ins_encode( );
6542   ins_pipe(empty);
6543 %}
6544 
6545 instruct membar_release() %{
6546   match(MemBarRelease);
6547   match(StoreFence);
6548   ins_cost(400);
6549 
6550   size(0);
6551   format %{ "MEMBAR-release ! (empty encoding)" %}
6552   ins_encode( );
6553   ins_pipe(empty);
6554 %}
6555 
6556 instruct membar_release_lock() %{
6557   match(MemBarReleaseLock);
6558   ins_cost(0);
6559 
6560   size(0);
6561   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6562   ins_encode( );
6563   ins_pipe(empty);
6564 %}
6565 
6566 instruct membar_volatile(eFlagsReg cr) %{
6567   match(MemBarVolatile);
6568   effect(KILL cr);
6569   ins_cost(400);
6570 
6571   format %{
6572     $$template
6573     if (os::is_MP()) {
6574       $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6575     } else {
6576       $$emit$$"MEMBAR-volatile ! (empty encoding)"
6577     }
6578   %}
6579   ins_encode %{
6580     __ membar(Assembler::StoreLoad);
6581   %}
6582   ins_pipe(pipe_slow);
6583 %}
6584 
6585 instruct unnecessary_membar_volatile() %{
6586   match(MemBarVolatile);
6587   predicate(Matcher::post_store_load_barrier(n));
6588   ins_cost(0);
6589 
6590   size(0);
6591   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6592   ins_encode( );
6593   ins_pipe(empty);
6594 %}
6595 
6596 instruct membar_storestore() %{
6597   match(MemBarStoreStore);
6598   ins_cost(0);
6599 
6600   size(0);
6601   format %{ "MEMBAR-storestore (empty encoding)" %}
6602   ins_encode( );
6603   ins_pipe(empty);
6604 %}
6605 
6606 //----------Move Instructions--------------------------------------------------
6607 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6608   match(Set dst (CastX2P src));
6609   format %{ "# X2P  $dst, $src" %}
6610   ins_encode( /*empty encoding*/ );
6611   ins_cost(0);
6612   ins_pipe(empty);
6613 %}
6614 
6615 instruct castP2X(rRegI dst, eRegP src ) %{
6616   match(Set dst (CastP2X src));
6617   ins_cost(50);
6618   format %{ "MOV    $dst, $src\t# CastP2X" %}
6619   ins_encode( enc_Copy( dst, src) );
6620   ins_pipe( ialu_reg_reg );
6621 %}
6622 
6623 //----------Conditional Move---------------------------------------------------
6624 // Conditional move
6625 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6626   predicate(!VM_Version::supports_cmov() );
6627   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6628   ins_cost(200);
6629   format %{ "J$cop,us skip\t# signed cmove\n\t"
6630             "MOV    $dst,$src\n"
6631       "skip:" %}
6632   ins_encode %{
6633     Label Lskip;
6634     // Invert sense of branch from sense of CMOV
6635     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6636     __ movl($dst$$Register, $src$$Register);
6637     __ bind(Lskip);
6638   %}
6639   ins_pipe( pipe_cmov_reg );
6640 %}
6641 
6642 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6643   predicate(!VM_Version::supports_cmov() );
6644   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6645   ins_cost(200);
6646   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6647             "MOV    $dst,$src\n"
6648       "skip:" %}
6649   ins_encode %{
6650     Label Lskip;
6651     // Invert sense of branch from sense of CMOV
6652     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6653     __ movl($dst$$Register, $src$$Register);
6654     __ bind(Lskip);
6655   %}
6656   ins_pipe( pipe_cmov_reg );
6657 %}
6658 
6659 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6660   predicate(VM_Version::supports_cmov() );
6661   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6662   ins_cost(200);
6663   format %{ "CMOV$cop $dst,$src" %}
6664   opcode(0x0F,0x40);
6665   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6666   ins_pipe( pipe_cmov_reg );
6667 %}
6668 
6669 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6670   predicate(VM_Version::supports_cmov() );
6671   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6672   ins_cost(200);
6673   format %{ "CMOV$cop $dst,$src" %}
6674   opcode(0x0F,0x40);
6675   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6676   ins_pipe( pipe_cmov_reg );
6677 %}
6678 
6679 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6680   predicate(VM_Version::supports_cmov() );
6681   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6682   ins_cost(200);
6683   expand %{
6684     cmovI_regU(cop, cr, dst, src);
6685   %}
6686 %}
6687 
6688 // Conditional move
6689 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6690   predicate(VM_Version::supports_cmov() );
6691   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6692   ins_cost(250);
6693   format %{ "CMOV$cop $dst,$src" %}
6694   opcode(0x0F,0x40);
6695   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6696   ins_pipe( pipe_cmov_mem );
6697 %}
6698 
6699 // Conditional move
6700 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6701   predicate(VM_Version::supports_cmov() );
6702   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6703   ins_cost(250);
6704   format %{ "CMOV$cop $dst,$src" %}
6705   opcode(0x0F,0x40);
6706   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6707   ins_pipe( pipe_cmov_mem );
6708 %}
6709 
6710 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6711   predicate(VM_Version::supports_cmov() );
6712   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6713   ins_cost(250);
6714   expand %{
6715     cmovI_memU(cop, cr, dst, src);
6716   %}
6717 %}
6718 
6719 // Conditional move
6720 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6721   predicate(VM_Version::supports_cmov() );
6722   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6723   ins_cost(200);
6724   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6725   opcode(0x0F,0x40);
6726   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6727   ins_pipe( pipe_cmov_reg );
6728 %}
6729 
6730 // Conditional move (non-P6 version)
6731 // Note:  a CMoveP is generated for  stubs and native wrappers
6732 //        regardless of whether we are on a P6, so we
6733 //        emulate a cmov here
6734 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6735   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6736   ins_cost(300);
6737   format %{ "Jn$cop   skip\n\t"
6738           "MOV    $dst,$src\t# pointer\n"
6739       "skip:" %}
6740   opcode(0x8b);
6741   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6742   ins_pipe( pipe_cmov_reg );
6743 %}
6744 
6745 // Conditional move
6746 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6747   predicate(VM_Version::supports_cmov() );
6748   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6749   ins_cost(200);
6750   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6751   opcode(0x0F,0x40);
6752   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6753   ins_pipe( pipe_cmov_reg );
6754 %}
6755 
6756 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6757   predicate(VM_Version::supports_cmov() );
6758   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6759   ins_cost(200);
6760   expand %{
6761     cmovP_regU(cop, cr, dst, src);
6762   %}
6763 %}
6764 
6765 // DISABLED: Requires the ADLC to emit a bottom_type call that
6766 // correctly meets the two pointer arguments; one is an incoming
6767 // register but the other is a memory operand.  ALSO appears to
6768 // be buggy with implicit null checks.
6769 //
6770 //// Conditional move
6771 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6772 //  predicate(VM_Version::supports_cmov() );
6773 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6774 //  ins_cost(250);
6775 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6776 //  opcode(0x0F,0x40);
6777 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6778 //  ins_pipe( pipe_cmov_mem );
6779 //%}
6780 //
6781 //// Conditional move
6782 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6783 //  predicate(VM_Version::supports_cmov() );
6784 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6785 //  ins_cost(250);
6786 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6787 //  opcode(0x0F,0x40);
6788 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6789 //  ins_pipe( pipe_cmov_mem );
6790 //%}
6791 
6792 // Conditional move
6793 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6794   predicate(UseSSE<=1);
6795   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6796   ins_cost(200);
6797   format %{ "FCMOV$cop $dst,$src\t# double" %}
6798   opcode(0xDA);
6799   ins_encode( enc_cmov_dpr(cop,src) );
6800   ins_pipe( pipe_cmovDPR_reg );
6801 %}
6802 
6803 // Conditional move
6804 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6805   predicate(UseSSE==0);
6806   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6807   ins_cost(200);
6808   format %{ "FCMOV$cop $dst,$src\t# float" %}
6809   opcode(0xDA);
6810   ins_encode( enc_cmov_dpr(cop,src) );
6811   ins_pipe( pipe_cmovDPR_reg );
6812 %}
6813 
6814 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6815 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6816   predicate(UseSSE<=1);
6817   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6818   ins_cost(200);
6819   format %{ "Jn$cop   skip\n\t"
6820             "MOV    $dst,$src\t# double\n"
6821       "skip:" %}
6822   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6823   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6824   ins_pipe( pipe_cmovDPR_reg );
6825 %}
6826 
6827 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6828 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6829   predicate(UseSSE==0);
6830   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6831   ins_cost(200);
6832   format %{ "Jn$cop    skip\n\t"
6833             "MOV    $dst,$src\t# float\n"
6834       "skip:" %}
6835   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6836   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6837   ins_pipe( pipe_cmovDPR_reg );
6838 %}
6839 
6840 // No CMOVE with SSE/SSE2
6841 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6842   predicate (UseSSE>=1);
6843   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6844   ins_cost(200);
6845   format %{ "Jn$cop   skip\n\t"
6846             "MOVSS  $dst,$src\t# float\n"
6847       "skip:" %}
6848   ins_encode %{
6849     Label skip;
6850     // Invert sense of branch from sense of CMOV
6851     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6852     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6853     __ bind(skip);
6854   %}
6855   ins_pipe( pipe_slow );
6856 %}
6857 
6858 // No CMOVE with SSE/SSE2
6859 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6860   predicate (UseSSE>=2);
6861   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6862   ins_cost(200);
6863   format %{ "Jn$cop   skip\n\t"
6864             "MOVSD  $dst,$src\t# float\n"
6865       "skip:" %}
6866   ins_encode %{
6867     Label skip;
6868     // Invert sense of branch from sense of CMOV
6869     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6870     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6871     __ bind(skip);
6872   %}
6873   ins_pipe( pipe_slow );
6874 %}
6875 
6876 // unsigned version
6877 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6878   predicate (UseSSE>=1);
6879   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6880   ins_cost(200);
6881   format %{ "Jn$cop   skip\n\t"
6882             "MOVSS  $dst,$src\t# float\n"
6883       "skip:" %}
6884   ins_encode %{
6885     Label skip;
6886     // Invert sense of branch from sense of CMOV
6887     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6888     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6889     __ bind(skip);
6890   %}
6891   ins_pipe( pipe_slow );
6892 %}
6893 
6894 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
6895   predicate (UseSSE>=1);
6896   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6897   ins_cost(200);
6898   expand %{
6899     fcmovF_regU(cop, cr, dst, src);
6900   %}
6901 %}
6902 
6903 // unsigned version
6904 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
6905   predicate (UseSSE>=2);
6906   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6907   ins_cost(200);
6908   format %{ "Jn$cop   skip\n\t"
6909             "MOVSD  $dst,$src\t# float\n"
6910       "skip:" %}
6911   ins_encode %{
6912     Label skip;
6913     // Invert sense of branch from sense of CMOV
6914     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6915     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6916     __ bind(skip);
6917   %}
6918   ins_pipe( pipe_slow );
6919 %}
6920 
6921 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
6922   predicate (UseSSE>=2);
6923   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6924   ins_cost(200);
6925   expand %{
6926     fcmovD_regU(cop, cr, dst, src);
6927   %}
6928 %}
6929 
6930 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
6931   predicate(VM_Version::supports_cmov() );
6932   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6933   ins_cost(200);
6934   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6935             "CMOV$cop $dst.hi,$src.hi" %}
6936   opcode(0x0F,0x40);
6937   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6938   ins_pipe( pipe_cmov_reg_long );
6939 %}
6940 
6941 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
6942   predicate(VM_Version::supports_cmov() );
6943   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6944   ins_cost(200);
6945   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6946             "CMOV$cop $dst.hi,$src.hi" %}
6947   opcode(0x0F,0x40);
6948   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6949   ins_pipe( pipe_cmov_reg_long );
6950 %}
6951 
6952 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
6953   predicate(VM_Version::supports_cmov() );
6954   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6955   ins_cost(200);
6956   expand %{
6957     cmovL_regU(cop, cr, dst, src);
6958   %}
6959 %}
6960 
6961 //----------Arithmetic Instructions--------------------------------------------
6962 //----------Addition Instructions----------------------------------------------
6963 
6964 // Integer Addition Instructions
6965 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
6966   match(Set dst (AddI dst src));
6967   effect(KILL cr);
6968 
6969   size(2);
6970   format %{ "ADD    $dst,$src" %}
6971   opcode(0x03);
6972   ins_encode( OpcP, RegReg( dst, src) );
6973   ins_pipe( ialu_reg_reg );
6974 %}
6975 
6976 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
6977   match(Set dst (AddI dst src));
6978   effect(KILL cr);
6979 
6980   format %{ "ADD    $dst,$src" %}
6981   opcode(0x81, 0x00); /* /0 id */
6982   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
6983   ins_pipe( ialu_reg );
6984 %}
6985 
6986 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
6987   predicate(UseIncDec);
6988   match(Set dst (AddI dst src));
6989   effect(KILL cr);
6990 
6991   size(1);
6992   format %{ "INC    $dst" %}
6993   opcode(0x40); /*  */
6994   ins_encode( Opc_plus( primary, dst ) );
6995   ins_pipe( ialu_reg );
6996 %}
6997 
6998 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
6999   match(Set dst (AddI src0 src1));
7000   ins_cost(110);
7001 
7002   format %{ "LEA    $dst,[$src0 + $src1]" %}
7003   opcode(0x8D); /* 0x8D /r */
7004   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7005   ins_pipe( ialu_reg_reg );
7006 %}
7007 
7008 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7009   match(Set dst (AddP src0 src1));
7010   ins_cost(110);
7011 
7012   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7013   opcode(0x8D); /* 0x8D /r */
7014   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7015   ins_pipe( ialu_reg_reg );
7016 %}
7017 
7018 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7019   predicate(UseIncDec);
7020   match(Set dst (AddI dst src));
7021   effect(KILL cr);
7022 
7023   size(1);
7024   format %{ "DEC    $dst" %}
7025   opcode(0x48); /*  */
7026   ins_encode( Opc_plus( primary, dst ) );
7027   ins_pipe( ialu_reg );
7028 %}
7029 
7030 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7031   match(Set dst (AddP dst src));
7032   effect(KILL cr);
7033 
7034   size(2);
7035   format %{ "ADD    $dst,$src" %}
7036   opcode(0x03);
7037   ins_encode( OpcP, RegReg( dst, src) );
7038   ins_pipe( ialu_reg_reg );
7039 %}
7040 
7041 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7042   match(Set dst (AddP dst src));
7043   effect(KILL cr);
7044 
7045   format %{ "ADD    $dst,$src" %}
7046   opcode(0x81,0x00); /* Opcode 81 /0 id */
7047   // ins_encode( RegImm( dst, src) );
7048   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7049   ins_pipe( ialu_reg );
7050 %}
7051 
7052 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7053   match(Set dst (AddI dst (LoadI src)));
7054   effect(KILL cr);
7055 
7056   ins_cost(125);
7057   format %{ "ADD    $dst,$src" %}
7058   opcode(0x03);
7059   ins_encode( OpcP, RegMem( dst, src) );
7060   ins_pipe( ialu_reg_mem );
7061 %}
7062 
7063 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7064   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7065   effect(KILL cr);
7066 
7067   ins_cost(150);
7068   format %{ "ADD    $dst,$src" %}
7069   opcode(0x01);  /* Opcode 01 /r */
7070   ins_encode( OpcP, RegMem( src, dst ) );
7071   ins_pipe( ialu_mem_reg );
7072 %}
7073 
7074 // Add Memory with Immediate
7075 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7076   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7077   effect(KILL cr);
7078 
7079   ins_cost(125);
7080   format %{ "ADD    $dst,$src" %}
7081   opcode(0x81);               /* Opcode 81 /0 id */
7082   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7083   ins_pipe( ialu_mem_imm );
7084 %}
7085 
7086 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7087   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7088   effect(KILL cr);
7089 
7090   ins_cost(125);
7091   format %{ "INC    $dst" %}
7092   opcode(0xFF);               /* Opcode FF /0 */
7093   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7094   ins_pipe( ialu_mem_imm );
7095 %}
7096 
7097 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7098   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7099   effect(KILL cr);
7100 
7101   ins_cost(125);
7102   format %{ "DEC    $dst" %}
7103   opcode(0xFF);               /* Opcode FF /1 */
7104   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7105   ins_pipe( ialu_mem_imm );
7106 %}
7107 
7108 
7109 instruct checkCastPP( eRegP dst ) %{
7110   match(Set dst (CheckCastPP dst));
7111 
7112   size(0);
7113   format %{ "#checkcastPP of $dst" %}
7114   ins_encode( /*empty encoding*/ );
7115   ins_pipe( empty );
7116 %}
7117 
7118 instruct castPP( eRegP dst ) %{
7119   match(Set dst (CastPP dst));
7120   format %{ "#castPP of $dst" %}
7121   ins_encode( /*empty encoding*/ );
7122   ins_pipe( empty );
7123 %}
7124 
7125 instruct castII( rRegI dst ) %{
7126   match(Set dst (CastII dst));
7127   format %{ "#castII of $dst" %}
7128   ins_encode( /*empty encoding*/ );
7129   ins_cost(0);
7130   ins_pipe( empty );
7131 %}
7132 
7133 
7134 // Load-locked - same as a regular pointer load when used with compare-swap
7135 instruct loadPLocked(eRegP dst, memory mem) %{
7136   match(Set dst (LoadPLocked mem));
7137 
7138   ins_cost(125);
7139   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7140   opcode(0x8B);
7141   ins_encode( OpcP, RegMem(dst,mem));
7142   ins_pipe( ialu_reg_mem );
7143 %}
7144 
7145 // Conditional-store of the updated heap-top.
7146 // Used during allocation of the shared heap.
7147 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7148 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7149   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7150   // EAX is killed if there is contention, but then it's also unused.
7151   // In the common case of no contention, EAX holds the new oop address.
7152   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7153   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7154   ins_pipe( pipe_cmpxchg );
7155 %}
7156 
7157 // Conditional-store of an int value.
7158 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7159 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7160   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7161   effect(KILL oldval);
7162   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7163   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7164   ins_pipe( pipe_cmpxchg );
7165 %}
7166 
7167 // Conditional-store of a long value.
7168 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7169 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7170   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7171   effect(KILL oldval);
7172   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7173             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7174             "XCHG   EBX,ECX"
7175   %}
7176   ins_encode %{
7177     // Note: we need to swap rbx, and rcx before and after the
7178     //       cmpxchg8 instruction because the instruction uses
7179     //       rcx as the high order word of the new value to store but
7180     //       our register encoding uses rbx.
7181     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7182     if( os::is_MP() )
7183       __ lock();
7184     __ cmpxchg8($mem$$Address);
7185     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7186   %}
7187   ins_pipe( pipe_cmpxchg );
7188 %}
7189 
7190 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7191 
7192 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7193   predicate(VM_Version::supports_cx8());
7194   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7195   effect(KILL cr, KILL oldval);
7196   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7197             "MOV    $res,0\n\t"
7198             "JNE,s  fail\n\t"
7199             "MOV    $res,1\n"
7200           "fail:" %}
7201   ins_encode( enc_cmpxchg8(mem_ptr),
7202               enc_flags_ne_to_boolean(res) );
7203   ins_pipe( pipe_cmpxchg );
7204 %}
7205 
7206 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7207   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7208   effect(KILL cr, KILL oldval);
7209   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7210             "MOV    $res,0\n\t"
7211             "JNE,s  fail\n\t"
7212             "MOV    $res,1\n"
7213           "fail:" %}
7214   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7215   ins_pipe( pipe_cmpxchg );
7216 %}
7217 
7218 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7219   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7220   effect(KILL cr, KILL oldval);
7221   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7222             "MOV    $res,0\n\t"
7223             "JNE,s  fail\n\t"
7224             "MOV    $res,1\n"
7225           "fail:" %}
7226   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7227   ins_pipe( pipe_cmpxchg );
7228 %}
7229 
7230 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7231   predicate(n->as_LoadStore()->result_not_used());
7232   match(Set dummy (GetAndAddI mem add));
7233   effect(KILL cr);
7234   format %{ "ADDL  [$mem],$add" %}
7235   ins_encode %{
7236     if (os::is_MP()) { __ lock(); }
7237     __ addl($mem$$Address, $add$$constant);
7238   %}
7239   ins_pipe( pipe_cmpxchg );
7240 %}
7241 
7242 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7243   match(Set newval (GetAndAddI mem newval));
7244   effect(KILL cr);
7245   format %{ "XADDL  [$mem],$newval" %}
7246   ins_encode %{
7247     if (os::is_MP()) { __ lock(); }
7248     __ xaddl($mem$$Address, $newval$$Register);
7249   %}
7250   ins_pipe( pipe_cmpxchg );
7251 %}
7252 
7253 instruct xchgI( memory mem, rRegI newval) %{
7254   match(Set newval (GetAndSetI mem newval));
7255   format %{ "XCHGL  $newval,[$mem]" %}
7256   ins_encode %{
7257     __ xchgl($newval$$Register, $mem$$Address);
7258   %}
7259   ins_pipe( pipe_cmpxchg );
7260 %}
7261 
7262 instruct xchgP( memory mem, pRegP newval) %{
7263   match(Set newval (GetAndSetP mem newval));
7264   format %{ "XCHGL  $newval,[$mem]" %}
7265   ins_encode %{
7266     __ xchgl($newval$$Register, $mem$$Address);
7267   %}
7268   ins_pipe( pipe_cmpxchg );
7269 %}
7270 
7271 //----------Subtraction Instructions-------------------------------------------
7272 
7273 // Integer Subtraction Instructions
7274 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7275   match(Set dst (SubI dst src));
7276   effect(KILL cr);
7277 
7278   size(2);
7279   format %{ "SUB    $dst,$src" %}
7280   opcode(0x2B);
7281   ins_encode( OpcP, RegReg( dst, src) );
7282   ins_pipe( ialu_reg_reg );
7283 %}
7284 
7285 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7286   match(Set dst (SubI dst src));
7287   effect(KILL cr);
7288 
7289   format %{ "SUB    $dst,$src" %}
7290   opcode(0x81,0x05);  /* Opcode 81 /5 */
7291   // ins_encode( RegImm( dst, src) );
7292   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7293   ins_pipe( ialu_reg );
7294 %}
7295 
7296 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7297   match(Set dst (SubI dst (LoadI src)));
7298   effect(KILL cr);
7299 
7300   ins_cost(125);
7301   format %{ "SUB    $dst,$src" %}
7302   opcode(0x2B);
7303   ins_encode( OpcP, RegMem( dst, src) );
7304   ins_pipe( ialu_reg_mem );
7305 %}
7306 
7307 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7308   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7309   effect(KILL cr);
7310 
7311   ins_cost(150);
7312   format %{ "SUB    $dst,$src" %}
7313   opcode(0x29);  /* Opcode 29 /r */
7314   ins_encode( OpcP, RegMem( src, dst ) );
7315   ins_pipe( ialu_mem_reg );
7316 %}
7317 
7318 // Subtract from a pointer
7319 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7320   match(Set dst (AddP dst (SubI zero src)));
7321   effect(KILL cr);
7322 
7323   size(2);
7324   format %{ "SUB    $dst,$src" %}
7325   opcode(0x2B);
7326   ins_encode( OpcP, RegReg( dst, src) );
7327   ins_pipe( ialu_reg_reg );
7328 %}
7329 
7330 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7331   match(Set dst (SubI zero dst));
7332   effect(KILL cr);
7333 
7334   size(2);
7335   format %{ "NEG    $dst" %}
7336   opcode(0xF7,0x03);  // Opcode F7 /3
7337   ins_encode( OpcP, RegOpc( dst ) );
7338   ins_pipe( ialu_reg );
7339 %}
7340 
7341 //----------Multiplication/Division Instructions-------------------------------
7342 // Integer Multiplication Instructions
7343 // Multiply Register
7344 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7345   match(Set dst (MulI dst src));
7346   effect(KILL cr);
7347 
7348   size(3);
7349   ins_cost(300);
7350   format %{ "IMUL   $dst,$src" %}
7351   opcode(0xAF, 0x0F);
7352   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7353   ins_pipe( ialu_reg_reg_alu0 );
7354 %}
7355 
7356 // Multiply 32-bit Immediate
7357 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7358   match(Set dst (MulI src imm));
7359   effect(KILL cr);
7360 
7361   ins_cost(300);
7362   format %{ "IMUL   $dst,$src,$imm" %}
7363   opcode(0x69);  /* 69 /r id */
7364   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7365   ins_pipe( ialu_reg_reg_alu0 );
7366 %}
7367 
7368 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7369   match(Set dst src);
7370   effect(KILL cr);
7371 
7372   // Note that this is artificially increased to make it more expensive than loadConL
7373   ins_cost(250);
7374   format %{ "MOV    EAX,$src\t// low word only" %}
7375   opcode(0xB8);
7376   ins_encode( LdImmL_Lo(dst, src) );
7377   ins_pipe( ialu_reg_fat );
7378 %}
7379 
7380 // Multiply by 32-bit Immediate, taking the shifted high order results
7381 //  (special case for shift by 32)
7382 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7383   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7384   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7385              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7386              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7387   effect(USE src1, KILL cr);
7388 
7389   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7390   ins_cost(0*100 + 1*400 - 150);
7391   format %{ "IMUL   EDX:EAX,$src1" %}
7392   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7393   ins_pipe( pipe_slow );
7394 %}
7395 
7396 // Multiply by 32-bit Immediate, taking the shifted high order results
7397 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7398   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7399   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7400              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7401              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7402   effect(USE src1, KILL cr);
7403 
7404   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7405   ins_cost(1*100 + 1*400 - 150);
7406   format %{ "IMUL   EDX:EAX,$src1\n\t"
7407             "SAR    EDX,$cnt-32" %}
7408   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7409   ins_pipe( pipe_slow );
7410 %}
7411 
7412 // Multiply Memory 32-bit Immediate
7413 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7414   match(Set dst (MulI (LoadI src) imm));
7415   effect(KILL cr);
7416 
7417   ins_cost(300);
7418   format %{ "IMUL   $dst,$src,$imm" %}
7419   opcode(0x69);  /* 69 /r id */
7420   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7421   ins_pipe( ialu_reg_mem_alu0 );
7422 %}
7423 
7424 // Multiply Memory
7425 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7426   match(Set dst (MulI dst (LoadI src)));
7427   effect(KILL cr);
7428 
7429   ins_cost(350);
7430   format %{ "IMUL   $dst,$src" %}
7431   opcode(0xAF, 0x0F);
7432   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7433   ins_pipe( ialu_reg_mem_alu0 );
7434 %}
7435 
7436 // Multiply Register Int to Long
7437 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7438   // Basic Idea: long = (long)int * (long)int
7439   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7440   effect(DEF dst, USE src, USE src1, KILL flags);
7441 
7442   ins_cost(300);
7443   format %{ "IMUL   $dst,$src1" %}
7444 
7445   ins_encode( long_int_multiply( dst, src1 ) );
7446   ins_pipe( ialu_reg_reg_alu0 );
7447 %}
7448 
7449 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7450   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7451   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7452   effect(KILL flags);
7453 
7454   ins_cost(300);
7455   format %{ "MUL    $dst,$src1" %}
7456 
7457   ins_encode( long_uint_multiply(dst, src1) );
7458   ins_pipe( ialu_reg_reg_alu0 );
7459 %}
7460 
7461 // Multiply Register Long
7462 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7463   match(Set dst (MulL dst src));
7464   effect(KILL cr, TEMP tmp);
7465   ins_cost(4*100+3*400);
7466 // Basic idea: lo(result) = lo(x_lo * y_lo)
7467 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7468   format %{ "MOV    $tmp,$src.lo\n\t"
7469             "IMUL   $tmp,EDX\n\t"
7470             "MOV    EDX,$src.hi\n\t"
7471             "IMUL   EDX,EAX\n\t"
7472             "ADD    $tmp,EDX\n\t"
7473             "MUL    EDX:EAX,$src.lo\n\t"
7474             "ADD    EDX,$tmp" %}
7475   ins_encode( long_multiply( dst, src, tmp ) );
7476   ins_pipe( pipe_slow );
7477 %}
7478 
7479 // Multiply Register Long where the left operand's high 32 bits are zero
7480 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7481   predicate(is_operand_hi32_zero(n->in(1)));
7482   match(Set dst (MulL dst src));
7483   effect(KILL cr, TEMP tmp);
7484   ins_cost(2*100+2*400);
7485 // Basic idea: lo(result) = lo(x_lo * y_lo)
7486 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7487   format %{ "MOV    $tmp,$src.hi\n\t"
7488             "IMUL   $tmp,EAX\n\t"
7489             "MUL    EDX:EAX,$src.lo\n\t"
7490             "ADD    EDX,$tmp" %}
7491   ins_encode %{
7492     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7493     __ imull($tmp$$Register, rax);
7494     __ mull($src$$Register);
7495     __ addl(rdx, $tmp$$Register);
7496   %}
7497   ins_pipe( pipe_slow );
7498 %}
7499 
7500 // Multiply Register Long where the right operand's high 32 bits are zero
7501 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7502   predicate(is_operand_hi32_zero(n->in(2)));
7503   match(Set dst (MulL dst src));
7504   effect(KILL cr, TEMP tmp);
7505   ins_cost(2*100+2*400);
7506 // Basic idea: lo(result) = lo(x_lo * y_lo)
7507 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7508   format %{ "MOV    $tmp,$src.lo\n\t"
7509             "IMUL   $tmp,EDX\n\t"
7510             "MUL    EDX:EAX,$src.lo\n\t"
7511             "ADD    EDX,$tmp" %}
7512   ins_encode %{
7513     __ movl($tmp$$Register, $src$$Register);
7514     __ imull($tmp$$Register, rdx);
7515     __ mull($src$$Register);
7516     __ addl(rdx, $tmp$$Register);
7517   %}
7518   ins_pipe( pipe_slow );
7519 %}
7520 
7521 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7522 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7523   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7524   match(Set dst (MulL dst src));
7525   effect(KILL cr);
7526   ins_cost(1*400);
7527 // Basic idea: lo(result) = lo(x_lo * y_lo)
7528 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7529   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7530   ins_encode %{
7531     __ mull($src$$Register);
7532   %}
7533   ins_pipe( pipe_slow );
7534 %}
7535 
7536 // Multiply Register Long by small constant
7537 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7538   match(Set dst (MulL dst src));
7539   effect(KILL cr, TEMP tmp);
7540   ins_cost(2*100+2*400);
7541   size(12);
7542 // Basic idea: lo(result) = lo(src * EAX)
7543 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7544   format %{ "IMUL   $tmp,EDX,$src\n\t"
7545             "MOV    EDX,$src\n\t"
7546             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7547             "ADD    EDX,$tmp" %}
7548   ins_encode( long_multiply_con( dst, src, tmp ) );
7549   ins_pipe( pipe_slow );
7550 %}
7551 
7552 // Integer DIV with Register
7553 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7554   match(Set rax (DivI rax div));
7555   effect(KILL rdx, KILL cr);
7556   size(26);
7557   ins_cost(30*100+10*100);
7558   format %{ "CMP    EAX,0x80000000\n\t"
7559             "JNE,s  normal\n\t"
7560             "XOR    EDX,EDX\n\t"
7561             "CMP    ECX,-1\n\t"
7562             "JE,s   done\n"
7563     "normal: CDQ\n\t"
7564             "IDIV   $div\n\t"
7565     "done:"        %}
7566   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7567   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7568   ins_pipe( ialu_reg_reg_alu0 );
7569 %}
7570 
7571 // Divide Register Long
7572 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7573   match(Set dst (DivL src1 src2));
7574   effect( KILL cr, KILL cx, KILL bx );
7575   ins_cost(10000);
7576   format %{ "PUSH   $src1.hi\n\t"
7577             "PUSH   $src1.lo\n\t"
7578             "PUSH   $src2.hi\n\t"
7579             "PUSH   $src2.lo\n\t"
7580             "CALL   SharedRuntime::ldiv\n\t"
7581             "ADD    ESP,16" %}
7582   ins_encode( long_div(src1,src2) );
7583   ins_pipe( pipe_slow );
7584 %}
7585 
7586 // Integer DIVMOD with Register, both quotient and mod results
7587 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7588   match(DivModI rax div);
7589   effect(KILL cr);
7590   size(26);
7591   ins_cost(30*100+10*100);
7592   format %{ "CMP    EAX,0x80000000\n\t"
7593             "JNE,s  normal\n\t"
7594             "XOR    EDX,EDX\n\t"
7595             "CMP    ECX,-1\n\t"
7596             "JE,s   done\n"
7597     "normal: CDQ\n\t"
7598             "IDIV   $div\n\t"
7599     "done:"        %}
7600   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7601   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7602   ins_pipe( pipe_slow );
7603 %}
7604 
7605 // Integer MOD with Register
7606 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7607   match(Set rdx (ModI rax div));
7608   effect(KILL rax, KILL cr);
7609 
7610   size(26);
7611   ins_cost(300);
7612   format %{ "CDQ\n\t"
7613             "IDIV   $div" %}
7614   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7615   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7616   ins_pipe( ialu_reg_reg_alu0 );
7617 %}
7618 
7619 // Remainder Register Long
7620 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7621   match(Set dst (ModL src1 src2));
7622   effect( KILL cr, KILL cx, KILL bx );
7623   ins_cost(10000);
7624   format %{ "PUSH   $src1.hi\n\t"
7625             "PUSH   $src1.lo\n\t"
7626             "PUSH   $src2.hi\n\t"
7627             "PUSH   $src2.lo\n\t"
7628             "CALL   SharedRuntime::lrem\n\t"
7629             "ADD    ESP,16" %}
7630   ins_encode( long_mod(src1,src2) );
7631   ins_pipe( pipe_slow );
7632 %}
7633 
7634 // Divide Register Long (no special case since divisor != -1)
7635 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7636   match(Set dst (DivL dst imm));
7637   effect( TEMP tmp, TEMP tmp2, KILL cr );
7638   ins_cost(1000);
7639   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7640             "XOR    $tmp2,$tmp2\n\t"
7641             "CMP    $tmp,EDX\n\t"
7642             "JA,s   fast\n\t"
7643             "MOV    $tmp2,EAX\n\t"
7644             "MOV    EAX,EDX\n\t"
7645             "MOV    EDX,0\n\t"
7646             "JLE,s  pos\n\t"
7647             "LNEG   EAX : $tmp2\n\t"
7648             "DIV    $tmp # unsigned division\n\t"
7649             "XCHG   EAX,$tmp2\n\t"
7650             "DIV    $tmp\n\t"
7651             "LNEG   $tmp2 : EAX\n\t"
7652             "JMP,s  done\n"
7653     "pos:\n\t"
7654             "DIV    $tmp\n\t"
7655             "XCHG   EAX,$tmp2\n"
7656     "fast:\n\t"
7657             "DIV    $tmp\n"
7658     "done:\n\t"
7659             "MOV    EDX,$tmp2\n\t"
7660             "NEG    EDX:EAX # if $imm < 0" %}
7661   ins_encode %{
7662     int con = (int)$imm$$constant;
7663     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7664     int pcon = (con > 0) ? con : -con;
7665     Label Lfast, Lpos, Ldone;
7666 
7667     __ movl($tmp$$Register, pcon);
7668     __ xorl($tmp2$$Register,$tmp2$$Register);
7669     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7670     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7671 
7672     __ movl($tmp2$$Register, $dst$$Register); // save
7673     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7674     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7675     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7676 
7677     // Negative dividend.
7678     // convert value to positive to use unsigned division
7679     __ lneg($dst$$Register, $tmp2$$Register);
7680     __ divl($tmp$$Register);
7681     __ xchgl($dst$$Register, $tmp2$$Register);
7682     __ divl($tmp$$Register);
7683     // revert result back to negative
7684     __ lneg($tmp2$$Register, $dst$$Register);
7685     __ jmpb(Ldone);
7686 
7687     __ bind(Lpos);
7688     __ divl($tmp$$Register); // Use unsigned division
7689     __ xchgl($dst$$Register, $tmp2$$Register);
7690     // Fallthrow for final divide, tmp2 has 32 bit hi result
7691 
7692     __ bind(Lfast);
7693     // fast path: src is positive
7694     __ divl($tmp$$Register); // Use unsigned division
7695 
7696     __ bind(Ldone);
7697     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7698     if (con < 0) {
7699       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7700     }
7701   %}
7702   ins_pipe( pipe_slow );
7703 %}
7704 
7705 // Remainder Register Long (remainder fit into 32 bits)
7706 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7707   match(Set dst (ModL dst imm));
7708   effect( TEMP tmp, TEMP tmp2, KILL cr );
7709   ins_cost(1000);
7710   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7711             "CMP    $tmp,EDX\n\t"
7712             "JA,s   fast\n\t"
7713             "MOV    $tmp2,EAX\n\t"
7714             "MOV    EAX,EDX\n\t"
7715             "MOV    EDX,0\n\t"
7716             "JLE,s  pos\n\t"
7717             "LNEG   EAX : $tmp2\n\t"
7718             "DIV    $tmp # unsigned division\n\t"
7719             "MOV    EAX,$tmp2\n\t"
7720             "DIV    $tmp\n\t"
7721             "NEG    EDX\n\t"
7722             "JMP,s  done\n"
7723     "pos:\n\t"
7724             "DIV    $tmp\n\t"
7725             "MOV    EAX,$tmp2\n"
7726     "fast:\n\t"
7727             "DIV    $tmp\n"
7728     "done:\n\t"
7729             "MOV    EAX,EDX\n\t"
7730             "SAR    EDX,31\n\t" %}
7731   ins_encode %{
7732     int con = (int)$imm$$constant;
7733     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7734     int pcon = (con > 0) ? con : -con;
7735     Label  Lfast, Lpos, Ldone;
7736 
7737     __ movl($tmp$$Register, pcon);
7738     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7739     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7740 
7741     __ movl($tmp2$$Register, $dst$$Register); // save
7742     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7743     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7744     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7745 
7746     // Negative dividend.
7747     // convert value to positive to use unsigned division
7748     __ lneg($dst$$Register, $tmp2$$Register);
7749     __ divl($tmp$$Register);
7750     __ movl($dst$$Register, $tmp2$$Register);
7751     __ divl($tmp$$Register);
7752     // revert remainder back to negative
7753     __ negl(HIGH_FROM_LOW($dst$$Register));
7754     __ jmpb(Ldone);
7755 
7756     __ bind(Lpos);
7757     __ divl($tmp$$Register);
7758     __ movl($dst$$Register, $tmp2$$Register);
7759 
7760     __ bind(Lfast);
7761     // fast path: src is positive
7762     __ divl($tmp$$Register);
7763 
7764     __ bind(Ldone);
7765     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7766     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
7767 
7768   %}
7769   ins_pipe( pipe_slow );
7770 %}
7771 
7772 // Integer Shift Instructions
7773 // Shift Left by one
7774 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7775   match(Set dst (LShiftI dst shift));
7776   effect(KILL cr);
7777 
7778   size(2);
7779   format %{ "SHL    $dst,$shift" %}
7780   opcode(0xD1, 0x4);  /* D1 /4 */
7781   ins_encode( OpcP, RegOpc( dst ) );
7782   ins_pipe( ialu_reg );
7783 %}
7784 
7785 // Shift Left by 8-bit immediate
7786 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7787   match(Set dst (LShiftI dst shift));
7788   effect(KILL cr);
7789 
7790   size(3);
7791   format %{ "SHL    $dst,$shift" %}
7792   opcode(0xC1, 0x4);  /* C1 /4 ib */
7793   ins_encode( RegOpcImm( dst, shift) );
7794   ins_pipe( ialu_reg );
7795 %}
7796 
7797 // Shift Left by variable
7798 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7799   match(Set dst (LShiftI dst shift));
7800   effect(KILL cr);
7801 
7802   size(2);
7803   format %{ "SHL    $dst,$shift" %}
7804   opcode(0xD3, 0x4);  /* D3 /4 */
7805   ins_encode( OpcP, RegOpc( dst ) );
7806   ins_pipe( ialu_reg_reg );
7807 %}
7808 
7809 // Arithmetic shift right by one
7810 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7811   match(Set dst (RShiftI dst shift));
7812   effect(KILL cr);
7813 
7814   size(2);
7815   format %{ "SAR    $dst,$shift" %}
7816   opcode(0xD1, 0x7);  /* D1 /7 */
7817   ins_encode( OpcP, RegOpc( dst ) );
7818   ins_pipe( ialu_reg );
7819 %}
7820 
7821 // Arithmetic shift right by one
7822 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
7823   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7824   effect(KILL cr);
7825   format %{ "SAR    $dst,$shift" %}
7826   opcode(0xD1, 0x7);  /* D1 /7 */
7827   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
7828   ins_pipe( ialu_mem_imm );
7829 %}
7830 
7831 // Arithmetic Shift Right by 8-bit immediate
7832 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7833   match(Set dst (RShiftI dst shift));
7834   effect(KILL cr);
7835 
7836   size(3);
7837   format %{ "SAR    $dst,$shift" %}
7838   opcode(0xC1, 0x7);  /* C1 /7 ib */
7839   ins_encode( RegOpcImm( dst, shift ) );
7840   ins_pipe( ialu_mem_imm );
7841 %}
7842 
7843 // Arithmetic Shift Right by 8-bit immediate
7844 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
7845   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7846   effect(KILL cr);
7847 
7848   format %{ "SAR    $dst,$shift" %}
7849   opcode(0xC1, 0x7);  /* C1 /7 ib */
7850   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
7851   ins_pipe( ialu_mem_imm );
7852 %}
7853 
7854 // Arithmetic Shift Right by variable
7855 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7856   match(Set dst (RShiftI dst shift));
7857   effect(KILL cr);
7858 
7859   size(2);
7860   format %{ "SAR    $dst,$shift" %}
7861   opcode(0xD3, 0x7);  /* D3 /7 */
7862   ins_encode( OpcP, RegOpc( dst ) );
7863   ins_pipe( ialu_reg_reg );
7864 %}
7865 
7866 // Logical shift right by one
7867 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7868   match(Set dst (URShiftI dst shift));
7869   effect(KILL cr);
7870 
7871   size(2);
7872   format %{ "SHR    $dst,$shift" %}
7873   opcode(0xD1, 0x5);  /* D1 /5 */
7874   ins_encode( OpcP, RegOpc( dst ) );
7875   ins_pipe( ialu_reg );
7876 %}
7877 
7878 // Logical Shift Right by 8-bit immediate
7879 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7880   match(Set dst (URShiftI dst shift));
7881   effect(KILL cr);
7882 
7883   size(3);
7884   format %{ "SHR    $dst,$shift" %}
7885   opcode(0xC1, 0x5);  /* C1 /5 ib */
7886   ins_encode( RegOpcImm( dst, shift) );
7887   ins_pipe( ialu_reg );
7888 %}
7889 
7890 
7891 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
7892 // This idiom is used by the compiler for the i2b bytecode.
7893 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
7894   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
7895 
7896   size(3);
7897   format %{ "MOVSX  $dst,$src :8" %}
7898   ins_encode %{
7899     __ movsbl($dst$$Register, $src$$Register);
7900   %}
7901   ins_pipe(ialu_reg_reg);
7902 %}
7903 
7904 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
7905 // This idiom is used by the compiler the i2s bytecode.
7906 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
7907   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
7908 
7909   size(3);
7910   format %{ "MOVSX  $dst,$src :16" %}
7911   ins_encode %{
7912     __ movswl($dst$$Register, $src$$Register);
7913   %}
7914   ins_pipe(ialu_reg_reg);
7915 %}
7916 
7917 
7918 // Logical Shift Right by variable
7919 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7920   match(Set dst (URShiftI dst shift));
7921   effect(KILL cr);
7922 
7923   size(2);
7924   format %{ "SHR    $dst,$shift" %}
7925   opcode(0xD3, 0x5);  /* D3 /5 */
7926   ins_encode( OpcP, RegOpc( dst ) );
7927   ins_pipe( ialu_reg_reg );
7928 %}
7929 
7930 
7931 //----------Logical Instructions-----------------------------------------------
7932 //----------Integer Logical Instructions---------------------------------------
7933 // And Instructions
7934 // And Register with Register
7935 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7936   match(Set dst (AndI dst src));
7937   effect(KILL cr);
7938 
7939   size(2);
7940   format %{ "AND    $dst,$src" %}
7941   opcode(0x23);
7942   ins_encode( OpcP, RegReg( dst, src) );
7943   ins_pipe( ialu_reg_reg );
7944 %}
7945 
7946 // And Register with Immediate
7947 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7948   match(Set dst (AndI dst src));
7949   effect(KILL cr);
7950 
7951   format %{ "AND    $dst,$src" %}
7952   opcode(0x81,0x04);  /* Opcode 81 /4 */
7953   // ins_encode( RegImm( dst, src) );
7954   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7955   ins_pipe( ialu_reg );
7956 %}
7957 
7958 // And Register with Memory
7959 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7960   match(Set dst (AndI dst (LoadI src)));
7961   effect(KILL cr);
7962 
7963   ins_cost(125);
7964   format %{ "AND    $dst,$src" %}
7965   opcode(0x23);
7966   ins_encode( OpcP, RegMem( dst, src) );
7967   ins_pipe( ialu_reg_mem );
7968 %}
7969 
7970 // And Memory with Register
7971 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7972   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
7973   effect(KILL cr);
7974 
7975   ins_cost(150);
7976   format %{ "AND    $dst,$src" %}
7977   opcode(0x21);  /* Opcode 21 /r */
7978   ins_encode( OpcP, RegMem( src, dst ) );
7979   ins_pipe( ialu_mem_reg );
7980 %}
7981 
7982 // And Memory with Immediate
7983 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7984   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
7985   effect(KILL cr);
7986 
7987   ins_cost(125);
7988   format %{ "AND    $dst,$src" %}
7989   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
7990   // ins_encode( MemImm( dst, src) );
7991   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
7992   ins_pipe( ialu_mem_imm );
7993 %}
7994 
7995 // BMI1 instructions
7996 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
7997   match(Set dst (AndI (XorI src1 minus_1) src2));
7998   predicate(UseBMI1Instructions);
7999   effect(KILL cr);
8000 
8001   format %{ "ANDNL  $dst, $src1, $src2" %}
8002 
8003   ins_encode %{
8004     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8005   %}
8006   ins_pipe(ialu_reg);
8007 %}
8008 
8009 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8010   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8011   predicate(UseBMI1Instructions);
8012   effect(KILL cr);
8013 
8014   ins_cost(125);
8015   format %{ "ANDNL  $dst, $src1, $src2" %}
8016 
8017   ins_encode %{
8018     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8019   %}
8020   ins_pipe(ialu_reg_mem);
8021 %}
8022 
8023 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8024   match(Set dst (AndI (SubI imm_zero src) src));
8025   predicate(UseBMI1Instructions);
8026   effect(KILL cr);
8027 
8028   format %{ "BLSIL  $dst, $src" %}
8029 
8030   ins_encode %{
8031     __ blsil($dst$$Register, $src$$Register);
8032   %}
8033   ins_pipe(ialu_reg);
8034 %}
8035 
8036 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8037   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8038   predicate(UseBMI1Instructions);
8039   effect(KILL cr);
8040 
8041   ins_cost(125);
8042   format %{ "BLSIL  $dst, $src" %}
8043 
8044   ins_encode %{
8045     __ blsil($dst$$Register, $src$$Address);
8046   %}
8047   ins_pipe(ialu_reg_mem);
8048 %}
8049 
8050 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8051 %{
8052   match(Set dst (XorI (AddI src minus_1) src));
8053   predicate(UseBMI1Instructions);
8054   effect(KILL cr);
8055 
8056   format %{ "BLSMSKL $dst, $src" %}
8057 
8058   ins_encode %{
8059     __ blsmskl($dst$$Register, $src$$Register);
8060   %}
8061 
8062   ins_pipe(ialu_reg);
8063 %}
8064 
8065 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8066 %{
8067   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8068   predicate(UseBMI1Instructions);
8069   effect(KILL cr);
8070 
8071   ins_cost(125);
8072   format %{ "BLSMSKL $dst, $src" %}
8073 
8074   ins_encode %{
8075     __ blsmskl($dst$$Register, $src$$Address);
8076   %}
8077 
8078   ins_pipe(ialu_reg_mem);
8079 %}
8080 
8081 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8082 %{
8083   match(Set dst (AndI (AddI src minus_1) src) );
8084   predicate(UseBMI1Instructions);
8085   effect(KILL cr);
8086 
8087   format %{ "BLSRL  $dst, $src" %}
8088 
8089   ins_encode %{
8090     __ blsrl($dst$$Register, $src$$Register);
8091   %}
8092 
8093   ins_pipe(ialu_reg);
8094 %}
8095 
8096 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8097 %{
8098   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8099   predicate(UseBMI1Instructions);
8100   effect(KILL cr);
8101 
8102   ins_cost(125);
8103   format %{ "BLSRL  $dst, $src" %}
8104 
8105   ins_encode %{
8106     __ blsrl($dst$$Register, $src$$Address);
8107   %}
8108 
8109   ins_pipe(ialu_reg_mem);
8110 %}
8111 
8112 // Or Instructions
8113 // Or Register with Register
8114 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8115   match(Set dst (OrI dst src));
8116   effect(KILL cr);
8117 
8118   size(2);
8119   format %{ "OR     $dst,$src" %}
8120   opcode(0x0B);
8121   ins_encode( OpcP, RegReg( dst, src) );
8122   ins_pipe( ialu_reg_reg );
8123 %}
8124 
8125 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8126   match(Set dst (OrI dst (CastP2X src)));
8127   effect(KILL cr);
8128 
8129   size(2);
8130   format %{ "OR     $dst,$src" %}
8131   opcode(0x0B);
8132   ins_encode( OpcP, RegReg( dst, src) );
8133   ins_pipe( ialu_reg_reg );
8134 %}
8135 
8136 
8137 // Or Register with Immediate
8138 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8139   match(Set dst (OrI dst src));
8140   effect(KILL cr);
8141 
8142   format %{ "OR     $dst,$src" %}
8143   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8144   // ins_encode( RegImm( dst, src) );
8145   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8146   ins_pipe( ialu_reg );
8147 %}
8148 
8149 // Or Register with Memory
8150 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8151   match(Set dst (OrI dst (LoadI src)));
8152   effect(KILL cr);
8153 
8154   ins_cost(125);
8155   format %{ "OR     $dst,$src" %}
8156   opcode(0x0B);
8157   ins_encode( OpcP, RegMem( dst, src) );
8158   ins_pipe( ialu_reg_mem );
8159 %}
8160 
8161 // Or Memory with Register
8162 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8163   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8164   effect(KILL cr);
8165 
8166   ins_cost(150);
8167   format %{ "OR     $dst,$src" %}
8168   opcode(0x09);  /* Opcode 09 /r */
8169   ins_encode( OpcP, RegMem( src, dst ) );
8170   ins_pipe( ialu_mem_reg );
8171 %}
8172 
8173 // Or Memory with Immediate
8174 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8175   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8176   effect(KILL cr);
8177 
8178   ins_cost(125);
8179   format %{ "OR     $dst,$src" %}
8180   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8181   // ins_encode( MemImm( dst, src) );
8182   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8183   ins_pipe( ialu_mem_imm );
8184 %}
8185 
8186 // ROL/ROR
8187 // ROL expand
8188 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8189   effect(USE_DEF dst, USE shift, KILL cr);
8190 
8191   format %{ "ROL    $dst, $shift" %}
8192   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8193   ins_encode( OpcP, RegOpc( dst ));
8194   ins_pipe( ialu_reg );
8195 %}
8196 
8197 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8198   effect(USE_DEF dst, USE shift, KILL cr);
8199 
8200   format %{ "ROL    $dst, $shift" %}
8201   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8202   ins_encode( RegOpcImm(dst, shift) );
8203   ins_pipe(ialu_reg);
8204 %}
8205 
8206 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8207   effect(USE_DEF dst, USE shift, KILL cr);
8208 
8209   format %{ "ROL    $dst, $shift" %}
8210   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8211   ins_encode(OpcP, RegOpc(dst));
8212   ins_pipe( ialu_reg_reg );
8213 %}
8214 // end of ROL expand
8215 
8216 // ROL 32bit by one once
8217 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8218   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8219 
8220   expand %{
8221     rolI_eReg_imm1(dst, lshift, cr);
8222   %}
8223 %}
8224 
8225 // ROL 32bit var by imm8 once
8226 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8227   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8228   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8229 
8230   expand %{
8231     rolI_eReg_imm8(dst, lshift, cr);
8232   %}
8233 %}
8234 
8235 // ROL 32bit var by var once
8236 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8237   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8238 
8239   expand %{
8240     rolI_eReg_CL(dst, shift, cr);
8241   %}
8242 %}
8243 
8244 // ROL 32bit var by var once
8245 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8246   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8247 
8248   expand %{
8249     rolI_eReg_CL(dst, shift, cr);
8250   %}
8251 %}
8252 
8253 // ROR expand
8254 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8255   effect(USE_DEF dst, USE shift, KILL cr);
8256 
8257   format %{ "ROR    $dst, $shift" %}
8258   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8259   ins_encode( OpcP, RegOpc( dst ) );
8260   ins_pipe( ialu_reg );
8261 %}
8262 
8263 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8264   effect (USE_DEF dst, USE shift, KILL cr);
8265 
8266   format %{ "ROR    $dst, $shift" %}
8267   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8268   ins_encode( RegOpcImm(dst, shift) );
8269   ins_pipe( ialu_reg );
8270 %}
8271 
8272 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8273   effect(USE_DEF dst, USE shift, KILL cr);
8274 
8275   format %{ "ROR    $dst, $shift" %}
8276   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8277   ins_encode(OpcP, RegOpc(dst));
8278   ins_pipe( ialu_reg_reg );
8279 %}
8280 // end of ROR expand
8281 
8282 // ROR right once
8283 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8284   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8285 
8286   expand %{
8287     rorI_eReg_imm1(dst, rshift, cr);
8288   %}
8289 %}
8290 
8291 // ROR 32bit by immI8 once
8292 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8293   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8294   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8295 
8296   expand %{
8297     rorI_eReg_imm8(dst, rshift, cr);
8298   %}
8299 %}
8300 
8301 // ROR 32bit var by var once
8302 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8303   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8304 
8305   expand %{
8306     rorI_eReg_CL(dst, shift, cr);
8307   %}
8308 %}
8309 
8310 // ROR 32bit var by var once
8311 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8312   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8313 
8314   expand %{
8315     rorI_eReg_CL(dst, shift, cr);
8316   %}
8317 %}
8318 
8319 // Xor Instructions
8320 // Xor Register with Register
8321 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8322   match(Set dst (XorI dst src));
8323   effect(KILL cr);
8324 
8325   size(2);
8326   format %{ "XOR    $dst,$src" %}
8327   opcode(0x33);
8328   ins_encode( OpcP, RegReg( dst, src) );
8329   ins_pipe( ialu_reg_reg );
8330 %}
8331 
8332 // Xor Register with Immediate -1
8333 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8334   match(Set dst (XorI dst imm));
8335 
8336   size(2);
8337   format %{ "NOT    $dst" %}
8338   ins_encode %{
8339      __ notl($dst$$Register);
8340   %}
8341   ins_pipe( ialu_reg );
8342 %}
8343 
8344 // Xor Register with Immediate
8345 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8346   match(Set dst (XorI dst src));
8347   effect(KILL cr);
8348 
8349   format %{ "XOR    $dst,$src" %}
8350   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8351   // ins_encode( RegImm( dst, src) );
8352   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8353   ins_pipe( ialu_reg );
8354 %}
8355 
8356 // Xor Register with Memory
8357 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8358   match(Set dst (XorI dst (LoadI src)));
8359   effect(KILL cr);
8360 
8361   ins_cost(125);
8362   format %{ "XOR    $dst,$src" %}
8363   opcode(0x33);
8364   ins_encode( OpcP, RegMem(dst, src) );
8365   ins_pipe( ialu_reg_mem );
8366 %}
8367 
8368 // Xor Memory with Register
8369 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8370   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8371   effect(KILL cr);
8372 
8373   ins_cost(150);
8374   format %{ "XOR    $dst,$src" %}
8375   opcode(0x31);  /* Opcode 31 /r */
8376   ins_encode( OpcP, RegMem( src, dst ) );
8377   ins_pipe( ialu_mem_reg );
8378 %}
8379 
8380 // Xor Memory with Immediate
8381 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8382   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8383   effect(KILL cr);
8384 
8385   ins_cost(125);
8386   format %{ "XOR    $dst,$src" %}
8387   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8388   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8389   ins_pipe( ialu_mem_imm );
8390 %}
8391 
8392 //----------Convert Int to Boolean---------------------------------------------
8393 
8394 instruct movI_nocopy(rRegI dst, rRegI src) %{
8395   effect( DEF dst, USE src );
8396   format %{ "MOV    $dst,$src" %}
8397   ins_encode( enc_Copy( dst, src) );
8398   ins_pipe( ialu_reg_reg );
8399 %}
8400 
8401 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8402   effect( USE_DEF dst, USE src, KILL cr );
8403 
8404   size(4);
8405   format %{ "NEG    $dst\n\t"
8406             "ADC    $dst,$src" %}
8407   ins_encode( neg_reg(dst),
8408               OpcRegReg(0x13,dst,src) );
8409   ins_pipe( ialu_reg_reg_long );
8410 %}
8411 
8412 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8413   match(Set dst (Conv2B src));
8414 
8415   expand %{
8416     movI_nocopy(dst,src);
8417     ci2b(dst,src,cr);
8418   %}
8419 %}
8420 
8421 instruct movP_nocopy(rRegI dst, eRegP src) %{
8422   effect( DEF dst, USE src );
8423   format %{ "MOV    $dst,$src" %}
8424   ins_encode( enc_Copy( dst, src) );
8425   ins_pipe( ialu_reg_reg );
8426 %}
8427 
8428 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8429   effect( USE_DEF dst, USE src, KILL cr );
8430   format %{ "NEG    $dst\n\t"
8431             "ADC    $dst,$src" %}
8432   ins_encode( neg_reg(dst),
8433               OpcRegReg(0x13,dst,src) );
8434   ins_pipe( ialu_reg_reg_long );
8435 %}
8436 
8437 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8438   match(Set dst (Conv2B src));
8439 
8440   expand %{
8441     movP_nocopy(dst,src);
8442     cp2b(dst,src,cr);
8443   %}
8444 %}
8445 
8446 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8447   match(Set dst (CmpLTMask p q));
8448   effect(KILL cr);
8449   ins_cost(400);
8450 
8451   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8452   format %{ "XOR    $dst,$dst\n\t"
8453             "CMP    $p,$q\n\t"
8454             "SETlt  $dst\n\t"
8455             "NEG    $dst" %}
8456   ins_encode %{
8457     Register Rp = $p$$Register;
8458     Register Rq = $q$$Register;
8459     Register Rd = $dst$$Register;
8460     Label done;
8461     __ xorl(Rd, Rd);
8462     __ cmpl(Rp, Rq);
8463     __ setb(Assembler::less, Rd);
8464     __ negl(Rd);
8465   %}
8466 
8467   ins_pipe(pipe_slow);
8468 %}
8469 
8470 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8471   match(Set dst (CmpLTMask dst zero));
8472   effect(DEF dst, KILL cr);
8473   ins_cost(100);
8474 
8475   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8476   ins_encode %{
8477   __ sarl($dst$$Register, 31);
8478   %}
8479   ins_pipe(ialu_reg);
8480 %}
8481 
8482 /* better to save a register than avoid a branch */
8483 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8484   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8485   effect(KILL cr);
8486   ins_cost(400);
8487   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8488             "JGE    done\n\t"
8489             "ADD    $p,$y\n"
8490             "done:  " %}
8491   ins_encode %{
8492     Register Rp = $p$$Register;
8493     Register Rq = $q$$Register;
8494     Register Ry = $y$$Register;
8495     Label done;
8496     __ subl(Rp, Rq);
8497     __ jccb(Assembler::greaterEqual, done);
8498     __ addl(Rp, Ry);
8499     __ bind(done);
8500   %}
8501 
8502   ins_pipe(pipe_cmplt);
8503 %}
8504 
8505 /* better to save a register than avoid a branch */
8506 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8507   match(Set y (AndI (CmpLTMask p q) y));
8508   effect(KILL cr);
8509 
8510   ins_cost(300);
8511 
8512   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8513             "JLT      done\n\t"
8514             "XORL     $y, $y\n"
8515             "done:  " %}
8516   ins_encode %{
8517     Register Rp = $p$$Register;
8518     Register Rq = $q$$Register;
8519     Register Ry = $y$$Register;
8520     Label done;
8521     __ cmpl(Rp, Rq);
8522     __ jccb(Assembler::less, done);
8523     __ xorl(Ry, Ry);
8524     __ bind(done);
8525   %}
8526 
8527   ins_pipe(pipe_cmplt);
8528 %}
8529 
8530 /* If I enable this, I encourage spilling in the inner loop of compress.
8531 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8532   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8533 */
8534 //----------Overflow Math Instructions-----------------------------------------
8535 
8536 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8537 %{
8538   match(Set cr (OverflowAddI op1 op2));
8539   effect(DEF cr, USE_KILL op1, USE op2);
8540 
8541   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8542 
8543   ins_encode %{
8544     __ addl($op1$$Register, $op2$$Register);
8545   %}
8546   ins_pipe(ialu_reg_reg);
8547 %}
8548 
8549 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8550 %{
8551   match(Set cr (OverflowAddI op1 op2));
8552   effect(DEF cr, USE_KILL op1, USE op2);
8553 
8554   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8555 
8556   ins_encode %{
8557     __ addl($op1$$Register, $op2$$constant);
8558   %}
8559   ins_pipe(ialu_reg_reg);
8560 %}
8561 
8562 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8563 %{
8564   match(Set cr (OverflowSubI op1 op2));
8565 
8566   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8567   ins_encode %{
8568     __ cmpl($op1$$Register, $op2$$Register);
8569   %}
8570   ins_pipe(ialu_reg_reg);
8571 %}
8572 
8573 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8574 %{
8575   match(Set cr (OverflowSubI op1 op2));
8576 
8577   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8578   ins_encode %{
8579     __ cmpl($op1$$Register, $op2$$constant);
8580   %}
8581   ins_pipe(ialu_reg_reg);
8582 %}
8583 
8584 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8585 %{
8586   match(Set cr (OverflowSubI zero op2));
8587   effect(DEF cr, USE_KILL op2);
8588 
8589   format %{ "NEG    $op2\t# overflow check int" %}
8590   ins_encode %{
8591     __ negl($op2$$Register);
8592   %}
8593   ins_pipe(ialu_reg_reg);
8594 %}
8595 
8596 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8597 %{
8598   match(Set cr (OverflowMulI op1 op2));
8599   effect(DEF cr, USE_KILL op1, USE op2);
8600 
8601   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8602   ins_encode %{
8603     __ imull($op1$$Register, $op2$$Register);
8604   %}
8605   ins_pipe(ialu_reg_reg_alu0);
8606 %}
8607 
8608 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8609 %{
8610   match(Set cr (OverflowMulI op1 op2));
8611   effect(DEF cr, TEMP tmp, USE op1, USE op2);
8612 
8613   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8614   ins_encode %{
8615     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8616   %}
8617   ins_pipe(ialu_reg_reg_alu0);
8618 %}
8619 
8620 //----------Long Instructions------------------------------------------------
8621 // Add Long Register with Register
8622 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8623   match(Set dst (AddL dst src));
8624   effect(KILL cr);
8625   ins_cost(200);
8626   format %{ "ADD    $dst.lo,$src.lo\n\t"
8627             "ADC    $dst.hi,$src.hi" %}
8628   opcode(0x03, 0x13);
8629   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8630   ins_pipe( ialu_reg_reg_long );
8631 %}
8632 
8633 // Add Long Register with Immediate
8634 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8635   match(Set dst (AddL dst src));
8636   effect(KILL cr);
8637   format %{ "ADD    $dst.lo,$src.lo\n\t"
8638             "ADC    $dst.hi,$src.hi" %}
8639   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8640   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8641   ins_pipe( ialu_reg_long );
8642 %}
8643 
8644 // Add Long Register with Memory
8645 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8646   match(Set dst (AddL dst (LoadL mem)));
8647   effect(KILL cr);
8648   ins_cost(125);
8649   format %{ "ADD    $dst.lo,$mem\n\t"
8650             "ADC    $dst.hi,$mem+4" %}
8651   opcode(0x03, 0x13);
8652   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8653   ins_pipe( ialu_reg_long_mem );
8654 %}
8655 
8656 // Subtract Long Register with Register.
8657 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8658   match(Set dst (SubL dst src));
8659   effect(KILL cr);
8660   ins_cost(200);
8661   format %{ "SUB    $dst.lo,$src.lo\n\t"
8662             "SBB    $dst.hi,$src.hi" %}
8663   opcode(0x2B, 0x1B);
8664   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8665   ins_pipe( ialu_reg_reg_long );
8666 %}
8667 
8668 // Subtract Long Register with Immediate
8669 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8670   match(Set dst (SubL dst src));
8671   effect(KILL cr);
8672   format %{ "SUB    $dst.lo,$src.lo\n\t"
8673             "SBB    $dst.hi,$src.hi" %}
8674   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
8675   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8676   ins_pipe( ialu_reg_long );
8677 %}
8678 
8679 // Subtract Long Register with Memory
8680 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8681   match(Set dst (SubL dst (LoadL mem)));
8682   effect(KILL cr);
8683   ins_cost(125);
8684   format %{ "SUB    $dst.lo,$mem\n\t"
8685             "SBB    $dst.hi,$mem+4" %}
8686   opcode(0x2B, 0x1B);
8687   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8688   ins_pipe( ialu_reg_long_mem );
8689 %}
8690 
8691 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8692   match(Set dst (SubL zero dst));
8693   effect(KILL cr);
8694   ins_cost(300);
8695   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
8696   ins_encode( neg_long(dst) );
8697   ins_pipe( ialu_reg_reg_long );
8698 %}
8699 
8700 // And Long Register with Register
8701 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8702   match(Set dst (AndL dst src));
8703   effect(KILL cr);
8704   format %{ "AND    $dst.lo,$src.lo\n\t"
8705             "AND    $dst.hi,$src.hi" %}
8706   opcode(0x23,0x23);
8707   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8708   ins_pipe( ialu_reg_reg_long );
8709 %}
8710 
8711 // And Long Register with Immediate
8712 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8713   match(Set dst (AndL dst src));
8714   effect(KILL cr);
8715   format %{ "AND    $dst.lo,$src.lo\n\t"
8716             "AND    $dst.hi,$src.hi" %}
8717   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
8718   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8719   ins_pipe( ialu_reg_long );
8720 %}
8721 
8722 // And Long Register with Memory
8723 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8724   match(Set dst (AndL dst (LoadL mem)));
8725   effect(KILL cr);
8726   ins_cost(125);
8727   format %{ "AND    $dst.lo,$mem\n\t"
8728             "AND    $dst.hi,$mem+4" %}
8729   opcode(0x23, 0x23);
8730   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8731   ins_pipe( ialu_reg_long_mem );
8732 %}
8733 
8734 // BMI1 instructions
8735 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
8736   match(Set dst (AndL (XorL src1 minus_1) src2));
8737   predicate(UseBMI1Instructions);
8738   effect(KILL cr, TEMP dst);
8739 
8740   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
8741             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
8742          %}
8743 
8744   ins_encode %{
8745     Register Rdst = $dst$$Register;
8746     Register Rsrc1 = $src1$$Register;
8747     Register Rsrc2 = $src2$$Register;
8748     __ andnl(Rdst, Rsrc1, Rsrc2);
8749     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
8750   %}
8751   ins_pipe(ialu_reg_reg_long);
8752 %}
8753 
8754 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
8755   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
8756   predicate(UseBMI1Instructions);
8757   effect(KILL cr, TEMP dst);
8758 
8759   ins_cost(125);
8760   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
8761             "ANDNL  $dst.hi, $src1.hi, $src2+4"
8762          %}
8763 
8764   ins_encode %{
8765     Register Rdst = $dst$$Register;
8766     Register Rsrc1 = $src1$$Register;
8767     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
8768 
8769     __ andnl(Rdst, Rsrc1, $src2$$Address);
8770     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
8771   %}
8772   ins_pipe(ialu_reg_mem);
8773 %}
8774 
8775 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
8776   match(Set dst (AndL (SubL imm_zero src) src));
8777   predicate(UseBMI1Instructions);
8778   effect(KILL cr, TEMP dst);
8779 
8780   format %{ "MOVL   $dst.hi, 0\n\t"
8781             "BLSIL  $dst.lo, $src.lo\n\t"
8782             "JNZ    done\n\t"
8783             "BLSIL  $dst.hi, $src.hi\n"
8784             "done:"
8785          %}
8786 
8787   ins_encode %{
8788     Label done;
8789     Register Rdst = $dst$$Register;
8790     Register Rsrc = $src$$Register;
8791     __ movl(HIGH_FROM_LOW(Rdst), 0);
8792     __ blsil(Rdst, Rsrc);
8793     __ jccb(Assembler::notZero, done);
8794     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8795     __ bind(done);
8796   %}
8797   ins_pipe(ialu_reg);
8798 %}
8799 
8800 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
8801   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
8802   predicate(UseBMI1Instructions);
8803   effect(KILL cr, TEMP dst);
8804 
8805   ins_cost(125);
8806   format %{ "MOVL   $dst.hi, 0\n\t"
8807             "BLSIL  $dst.lo, $src\n\t"
8808             "JNZ    done\n\t"
8809             "BLSIL  $dst.hi, $src+4\n"
8810             "done:"
8811          %}
8812 
8813   ins_encode %{
8814     Label done;
8815     Register Rdst = $dst$$Register;
8816     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8817 
8818     __ movl(HIGH_FROM_LOW(Rdst), 0);
8819     __ blsil(Rdst, $src$$Address);
8820     __ jccb(Assembler::notZero, done);
8821     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
8822     __ bind(done);
8823   %}
8824   ins_pipe(ialu_reg_mem);
8825 %}
8826 
8827 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8828 %{
8829   match(Set dst (XorL (AddL src minus_1) src));
8830   predicate(UseBMI1Instructions);
8831   effect(KILL cr, TEMP dst);
8832 
8833   format %{ "MOVL    $dst.hi, 0\n\t"
8834             "BLSMSKL $dst.lo, $src.lo\n\t"
8835             "JNC     done\n\t"
8836             "BLSMSKL $dst.hi, $src.hi\n"
8837             "done:"
8838          %}
8839 
8840   ins_encode %{
8841     Label done;
8842     Register Rdst = $dst$$Register;
8843     Register Rsrc = $src$$Register;
8844     __ movl(HIGH_FROM_LOW(Rdst), 0);
8845     __ blsmskl(Rdst, Rsrc);
8846     __ jccb(Assembler::carryClear, done);
8847     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8848     __ bind(done);
8849   %}
8850 
8851   ins_pipe(ialu_reg);
8852 %}
8853 
8854 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8855 %{
8856   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
8857   predicate(UseBMI1Instructions);
8858   effect(KILL cr, TEMP dst);
8859 
8860   ins_cost(125);
8861   format %{ "MOVL    $dst.hi, 0\n\t"
8862             "BLSMSKL $dst.lo, $src\n\t"
8863             "JNC     done\n\t"
8864             "BLSMSKL $dst.hi, $src+4\n"
8865             "done:"
8866          %}
8867 
8868   ins_encode %{
8869     Label done;
8870     Register Rdst = $dst$$Register;
8871     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8872 
8873     __ movl(HIGH_FROM_LOW(Rdst), 0);
8874     __ blsmskl(Rdst, $src$$Address);
8875     __ jccb(Assembler::carryClear, done);
8876     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
8877     __ bind(done);
8878   %}
8879 
8880   ins_pipe(ialu_reg_mem);
8881 %}
8882 
8883 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8884 %{
8885   match(Set dst (AndL (AddL src minus_1) src) );
8886   predicate(UseBMI1Instructions);
8887   effect(KILL cr, TEMP dst);
8888 
8889   format %{ "MOVL   $dst.hi, $src.hi\n\t"
8890             "BLSRL  $dst.lo, $src.lo\n\t"
8891             "JNC    done\n\t"
8892             "BLSRL  $dst.hi, $src.hi\n"
8893             "done:"
8894   %}
8895 
8896   ins_encode %{
8897     Label done;
8898     Register Rdst = $dst$$Register;
8899     Register Rsrc = $src$$Register;
8900     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8901     __ blsrl(Rdst, Rsrc);
8902     __ jccb(Assembler::carryClear, done);
8903     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8904     __ bind(done);
8905   %}
8906 
8907   ins_pipe(ialu_reg);
8908 %}
8909 
8910 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8911 %{
8912   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
8913   predicate(UseBMI1Instructions);
8914   effect(KILL cr, TEMP dst);
8915 
8916   ins_cost(125);
8917   format %{ "MOVL   $dst.hi, $src+4\n\t"
8918             "BLSRL  $dst.lo, $src\n\t"
8919             "JNC    done\n\t"
8920             "BLSRL  $dst.hi, $src+4\n"
8921             "done:"
8922   %}
8923 
8924   ins_encode %{
8925     Label done;
8926     Register Rdst = $dst$$Register;
8927     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8928     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
8929     __ blsrl(Rdst, $src$$Address);
8930     __ jccb(Assembler::carryClear, done);
8931     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
8932     __ bind(done);
8933   %}
8934 
8935   ins_pipe(ialu_reg_mem);
8936 %}
8937 
8938 // Or Long Register with Register
8939 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8940   match(Set dst (OrL dst src));
8941   effect(KILL cr);
8942   format %{ "OR     $dst.lo,$src.lo\n\t"
8943             "OR     $dst.hi,$src.hi" %}
8944   opcode(0x0B,0x0B);
8945   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8946   ins_pipe( ialu_reg_reg_long );
8947 %}
8948 
8949 // Or Long Register with Immediate
8950 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8951   match(Set dst (OrL dst src));
8952   effect(KILL cr);
8953   format %{ "OR     $dst.lo,$src.lo\n\t"
8954             "OR     $dst.hi,$src.hi" %}
8955   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
8956   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8957   ins_pipe( ialu_reg_long );
8958 %}
8959 
8960 // Or Long Register with Memory
8961 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8962   match(Set dst (OrL dst (LoadL mem)));
8963   effect(KILL cr);
8964   ins_cost(125);
8965   format %{ "OR     $dst.lo,$mem\n\t"
8966             "OR     $dst.hi,$mem+4" %}
8967   opcode(0x0B,0x0B);
8968   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8969   ins_pipe( ialu_reg_long_mem );
8970 %}
8971 
8972 // Xor Long Register with Register
8973 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8974   match(Set dst (XorL dst src));
8975   effect(KILL cr);
8976   format %{ "XOR    $dst.lo,$src.lo\n\t"
8977             "XOR    $dst.hi,$src.hi" %}
8978   opcode(0x33,0x33);
8979   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8980   ins_pipe( ialu_reg_reg_long );
8981 %}
8982 
8983 // Xor Long Register with Immediate -1
8984 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
8985   match(Set dst (XorL dst imm));
8986   format %{ "NOT    $dst.lo\n\t"
8987             "NOT    $dst.hi" %}
8988   ins_encode %{
8989      __ notl($dst$$Register);
8990      __ notl(HIGH_FROM_LOW($dst$$Register));
8991   %}
8992   ins_pipe( ialu_reg_long );
8993 %}
8994 
8995 // Xor Long Register with Immediate
8996 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8997   match(Set dst (XorL dst src));
8998   effect(KILL cr);
8999   format %{ "XOR    $dst.lo,$src.lo\n\t"
9000             "XOR    $dst.hi,$src.hi" %}
9001   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9002   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9003   ins_pipe( ialu_reg_long );
9004 %}
9005 
9006 // Xor Long Register with Memory
9007 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9008   match(Set dst (XorL dst (LoadL mem)));
9009   effect(KILL cr);
9010   ins_cost(125);
9011   format %{ "XOR    $dst.lo,$mem\n\t"
9012             "XOR    $dst.hi,$mem+4" %}
9013   opcode(0x33,0x33);
9014   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9015   ins_pipe( ialu_reg_long_mem );
9016 %}
9017 
9018 // Shift Left Long by 1
9019 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9020   predicate(UseNewLongLShift);
9021   match(Set dst (LShiftL dst cnt));
9022   effect(KILL cr);
9023   ins_cost(100);
9024   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9025             "ADC    $dst.hi,$dst.hi" %}
9026   ins_encode %{
9027     __ addl($dst$$Register,$dst$$Register);
9028     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9029   %}
9030   ins_pipe( ialu_reg_long );
9031 %}
9032 
9033 // Shift Left Long by 2
9034 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9035   predicate(UseNewLongLShift);
9036   match(Set dst (LShiftL dst cnt));
9037   effect(KILL cr);
9038   ins_cost(100);
9039   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9040             "ADC    $dst.hi,$dst.hi\n\t"
9041             "ADD    $dst.lo,$dst.lo\n\t"
9042             "ADC    $dst.hi,$dst.hi" %}
9043   ins_encode %{
9044     __ addl($dst$$Register,$dst$$Register);
9045     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9046     __ addl($dst$$Register,$dst$$Register);
9047     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9048   %}
9049   ins_pipe( ialu_reg_long );
9050 %}
9051 
9052 // Shift Left Long by 3
9053 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9054   predicate(UseNewLongLShift);
9055   match(Set dst (LShiftL dst cnt));
9056   effect(KILL cr);
9057   ins_cost(100);
9058   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9059             "ADC    $dst.hi,$dst.hi\n\t"
9060             "ADD    $dst.lo,$dst.lo\n\t"
9061             "ADC    $dst.hi,$dst.hi\n\t"
9062             "ADD    $dst.lo,$dst.lo\n\t"
9063             "ADC    $dst.hi,$dst.hi" %}
9064   ins_encode %{
9065     __ addl($dst$$Register,$dst$$Register);
9066     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9067     __ addl($dst$$Register,$dst$$Register);
9068     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9069     __ addl($dst$$Register,$dst$$Register);
9070     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9071   %}
9072   ins_pipe( ialu_reg_long );
9073 %}
9074 
9075 // Shift Left Long by 1-31
9076 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9077   match(Set dst (LShiftL dst cnt));
9078   effect(KILL cr);
9079   ins_cost(200);
9080   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9081             "SHL    $dst.lo,$cnt" %}
9082   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9083   ins_encode( move_long_small_shift(dst,cnt) );
9084   ins_pipe( ialu_reg_long );
9085 %}
9086 
9087 // Shift Left Long by 32-63
9088 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9089   match(Set dst (LShiftL dst cnt));
9090   effect(KILL cr);
9091   ins_cost(300);
9092   format %{ "MOV    $dst.hi,$dst.lo\n"
9093           "\tSHL    $dst.hi,$cnt-32\n"
9094           "\tXOR    $dst.lo,$dst.lo" %}
9095   opcode(0xC1, 0x4);  /* C1 /4 ib */
9096   ins_encode( move_long_big_shift_clr(dst,cnt) );
9097   ins_pipe( ialu_reg_long );
9098 %}
9099 
9100 // Shift Left Long by variable
9101 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9102   match(Set dst (LShiftL dst shift));
9103   effect(KILL cr);
9104   ins_cost(500+200);
9105   size(17);
9106   format %{ "TEST   $shift,32\n\t"
9107             "JEQ,s  small\n\t"
9108             "MOV    $dst.hi,$dst.lo\n\t"
9109             "XOR    $dst.lo,$dst.lo\n"
9110     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9111             "SHL    $dst.lo,$shift" %}
9112   ins_encode( shift_left_long( dst, shift ) );
9113   ins_pipe( pipe_slow );
9114 %}
9115 
9116 // Shift Right Long by 1-31
9117 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9118   match(Set dst (URShiftL dst cnt));
9119   effect(KILL cr);
9120   ins_cost(200);
9121   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9122             "SHR    $dst.hi,$cnt" %}
9123   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9124   ins_encode( move_long_small_shift(dst,cnt) );
9125   ins_pipe( ialu_reg_long );
9126 %}
9127 
9128 // Shift Right Long by 32-63
9129 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9130   match(Set dst (URShiftL dst cnt));
9131   effect(KILL cr);
9132   ins_cost(300);
9133   format %{ "MOV    $dst.lo,$dst.hi\n"
9134           "\tSHR    $dst.lo,$cnt-32\n"
9135           "\tXOR    $dst.hi,$dst.hi" %}
9136   opcode(0xC1, 0x5);  /* C1 /5 ib */
9137   ins_encode( move_long_big_shift_clr(dst,cnt) );
9138   ins_pipe( ialu_reg_long );
9139 %}
9140 
9141 // Shift Right Long by variable
9142 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9143   match(Set dst (URShiftL dst shift));
9144   effect(KILL cr);
9145   ins_cost(600);
9146   size(17);
9147   format %{ "TEST   $shift,32\n\t"
9148             "JEQ,s  small\n\t"
9149             "MOV    $dst.lo,$dst.hi\n\t"
9150             "XOR    $dst.hi,$dst.hi\n"
9151     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9152             "SHR    $dst.hi,$shift" %}
9153   ins_encode( shift_right_long( dst, shift ) );
9154   ins_pipe( pipe_slow );
9155 %}
9156 
9157 // Shift Right Long by 1-31
9158 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9159   match(Set dst (RShiftL dst cnt));
9160   effect(KILL cr);
9161   ins_cost(200);
9162   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9163             "SAR    $dst.hi,$cnt" %}
9164   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9165   ins_encode( move_long_small_shift(dst,cnt) );
9166   ins_pipe( ialu_reg_long );
9167 %}
9168 
9169 // Shift Right Long by 32-63
9170 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9171   match(Set dst (RShiftL dst cnt));
9172   effect(KILL cr);
9173   ins_cost(300);
9174   format %{ "MOV    $dst.lo,$dst.hi\n"
9175           "\tSAR    $dst.lo,$cnt-32\n"
9176           "\tSAR    $dst.hi,31" %}
9177   opcode(0xC1, 0x7);  /* C1 /7 ib */
9178   ins_encode( move_long_big_shift_sign(dst,cnt) );
9179   ins_pipe( ialu_reg_long );
9180 %}
9181 
9182 // Shift Right arithmetic Long by variable
9183 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9184   match(Set dst (RShiftL dst shift));
9185   effect(KILL cr);
9186   ins_cost(600);
9187   size(18);
9188   format %{ "TEST   $shift,32\n\t"
9189             "JEQ,s  small\n\t"
9190             "MOV    $dst.lo,$dst.hi\n\t"
9191             "SAR    $dst.hi,31\n"
9192     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9193             "SAR    $dst.hi,$shift" %}
9194   ins_encode( shift_right_arith_long( dst, shift ) );
9195   ins_pipe( pipe_slow );
9196 %}
9197 
9198 
9199 //----------Double Instructions------------------------------------------------
9200 // Double Math
9201 
9202 // Compare & branch
9203 
9204 // P6 version of float compare, sets condition codes in EFLAGS
9205 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9206   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9207   match(Set cr (CmpD src1 src2));
9208   effect(KILL rax);
9209   ins_cost(150);
9210   format %{ "FLD    $src1\n\t"
9211             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9212             "JNP    exit\n\t"
9213             "MOV    ah,1       // saw a NaN, set CF\n\t"
9214             "SAHF\n"
9215      "exit:\tNOP               // avoid branch to branch" %}
9216   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9217   ins_encode( Push_Reg_DPR(src1),
9218               OpcP, RegOpc(src2),
9219               cmpF_P6_fixup );
9220   ins_pipe( pipe_slow );
9221 %}
9222 
9223 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9224   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9225   match(Set cr (CmpD src1 src2));
9226   ins_cost(150);
9227   format %{ "FLD    $src1\n\t"
9228             "FUCOMIP ST,$src2  // P6 instruction" %}
9229   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9230   ins_encode( Push_Reg_DPR(src1),
9231               OpcP, RegOpc(src2));
9232   ins_pipe( pipe_slow );
9233 %}
9234 
9235 // Compare & branch
9236 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9237   predicate(UseSSE<=1);
9238   match(Set cr (CmpD src1 src2));
9239   effect(KILL rax);
9240   ins_cost(200);
9241   format %{ "FLD    $src1\n\t"
9242             "FCOMp  $src2\n\t"
9243             "FNSTSW AX\n\t"
9244             "TEST   AX,0x400\n\t"
9245             "JZ,s   flags\n\t"
9246             "MOV    AH,1\t# unordered treat as LT\n"
9247     "flags:\tSAHF" %}
9248   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9249   ins_encode( Push_Reg_DPR(src1),
9250               OpcP, RegOpc(src2),
9251               fpu_flags);
9252   ins_pipe( pipe_slow );
9253 %}
9254 
9255 // Compare vs zero into -1,0,1
9256 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9257   predicate(UseSSE<=1);
9258   match(Set dst (CmpD3 src1 zero));
9259   effect(KILL cr, KILL rax);
9260   ins_cost(280);
9261   format %{ "FTSTD  $dst,$src1" %}
9262   opcode(0xE4, 0xD9);
9263   ins_encode( Push_Reg_DPR(src1),
9264               OpcS, OpcP, PopFPU,
9265               CmpF_Result(dst));
9266   ins_pipe( pipe_slow );
9267 %}
9268 
9269 // Compare into -1,0,1
9270 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9271   predicate(UseSSE<=1);
9272   match(Set dst (CmpD3 src1 src2));
9273   effect(KILL cr, KILL rax);
9274   ins_cost(300);
9275   format %{ "FCMPD  $dst,$src1,$src2" %}
9276   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9277   ins_encode( Push_Reg_DPR(src1),
9278               OpcP, RegOpc(src2),
9279               CmpF_Result(dst));
9280   ins_pipe( pipe_slow );
9281 %}
9282 
9283 // float compare and set condition codes in EFLAGS by XMM regs
9284 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9285   predicate(UseSSE>=2);
9286   match(Set cr (CmpD src1 src2));
9287   ins_cost(145);
9288   format %{ "UCOMISD $src1,$src2\n\t"
9289             "JNP,s   exit\n\t"
9290             "PUSHF\t# saw NaN, set CF\n\t"
9291             "AND     [rsp], #0xffffff2b\n\t"
9292             "POPF\n"
9293     "exit:" %}
9294   ins_encode %{
9295     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9296     emit_cmpfp_fixup(_masm);
9297   %}
9298   ins_pipe( pipe_slow );
9299 %}
9300 
9301 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9302   predicate(UseSSE>=2);
9303   match(Set cr (CmpD src1 src2));
9304   ins_cost(100);
9305   format %{ "UCOMISD $src1,$src2" %}
9306   ins_encode %{
9307     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9308   %}
9309   ins_pipe( pipe_slow );
9310 %}
9311 
9312 // float compare and set condition codes in EFLAGS by XMM regs
9313 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9314   predicate(UseSSE>=2);
9315   match(Set cr (CmpD src1 (LoadD src2)));
9316   ins_cost(145);
9317   format %{ "UCOMISD $src1,$src2\n\t"
9318             "JNP,s   exit\n\t"
9319             "PUSHF\t# saw NaN, set CF\n\t"
9320             "AND     [rsp], #0xffffff2b\n\t"
9321             "POPF\n"
9322     "exit:" %}
9323   ins_encode %{
9324     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9325     emit_cmpfp_fixup(_masm);
9326   %}
9327   ins_pipe( pipe_slow );
9328 %}
9329 
9330 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9331   predicate(UseSSE>=2);
9332   match(Set cr (CmpD src1 (LoadD src2)));
9333   ins_cost(100);
9334   format %{ "UCOMISD $src1,$src2" %}
9335   ins_encode %{
9336     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9337   %}
9338   ins_pipe( pipe_slow );
9339 %}
9340 
9341 // Compare into -1,0,1 in XMM
9342 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9343   predicate(UseSSE>=2);
9344   match(Set dst (CmpD3 src1 src2));
9345   effect(KILL cr);
9346   ins_cost(255);
9347   format %{ "UCOMISD $src1, $src2\n\t"
9348             "MOV     $dst, #-1\n\t"
9349             "JP,s    done\n\t"
9350             "JB,s    done\n\t"
9351             "SETNE   $dst\n\t"
9352             "MOVZB   $dst, $dst\n"
9353     "done:" %}
9354   ins_encode %{
9355     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9356     emit_cmpfp3(_masm, $dst$$Register);
9357   %}
9358   ins_pipe( pipe_slow );
9359 %}
9360 
9361 // Compare into -1,0,1 in XMM and memory
9362 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9363   predicate(UseSSE>=2);
9364   match(Set dst (CmpD3 src1 (LoadD src2)));
9365   effect(KILL cr);
9366   ins_cost(275);
9367   format %{ "UCOMISD $src1, $src2\n\t"
9368             "MOV     $dst, #-1\n\t"
9369             "JP,s    done\n\t"
9370             "JB,s    done\n\t"
9371             "SETNE   $dst\n\t"
9372             "MOVZB   $dst, $dst\n"
9373     "done:" %}
9374   ins_encode %{
9375     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9376     emit_cmpfp3(_masm, $dst$$Register);
9377   %}
9378   ins_pipe( pipe_slow );
9379 %}
9380 
9381 
9382 instruct subDPR_reg(regDPR dst, regDPR src) %{
9383   predicate (UseSSE <=1);
9384   match(Set dst (SubD dst src));
9385 
9386   format %{ "FLD    $src\n\t"
9387             "DSUBp  $dst,ST" %}
9388   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9389   ins_cost(150);
9390   ins_encode( Push_Reg_DPR(src),
9391               OpcP, RegOpc(dst) );
9392   ins_pipe( fpu_reg_reg );
9393 %}
9394 
9395 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9396   predicate (UseSSE <=1);
9397   match(Set dst (RoundDouble (SubD src1 src2)));
9398   ins_cost(250);
9399 
9400   format %{ "FLD    $src2\n\t"
9401             "DSUB   ST,$src1\n\t"
9402             "FSTP_D $dst\t# D-round" %}
9403   opcode(0xD8, 0x5);
9404   ins_encode( Push_Reg_DPR(src2),
9405               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9406   ins_pipe( fpu_mem_reg_reg );
9407 %}
9408 
9409 
9410 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9411   predicate (UseSSE <=1);
9412   match(Set dst (SubD dst (LoadD src)));
9413   ins_cost(150);
9414 
9415   format %{ "FLD    $src\n\t"
9416             "DSUBp  $dst,ST" %}
9417   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9418   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9419               OpcP, RegOpc(dst) );
9420   ins_pipe( fpu_reg_mem );
9421 %}
9422 
9423 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9424   predicate (UseSSE<=1);
9425   match(Set dst (AbsD src));
9426   ins_cost(100);
9427   format %{ "FABS" %}
9428   opcode(0xE1, 0xD9);
9429   ins_encode( OpcS, OpcP );
9430   ins_pipe( fpu_reg_reg );
9431 %}
9432 
9433 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9434   predicate(UseSSE<=1);
9435   match(Set dst (NegD src));
9436   ins_cost(100);
9437   format %{ "FCHS" %}
9438   opcode(0xE0, 0xD9);
9439   ins_encode( OpcS, OpcP );
9440   ins_pipe( fpu_reg_reg );
9441 %}
9442 
9443 instruct addDPR_reg(regDPR dst, regDPR src) %{
9444   predicate(UseSSE<=1);
9445   match(Set dst (AddD dst src));
9446   format %{ "FLD    $src\n\t"
9447             "DADD   $dst,ST" %}
9448   size(4);
9449   ins_cost(150);
9450   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9451   ins_encode( Push_Reg_DPR(src),
9452               OpcP, RegOpc(dst) );
9453   ins_pipe( fpu_reg_reg );
9454 %}
9455 
9456 
9457 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9458   predicate(UseSSE<=1);
9459   match(Set dst (RoundDouble (AddD src1 src2)));
9460   ins_cost(250);
9461 
9462   format %{ "FLD    $src2\n\t"
9463             "DADD   ST,$src1\n\t"
9464             "FSTP_D $dst\t# D-round" %}
9465   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9466   ins_encode( Push_Reg_DPR(src2),
9467               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9468   ins_pipe( fpu_mem_reg_reg );
9469 %}
9470 
9471 
9472 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9473   predicate(UseSSE<=1);
9474   match(Set dst (AddD dst (LoadD src)));
9475   ins_cost(150);
9476 
9477   format %{ "FLD    $src\n\t"
9478             "DADDp  $dst,ST" %}
9479   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9480   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9481               OpcP, RegOpc(dst) );
9482   ins_pipe( fpu_reg_mem );
9483 %}
9484 
9485 // add-to-memory
9486 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9487   predicate(UseSSE<=1);
9488   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9489   ins_cost(150);
9490 
9491   format %{ "FLD_D  $dst\n\t"
9492             "DADD   ST,$src\n\t"
9493             "FST_D  $dst" %}
9494   opcode(0xDD, 0x0);
9495   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9496               Opcode(0xD8), RegOpc(src),
9497               set_instruction_start,
9498               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9499   ins_pipe( fpu_reg_mem );
9500 %}
9501 
9502 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9503   predicate(UseSSE<=1);
9504   match(Set dst (AddD dst con));
9505   ins_cost(125);
9506   format %{ "FLD1\n\t"
9507             "DADDp  $dst,ST" %}
9508   ins_encode %{
9509     __ fld1();
9510     __ faddp($dst$$reg);
9511   %}
9512   ins_pipe(fpu_reg);
9513 %}
9514 
9515 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9516   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9517   match(Set dst (AddD dst con));
9518   ins_cost(200);
9519   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9520             "DADDp  $dst,ST" %}
9521   ins_encode %{
9522     __ fld_d($constantaddress($con));
9523     __ faddp($dst$$reg);
9524   %}
9525   ins_pipe(fpu_reg_mem);
9526 %}
9527 
9528 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9529   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9530   match(Set dst (RoundDouble (AddD src con)));
9531   ins_cost(200);
9532   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9533             "DADD   ST,$src\n\t"
9534             "FSTP_D $dst\t# D-round" %}
9535   ins_encode %{
9536     __ fld_d($constantaddress($con));
9537     __ fadd($src$$reg);
9538     __ fstp_d(Address(rsp, $dst$$disp));
9539   %}
9540   ins_pipe(fpu_mem_reg_con);
9541 %}
9542 
9543 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9544   predicate(UseSSE<=1);
9545   match(Set dst (MulD dst src));
9546   format %{ "FLD    $src\n\t"
9547             "DMULp  $dst,ST" %}
9548   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9549   ins_cost(150);
9550   ins_encode( Push_Reg_DPR(src),
9551               OpcP, RegOpc(dst) );
9552   ins_pipe( fpu_reg_reg );
9553 %}
9554 
9555 // Strict FP instruction biases argument before multiply then
9556 // biases result to avoid double rounding of subnormals.
9557 //
9558 // scale arg1 by multiplying arg1 by 2^(-15360)
9559 // load arg2
9560 // multiply scaled arg1 by arg2
9561 // rescale product by 2^(15360)
9562 //
9563 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9564   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9565   match(Set dst (MulD dst src));
9566   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9567 
9568   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9569             "DMULp  $dst,ST\n\t"
9570             "FLD    $src\n\t"
9571             "DMULp  $dst,ST\n\t"
9572             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9573             "DMULp  $dst,ST\n\t" %}
9574   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9575   ins_encode( strictfp_bias1(dst),
9576               Push_Reg_DPR(src),
9577               OpcP, RegOpc(dst),
9578               strictfp_bias2(dst) );
9579   ins_pipe( fpu_reg_reg );
9580 %}
9581 
9582 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9583   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9584   match(Set dst (MulD dst con));
9585   ins_cost(200);
9586   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9587             "DMULp  $dst,ST" %}
9588   ins_encode %{
9589     __ fld_d($constantaddress($con));
9590     __ fmulp($dst$$reg);
9591   %}
9592   ins_pipe(fpu_reg_mem);
9593 %}
9594 
9595 
9596 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9597   predicate( UseSSE<=1 );
9598   match(Set dst (MulD dst (LoadD src)));
9599   ins_cost(200);
9600   format %{ "FLD_D  $src\n\t"
9601             "DMULp  $dst,ST" %}
9602   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9603   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9604               OpcP, RegOpc(dst) );
9605   ins_pipe( fpu_reg_mem );
9606 %}
9607 
9608 //
9609 // Cisc-alternate to reg-reg multiply
9610 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9611   predicate( UseSSE<=1 );
9612   match(Set dst (MulD src (LoadD mem)));
9613   ins_cost(250);
9614   format %{ "FLD_D  $mem\n\t"
9615             "DMUL   ST,$src\n\t"
9616             "FSTP_D $dst" %}
9617   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9618   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9619               OpcReg_FPR(src),
9620               Pop_Reg_DPR(dst) );
9621   ins_pipe( fpu_reg_reg_mem );
9622 %}
9623 
9624 
9625 // MACRO3 -- addDPR a mulDPR
9626 // This instruction is a '2-address' instruction in that the result goes
9627 // back to src2.  This eliminates a move from the macro; possibly the
9628 // register allocator will have to add it back (and maybe not).
9629 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9630   predicate( UseSSE<=1 );
9631   match(Set src2 (AddD (MulD src0 src1) src2));
9632   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9633             "DMUL   ST,$src1\n\t"
9634             "DADDp  $src2,ST" %}
9635   ins_cost(250);
9636   opcode(0xDD); /* LoadD DD /0 */
9637   ins_encode( Push_Reg_FPR(src0),
9638               FMul_ST_reg(src1),
9639               FAddP_reg_ST(src2) );
9640   ins_pipe( fpu_reg_reg_reg );
9641 %}
9642 
9643 
9644 // MACRO3 -- subDPR a mulDPR
9645 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9646   predicate( UseSSE<=1 );
9647   match(Set src2 (SubD (MulD src0 src1) src2));
9648   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9649             "DMUL   ST,$src1\n\t"
9650             "DSUBRp $src2,ST" %}
9651   ins_cost(250);
9652   ins_encode( Push_Reg_FPR(src0),
9653               FMul_ST_reg(src1),
9654               Opcode(0xDE), Opc_plus(0xE0,src2));
9655   ins_pipe( fpu_reg_reg_reg );
9656 %}
9657 
9658 
9659 instruct divDPR_reg(regDPR dst, regDPR src) %{
9660   predicate( UseSSE<=1 );
9661   match(Set dst (DivD dst src));
9662 
9663   format %{ "FLD    $src\n\t"
9664             "FDIVp  $dst,ST" %}
9665   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9666   ins_cost(150);
9667   ins_encode( Push_Reg_DPR(src),
9668               OpcP, RegOpc(dst) );
9669   ins_pipe( fpu_reg_reg );
9670 %}
9671 
9672 // Strict FP instruction biases argument before division then
9673 // biases result, to avoid double rounding of subnormals.
9674 //
9675 // scale dividend by multiplying dividend by 2^(-15360)
9676 // load divisor
9677 // divide scaled dividend by divisor
9678 // rescale quotient by 2^(15360)
9679 //
9680 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9681   predicate (UseSSE<=1);
9682   match(Set dst (DivD dst src));
9683   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9684   ins_cost(01);
9685 
9686   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9687             "DMULp  $dst,ST\n\t"
9688             "FLD    $src\n\t"
9689             "FDIVp  $dst,ST\n\t"
9690             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9691             "DMULp  $dst,ST\n\t" %}
9692   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9693   ins_encode( strictfp_bias1(dst),
9694               Push_Reg_DPR(src),
9695               OpcP, RegOpc(dst),
9696               strictfp_bias2(dst) );
9697   ins_pipe( fpu_reg_reg );
9698 %}
9699 
9700 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9701   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
9702   match(Set dst (RoundDouble (DivD src1 src2)));
9703 
9704   format %{ "FLD    $src1\n\t"
9705             "FDIV   ST,$src2\n\t"
9706             "FSTP_D $dst\t# D-round" %}
9707   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
9708   ins_encode( Push_Reg_DPR(src1),
9709               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
9710   ins_pipe( fpu_mem_reg_reg );
9711 %}
9712 
9713 
9714 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9715   predicate(UseSSE<=1);
9716   match(Set dst (ModD dst src));
9717   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9718 
9719   format %{ "DMOD   $dst,$src" %}
9720   ins_cost(250);
9721   ins_encode(Push_Reg_Mod_DPR(dst, src),
9722               emitModDPR(),
9723               Push_Result_Mod_DPR(src),
9724               Pop_Reg_DPR(dst));
9725   ins_pipe( pipe_slow );
9726 %}
9727 
9728 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9729   predicate(UseSSE>=2);
9730   match(Set dst (ModD src0 src1));
9731   effect(KILL rax, KILL cr);
9732 
9733   format %{ "SUB    ESP,8\t # DMOD\n"
9734           "\tMOVSD  [ESP+0],$src1\n"
9735           "\tFLD_D  [ESP+0]\n"
9736           "\tMOVSD  [ESP+0],$src0\n"
9737           "\tFLD_D  [ESP+0]\n"
9738      "loop:\tFPREM\n"
9739           "\tFWAIT\n"
9740           "\tFNSTSW AX\n"
9741           "\tSAHF\n"
9742           "\tJP     loop\n"
9743           "\tFSTP_D [ESP+0]\n"
9744           "\tMOVSD  $dst,[ESP+0]\n"
9745           "\tADD    ESP,8\n"
9746           "\tFSTP   ST0\t # Restore FPU Stack"
9747     %}
9748   ins_cost(250);
9749   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
9750   ins_pipe( pipe_slow );
9751 %}
9752 
9753 instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{
9754   predicate (UseSSE<=1);
9755   match(Set dst (SinD src));
9756   ins_cost(1800);
9757   format %{ "DSIN   $dst" %}
9758   opcode(0xD9, 0xFE);
9759   ins_encode( OpcP, OpcS );
9760   ins_pipe( pipe_slow );
9761 %}
9762 
9763 instruct sinD_reg(regD dst, eFlagsReg cr) %{
9764   predicate (UseSSE>=2);
9765   match(Set dst (SinD dst));
9766   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9767   ins_cost(1800);
9768   format %{ "DSIN   $dst" %}
9769   opcode(0xD9, 0xFE);
9770   ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
9771   ins_pipe( pipe_slow );
9772 %}
9773 
9774 instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{
9775   predicate (UseSSE<=1);
9776   match(Set dst (CosD src));
9777   ins_cost(1800);
9778   format %{ "DCOS   $dst" %}
9779   opcode(0xD9, 0xFF);
9780   ins_encode( OpcP, OpcS );
9781   ins_pipe( pipe_slow );
9782 %}
9783 
9784 instruct cosD_reg(regD dst, eFlagsReg cr) %{
9785   predicate (UseSSE>=2);
9786   match(Set dst (CosD dst));
9787   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9788   ins_cost(1800);
9789   format %{ "DCOS   $dst" %}
9790   opcode(0xD9, 0xFF);
9791   ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
9792   ins_pipe( pipe_slow );
9793 %}
9794 
9795 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
9796   predicate (UseSSE<=1);
9797   match(Set dst(TanD src));
9798   format %{ "DTAN   $dst" %}
9799   ins_encode( Opcode(0xD9), Opcode(0xF2),    // fptan
9800               Opcode(0xDD), Opcode(0xD8));   // fstp st
9801   ins_pipe( pipe_slow );
9802 %}
9803 
9804 instruct tanD_reg(regD dst, eFlagsReg cr) %{
9805   predicate (UseSSE>=2);
9806   match(Set dst(TanD dst));
9807   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9808   format %{ "DTAN   $dst" %}
9809   ins_encode( Push_SrcD(dst),
9810               Opcode(0xD9), Opcode(0xF2),    // fptan
9811               Opcode(0xDD), Opcode(0xD8),   // fstp st
9812               Push_ResultD(dst) );
9813   ins_pipe( pipe_slow );
9814 %}
9815 
9816 instruct atanDPR_reg(regDPR dst, regDPR src) %{
9817   predicate (UseSSE<=1);
9818   match(Set dst(AtanD dst src));
9819   format %{ "DATA   $dst,$src" %}
9820   opcode(0xD9, 0xF3);
9821   ins_encode( Push_Reg_DPR(src),
9822               OpcP, OpcS, RegOpc(dst) );
9823   ins_pipe( pipe_slow );
9824 %}
9825 
9826 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
9827   predicate (UseSSE>=2);
9828   match(Set dst(AtanD dst src));
9829   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9830   format %{ "DATA   $dst,$src" %}
9831   opcode(0xD9, 0xF3);
9832   ins_encode( Push_SrcD(src),
9833               OpcP, OpcS, Push_ResultD(dst) );
9834   ins_pipe( pipe_slow );
9835 %}
9836 
9837 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
9838   predicate (UseSSE<=1);
9839   match(Set dst (SqrtD src));
9840   format %{ "DSQRT  $dst,$src" %}
9841   opcode(0xFA, 0xD9);
9842   ins_encode( Push_Reg_DPR(src),
9843               OpcS, OpcP, Pop_Reg_DPR(dst) );
9844   ins_pipe( pipe_slow );
9845 %}
9846 
9847 instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
9848   predicate (UseSSE<=1);
9849   match(Set Y (PowD X Y));  // Raise X to the Yth power
9850   effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
9851   format %{ "fast_pow $X $Y -> $Y  // KILL $rax, $rcx, $rdx" %}
9852   ins_encode %{
9853     __ subptr(rsp, 8);
9854     __ fld_s($X$$reg - 1);
9855     __ fast_pow();
9856     __ addptr(rsp, 8);
9857   %}
9858   ins_pipe( pipe_slow );
9859 %}
9860 
9861 instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
9862   predicate (UseSSE>=2);
9863   match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
9864   effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
9865   format %{ "fast_pow $src0 $src1 -> $dst  // KILL $rax, $rcx, $rdx" %}
9866   ins_encode %{
9867     __ subptr(rsp, 8);
9868     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
9869     __ fld_d(Address(rsp, 0));
9870     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
9871     __ fld_d(Address(rsp, 0));
9872     __ fast_pow();
9873     __ fstp_d(Address(rsp, 0));
9874     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
9875     __ addptr(rsp, 8);
9876   %}
9877   ins_pipe( pipe_slow );
9878 %}
9879 
9880 
9881 instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
9882   predicate (UseSSE<=1);
9883   match(Set dpr1 (ExpD dpr1));
9884   effect(KILL rax, KILL rcx, KILL rdx, KILL cr);
9885   format %{ "fast_exp $dpr1 -> $dpr1  // KILL $rax, $rcx, $rdx" %}
9886   ins_encode %{
9887     __ fast_exp();
9888   %}
9889   ins_pipe( pipe_slow );
9890 %}
9891 
9892 instruct expD_reg(regD dst, regD src, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
9893   predicate (UseSSE>=2);
9894   match(Set dst (ExpD src));
9895   effect(KILL rax, KILL rcx, KILL rdx, KILL cr);
9896   format %{ "fast_exp $dst -> $src  // KILL $rax, $rcx, $rdx" %}
9897   ins_encode %{
9898     __ subptr(rsp, 8);
9899     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
9900     __ fld_d(Address(rsp, 0));
9901     __ fast_exp();
9902     __ fstp_d(Address(rsp, 0));
9903     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
9904     __ addptr(rsp, 8);
9905   %}
9906   ins_pipe( pipe_slow );
9907 %}
9908 
9909 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
9910   predicate (UseSSE<=1);
9911   // The source Double operand on FPU stack
9912   match(Set dst (Log10D src));
9913   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9914   // fxch         ; swap ST(0) with ST(1)
9915   // fyl2x        ; compute log_10(2) * log_2(x)
9916   format %{ "FLDLG2 \t\t\t#Log10\n\t"
9917             "FXCH   \n\t"
9918             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9919          %}
9920   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9921               Opcode(0xD9), Opcode(0xC9),   // fxch
9922               Opcode(0xD9), Opcode(0xF1));  // fyl2x
9923 
9924   ins_pipe( pipe_slow );
9925 %}
9926 
9927 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
9928   predicate (UseSSE>=2);
9929   effect(KILL cr);
9930   match(Set dst (Log10D src));
9931   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9932   // fyl2x        ; compute log_10(2) * log_2(x)
9933   format %{ "FLDLG2 \t\t\t#Log10\n\t"
9934             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9935          %}
9936   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9937               Push_SrcD(src),
9938               Opcode(0xD9), Opcode(0xF1),   // fyl2x
9939               Push_ResultD(dst));
9940 
9941   ins_pipe( pipe_slow );
9942 %}
9943 
9944 instruct logDPR_reg(regDPR1 dst, regDPR1 src) %{
9945   predicate (UseSSE<=1);
9946   // The source Double operand on FPU stack
9947   match(Set dst (LogD src));
9948   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
9949   // fxch         ; swap ST(0) with ST(1)
9950   // fyl2x        ; compute log_e(2) * log_2(x)
9951   format %{ "FLDLN2 \t\t\t#Log_e\n\t"
9952             "FXCH   \n\t"
9953             "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
9954          %}
9955   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
9956               Opcode(0xD9), Opcode(0xC9),   // fxch
9957               Opcode(0xD9), Opcode(0xF1));  // fyl2x
9958 
9959   ins_pipe( pipe_slow );
9960 %}
9961 
9962 instruct logD_reg(regD dst, regD src, eFlagsReg cr) %{
9963   predicate (UseSSE>=2);
9964   effect(KILL cr);
9965   // The source and result Double operands in XMM registers
9966   match(Set dst (LogD src));
9967   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
9968   // fyl2x        ; compute log_e(2) * log_2(x)
9969   format %{ "FLDLN2 \t\t\t#Log_e\n\t"
9970             "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
9971          %}
9972   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
9973               Push_SrcD(src),
9974               Opcode(0xD9), Opcode(0xF1),   // fyl2x
9975               Push_ResultD(dst));
9976   ins_pipe( pipe_slow );
9977 %}
9978 
9979 //-------------Float Instructions-------------------------------
9980 // Float Math
9981 
9982 // Code for float compare:
9983 //     fcompp();
9984 //     fwait(); fnstsw_ax();
9985 //     sahf();
9986 //     movl(dst, unordered_result);
9987 //     jcc(Assembler::parity, exit);
9988 //     movl(dst, less_result);
9989 //     jcc(Assembler::below, exit);
9990 //     movl(dst, equal_result);
9991 //     jcc(Assembler::equal, exit);
9992 //     movl(dst, greater_result);
9993 //   exit:
9994 
9995 // P6 version of float compare, sets condition codes in EFLAGS
9996 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
9997   predicate(VM_Version::supports_cmov() && UseSSE == 0);
9998   match(Set cr (CmpF src1 src2));
9999   effect(KILL rax);
10000   ins_cost(150);
10001   format %{ "FLD    $src1\n\t"
10002             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10003             "JNP    exit\n\t"
10004             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10005             "SAHF\n"
10006      "exit:\tNOP               // avoid branch to branch" %}
10007   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10008   ins_encode( Push_Reg_DPR(src1),
10009               OpcP, RegOpc(src2),
10010               cmpF_P6_fixup );
10011   ins_pipe( pipe_slow );
10012 %}
10013 
10014 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10015   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10016   match(Set cr (CmpF src1 src2));
10017   ins_cost(100);
10018   format %{ "FLD    $src1\n\t"
10019             "FUCOMIP ST,$src2  // P6 instruction" %}
10020   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10021   ins_encode( Push_Reg_DPR(src1),
10022               OpcP, RegOpc(src2));
10023   ins_pipe( pipe_slow );
10024 %}
10025 
10026 
10027 // Compare & branch
10028 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10029   predicate(UseSSE == 0);
10030   match(Set cr (CmpF src1 src2));
10031   effect(KILL rax);
10032   ins_cost(200);
10033   format %{ "FLD    $src1\n\t"
10034             "FCOMp  $src2\n\t"
10035             "FNSTSW AX\n\t"
10036             "TEST   AX,0x400\n\t"
10037             "JZ,s   flags\n\t"
10038             "MOV    AH,1\t# unordered treat as LT\n"
10039     "flags:\tSAHF" %}
10040   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10041   ins_encode( Push_Reg_DPR(src1),
10042               OpcP, RegOpc(src2),
10043               fpu_flags);
10044   ins_pipe( pipe_slow );
10045 %}
10046 
10047 // Compare vs zero into -1,0,1
10048 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10049   predicate(UseSSE == 0);
10050   match(Set dst (CmpF3 src1 zero));
10051   effect(KILL cr, KILL rax);
10052   ins_cost(280);
10053   format %{ "FTSTF  $dst,$src1" %}
10054   opcode(0xE4, 0xD9);
10055   ins_encode( Push_Reg_DPR(src1),
10056               OpcS, OpcP, PopFPU,
10057               CmpF_Result(dst));
10058   ins_pipe( pipe_slow );
10059 %}
10060 
10061 // Compare into -1,0,1
10062 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10063   predicate(UseSSE == 0);
10064   match(Set dst (CmpF3 src1 src2));
10065   effect(KILL cr, KILL rax);
10066   ins_cost(300);
10067   format %{ "FCMPF  $dst,$src1,$src2" %}
10068   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10069   ins_encode( Push_Reg_DPR(src1),
10070               OpcP, RegOpc(src2),
10071               CmpF_Result(dst));
10072   ins_pipe( pipe_slow );
10073 %}
10074 
10075 // float compare and set condition codes in EFLAGS by XMM regs
10076 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10077   predicate(UseSSE>=1);
10078   match(Set cr (CmpF src1 src2));
10079   ins_cost(145);
10080   format %{ "UCOMISS $src1,$src2\n\t"
10081             "JNP,s   exit\n\t"
10082             "PUSHF\t# saw NaN, set CF\n\t"
10083             "AND     [rsp], #0xffffff2b\n\t"
10084             "POPF\n"
10085     "exit:" %}
10086   ins_encode %{
10087     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10088     emit_cmpfp_fixup(_masm);
10089   %}
10090   ins_pipe( pipe_slow );
10091 %}
10092 
10093 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10094   predicate(UseSSE>=1);
10095   match(Set cr (CmpF src1 src2));
10096   ins_cost(100);
10097   format %{ "UCOMISS $src1,$src2" %}
10098   ins_encode %{
10099     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10100   %}
10101   ins_pipe( pipe_slow );
10102 %}
10103 
10104 // float compare and set condition codes in EFLAGS by XMM regs
10105 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10106   predicate(UseSSE>=1);
10107   match(Set cr (CmpF src1 (LoadF src2)));
10108   ins_cost(165);
10109   format %{ "UCOMISS $src1,$src2\n\t"
10110             "JNP,s   exit\n\t"
10111             "PUSHF\t# saw NaN, set CF\n\t"
10112             "AND     [rsp], #0xffffff2b\n\t"
10113             "POPF\n"
10114     "exit:" %}
10115   ins_encode %{
10116     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10117     emit_cmpfp_fixup(_masm);
10118   %}
10119   ins_pipe( pipe_slow );
10120 %}
10121 
10122 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10123   predicate(UseSSE>=1);
10124   match(Set cr (CmpF src1 (LoadF src2)));
10125   ins_cost(100);
10126   format %{ "UCOMISS $src1,$src2" %}
10127   ins_encode %{
10128     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10129   %}
10130   ins_pipe( pipe_slow );
10131 %}
10132 
10133 // Compare into -1,0,1 in XMM
10134 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10135   predicate(UseSSE>=1);
10136   match(Set dst (CmpF3 src1 src2));
10137   effect(KILL cr);
10138   ins_cost(255);
10139   format %{ "UCOMISS $src1, $src2\n\t"
10140             "MOV     $dst, #-1\n\t"
10141             "JP,s    done\n\t"
10142             "JB,s    done\n\t"
10143             "SETNE   $dst\n\t"
10144             "MOVZB   $dst, $dst\n"
10145     "done:" %}
10146   ins_encode %{
10147     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10148     emit_cmpfp3(_masm, $dst$$Register);
10149   %}
10150   ins_pipe( pipe_slow );
10151 %}
10152 
10153 // Compare into -1,0,1 in XMM and memory
10154 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10155   predicate(UseSSE>=1);
10156   match(Set dst (CmpF3 src1 (LoadF src2)));
10157   effect(KILL cr);
10158   ins_cost(275);
10159   format %{ "UCOMISS $src1, $src2\n\t"
10160             "MOV     $dst, #-1\n\t"
10161             "JP,s    done\n\t"
10162             "JB,s    done\n\t"
10163             "SETNE   $dst\n\t"
10164             "MOVZB   $dst, $dst\n"
10165     "done:" %}
10166   ins_encode %{
10167     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10168     emit_cmpfp3(_masm, $dst$$Register);
10169   %}
10170   ins_pipe( pipe_slow );
10171 %}
10172 
10173 // Spill to obtain 24-bit precision
10174 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10175   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10176   match(Set dst (SubF src1 src2));
10177 
10178   format %{ "FSUB   $dst,$src1 - $src2" %}
10179   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10180   ins_encode( Push_Reg_FPR(src1),
10181               OpcReg_FPR(src2),
10182               Pop_Mem_FPR(dst) );
10183   ins_pipe( fpu_mem_reg_reg );
10184 %}
10185 //
10186 // This instruction does not round to 24-bits
10187 instruct subFPR_reg(regFPR dst, regFPR src) %{
10188   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10189   match(Set dst (SubF dst src));
10190 
10191   format %{ "FSUB   $dst,$src" %}
10192   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10193   ins_encode( Push_Reg_FPR(src),
10194               OpcP, RegOpc(dst) );
10195   ins_pipe( fpu_reg_reg );
10196 %}
10197 
10198 // Spill to obtain 24-bit precision
10199 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10200   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10201   match(Set dst (AddF src1 src2));
10202 
10203   format %{ "FADD   $dst,$src1,$src2" %}
10204   opcode(0xD8, 0x0); /* D8 C0+i */
10205   ins_encode( Push_Reg_FPR(src2),
10206               OpcReg_FPR(src1),
10207               Pop_Mem_FPR(dst) );
10208   ins_pipe( fpu_mem_reg_reg );
10209 %}
10210 //
10211 // This instruction does not round to 24-bits
10212 instruct addFPR_reg(regFPR dst, regFPR src) %{
10213   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10214   match(Set dst (AddF dst src));
10215 
10216   format %{ "FLD    $src\n\t"
10217             "FADDp  $dst,ST" %}
10218   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10219   ins_encode( Push_Reg_FPR(src),
10220               OpcP, RegOpc(dst) );
10221   ins_pipe( fpu_reg_reg );
10222 %}
10223 
10224 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10225   predicate(UseSSE==0);
10226   match(Set dst (AbsF src));
10227   ins_cost(100);
10228   format %{ "FABS" %}
10229   opcode(0xE1, 0xD9);
10230   ins_encode( OpcS, OpcP );
10231   ins_pipe( fpu_reg_reg );
10232 %}
10233 
10234 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10235   predicate(UseSSE==0);
10236   match(Set dst (NegF src));
10237   ins_cost(100);
10238   format %{ "FCHS" %}
10239   opcode(0xE0, 0xD9);
10240   ins_encode( OpcS, OpcP );
10241   ins_pipe( fpu_reg_reg );
10242 %}
10243 
10244 // Cisc-alternate to addFPR_reg
10245 // Spill to obtain 24-bit precision
10246 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10247   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10248   match(Set dst (AddF src1 (LoadF src2)));
10249 
10250   format %{ "FLD    $src2\n\t"
10251             "FADD   ST,$src1\n\t"
10252             "FSTP_S $dst" %}
10253   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10254   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10255               OpcReg_FPR(src1),
10256               Pop_Mem_FPR(dst) );
10257   ins_pipe( fpu_mem_reg_mem );
10258 %}
10259 //
10260 // Cisc-alternate to addFPR_reg
10261 // This instruction does not round to 24-bits
10262 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10263   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10264   match(Set dst (AddF dst (LoadF src)));
10265 
10266   format %{ "FADD   $dst,$src" %}
10267   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10268   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10269               OpcP, RegOpc(dst) );
10270   ins_pipe( fpu_reg_mem );
10271 %}
10272 
10273 // // Following two instructions for _222_mpegaudio
10274 // Spill to obtain 24-bit precision
10275 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10276   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10277   match(Set dst (AddF src1 src2));
10278 
10279   format %{ "FADD   $dst,$src1,$src2" %}
10280   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10281   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10282               OpcReg_FPR(src2),
10283               Pop_Mem_FPR(dst) );
10284   ins_pipe( fpu_mem_reg_mem );
10285 %}
10286 
10287 // Cisc-spill variant
10288 // Spill to obtain 24-bit precision
10289 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10290   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10291   match(Set dst (AddF src1 (LoadF src2)));
10292 
10293   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10294   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10295   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10296               set_instruction_start,
10297               OpcP, RMopc_Mem(secondary,src1),
10298               Pop_Mem_FPR(dst) );
10299   ins_pipe( fpu_mem_mem_mem );
10300 %}
10301 
10302 // Spill to obtain 24-bit precision
10303 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10304   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10305   match(Set dst (AddF src1 src2));
10306 
10307   format %{ "FADD   $dst,$src1,$src2" %}
10308   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10309   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10310               set_instruction_start,
10311               OpcP, RMopc_Mem(secondary,src1),
10312               Pop_Mem_FPR(dst) );
10313   ins_pipe( fpu_mem_mem_mem );
10314 %}
10315 
10316 
10317 // Spill to obtain 24-bit precision
10318 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10319   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10320   match(Set dst (AddF src con));
10321   format %{ "FLD    $src\n\t"
10322             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10323             "FSTP_S $dst"  %}
10324   ins_encode %{
10325     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10326     __ fadd_s($constantaddress($con));
10327     __ fstp_s(Address(rsp, $dst$$disp));
10328   %}
10329   ins_pipe(fpu_mem_reg_con);
10330 %}
10331 //
10332 // This instruction does not round to 24-bits
10333 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10334   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10335   match(Set dst (AddF src con));
10336   format %{ "FLD    $src\n\t"
10337             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10338             "FSTP   $dst"  %}
10339   ins_encode %{
10340     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10341     __ fadd_s($constantaddress($con));
10342     __ fstp_d($dst$$reg);
10343   %}
10344   ins_pipe(fpu_reg_reg_con);
10345 %}
10346 
10347 // Spill to obtain 24-bit precision
10348 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10349   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10350   match(Set dst (MulF src1 src2));
10351 
10352   format %{ "FLD    $src1\n\t"
10353             "FMUL   $src2\n\t"
10354             "FSTP_S $dst"  %}
10355   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10356   ins_encode( Push_Reg_FPR(src1),
10357               OpcReg_FPR(src2),
10358               Pop_Mem_FPR(dst) );
10359   ins_pipe( fpu_mem_reg_reg );
10360 %}
10361 //
10362 // This instruction does not round to 24-bits
10363 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10364   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10365   match(Set dst (MulF src1 src2));
10366 
10367   format %{ "FLD    $src1\n\t"
10368             "FMUL   $src2\n\t"
10369             "FSTP_S $dst"  %}
10370   opcode(0xD8, 0x1); /* D8 C8+i */
10371   ins_encode( Push_Reg_FPR(src2),
10372               OpcReg_FPR(src1),
10373               Pop_Reg_FPR(dst) );
10374   ins_pipe( fpu_reg_reg_reg );
10375 %}
10376 
10377 
10378 // Spill to obtain 24-bit precision
10379 // Cisc-alternate to reg-reg multiply
10380 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10381   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10382   match(Set dst (MulF src1 (LoadF src2)));
10383 
10384   format %{ "FLD_S  $src2\n\t"
10385             "FMUL   $src1\n\t"
10386             "FSTP_S $dst"  %}
10387   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10388   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10389               OpcReg_FPR(src1),
10390               Pop_Mem_FPR(dst) );
10391   ins_pipe( fpu_mem_reg_mem );
10392 %}
10393 //
10394 // This instruction does not round to 24-bits
10395 // Cisc-alternate to reg-reg multiply
10396 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10397   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10398   match(Set dst (MulF src1 (LoadF src2)));
10399 
10400   format %{ "FMUL   $dst,$src1,$src2" %}
10401   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10402   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10403               OpcReg_FPR(src1),
10404               Pop_Reg_FPR(dst) );
10405   ins_pipe( fpu_reg_reg_mem );
10406 %}
10407 
10408 // Spill to obtain 24-bit precision
10409 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10410   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10411   match(Set dst (MulF src1 src2));
10412 
10413   format %{ "FMUL   $dst,$src1,$src2" %}
10414   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10415   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10416               set_instruction_start,
10417               OpcP, RMopc_Mem(secondary,src1),
10418               Pop_Mem_FPR(dst) );
10419   ins_pipe( fpu_mem_mem_mem );
10420 %}
10421 
10422 // Spill to obtain 24-bit precision
10423 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10424   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10425   match(Set dst (MulF src con));
10426 
10427   format %{ "FLD    $src\n\t"
10428             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10429             "FSTP_S $dst"  %}
10430   ins_encode %{
10431     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10432     __ fmul_s($constantaddress($con));
10433     __ fstp_s(Address(rsp, $dst$$disp));
10434   %}
10435   ins_pipe(fpu_mem_reg_con);
10436 %}
10437 //
10438 // This instruction does not round to 24-bits
10439 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10440   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10441   match(Set dst (MulF src con));
10442 
10443   format %{ "FLD    $src\n\t"
10444             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10445             "FSTP   $dst"  %}
10446   ins_encode %{
10447     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10448     __ fmul_s($constantaddress($con));
10449     __ fstp_d($dst$$reg);
10450   %}
10451   ins_pipe(fpu_reg_reg_con);
10452 %}
10453 
10454 
10455 //
10456 // MACRO1 -- subsume unshared load into mulFPR
10457 // This instruction does not round to 24-bits
10458 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10459   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10460   match(Set dst (MulF (LoadF mem1) src));
10461 
10462   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10463             "FMUL   ST,$src\n\t"
10464             "FSTP   $dst" %}
10465   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10466   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10467               OpcReg_FPR(src),
10468               Pop_Reg_FPR(dst) );
10469   ins_pipe( fpu_reg_reg_mem );
10470 %}
10471 //
10472 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10473 // This instruction does not round to 24-bits
10474 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10475   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10476   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10477   ins_cost(95);
10478 
10479   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10480             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10481             "FADD   ST,$src2\n\t"
10482             "FSTP   $dst" %}
10483   opcode(0xD9); /* LoadF D9 /0 */
10484   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10485               FMul_ST_reg(src1),
10486               FAdd_ST_reg(src2),
10487               Pop_Reg_FPR(dst) );
10488   ins_pipe( fpu_reg_mem_reg_reg );
10489 %}
10490 
10491 // MACRO3 -- addFPR a mulFPR
10492 // This instruction does not round to 24-bits.  It is a '2-address'
10493 // instruction in that the result goes back to src2.  This eliminates
10494 // a move from the macro; possibly the register allocator will have
10495 // to add it back (and maybe not).
10496 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10497   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10498   match(Set src2 (AddF (MulF src0 src1) src2));
10499 
10500   format %{ "FLD    $src0     ===MACRO3===\n\t"
10501             "FMUL   ST,$src1\n\t"
10502             "FADDP  $src2,ST" %}
10503   opcode(0xD9); /* LoadF D9 /0 */
10504   ins_encode( Push_Reg_FPR(src0),
10505               FMul_ST_reg(src1),
10506               FAddP_reg_ST(src2) );
10507   ins_pipe( fpu_reg_reg_reg );
10508 %}
10509 
10510 // MACRO4 -- divFPR subFPR
10511 // This instruction does not round to 24-bits
10512 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10513   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10514   match(Set dst (DivF (SubF src2 src1) src3));
10515 
10516   format %{ "FLD    $src2   ===MACRO4===\n\t"
10517             "FSUB   ST,$src1\n\t"
10518             "FDIV   ST,$src3\n\t"
10519             "FSTP  $dst" %}
10520   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10521   ins_encode( Push_Reg_FPR(src2),
10522               subFPR_divFPR_encode(src1,src3),
10523               Pop_Reg_FPR(dst) );
10524   ins_pipe( fpu_reg_reg_reg_reg );
10525 %}
10526 
10527 // Spill to obtain 24-bit precision
10528 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10529   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10530   match(Set dst (DivF src1 src2));
10531 
10532   format %{ "FDIV   $dst,$src1,$src2" %}
10533   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10534   ins_encode( Push_Reg_FPR(src1),
10535               OpcReg_FPR(src2),
10536               Pop_Mem_FPR(dst) );
10537   ins_pipe( fpu_mem_reg_reg );
10538 %}
10539 //
10540 // This instruction does not round to 24-bits
10541 instruct divFPR_reg(regFPR dst, regFPR src) %{
10542   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10543   match(Set dst (DivF dst src));
10544 
10545   format %{ "FDIV   $dst,$src" %}
10546   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10547   ins_encode( Push_Reg_FPR(src),
10548               OpcP, RegOpc(dst) );
10549   ins_pipe( fpu_reg_reg );
10550 %}
10551 
10552 
10553 // Spill to obtain 24-bit precision
10554 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10555   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10556   match(Set dst (ModF src1 src2));
10557   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10558 
10559   format %{ "FMOD   $dst,$src1,$src2" %}
10560   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10561               emitModDPR(),
10562               Push_Result_Mod_DPR(src2),
10563               Pop_Mem_FPR(dst));
10564   ins_pipe( pipe_slow );
10565 %}
10566 //
10567 // This instruction does not round to 24-bits
10568 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10569   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10570   match(Set dst (ModF dst src));
10571   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10572 
10573   format %{ "FMOD   $dst,$src" %}
10574   ins_encode(Push_Reg_Mod_DPR(dst, src),
10575               emitModDPR(),
10576               Push_Result_Mod_DPR(src),
10577               Pop_Reg_FPR(dst));
10578   ins_pipe( pipe_slow );
10579 %}
10580 
10581 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10582   predicate(UseSSE>=1);
10583   match(Set dst (ModF src0 src1));
10584   effect(KILL rax, KILL cr);
10585   format %{ "SUB    ESP,4\t # FMOD\n"
10586           "\tMOVSS  [ESP+0],$src1\n"
10587           "\tFLD_S  [ESP+0]\n"
10588           "\tMOVSS  [ESP+0],$src0\n"
10589           "\tFLD_S  [ESP+0]\n"
10590      "loop:\tFPREM\n"
10591           "\tFWAIT\n"
10592           "\tFNSTSW AX\n"
10593           "\tSAHF\n"
10594           "\tJP     loop\n"
10595           "\tFSTP_S [ESP+0]\n"
10596           "\tMOVSS  $dst,[ESP+0]\n"
10597           "\tADD    ESP,4\n"
10598           "\tFSTP   ST0\t # Restore FPU Stack"
10599     %}
10600   ins_cost(250);
10601   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10602   ins_pipe( pipe_slow );
10603 %}
10604 
10605 
10606 //----------Arithmetic Conversion Instructions---------------------------------
10607 // The conversions operations are all Alpha sorted.  Please keep it that way!
10608 
10609 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10610   predicate(UseSSE==0);
10611   match(Set dst (RoundFloat src));
10612   ins_cost(125);
10613   format %{ "FST_S  $dst,$src\t# F-round" %}
10614   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10615   ins_pipe( fpu_mem_reg );
10616 %}
10617 
10618 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10619   predicate(UseSSE<=1);
10620   match(Set dst (RoundDouble src));
10621   ins_cost(125);
10622   format %{ "FST_D  $dst,$src\t# D-round" %}
10623   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10624   ins_pipe( fpu_mem_reg );
10625 %}
10626 
10627 // Force rounding to 24-bit precision and 6-bit exponent
10628 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10629   predicate(UseSSE==0);
10630   match(Set dst (ConvD2F src));
10631   format %{ "FST_S  $dst,$src\t# F-round" %}
10632   expand %{
10633     roundFloat_mem_reg(dst,src);
10634   %}
10635 %}
10636 
10637 // Force rounding to 24-bit precision and 6-bit exponent
10638 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10639   predicate(UseSSE==1);
10640   match(Set dst (ConvD2F src));
10641   effect( KILL cr );
10642   format %{ "SUB    ESP,4\n\t"
10643             "FST_S  [ESP],$src\t# F-round\n\t"
10644             "MOVSS  $dst,[ESP]\n\t"
10645             "ADD ESP,4" %}
10646   ins_encode %{
10647     __ subptr(rsp, 4);
10648     if ($src$$reg != FPR1L_enc) {
10649       __ fld_s($src$$reg-1);
10650       __ fstp_s(Address(rsp, 0));
10651     } else {
10652       __ fst_s(Address(rsp, 0));
10653     }
10654     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10655     __ addptr(rsp, 4);
10656   %}
10657   ins_pipe( pipe_slow );
10658 %}
10659 
10660 // Force rounding double precision to single precision
10661 instruct convD2F_reg(regF dst, regD src) %{
10662   predicate(UseSSE>=2);
10663   match(Set dst (ConvD2F src));
10664   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10665   ins_encode %{
10666     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10667   %}
10668   ins_pipe( pipe_slow );
10669 %}
10670 
10671 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10672   predicate(UseSSE==0);
10673   match(Set dst (ConvF2D src));
10674   format %{ "FST_S  $dst,$src\t# D-round" %}
10675   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10676   ins_pipe( fpu_reg_reg );
10677 %}
10678 
10679 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10680   predicate(UseSSE==1);
10681   match(Set dst (ConvF2D src));
10682   format %{ "FST_D  $dst,$src\t# D-round" %}
10683   expand %{
10684     roundDouble_mem_reg(dst,src);
10685   %}
10686 %}
10687 
10688 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10689   predicate(UseSSE==1);
10690   match(Set dst (ConvF2D src));
10691   effect( KILL cr );
10692   format %{ "SUB    ESP,4\n\t"
10693             "MOVSS  [ESP] $src\n\t"
10694             "FLD_S  [ESP]\n\t"
10695             "ADD    ESP,4\n\t"
10696             "FSTP   $dst\t# D-round" %}
10697   ins_encode %{
10698     __ subptr(rsp, 4);
10699     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10700     __ fld_s(Address(rsp, 0));
10701     __ addptr(rsp, 4);
10702     __ fstp_d($dst$$reg);
10703   %}
10704   ins_pipe( pipe_slow );
10705 %}
10706 
10707 instruct convF2D_reg(regD dst, regF src) %{
10708   predicate(UseSSE>=2);
10709   match(Set dst (ConvF2D src));
10710   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10711   ins_encode %{
10712     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10713   %}
10714   ins_pipe( pipe_slow );
10715 %}
10716 
10717 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10718 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10719   predicate(UseSSE<=1);
10720   match(Set dst (ConvD2I src));
10721   effect( KILL tmp, KILL cr );
10722   format %{ "FLD    $src\t# Convert double to int \n\t"
10723             "FLDCW  trunc mode\n\t"
10724             "SUB    ESP,4\n\t"
10725             "FISTp  [ESP + #0]\n\t"
10726             "FLDCW  std/24-bit mode\n\t"
10727             "POP    EAX\n\t"
10728             "CMP    EAX,0x80000000\n\t"
10729             "JNE,s  fast\n\t"
10730             "FLD_D  $src\n\t"
10731             "CALL   d2i_wrapper\n"
10732       "fast:" %}
10733   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10734   ins_pipe( pipe_slow );
10735 %}
10736 
10737 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10738 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10739   predicate(UseSSE>=2);
10740   match(Set dst (ConvD2I src));
10741   effect( KILL tmp, KILL cr );
10742   format %{ "CVTTSD2SI $dst, $src\n\t"
10743             "CMP    $dst,0x80000000\n\t"
10744             "JNE,s  fast\n\t"
10745             "SUB    ESP, 8\n\t"
10746             "MOVSD  [ESP], $src\n\t"
10747             "FLD_D  [ESP]\n\t"
10748             "ADD    ESP, 8\n\t"
10749             "CALL   d2i_wrapper\n"
10750       "fast:" %}
10751   ins_encode %{
10752     Label fast;
10753     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10754     __ cmpl($dst$$Register, 0x80000000);
10755     __ jccb(Assembler::notEqual, fast);
10756     __ subptr(rsp, 8);
10757     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10758     __ fld_d(Address(rsp, 0));
10759     __ addptr(rsp, 8);
10760     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10761     __ bind(fast);
10762   %}
10763   ins_pipe( pipe_slow );
10764 %}
10765 
10766 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10767   predicate(UseSSE<=1);
10768   match(Set dst (ConvD2L src));
10769   effect( KILL cr );
10770   format %{ "FLD    $src\t# Convert double to long\n\t"
10771             "FLDCW  trunc mode\n\t"
10772             "SUB    ESP,8\n\t"
10773             "FISTp  [ESP + #0]\n\t"
10774             "FLDCW  std/24-bit mode\n\t"
10775             "POP    EAX\n\t"
10776             "POP    EDX\n\t"
10777             "CMP    EDX,0x80000000\n\t"
10778             "JNE,s  fast\n\t"
10779             "TEST   EAX,EAX\n\t"
10780             "JNE,s  fast\n\t"
10781             "FLD    $src\n\t"
10782             "CALL   d2l_wrapper\n"
10783       "fast:" %}
10784   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10785   ins_pipe( pipe_slow );
10786 %}
10787 
10788 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10789 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10790   predicate (UseSSE>=2);
10791   match(Set dst (ConvD2L src));
10792   effect( KILL cr );
10793   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10794             "MOVSD  [ESP],$src\n\t"
10795             "FLD_D  [ESP]\n\t"
10796             "FLDCW  trunc mode\n\t"
10797             "FISTp  [ESP + #0]\n\t"
10798             "FLDCW  std/24-bit mode\n\t"
10799             "POP    EAX\n\t"
10800             "POP    EDX\n\t"
10801             "CMP    EDX,0x80000000\n\t"
10802             "JNE,s  fast\n\t"
10803             "TEST   EAX,EAX\n\t"
10804             "JNE,s  fast\n\t"
10805             "SUB    ESP,8\n\t"
10806             "MOVSD  [ESP],$src\n\t"
10807             "FLD_D  [ESP]\n\t"
10808             "ADD    ESP,8\n\t"
10809             "CALL   d2l_wrapper\n"
10810       "fast:" %}
10811   ins_encode %{
10812     Label fast;
10813     __ subptr(rsp, 8);
10814     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10815     __ fld_d(Address(rsp, 0));
10816     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10817     __ fistp_d(Address(rsp, 0));
10818     // Restore the rounding mode, mask the exception
10819     if (Compile::current()->in_24_bit_fp_mode()) {
10820       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10821     } else {
10822       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10823     }
10824     // Load the converted long, adjust CPU stack
10825     __ pop(rax);
10826     __ pop(rdx);
10827     __ cmpl(rdx, 0x80000000);
10828     __ jccb(Assembler::notEqual, fast);
10829     __ testl(rax, rax);
10830     __ jccb(Assembler::notEqual, fast);
10831     __ subptr(rsp, 8);
10832     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10833     __ fld_d(Address(rsp, 0));
10834     __ addptr(rsp, 8);
10835     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10836     __ bind(fast);
10837   %}
10838   ins_pipe( pipe_slow );
10839 %}
10840 
10841 // Convert a double to an int.  Java semantics require we do complex
10842 // manglations in the corner cases.  So we set the rounding mode to
10843 // 'zero', store the darned double down as an int, and reset the
10844 // rounding mode to 'nearest'.  The hardware stores a flag value down
10845 // if we would overflow or converted a NAN; we check for this and
10846 // and go the slow path if needed.
10847 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10848   predicate(UseSSE==0);
10849   match(Set dst (ConvF2I src));
10850   effect( KILL tmp, KILL cr );
10851   format %{ "FLD    $src\t# Convert float to int \n\t"
10852             "FLDCW  trunc mode\n\t"
10853             "SUB    ESP,4\n\t"
10854             "FISTp  [ESP + #0]\n\t"
10855             "FLDCW  std/24-bit mode\n\t"
10856             "POP    EAX\n\t"
10857             "CMP    EAX,0x80000000\n\t"
10858             "JNE,s  fast\n\t"
10859             "FLD    $src\n\t"
10860             "CALL   d2i_wrapper\n"
10861       "fast:" %}
10862   // DPR2I_encoding works for FPR2I
10863   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10864   ins_pipe( pipe_slow );
10865 %}
10866 
10867 // Convert a float in xmm to an int reg.
10868 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10869   predicate(UseSSE>=1);
10870   match(Set dst (ConvF2I src));
10871   effect( KILL tmp, KILL cr );
10872   format %{ "CVTTSS2SI $dst, $src\n\t"
10873             "CMP    $dst,0x80000000\n\t"
10874             "JNE,s  fast\n\t"
10875             "SUB    ESP, 4\n\t"
10876             "MOVSS  [ESP], $src\n\t"
10877             "FLD    [ESP]\n\t"
10878             "ADD    ESP, 4\n\t"
10879             "CALL   d2i_wrapper\n"
10880       "fast:" %}
10881   ins_encode %{
10882     Label fast;
10883     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10884     __ cmpl($dst$$Register, 0x80000000);
10885     __ jccb(Assembler::notEqual, fast);
10886     __ subptr(rsp, 4);
10887     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10888     __ fld_s(Address(rsp, 0));
10889     __ addptr(rsp, 4);
10890     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10891     __ bind(fast);
10892   %}
10893   ins_pipe( pipe_slow );
10894 %}
10895 
10896 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10897   predicate(UseSSE==0);
10898   match(Set dst (ConvF2L src));
10899   effect( KILL cr );
10900   format %{ "FLD    $src\t# Convert float to long\n\t"
10901             "FLDCW  trunc mode\n\t"
10902             "SUB    ESP,8\n\t"
10903             "FISTp  [ESP + #0]\n\t"
10904             "FLDCW  std/24-bit mode\n\t"
10905             "POP    EAX\n\t"
10906             "POP    EDX\n\t"
10907             "CMP    EDX,0x80000000\n\t"
10908             "JNE,s  fast\n\t"
10909             "TEST   EAX,EAX\n\t"
10910             "JNE,s  fast\n\t"
10911             "FLD    $src\n\t"
10912             "CALL   d2l_wrapper\n"
10913       "fast:" %}
10914   // DPR2L_encoding works for FPR2L
10915   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10916   ins_pipe( pipe_slow );
10917 %}
10918 
10919 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10920 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10921   predicate (UseSSE>=1);
10922   match(Set dst (ConvF2L src));
10923   effect( KILL cr );
10924   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10925             "MOVSS  [ESP],$src\n\t"
10926             "FLD_S  [ESP]\n\t"
10927             "FLDCW  trunc mode\n\t"
10928             "FISTp  [ESP + #0]\n\t"
10929             "FLDCW  std/24-bit mode\n\t"
10930             "POP    EAX\n\t"
10931             "POP    EDX\n\t"
10932             "CMP    EDX,0x80000000\n\t"
10933             "JNE,s  fast\n\t"
10934             "TEST   EAX,EAX\n\t"
10935             "JNE,s  fast\n\t"
10936             "SUB    ESP,4\t# Convert float to long\n\t"
10937             "MOVSS  [ESP],$src\n\t"
10938             "FLD_S  [ESP]\n\t"
10939             "ADD    ESP,4\n\t"
10940             "CALL   d2l_wrapper\n"
10941       "fast:" %}
10942   ins_encode %{
10943     Label fast;
10944     __ subptr(rsp, 8);
10945     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10946     __ fld_s(Address(rsp, 0));
10947     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10948     __ fistp_d(Address(rsp, 0));
10949     // Restore the rounding mode, mask the exception
10950     if (Compile::current()->in_24_bit_fp_mode()) {
10951       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10952     } else {
10953       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10954     }
10955     // Load the converted long, adjust CPU stack
10956     __ pop(rax);
10957     __ pop(rdx);
10958     __ cmpl(rdx, 0x80000000);
10959     __ jccb(Assembler::notEqual, fast);
10960     __ testl(rax, rax);
10961     __ jccb(Assembler::notEqual, fast);
10962     __ subptr(rsp, 4);
10963     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10964     __ fld_s(Address(rsp, 0));
10965     __ addptr(rsp, 4);
10966     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10967     __ bind(fast);
10968   %}
10969   ins_pipe( pipe_slow );
10970 %}
10971 
10972 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
10973   predicate( UseSSE<=1 );
10974   match(Set dst (ConvI2D src));
10975   format %{ "FILD   $src\n\t"
10976             "FSTP   $dst" %}
10977   opcode(0xDB, 0x0);  /* DB /0 */
10978   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
10979   ins_pipe( fpu_reg_mem );
10980 %}
10981 
10982 instruct convI2D_reg(regD dst, rRegI src) %{
10983   predicate( UseSSE>=2 && !UseXmmI2D );
10984   match(Set dst (ConvI2D src));
10985   format %{ "CVTSI2SD $dst,$src" %}
10986   ins_encode %{
10987     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10988   %}
10989   ins_pipe( pipe_slow );
10990 %}
10991 
10992 instruct convI2D_mem(regD dst, memory mem) %{
10993   predicate( UseSSE>=2 );
10994   match(Set dst (ConvI2D (LoadI mem)));
10995   format %{ "CVTSI2SD $dst,$mem" %}
10996   ins_encode %{
10997     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
10998   %}
10999   ins_pipe( pipe_slow );
11000 %}
11001 
11002 instruct convXI2D_reg(regD dst, rRegI src)
11003 %{
11004   predicate( UseSSE>=2 && UseXmmI2D );
11005   match(Set dst (ConvI2D src));
11006 
11007   format %{ "MOVD  $dst,$src\n\t"
11008             "CVTDQ2PD $dst,$dst\t# i2d" %}
11009   ins_encode %{
11010     __ movdl($dst$$XMMRegister, $src$$Register);
11011     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11012   %}
11013   ins_pipe(pipe_slow); // XXX
11014 %}
11015 
11016 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11017   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11018   match(Set dst (ConvI2D (LoadI mem)));
11019   format %{ "FILD   $mem\n\t"
11020             "FSTP   $dst" %}
11021   opcode(0xDB);      /* DB /0 */
11022   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11023               Pop_Reg_DPR(dst));
11024   ins_pipe( fpu_reg_mem );
11025 %}
11026 
11027 // Convert a byte to a float; no rounding step needed.
11028 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11029   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11030   match(Set dst (ConvI2F src));
11031   format %{ "FILD   $src\n\t"
11032             "FSTP   $dst" %}
11033 
11034   opcode(0xDB, 0x0);  /* DB /0 */
11035   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11036   ins_pipe( fpu_reg_mem );
11037 %}
11038 
11039 // In 24-bit mode, force exponent rounding by storing back out
11040 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11041   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11042   match(Set dst (ConvI2F src));
11043   ins_cost(200);
11044   format %{ "FILD   $src\n\t"
11045             "FSTP_S $dst" %}
11046   opcode(0xDB, 0x0);  /* DB /0 */
11047   ins_encode( Push_Mem_I(src),
11048               Pop_Mem_FPR(dst));
11049   ins_pipe( fpu_mem_mem );
11050 %}
11051 
11052 // In 24-bit mode, force exponent rounding by storing back out
11053 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11054   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11055   match(Set dst (ConvI2F (LoadI mem)));
11056   ins_cost(200);
11057   format %{ "FILD   $mem\n\t"
11058             "FSTP_S $dst" %}
11059   opcode(0xDB);  /* DB /0 */
11060   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11061               Pop_Mem_FPR(dst));
11062   ins_pipe( fpu_mem_mem );
11063 %}
11064 
11065 // This instruction does not round to 24-bits
11066 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11067   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11068   match(Set dst (ConvI2F src));
11069   format %{ "FILD   $src\n\t"
11070             "FSTP   $dst" %}
11071   opcode(0xDB, 0x0);  /* DB /0 */
11072   ins_encode( Push_Mem_I(src),
11073               Pop_Reg_FPR(dst));
11074   ins_pipe( fpu_reg_mem );
11075 %}
11076 
11077 // This instruction does not round to 24-bits
11078 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11079   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11080   match(Set dst (ConvI2F (LoadI mem)));
11081   format %{ "FILD   $mem\n\t"
11082             "FSTP   $dst" %}
11083   opcode(0xDB);      /* DB /0 */
11084   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11085               Pop_Reg_FPR(dst));
11086   ins_pipe( fpu_reg_mem );
11087 %}
11088 
11089 // Convert an int to a float in xmm; no rounding step needed.
11090 instruct convI2F_reg(regF dst, rRegI src) %{
11091   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11092   match(Set dst (ConvI2F src));
11093   format %{ "CVTSI2SS $dst, $src" %}
11094   ins_encode %{
11095     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11096   %}
11097   ins_pipe( pipe_slow );
11098 %}
11099 
11100  instruct convXI2F_reg(regF dst, rRegI src)
11101 %{
11102   predicate( UseSSE>=2 && UseXmmI2F );
11103   match(Set dst (ConvI2F src));
11104 
11105   format %{ "MOVD  $dst,$src\n\t"
11106             "CVTDQ2PS $dst,$dst\t# i2f" %}
11107   ins_encode %{
11108     __ movdl($dst$$XMMRegister, $src$$Register);
11109     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11110   %}
11111   ins_pipe(pipe_slow); // XXX
11112 %}
11113 
11114 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11115   match(Set dst (ConvI2L src));
11116   effect(KILL cr);
11117   ins_cost(375);
11118   format %{ "MOV    $dst.lo,$src\n\t"
11119             "MOV    $dst.hi,$src\n\t"
11120             "SAR    $dst.hi,31" %}
11121   ins_encode(convert_int_long(dst,src));
11122   ins_pipe( ialu_reg_reg_long );
11123 %}
11124 
11125 // Zero-extend convert int to long
11126 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11127   match(Set dst (AndL (ConvI2L src) mask) );
11128   effect( KILL flags );
11129   ins_cost(250);
11130   format %{ "MOV    $dst.lo,$src\n\t"
11131             "XOR    $dst.hi,$dst.hi" %}
11132   opcode(0x33); // XOR
11133   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11134   ins_pipe( ialu_reg_reg_long );
11135 %}
11136 
11137 // Zero-extend long
11138 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11139   match(Set dst (AndL src mask) );
11140   effect( KILL flags );
11141   ins_cost(250);
11142   format %{ "MOV    $dst.lo,$src.lo\n\t"
11143             "XOR    $dst.hi,$dst.hi\n\t" %}
11144   opcode(0x33); // XOR
11145   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11146   ins_pipe( ialu_reg_reg_long );
11147 %}
11148 
11149 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11150   predicate (UseSSE<=1);
11151   match(Set dst (ConvL2D src));
11152   effect( KILL cr );
11153   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11154             "PUSH   $src.lo\n\t"
11155             "FILD   ST,[ESP + #0]\n\t"
11156             "ADD    ESP,8\n\t"
11157             "FSTP_D $dst\t# D-round" %}
11158   opcode(0xDF, 0x5);  /* DF /5 */
11159   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11160   ins_pipe( pipe_slow );
11161 %}
11162 
11163 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11164   predicate (UseSSE>=2);
11165   match(Set dst (ConvL2D src));
11166   effect( KILL cr );
11167   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11168             "PUSH   $src.lo\n\t"
11169             "FILD_D [ESP]\n\t"
11170             "FSTP_D [ESP]\n\t"
11171             "MOVSD  $dst,[ESP]\n\t"
11172             "ADD    ESP,8" %}
11173   opcode(0xDF, 0x5);  /* DF /5 */
11174   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11175   ins_pipe( pipe_slow );
11176 %}
11177 
11178 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11179   predicate (UseSSE>=1);
11180   match(Set dst (ConvL2F src));
11181   effect( KILL cr );
11182   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11183             "PUSH   $src.lo\n\t"
11184             "FILD_D [ESP]\n\t"
11185             "FSTP_S [ESP]\n\t"
11186             "MOVSS  $dst,[ESP]\n\t"
11187             "ADD    ESP,8" %}
11188   opcode(0xDF, 0x5);  /* DF /5 */
11189   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11190   ins_pipe( pipe_slow );
11191 %}
11192 
11193 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11194   match(Set dst (ConvL2F src));
11195   effect( KILL cr );
11196   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11197             "PUSH   $src.lo\n\t"
11198             "FILD   ST,[ESP + #0]\n\t"
11199             "ADD    ESP,8\n\t"
11200             "FSTP_S $dst\t# F-round" %}
11201   opcode(0xDF, 0x5);  /* DF /5 */
11202   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11203   ins_pipe( pipe_slow );
11204 %}
11205 
11206 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11207   match(Set dst (ConvL2I src));
11208   effect( DEF dst, USE src );
11209   format %{ "MOV    $dst,$src.lo" %}
11210   ins_encode(enc_CopyL_Lo(dst,src));
11211   ins_pipe( ialu_reg_reg );
11212 %}
11213 
11214 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11215   match(Set dst (MoveF2I src));
11216   effect( DEF dst, USE src );
11217   ins_cost(100);
11218   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11219   ins_encode %{
11220     __ movl($dst$$Register, Address(rsp, $src$$disp));
11221   %}
11222   ins_pipe( ialu_reg_mem );
11223 %}
11224 
11225 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11226   predicate(UseSSE==0);
11227   match(Set dst (MoveF2I src));
11228   effect( DEF dst, USE src );
11229 
11230   ins_cost(125);
11231   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11232   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11233   ins_pipe( fpu_mem_reg );
11234 %}
11235 
11236 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11237   predicate(UseSSE>=1);
11238   match(Set dst (MoveF2I src));
11239   effect( DEF dst, USE src );
11240 
11241   ins_cost(95);
11242   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11243   ins_encode %{
11244     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11245   %}
11246   ins_pipe( pipe_slow );
11247 %}
11248 
11249 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11250   predicate(UseSSE>=2);
11251   match(Set dst (MoveF2I src));
11252   effect( DEF dst, USE src );
11253   ins_cost(85);
11254   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11255   ins_encode %{
11256     __ movdl($dst$$Register, $src$$XMMRegister);
11257   %}
11258   ins_pipe( pipe_slow );
11259 %}
11260 
11261 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11262   match(Set dst (MoveI2F src));
11263   effect( DEF dst, USE src );
11264 
11265   ins_cost(100);
11266   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11267   ins_encode %{
11268     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11269   %}
11270   ins_pipe( ialu_mem_reg );
11271 %}
11272 
11273 
11274 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11275   predicate(UseSSE==0);
11276   match(Set dst (MoveI2F src));
11277   effect(DEF dst, USE src);
11278 
11279   ins_cost(125);
11280   format %{ "FLD_S  $src\n\t"
11281             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11282   opcode(0xD9);               /* D9 /0, FLD m32real */
11283   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11284               Pop_Reg_FPR(dst) );
11285   ins_pipe( fpu_reg_mem );
11286 %}
11287 
11288 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11289   predicate(UseSSE>=1);
11290   match(Set dst (MoveI2F src));
11291   effect( DEF dst, USE src );
11292 
11293   ins_cost(95);
11294   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11295   ins_encode %{
11296     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11297   %}
11298   ins_pipe( pipe_slow );
11299 %}
11300 
11301 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11302   predicate(UseSSE>=2);
11303   match(Set dst (MoveI2F src));
11304   effect( DEF dst, USE src );
11305 
11306   ins_cost(85);
11307   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11308   ins_encode %{
11309     __ movdl($dst$$XMMRegister, $src$$Register);
11310   %}
11311   ins_pipe( pipe_slow );
11312 %}
11313 
11314 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11315   match(Set dst (MoveD2L src));
11316   effect(DEF dst, USE src);
11317 
11318   ins_cost(250);
11319   format %{ "MOV    $dst.lo,$src\n\t"
11320             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11321   opcode(0x8B, 0x8B);
11322   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11323   ins_pipe( ialu_mem_long_reg );
11324 %}
11325 
11326 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11327   predicate(UseSSE<=1);
11328   match(Set dst (MoveD2L src));
11329   effect(DEF dst, USE src);
11330 
11331   ins_cost(125);
11332   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11333   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11334   ins_pipe( fpu_mem_reg );
11335 %}
11336 
11337 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11338   predicate(UseSSE>=2);
11339   match(Set dst (MoveD2L src));
11340   effect(DEF dst, USE src);
11341   ins_cost(95);
11342   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11343   ins_encode %{
11344     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11345   %}
11346   ins_pipe( pipe_slow );
11347 %}
11348 
11349 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11350   predicate(UseSSE>=2);
11351   match(Set dst (MoveD2L src));
11352   effect(DEF dst, USE src, TEMP tmp);
11353   ins_cost(85);
11354   format %{ "MOVD   $dst.lo,$src\n\t"
11355             "PSHUFLW $tmp,$src,0x4E\n\t"
11356             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11357   ins_encode %{
11358     __ movdl($dst$$Register, $src$$XMMRegister);
11359     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11360     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11361   %}
11362   ins_pipe( pipe_slow );
11363 %}
11364 
11365 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11366   match(Set dst (MoveL2D src));
11367   effect(DEF dst, USE src);
11368 
11369   ins_cost(200);
11370   format %{ "MOV    $dst,$src.lo\n\t"
11371             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11372   opcode(0x89, 0x89);
11373   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11374   ins_pipe( ialu_mem_long_reg );
11375 %}
11376 
11377 
11378 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11379   predicate(UseSSE<=1);
11380   match(Set dst (MoveL2D src));
11381   effect(DEF dst, USE src);
11382   ins_cost(125);
11383 
11384   format %{ "FLD_D  $src\n\t"
11385             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11386   opcode(0xDD);               /* DD /0, FLD m64real */
11387   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11388               Pop_Reg_DPR(dst) );
11389   ins_pipe( fpu_reg_mem );
11390 %}
11391 
11392 
11393 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11394   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11395   match(Set dst (MoveL2D src));
11396   effect(DEF dst, USE src);
11397 
11398   ins_cost(95);
11399   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11400   ins_encode %{
11401     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11402   %}
11403   ins_pipe( pipe_slow );
11404 %}
11405 
11406 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11407   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11408   match(Set dst (MoveL2D src));
11409   effect(DEF dst, USE src);
11410 
11411   ins_cost(95);
11412   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11413   ins_encode %{
11414     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11415   %}
11416   ins_pipe( pipe_slow );
11417 %}
11418 
11419 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11420   predicate(UseSSE>=2);
11421   match(Set dst (MoveL2D src));
11422   effect(TEMP dst, USE src, TEMP tmp);
11423   ins_cost(85);
11424   format %{ "MOVD   $dst,$src.lo\n\t"
11425             "MOVD   $tmp,$src.hi\n\t"
11426             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11427   ins_encode %{
11428     __ movdl($dst$$XMMRegister, $src$$Register);
11429     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11430     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11431   %}
11432   ins_pipe( pipe_slow );
11433 %}
11434 
11435 
11436 // =======================================================================
11437 // fast clearing of an array
11438 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11439   predicate(!UseFastStosb);
11440   match(Set dummy (ClearArray cnt base));
11441   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11442   format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11443             "SHL    ECX,1\t# Convert doublewords to words\n\t"
11444             "REP STOS\t# store EAX into [EDI++] while ECX--" %}
11445   ins_encode %{
11446     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
11447   %}
11448   ins_pipe( pipe_slow );
11449 %}
11450 
11451 instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11452   predicate(UseFastStosb);
11453   match(Set dummy (ClearArray cnt base));
11454   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11455   format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11456             "SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11457             "REP STOSB\t# store EAX into [EDI++] while ECX--" %}
11458   ins_encode %{
11459     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
11460   %}
11461   ins_pipe( pipe_slow );
11462 %}
11463 
11464 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11465                         eAXRegI result, regD tmp1, eFlagsReg cr) %{
11466   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11467   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11468 
11469   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11470   ins_encode %{
11471     __ string_compare($str1$$Register, $str2$$Register,
11472                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11473                       $tmp1$$XMMRegister);
11474   %}
11475   ins_pipe( pipe_slow );
11476 %}
11477 
11478 // fast string equals
11479 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11480                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11481   match(Set result (StrEquals (Binary str1 str2) cnt));
11482   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11483 
11484   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11485   ins_encode %{
11486     __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
11487                           $cnt$$Register, $result$$Register, $tmp3$$Register,
11488                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11489   %}
11490   ins_pipe( pipe_slow );
11491 %}
11492 
11493 // fast search of substring with known size.
11494 instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11495                             eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11496   predicate(UseSSE42Intrinsics);
11497   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11498   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11499 
11500   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11501   ins_encode %{
11502     int icnt2 = (int)$int_cnt2$$constant;
11503     if (icnt2 >= 8) {
11504       // IndexOf for constant substrings with size >= 8 elements
11505       // which don't need to be loaded through stack.
11506       __ string_indexofC8($str1$$Register, $str2$$Register,
11507                           $cnt1$$Register, $cnt2$$Register,
11508                           icnt2, $result$$Register,
11509                           $vec$$XMMRegister, $tmp$$Register);
11510     } else {
11511       // Small strings are loaded through stack if they cross page boundary.
11512       __ string_indexof($str1$$Register, $str2$$Register,
11513                         $cnt1$$Register, $cnt2$$Register,
11514                         icnt2, $result$$Register,
11515                         $vec$$XMMRegister, $tmp$$Register);
11516     }
11517   %}
11518   ins_pipe( pipe_slow );
11519 %}
11520 
11521 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11522                         eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11523   predicate(UseSSE42Intrinsics);
11524   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11525   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11526 
11527   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11528   ins_encode %{
11529     __ string_indexof($str1$$Register, $str2$$Register,
11530                       $cnt1$$Register, $cnt2$$Register,
11531                       (-1), $result$$Register,
11532                       $vec$$XMMRegister, $tmp$$Register);
11533   %}
11534   ins_pipe( pipe_slow );
11535 %}
11536 
11537 // fast array equals
11538 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11539                       regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11540 %{
11541   match(Set result (AryEq ary1 ary2));
11542   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11543   //ins_cost(300);
11544 
11545   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11546   ins_encode %{
11547     __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
11548                           $tmp3$$Register, $result$$Register, $tmp4$$Register,
11549                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11550   %}
11551   ins_pipe( pipe_slow );
11552 %}
11553 
11554 // encode char[] to byte[] in ISO_8859_1
11555 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11556                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11557                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11558   match(Set result (EncodeISOArray src (Binary dst len)));
11559   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11560 
11561   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11562   ins_encode %{
11563     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11564                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11565                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11566   %}
11567   ins_pipe( pipe_slow );
11568 %}
11569 
11570 
11571 //----------Control Flow Instructions------------------------------------------
11572 // Signed compare Instructions
11573 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
11574   match(Set cr (CmpI op1 op2));
11575   effect( DEF cr, USE op1, USE op2 );
11576   format %{ "CMP    $op1,$op2" %}
11577   opcode(0x3B);  /* Opcode 3B /r */
11578   ins_encode( OpcP, RegReg( op1, op2) );
11579   ins_pipe( ialu_cr_reg_reg );
11580 %}
11581 
11582 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
11583   match(Set cr (CmpI op1 op2));
11584   effect( DEF cr, USE op1 );
11585   format %{ "CMP    $op1,$op2" %}
11586   opcode(0x81,0x07);  /* Opcode 81 /7 */
11587   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
11588   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11589   ins_pipe( ialu_cr_reg_imm );
11590 %}
11591 
11592 // Cisc-spilled version of cmpI_eReg
11593 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
11594   match(Set cr (CmpI op1 (LoadI op2)));
11595 
11596   format %{ "CMP    $op1,$op2" %}
11597   ins_cost(500);
11598   opcode(0x3B);  /* Opcode 3B /r */
11599   ins_encode( OpcP, RegMem( op1, op2) );
11600   ins_pipe( ialu_cr_reg_mem );
11601 %}
11602 
11603 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
11604   match(Set cr (CmpI src zero));
11605   effect( DEF cr, USE src );
11606 
11607   format %{ "TEST   $src,$src" %}
11608   opcode(0x85);
11609   ins_encode( OpcP, RegReg( src, src ) );
11610   ins_pipe( ialu_cr_reg_imm );
11611 %}
11612 
11613 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
11614   match(Set cr (CmpI (AndI src con) zero));
11615 
11616   format %{ "TEST   $src,$con" %}
11617   opcode(0xF7,0x00);
11618   ins_encode( OpcP, RegOpc(src), Con32(con) );
11619   ins_pipe( ialu_cr_reg_imm );
11620 %}
11621 
11622 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
11623   match(Set cr (CmpI (AndI src mem) zero));
11624 
11625   format %{ "TEST   $src,$mem" %}
11626   opcode(0x85);
11627   ins_encode( OpcP, RegMem( src, mem ) );
11628   ins_pipe( ialu_cr_reg_mem );
11629 %}
11630 
11631 // Unsigned compare Instructions; really, same as signed except they
11632 // produce an eFlagsRegU instead of eFlagsReg.
11633 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
11634   match(Set cr (CmpU op1 op2));
11635 
11636   format %{ "CMPu   $op1,$op2" %}
11637   opcode(0x3B);  /* Opcode 3B /r */
11638   ins_encode( OpcP, RegReg( op1, op2) );
11639   ins_pipe( ialu_cr_reg_reg );
11640 %}
11641 
11642 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
11643   match(Set cr (CmpU op1 op2));
11644 
11645   format %{ "CMPu   $op1,$op2" %}
11646   opcode(0x81,0x07);  /* Opcode 81 /7 */
11647   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11648   ins_pipe( ialu_cr_reg_imm );
11649 %}
11650 
11651 // // Cisc-spilled version of cmpU_eReg
11652 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
11653   match(Set cr (CmpU op1 (LoadI op2)));
11654 
11655   format %{ "CMPu   $op1,$op2" %}
11656   ins_cost(500);
11657   opcode(0x3B);  /* Opcode 3B /r */
11658   ins_encode( OpcP, RegMem( op1, op2) );
11659   ins_pipe( ialu_cr_reg_mem );
11660 %}
11661 
11662 // // Cisc-spilled version of cmpU_eReg
11663 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
11664 //  match(Set cr (CmpU (LoadI op1) op2));
11665 //
11666 //  format %{ "CMPu   $op1,$op2" %}
11667 //  ins_cost(500);
11668 //  opcode(0x39);  /* Opcode 39 /r */
11669 //  ins_encode( OpcP, RegMem( op1, op2) );
11670 //%}
11671 
11672 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
11673   match(Set cr (CmpU src zero));
11674 
11675   format %{ "TESTu  $src,$src" %}
11676   opcode(0x85);
11677   ins_encode( OpcP, RegReg( src, src ) );
11678   ins_pipe( ialu_cr_reg_imm );
11679 %}
11680 
11681 // Unsigned pointer compare Instructions
11682 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
11683   match(Set cr (CmpP op1 op2));
11684 
11685   format %{ "CMPu   $op1,$op2" %}
11686   opcode(0x3B);  /* Opcode 3B /r */
11687   ins_encode( OpcP, RegReg( op1, op2) );
11688   ins_pipe( ialu_cr_reg_reg );
11689 %}
11690 
11691 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
11692   match(Set cr (CmpP op1 op2));
11693 
11694   format %{ "CMPu   $op1,$op2" %}
11695   opcode(0x81,0x07);  /* Opcode 81 /7 */
11696   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11697   ins_pipe( ialu_cr_reg_imm );
11698 %}
11699 
11700 // // Cisc-spilled version of cmpP_eReg
11701 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
11702   match(Set cr (CmpP op1 (LoadP op2)));
11703 
11704   format %{ "CMPu   $op1,$op2" %}
11705   ins_cost(500);
11706   opcode(0x3B);  /* Opcode 3B /r */
11707   ins_encode( OpcP, RegMem( op1, op2) );
11708   ins_pipe( ialu_cr_reg_mem );
11709 %}
11710 
11711 // // Cisc-spilled version of cmpP_eReg
11712 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
11713 //  match(Set cr (CmpP (LoadP op1) op2));
11714 //
11715 //  format %{ "CMPu   $op1,$op2" %}
11716 //  ins_cost(500);
11717 //  opcode(0x39);  /* Opcode 39 /r */
11718 //  ins_encode( OpcP, RegMem( op1, op2) );
11719 //%}
11720 
11721 // Compare raw pointer (used in out-of-heap check).
11722 // Only works because non-oop pointers must be raw pointers
11723 // and raw pointers have no anti-dependencies.
11724 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
11725   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
11726   match(Set cr (CmpP op1 (LoadP op2)));
11727 
11728   format %{ "CMPu   $op1,$op2" %}
11729   opcode(0x3B);  /* Opcode 3B /r */
11730   ins_encode( OpcP, RegMem( op1, op2) );
11731   ins_pipe( ialu_cr_reg_mem );
11732 %}
11733 
11734 //
11735 // This will generate a signed flags result. This should be ok
11736 // since any compare to a zero should be eq/neq.
11737 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
11738   match(Set cr (CmpP src zero));
11739 
11740   format %{ "TEST   $src,$src" %}
11741   opcode(0x85);
11742   ins_encode( OpcP, RegReg( src, src ) );
11743   ins_pipe( ialu_cr_reg_imm );
11744 %}
11745 
11746 // Cisc-spilled version of testP_reg
11747 // This will generate a signed flags result. This should be ok
11748 // since any compare to a zero should be eq/neq.
11749 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
11750   match(Set cr (CmpP (LoadP op) zero));
11751 
11752   format %{ "TEST   $op,0xFFFFFFFF" %}
11753   ins_cost(500);
11754   opcode(0xF7);               /* Opcode F7 /0 */
11755   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
11756   ins_pipe( ialu_cr_reg_imm );
11757 %}
11758 
11759 // Yanked all unsigned pointer compare operations.
11760 // Pointer compares are done with CmpP which is already unsigned.
11761 
11762 //----------Max and Min--------------------------------------------------------
11763 // Min Instructions
11764 ////
11765 //   *** Min and Max using the conditional move are slower than the
11766 //   *** branch version on a Pentium III.
11767 // // Conditional move for min
11768 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11769 //  effect( USE_DEF op2, USE op1, USE cr );
11770 //  format %{ "CMOVlt $op2,$op1\t! min" %}
11771 //  opcode(0x4C,0x0F);
11772 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11773 //  ins_pipe( pipe_cmov_reg );
11774 //%}
11775 //
11776 //// Min Register with Register (P6 version)
11777 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
11778 //  predicate(VM_Version::supports_cmov() );
11779 //  match(Set op2 (MinI op1 op2));
11780 //  ins_cost(200);
11781 //  expand %{
11782 //    eFlagsReg cr;
11783 //    compI_eReg(cr,op1,op2);
11784 //    cmovI_reg_lt(op2,op1,cr);
11785 //  %}
11786 //%}
11787 
11788 // Min Register with Register (generic version)
11789 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11790   match(Set dst (MinI dst src));
11791   effect(KILL flags);
11792   ins_cost(300);
11793 
11794   format %{ "MIN    $dst,$src" %}
11795   opcode(0xCC);
11796   ins_encode( min_enc(dst,src) );
11797   ins_pipe( pipe_slow );
11798 %}
11799 
11800 // Max Register with Register
11801 //   *** Min and Max using the conditional move are slower than the
11802 //   *** branch version on a Pentium III.
11803 // // Conditional move for max
11804 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11805 //  effect( USE_DEF op2, USE op1, USE cr );
11806 //  format %{ "CMOVgt $op2,$op1\t! max" %}
11807 //  opcode(0x4F,0x0F);
11808 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11809 //  ins_pipe( pipe_cmov_reg );
11810 //%}
11811 //
11812 // // Max Register with Register (P6 version)
11813 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
11814 //  predicate(VM_Version::supports_cmov() );
11815 //  match(Set op2 (MaxI op1 op2));
11816 //  ins_cost(200);
11817 //  expand %{
11818 //    eFlagsReg cr;
11819 //    compI_eReg(cr,op1,op2);
11820 //    cmovI_reg_gt(op2,op1,cr);
11821 //  %}
11822 //%}
11823 
11824 // Max Register with Register (generic version)
11825 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11826   match(Set dst (MaxI dst src));
11827   effect(KILL flags);
11828   ins_cost(300);
11829 
11830   format %{ "MAX    $dst,$src" %}
11831   opcode(0xCC);
11832   ins_encode( max_enc(dst,src) );
11833   ins_pipe( pipe_slow );
11834 %}
11835 
11836 // ============================================================================
11837 // Counted Loop limit node which represents exact final iterator value.
11838 // Note: the resulting value should fit into integer range since
11839 // counted loops have limit check on overflow.
11840 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
11841   match(Set limit (LoopLimit (Binary init limit) stride));
11842   effect(TEMP limit_hi, TEMP tmp, KILL flags);
11843   ins_cost(300);
11844 
11845   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
11846   ins_encode %{
11847     int strd = (int)$stride$$constant;
11848     assert(strd != 1 && strd != -1, "sanity");
11849     int m1 = (strd > 0) ? 1 : -1;
11850     // Convert limit to long (EAX:EDX)
11851     __ cdql();
11852     // Convert init to long (init:tmp)
11853     __ movl($tmp$$Register, $init$$Register);
11854     __ sarl($tmp$$Register, 31);
11855     // $limit - $init
11856     __ subl($limit$$Register, $init$$Register);
11857     __ sbbl($limit_hi$$Register, $tmp$$Register);
11858     // + ($stride - 1)
11859     if (strd > 0) {
11860       __ addl($limit$$Register, (strd - 1));
11861       __ adcl($limit_hi$$Register, 0);
11862       __ movl($tmp$$Register, strd);
11863     } else {
11864       __ addl($limit$$Register, (strd + 1));
11865       __ adcl($limit_hi$$Register, -1);
11866       __ lneg($limit_hi$$Register, $limit$$Register);
11867       __ movl($tmp$$Register, -strd);
11868     }
11869     // signed devision: (EAX:EDX) / pos_stride
11870     __ idivl($tmp$$Register);
11871     if (strd < 0) {
11872       // restore sign
11873       __ negl($tmp$$Register);
11874     }
11875     // (EAX) * stride
11876     __ mull($tmp$$Register);
11877     // + init (ignore upper bits)
11878     __ addl($limit$$Register, $init$$Register);
11879   %}
11880   ins_pipe( pipe_slow );
11881 %}
11882 
11883 // ============================================================================
11884 // Branch Instructions
11885 // Jump Table
11886 instruct jumpXtnd(rRegI switch_val) %{
11887   match(Jump switch_val);
11888   ins_cost(350);
11889   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
11890   ins_encode %{
11891     // Jump to Address(table_base + switch_reg)
11892     Address index(noreg, $switch_val$$Register, Address::times_1);
11893     __ jump(ArrayAddress($constantaddress, index));
11894   %}
11895   ins_pipe(pipe_jmp);
11896 %}
11897 
11898 // Jump Direct - Label defines a relative address from JMP+1
11899 instruct jmpDir(label labl) %{
11900   match(Goto);
11901   effect(USE labl);
11902 
11903   ins_cost(300);
11904   format %{ "JMP    $labl" %}
11905   size(5);
11906   ins_encode %{
11907     Label* L = $labl$$label;
11908     __ jmp(*L, false); // Always long jump
11909   %}
11910   ins_pipe( pipe_jmp );
11911 %}
11912 
11913 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11914 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
11915   match(If cop cr);
11916   effect(USE labl);
11917 
11918   ins_cost(300);
11919   format %{ "J$cop    $labl" %}
11920   size(6);
11921   ins_encode %{
11922     Label* L = $labl$$label;
11923     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11924   %}
11925   ins_pipe( pipe_jcc );
11926 %}
11927 
11928 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11929 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
11930   match(CountedLoopEnd cop cr);
11931   effect(USE labl);
11932 
11933   ins_cost(300);
11934   format %{ "J$cop    $labl\t# Loop end" %}
11935   size(6);
11936   ins_encode %{
11937     Label* L = $labl$$label;
11938     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11939   %}
11940   ins_pipe( pipe_jcc );
11941 %}
11942 
11943 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11944 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
11945   match(CountedLoopEnd cop cmp);
11946   effect(USE labl);
11947 
11948   ins_cost(300);
11949   format %{ "J$cop,u  $labl\t# Loop end" %}
11950   size(6);
11951   ins_encode %{
11952     Label* L = $labl$$label;
11953     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11954   %}
11955   ins_pipe( pipe_jcc );
11956 %}
11957 
11958 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
11959   match(CountedLoopEnd cop cmp);
11960   effect(USE labl);
11961 
11962   ins_cost(200);
11963   format %{ "J$cop,u  $labl\t# Loop end" %}
11964   size(6);
11965   ins_encode %{
11966     Label* L = $labl$$label;
11967     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11968   %}
11969   ins_pipe( pipe_jcc );
11970 %}
11971 
11972 // Jump Direct Conditional - using unsigned comparison
11973 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
11974   match(If cop cmp);
11975   effect(USE labl);
11976 
11977   ins_cost(300);
11978   format %{ "J$cop,u  $labl" %}
11979   size(6);
11980   ins_encode %{
11981     Label* L = $labl$$label;
11982     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11983   %}
11984   ins_pipe(pipe_jcc);
11985 %}
11986 
11987 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
11988   match(If cop cmp);
11989   effect(USE labl);
11990 
11991   ins_cost(200);
11992   format %{ "J$cop,u  $labl" %}
11993   size(6);
11994   ins_encode %{
11995     Label* L = $labl$$label;
11996     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11997   %}
11998   ins_pipe(pipe_jcc);
11999 %}
12000 
12001 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12002   match(If cop cmp);
12003   effect(USE labl);
12004 
12005   ins_cost(200);
12006   format %{ $$template
12007     if ($cop$$cmpcode == Assembler::notEqual) {
12008       $$emit$$"JP,u   $labl\n\t"
12009       $$emit$$"J$cop,u   $labl"
12010     } else {
12011       $$emit$$"JP,u   done\n\t"
12012       $$emit$$"J$cop,u   $labl\n\t"
12013       $$emit$$"done:"
12014     }
12015   %}
12016   ins_encode %{
12017     Label* l = $labl$$label;
12018     if ($cop$$cmpcode == Assembler::notEqual) {
12019       __ jcc(Assembler::parity, *l, false);
12020       __ jcc(Assembler::notEqual, *l, false);
12021     } else if ($cop$$cmpcode == Assembler::equal) {
12022       Label done;
12023       __ jccb(Assembler::parity, done);
12024       __ jcc(Assembler::equal, *l, false);
12025       __ bind(done);
12026     } else {
12027        ShouldNotReachHere();
12028     }
12029   %}
12030   ins_pipe(pipe_jcc);
12031 %}
12032 
12033 // ============================================================================
12034 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12035 // array for an instance of the superklass.  Set a hidden internal cache on a
12036 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12037 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12038 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12039   match(Set result (PartialSubtypeCheck sub super));
12040   effect( KILL rcx, KILL cr );
12041 
12042   ins_cost(1100);  // slightly larger than the next version
12043   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12044             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12045             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12046             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12047             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12048             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12049             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12050      "miss:\t" %}
12051 
12052   opcode(0x1); // Force a XOR of EDI
12053   ins_encode( enc_PartialSubtypeCheck() );
12054   ins_pipe( pipe_slow );
12055 %}
12056 
12057 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12058   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12059   effect( KILL rcx, KILL result );
12060 
12061   ins_cost(1000);
12062   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12063             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12064             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12065             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12066             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12067             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12068      "miss:\t" %}
12069 
12070   opcode(0x0);  // No need to XOR EDI
12071   ins_encode( enc_PartialSubtypeCheck() );
12072   ins_pipe( pipe_slow );
12073 %}
12074 
12075 // ============================================================================
12076 // Branch Instructions -- short offset versions
12077 //
12078 // These instructions are used to replace jumps of a long offset (the default
12079 // match) with jumps of a shorter offset.  These instructions are all tagged
12080 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12081 // match rules in general matching.  Instead, the ADLC generates a conversion
12082 // method in the MachNode which can be used to do in-place replacement of the
12083 // long variant with the shorter variant.  The compiler will determine if a
12084 // branch can be taken by the is_short_branch_offset() predicate in the machine
12085 // specific code section of the file.
12086 
12087 // Jump Direct - Label defines a relative address from JMP+1
12088 instruct jmpDir_short(label labl) %{
12089   match(Goto);
12090   effect(USE labl);
12091 
12092   ins_cost(300);
12093   format %{ "JMP,s  $labl" %}
12094   size(2);
12095   ins_encode %{
12096     Label* L = $labl$$label;
12097     __ jmpb(*L);
12098   %}
12099   ins_pipe( pipe_jmp );
12100   ins_short_branch(1);
12101 %}
12102 
12103 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12104 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12105   match(If cop cr);
12106   effect(USE labl);
12107 
12108   ins_cost(300);
12109   format %{ "J$cop,s  $labl" %}
12110   size(2);
12111   ins_encode %{
12112     Label* L = $labl$$label;
12113     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12114   %}
12115   ins_pipe( pipe_jcc );
12116   ins_short_branch(1);
12117 %}
12118 
12119 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12120 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12121   match(CountedLoopEnd cop cr);
12122   effect(USE labl);
12123 
12124   ins_cost(300);
12125   format %{ "J$cop,s  $labl\t# Loop end" %}
12126   size(2);
12127   ins_encode %{
12128     Label* L = $labl$$label;
12129     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12130   %}
12131   ins_pipe( pipe_jcc );
12132   ins_short_branch(1);
12133 %}
12134 
12135 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12136 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12137   match(CountedLoopEnd cop cmp);
12138   effect(USE labl);
12139 
12140   ins_cost(300);
12141   format %{ "J$cop,us $labl\t# Loop end" %}
12142   size(2);
12143   ins_encode %{
12144     Label* L = $labl$$label;
12145     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12146   %}
12147   ins_pipe( pipe_jcc );
12148   ins_short_branch(1);
12149 %}
12150 
12151 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12152   match(CountedLoopEnd cop cmp);
12153   effect(USE labl);
12154 
12155   ins_cost(300);
12156   format %{ "J$cop,us $labl\t# Loop end" %}
12157   size(2);
12158   ins_encode %{
12159     Label* L = $labl$$label;
12160     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12161   %}
12162   ins_pipe( pipe_jcc );
12163   ins_short_branch(1);
12164 %}
12165 
12166 // Jump Direct Conditional - using unsigned comparison
12167 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12168   match(If cop cmp);
12169   effect(USE labl);
12170 
12171   ins_cost(300);
12172   format %{ "J$cop,us $labl" %}
12173   size(2);
12174   ins_encode %{
12175     Label* L = $labl$$label;
12176     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12177   %}
12178   ins_pipe( pipe_jcc );
12179   ins_short_branch(1);
12180 %}
12181 
12182 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12183   match(If cop cmp);
12184   effect(USE labl);
12185 
12186   ins_cost(300);
12187   format %{ "J$cop,us $labl" %}
12188   size(2);
12189   ins_encode %{
12190     Label* L = $labl$$label;
12191     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12192   %}
12193   ins_pipe( pipe_jcc );
12194   ins_short_branch(1);
12195 %}
12196 
12197 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12198   match(If cop cmp);
12199   effect(USE labl);
12200 
12201   ins_cost(300);
12202   format %{ $$template
12203     if ($cop$$cmpcode == Assembler::notEqual) {
12204       $$emit$$"JP,u,s   $labl\n\t"
12205       $$emit$$"J$cop,u,s   $labl"
12206     } else {
12207       $$emit$$"JP,u,s   done\n\t"
12208       $$emit$$"J$cop,u,s  $labl\n\t"
12209       $$emit$$"done:"
12210     }
12211   %}
12212   size(4);
12213   ins_encode %{
12214     Label* l = $labl$$label;
12215     if ($cop$$cmpcode == Assembler::notEqual) {
12216       __ jccb(Assembler::parity, *l);
12217       __ jccb(Assembler::notEqual, *l);
12218     } else if ($cop$$cmpcode == Assembler::equal) {
12219       Label done;
12220       __ jccb(Assembler::parity, done);
12221       __ jccb(Assembler::equal, *l);
12222       __ bind(done);
12223     } else {
12224        ShouldNotReachHere();
12225     }
12226   %}
12227   ins_pipe(pipe_jcc);
12228   ins_short_branch(1);
12229 %}
12230 
12231 // ============================================================================
12232 // Long Compare
12233 //
12234 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12235 // is tricky.  The flavor of compare used depends on whether we are testing
12236 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12237 // The GE test is the negated LT test.  The LE test can be had by commuting
12238 // the operands (yielding a GE test) and then negating; negate again for the
12239 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12240 // NE test is negated from that.
12241 
12242 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12243 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12244 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12245 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12246 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12247 // foo match ends up with the wrong leaf.  One fix is to not match both
12248 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12249 // both forms beat the trinary form of long-compare and both are very useful
12250 // on Intel which has so few registers.
12251 
12252 // Manifest a CmpL result in an integer register.  Very painful.
12253 // This is the test to avoid.
12254 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12255   match(Set dst (CmpL3 src1 src2));
12256   effect( KILL flags );
12257   ins_cost(1000);
12258   format %{ "XOR    $dst,$dst\n\t"
12259             "CMP    $src1.hi,$src2.hi\n\t"
12260             "JLT,s  m_one\n\t"
12261             "JGT,s  p_one\n\t"
12262             "CMP    $src1.lo,$src2.lo\n\t"
12263             "JB,s   m_one\n\t"
12264             "JEQ,s  done\n"
12265     "p_one:\tINC    $dst\n\t"
12266             "JMP,s  done\n"
12267     "m_one:\tDEC    $dst\n"
12268      "done:" %}
12269   ins_encode %{
12270     Label p_one, m_one, done;
12271     __ xorptr($dst$$Register, $dst$$Register);
12272     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12273     __ jccb(Assembler::less,    m_one);
12274     __ jccb(Assembler::greater, p_one);
12275     __ cmpl($src1$$Register, $src2$$Register);
12276     __ jccb(Assembler::below,   m_one);
12277     __ jccb(Assembler::equal,   done);
12278     __ bind(p_one);
12279     __ incrementl($dst$$Register);
12280     __ jmpb(done);
12281     __ bind(m_one);
12282     __ decrementl($dst$$Register);
12283     __ bind(done);
12284   %}
12285   ins_pipe( pipe_slow );
12286 %}
12287 
12288 //======
12289 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12290 // compares.  Can be used for LE or GT compares by reversing arguments.
12291 // NOT GOOD FOR EQ/NE tests.
12292 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12293   match( Set flags (CmpL src zero ));
12294   ins_cost(100);
12295   format %{ "TEST   $src.hi,$src.hi" %}
12296   opcode(0x85);
12297   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12298   ins_pipe( ialu_cr_reg_reg );
12299 %}
12300 
12301 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12302 // compares.  Can be used for LE or GT compares by reversing arguments.
12303 // NOT GOOD FOR EQ/NE tests.
12304 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12305   match( Set flags (CmpL src1 src2 ));
12306   effect( TEMP tmp );
12307   ins_cost(300);
12308   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12309             "MOV    $tmp,$src1.hi\n\t"
12310             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12311   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12312   ins_pipe( ialu_cr_reg_reg );
12313 %}
12314 
12315 // Long compares reg < zero/req OR reg >= zero/req.
12316 // Just a wrapper for a normal branch, plus the predicate test.
12317 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12318   match(If cmp flags);
12319   effect(USE labl);
12320   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12321   expand %{
12322     jmpCon(cmp,flags,labl);    // JLT or JGE...
12323   %}
12324 %}
12325 
12326 // Compare 2 longs and CMOVE longs.
12327 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12328   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12329   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12330   ins_cost(400);
12331   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12332             "CMOV$cmp $dst.hi,$src.hi" %}
12333   opcode(0x0F,0x40);
12334   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12335   ins_pipe( pipe_cmov_reg_long );
12336 %}
12337 
12338 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12339   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12340   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12341   ins_cost(500);
12342   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12343             "CMOV$cmp $dst.hi,$src.hi" %}
12344   opcode(0x0F,0x40);
12345   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12346   ins_pipe( pipe_cmov_reg_long );
12347 %}
12348 
12349 // Compare 2 longs and CMOVE ints.
12350 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12351   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12352   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12353   ins_cost(200);
12354   format %{ "CMOV$cmp $dst,$src" %}
12355   opcode(0x0F,0x40);
12356   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12357   ins_pipe( pipe_cmov_reg );
12358 %}
12359 
12360 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12361   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12362   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12363   ins_cost(250);
12364   format %{ "CMOV$cmp $dst,$src" %}
12365   opcode(0x0F,0x40);
12366   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12367   ins_pipe( pipe_cmov_mem );
12368 %}
12369 
12370 // Compare 2 longs and CMOVE ints.
12371 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12372   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12373   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12374   ins_cost(200);
12375   format %{ "CMOV$cmp $dst,$src" %}
12376   opcode(0x0F,0x40);
12377   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12378   ins_pipe( pipe_cmov_reg );
12379 %}
12380 
12381 // Compare 2 longs and CMOVE doubles
12382 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12383   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12384   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12385   ins_cost(200);
12386   expand %{
12387     fcmovDPR_regS(cmp,flags,dst,src);
12388   %}
12389 %}
12390 
12391 // Compare 2 longs and CMOVE doubles
12392 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12393   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12394   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12395   ins_cost(200);
12396   expand %{
12397     fcmovD_regS(cmp,flags,dst,src);
12398   %}
12399 %}
12400 
12401 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12402   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12403   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12404   ins_cost(200);
12405   expand %{
12406     fcmovFPR_regS(cmp,flags,dst,src);
12407   %}
12408 %}
12409 
12410 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12411   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12412   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12413   ins_cost(200);
12414   expand %{
12415     fcmovF_regS(cmp,flags,dst,src);
12416   %}
12417 %}
12418 
12419 //======
12420 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12421 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12422   match( Set flags (CmpL src zero ));
12423   effect(TEMP tmp);
12424   ins_cost(200);
12425   format %{ "MOV    $tmp,$src.lo\n\t"
12426             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12427   ins_encode( long_cmp_flags0( src, tmp ) );
12428   ins_pipe( ialu_reg_reg_long );
12429 %}
12430 
12431 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12432 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12433   match( Set flags (CmpL src1 src2 ));
12434   ins_cost(200+300);
12435   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12436             "JNE,s  skip\n\t"
12437             "CMP    $src1.hi,$src2.hi\n\t"
12438      "skip:\t" %}
12439   ins_encode( long_cmp_flags1( src1, src2 ) );
12440   ins_pipe( ialu_cr_reg_reg );
12441 %}
12442 
12443 // Long compare reg == zero/reg OR reg != zero/reg
12444 // Just a wrapper for a normal branch, plus the predicate test.
12445 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12446   match(If cmp flags);
12447   effect(USE labl);
12448   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12449   expand %{
12450     jmpCon(cmp,flags,labl);    // JEQ or JNE...
12451   %}
12452 %}
12453 
12454 // Compare 2 longs and CMOVE longs.
12455 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12456   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12457   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12458   ins_cost(400);
12459   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12460             "CMOV$cmp $dst.hi,$src.hi" %}
12461   opcode(0x0F,0x40);
12462   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12463   ins_pipe( pipe_cmov_reg_long );
12464 %}
12465 
12466 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12467   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12468   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12469   ins_cost(500);
12470   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12471             "CMOV$cmp $dst.hi,$src.hi" %}
12472   opcode(0x0F,0x40);
12473   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12474   ins_pipe( pipe_cmov_reg_long );
12475 %}
12476 
12477 // Compare 2 longs and CMOVE ints.
12478 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
12479   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12480   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12481   ins_cost(200);
12482   format %{ "CMOV$cmp $dst,$src" %}
12483   opcode(0x0F,0x40);
12484   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12485   ins_pipe( pipe_cmov_reg );
12486 %}
12487 
12488 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
12489   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12490   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12491   ins_cost(250);
12492   format %{ "CMOV$cmp $dst,$src" %}
12493   opcode(0x0F,0x40);
12494   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12495   ins_pipe( pipe_cmov_mem );
12496 %}
12497 
12498 // Compare 2 longs and CMOVE ints.
12499 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
12500   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12501   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12502   ins_cost(200);
12503   format %{ "CMOV$cmp $dst,$src" %}
12504   opcode(0x0F,0x40);
12505   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12506   ins_pipe( pipe_cmov_reg );
12507 %}
12508 
12509 // Compare 2 longs and CMOVE doubles
12510 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
12511   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12512   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12513   ins_cost(200);
12514   expand %{
12515     fcmovDPR_regS(cmp,flags,dst,src);
12516   %}
12517 %}
12518 
12519 // Compare 2 longs and CMOVE doubles
12520 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
12521   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12522   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12523   ins_cost(200);
12524   expand %{
12525     fcmovD_regS(cmp,flags,dst,src);
12526   %}
12527 %}
12528 
12529 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
12530   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12531   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12532   ins_cost(200);
12533   expand %{
12534     fcmovFPR_regS(cmp,flags,dst,src);
12535   %}
12536 %}
12537 
12538 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
12539   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12540   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12541   ins_cost(200);
12542   expand %{
12543     fcmovF_regS(cmp,flags,dst,src);
12544   %}
12545 %}
12546 
12547 //======
12548 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12549 // Same as cmpL_reg_flags_LEGT except must negate src
12550 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
12551   match( Set flags (CmpL src zero ));
12552   effect( TEMP tmp );
12553   ins_cost(300);
12554   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
12555             "CMP    $tmp,$src.lo\n\t"
12556             "SBB    $tmp,$src.hi\n\t" %}
12557   ins_encode( long_cmp_flags3(src, tmp) );
12558   ins_pipe( ialu_reg_reg_long );
12559 %}
12560 
12561 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12562 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
12563 // requires a commuted test to get the same result.
12564 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12565   match( Set flags (CmpL src1 src2 ));
12566   effect( TEMP tmp );
12567   ins_cost(300);
12568   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
12569             "MOV    $tmp,$src2.hi\n\t"
12570             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
12571   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
12572   ins_pipe( ialu_cr_reg_reg );
12573 %}
12574 
12575 // Long compares reg < zero/req OR reg >= zero/req.
12576 // Just a wrapper for a normal branch, plus the predicate test
12577 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
12578   match(If cmp flags);
12579   effect(USE labl);
12580   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
12581   ins_cost(300);
12582   expand %{
12583     jmpCon(cmp,flags,labl);    // JGT or JLE...
12584   %}
12585 %}
12586 
12587 // Compare 2 longs and CMOVE longs.
12588 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
12589   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12590   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12591   ins_cost(400);
12592   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12593             "CMOV$cmp $dst.hi,$src.hi" %}
12594   opcode(0x0F,0x40);
12595   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12596   ins_pipe( pipe_cmov_reg_long );
12597 %}
12598 
12599 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
12600   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12601   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12602   ins_cost(500);
12603   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12604             "CMOV$cmp $dst.hi,$src.hi+4" %}
12605   opcode(0x0F,0x40);
12606   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12607   ins_pipe( pipe_cmov_reg_long );
12608 %}
12609 
12610 // Compare 2 longs and CMOVE ints.
12611 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
12612   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12613   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12614   ins_cost(200);
12615   format %{ "CMOV$cmp $dst,$src" %}
12616   opcode(0x0F,0x40);
12617   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12618   ins_pipe( pipe_cmov_reg );
12619 %}
12620 
12621 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
12622   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12623   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12624   ins_cost(250);
12625   format %{ "CMOV$cmp $dst,$src" %}
12626   opcode(0x0F,0x40);
12627   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12628   ins_pipe( pipe_cmov_mem );
12629 %}
12630 
12631 // Compare 2 longs and CMOVE ptrs.
12632 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
12633   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12634   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12635   ins_cost(200);
12636   format %{ "CMOV$cmp $dst,$src" %}
12637   opcode(0x0F,0x40);
12638   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12639   ins_pipe( pipe_cmov_reg );
12640 %}
12641 
12642 // Compare 2 longs and CMOVE doubles
12643 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
12644   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12645   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12646   ins_cost(200);
12647   expand %{
12648     fcmovDPR_regS(cmp,flags,dst,src);
12649   %}
12650 %}
12651 
12652 // Compare 2 longs and CMOVE doubles
12653 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
12654   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12655   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12656   ins_cost(200);
12657   expand %{
12658     fcmovD_regS(cmp,flags,dst,src);
12659   %}
12660 %}
12661 
12662 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
12663   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12664   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12665   ins_cost(200);
12666   expand %{
12667     fcmovFPR_regS(cmp,flags,dst,src);
12668   %}
12669 %}
12670 
12671 
12672 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
12673   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12674   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12675   ins_cost(200);
12676   expand %{
12677     fcmovF_regS(cmp,flags,dst,src);
12678   %}
12679 %}
12680 
12681 
12682 // ============================================================================
12683 // Procedure Call/Return Instructions
12684 // Call Java Static Instruction
12685 // Note: If this code changes, the corresponding ret_addr_offset() and
12686 //       compute_padding() functions will have to be adjusted.
12687 instruct CallStaticJavaDirect(method meth) %{
12688   match(CallStaticJava);
12689   predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke());
12690   effect(USE meth);
12691 
12692   ins_cost(300);
12693   format %{ "CALL,static " %}
12694   opcode(0xE8); /* E8 cd */
12695   ins_encode( pre_call_resets,
12696               Java_Static_Call( meth ),
12697               call_epilog,
12698               post_call_FPU );
12699   ins_pipe( pipe_slow );
12700   ins_alignment(4);
12701 %}
12702 
12703 // Call Java Static Instruction (method handle version)
12704 // Note: If this code changes, the corresponding ret_addr_offset() and
12705 //       compute_padding() functions will have to be adjusted.
12706 instruct CallStaticJavaHandle(method meth, eBPRegP ebp_mh_SP_save) %{
12707   match(CallStaticJava);
12708   predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
12709   effect(USE meth);
12710   // EBP is saved by all callees (for interpreter stack correction).
12711   // We use it here for a similar purpose, in {preserve,restore}_SP.
12712 
12713   ins_cost(300);
12714   format %{ "CALL,static/MethodHandle " %}
12715   opcode(0xE8); /* E8 cd */
12716   ins_encode( pre_call_resets,
12717               preserve_SP,
12718               Java_Static_Call( meth ),
12719               restore_SP,
12720               call_epilog,
12721               post_call_FPU );
12722   ins_pipe( pipe_slow );
12723   ins_alignment(4);
12724 %}
12725 
12726 // Call Java Dynamic Instruction
12727 // Note: If this code changes, the corresponding ret_addr_offset() and
12728 //       compute_padding() functions will have to be adjusted.
12729 instruct CallDynamicJavaDirect(method meth) %{
12730   match(CallDynamicJava);
12731   effect(USE meth);
12732 
12733   ins_cost(300);
12734   format %{ "MOV    EAX,(oop)-1\n\t"
12735             "CALL,dynamic" %}
12736   opcode(0xE8); /* E8 cd */
12737   ins_encode( pre_call_resets,
12738               Java_Dynamic_Call( meth ),
12739               call_epilog,
12740               post_call_FPU );
12741   ins_pipe( pipe_slow );
12742   ins_alignment(4);
12743 %}
12744 
12745 // Call Runtime Instruction
12746 instruct CallRuntimeDirect(method meth) %{
12747   match(CallRuntime );
12748   effect(USE meth);
12749 
12750   ins_cost(300);
12751   format %{ "CALL,runtime " %}
12752   opcode(0xE8); /* E8 cd */
12753   // Use FFREEs to clear entries in float stack
12754   ins_encode( pre_call_resets,
12755               FFree_Float_Stack_All,
12756               Java_To_Runtime( meth ),
12757               post_call_FPU );
12758   ins_pipe( pipe_slow );
12759 %}
12760 
12761 // Call runtime without safepoint
12762 instruct CallLeafDirect(method meth) %{
12763   match(CallLeaf);
12764   effect(USE meth);
12765 
12766   ins_cost(300);
12767   format %{ "CALL_LEAF,runtime " %}
12768   opcode(0xE8); /* E8 cd */
12769   ins_encode( pre_call_resets,
12770               FFree_Float_Stack_All,
12771               Java_To_Runtime( meth ),
12772               Verify_FPU_For_Leaf, post_call_FPU );
12773   ins_pipe( pipe_slow );
12774 %}
12775 
12776 instruct CallLeafNoFPDirect(method meth) %{
12777   match(CallLeafNoFP);
12778   effect(USE meth);
12779 
12780   ins_cost(300);
12781   format %{ "CALL_LEAF_NOFP,runtime " %}
12782   opcode(0xE8); /* E8 cd */
12783   ins_encode(Java_To_Runtime(meth));
12784   ins_pipe( pipe_slow );
12785 %}
12786 
12787 
12788 // Return Instruction
12789 // Remove the return address & jump to it.
12790 instruct Ret() %{
12791   match(Return);
12792   format %{ "RET" %}
12793   opcode(0xC3);
12794   ins_encode(OpcP);
12795   ins_pipe( pipe_jmp );
12796 %}
12797 
12798 // Tail Call; Jump from runtime stub to Java code.
12799 // Also known as an 'interprocedural jump'.
12800 // Target of jump will eventually return to caller.
12801 // TailJump below removes the return address.
12802 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
12803   match(TailCall jump_target method_oop );
12804   ins_cost(300);
12805   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
12806   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12807   ins_encode( OpcP, RegOpc(jump_target) );
12808   ins_pipe( pipe_jmp );
12809 %}
12810 
12811 
12812 // Tail Jump; remove the return address; jump to target.
12813 // TailCall above leaves the return address around.
12814 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
12815   match( TailJump jump_target ex_oop );
12816   ins_cost(300);
12817   format %{ "POP    EDX\t# pop return address into dummy\n\t"
12818             "JMP    $jump_target " %}
12819   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12820   ins_encode( enc_pop_rdx,
12821               OpcP, RegOpc(jump_target) );
12822   ins_pipe( pipe_jmp );
12823 %}
12824 
12825 // Create exception oop: created by stack-crawling runtime code.
12826 // Created exception is now available to this handler, and is setup
12827 // just prior to jumping to this handler.  No code emitted.
12828 instruct CreateException( eAXRegP ex_oop )
12829 %{
12830   match(Set ex_oop (CreateEx));
12831 
12832   size(0);
12833   // use the following format syntax
12834   format %{ "# exception oop is in EAX; no code emitted" %}
12835   ins_encode();
12836   ins_pipe( empty );
12837 %}
12838 
12839 
12840 // Rethrow exception:
12841 // The exception oop will come in the first argument position.
12842 // Then JUMP (not call) to the rethrow stub code.
12843 instruct RethrowException()
12844 %{
12845   match(Rethrow);
12846 
12847   // use the following format syntax
12848   format %{ "JMP    rethrow_stub" %}
12849   ins_encode(enc_rethrow);
12850   ins_pipe( pipe_jmp );
12851 %}
12852 
12853 // inlined locking and unlocking
12854 
12855 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
12856   predicate(Compile::current()->use_rtm());
12857   match(Set cr (FastLock object box));
12858   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
12859   ins_cost(300);
12860   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
12861   ins_encode %{
12862     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12863                  $scr$$Register, $cx1$$Register, $cx2$$Register,
12864                  _counters, _rtm_counters, _stack_rtm_counters,
12865                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
12866                  true, ra_->C->profile_rtm());
12867   %}
12868   ins_pipe(pipe_slow);
12869 %}
12870 
12871 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
12872   predicate(!Compile::current()->use_rtm());
12873   match(Set cr (FastLock object box));
12874   effect(TEMP tmp, TEMP scr, USE_KILL box);
12875   ins_cost(300);
12876   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
12877   ins_encode %{
12878     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12879                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
12880   %}
12881   ins_pipe(pipe_slow);
12882 %}
12883 
12884 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
12885   match(Set cr (FastUnlock object box));
12886   effect(TEMP tmp, USE_KILL box);
12887   ins_cost(300);
12888   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
12889   ins_encode %{
12890     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
12891   %}
12892   ins_pipe(pipe_slow);
12893 %}
12894 
12895 
12896 
12897 // ============================================================================
12898 // Safepoint Instruction
12899 instruct safePoint_poll(eFlagsReg cr) %{
12900   match(SafePoint);
12901   effect(KILL cr);
12902 
12903   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
12904   // On SPARC that might be acceptable as we can generate the address with
12905   // just a sethi, saving an or.  By polling at offset 0 we can end up
12906   // putting additional pressure on the index-0 in the D$.  Because of
12907   // alignment (just like the situation at hand) the lower indices tend
12908   // to see more traffic.  It'd be better to change the polling address
12909   // to offset 0 of the last $line in the polling page.
12910 
12911   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
12912   ins_cost(125);
12913   size(6) ;
12914   ins_encode( Safepoint_Poll() );
12915   ins_pipe( ialu_reg_mem );
12916 %}
12917 
12918 
12919 // ============================================================================
12920 // This name is KNOWN by the ADLC and cannot be changed.
12921 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
12922 // for this guy.
12923 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
12924   match(Set dst (ThreadLocal));
12925   effect(DEF dst, KILL cr);
12926 
12927   format %{ "MOV    $dst, Thread::current()" %}
12928   ins_encode %{
12929     Register dstReg = as_Register($dst$$reg);
12930     __ get_thread(dstReg);
12931   %}
12932   ins_pipe( ialu_reg_fat );
12933 %}
12934 
12935 
12936 
12937 //----------PEEPHOLE RULES-----------------------------------------------------
12938 // These must follow all instruction definitions as they use the names
12939 // defined in the instructions definitions.
12940 //
12941 // peepmatch ( root_instr_name [preceding_instruction]* );
12942 //
12943 // peepconstraint %{
12944 // (instruction_number.operand_name relational_op instruction_number.operand_name
12945 //  [, ...] );
12946 // // instruction numbers are zero-based using left to right order in peepmatch
12947 //
12948 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
12949 // // provide an instruction_number.operand_name for each operand that appears
12950 // // in the replacement instruction's match rule
12951 //
12952 // ---------VM FLAGS---------------------------------------------------------
12953 //
12954 // All peephole optimizations can be turned off using -XX:-OptoPeephole
12955 //
12956 // Each peephole rule is given an identifying number starting with zero and
12957 // increasing by one in the order seen by the parser.  An individual peephole
12958 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
12959 // on the command-line.
12960 //
12961 // ---------CURRENT LIMITATIONS----------------------------------------------
12962 //
12963 // Only match adjacent instructions in same basic block
12964 // Only equality constraints
12965 // Only constraints between operands, not (0.dest_reg == EAX_enc)
12966 // Only one replacement instruction
12967 //
12968 // ---------EXAMPLE----------------------------------------------------------
12969 //
12970 // // pertinent parts of existing instructions in architecture description
12971 // instruct movI(rRegI dst, rRegI src) %{
12972 //   match(Set dst (CopyI src));
12973 // %}
12974 //
12975 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
12976 //   match(Set dst (AddI dst src));
12977 //   effect(KILL cr);
12978 // %}
12979 //
12980 // // Change (inc mov) to lea
12981 // peephole %{
12982 //   // increment preceeded by register-register move
12983 //   peepmatch ( incI_eReg movI );
12984 //   // require that the destination register of the increment
12985 //   // match the destination register of the move
12986 //   peepconstraint ( 0.dst == 1.dst );
12987 //   // construct a replacement instruction that sets
12988 //   // the destination to ( move's source register + one )
12989 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
12990 // %}
12991 //
12992 // Implementation no longer uses movX instructions since
12993 // machine-independent system no longer uses CopyX nodes.
12994 //
12995 // peephole %{
12996 //   peepmatch ( incI_eReg movI );
12997 //   peepconstraint ( 0.dst == 1.dst );
12998 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
12999 // %}
13000 //
13001 // peephole %{
13002 //   peepmatch ( decI_eReg movI );
13003 //   peepconstraint ( 0.dst == 1.dst );
13004 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13005 // %}
13006 //
13007 // peephole %{
13008 //   peepmatch ( addI_eReg_imm movI );
13009 //   peepconstraint ( 0.dst == 1.dst );
13010 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13011 // %}
13012 //
13013 // peephole %{
13014 //   peepmatch ( addP_eReg_imm movP );
13015 //   peepconstraint ( 0.dst == 1.dst );
13016 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13017 // %}
13018 
13019 // // Change load of spilled value to only a spill
13020 // instruct storeI(memory mem, rRegI src) %{
13021 //   match(Set mem (StoreI mem src));
13022 // %}
13023 //
13024 // instruct loadI(rRegI dst, memory mem) %{
13025 //   match(Set dst (LoadI mem));
13026 // %}
13027 //
13028 peephole %{
13029   peepmatch ( loadI storeI );
13030   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13031   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13032 %}
13033 
13034 //----------SMARTSPILL RULES---------------------------------------------------
13035 // These must follow all instruction definitions as they use the names
13036 // defined in the instructions definitions.