1 //
   2 // Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // architecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 //
 105 // Empty fill registers, which are never used, but supply alignment to xmm regs
 106 //
 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
 115 
 116 // Specify priority of register selection within phases of register
 117 // allocation.  Highest priority is first.  A useful heuristic is to
 118 // give registers a low priority when they are required by machine
 119 // instructions, like EAX and EDX.  Registers which are used as
 120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 121 // For the Intel integer registers, the equivalent Long pairs are
 122 // EDX:EAX, EBX:ECX, and EDI:EBP.
 123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 126                     FPR6L, FPR6H, FPR7L, FPR7H,
 127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
 128 
 129 
 130 //----------Architecture Description Register Classes--------------------------
 131 // Several register classes are automatically defined based upon information in
 132 // this architecture description.
 133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 134 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 137 //
 138 // Class for no registers (empty set).
 139 reg_class no_reg();
 140 
 141 // Class for all registers
 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 143 // Class for all registers (excluding EBP)
 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
 145 // Dynamic register class that selects at runtime between register classes
 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
 149 
 150 // Class for general registers
 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 152 // Class for general registers (excluding EBP).
 153 // This register class can be used for implicit null checks on win95.
 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
 155 // Used also if the PreserveFramePointer flag is true.
 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
 159 
 160 // Class of "X" registers
 161 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 162 
 163 // Class of registers that can appear in an address with no offset.
 164 // EBP and ESP require an extra instruction byte for zero offset.
 165 // Used in fast-unlock
 166 reg_class p_reg(EDX, EDI, ESI, EBX);
 167 
 168 // Class for general registers excluding ECX
 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
 170 // Class for general registers excluding ECX (and EBP)
 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
 174 
 175 // Class for general registers excluding EAX
 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 177 
 178 // Class for general registers excluding EAX and EBX.
 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
 180 // Class for general registers excluding EAX and EBX (and EBP)
 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
 184 
 185 // Class of EAX (for multiply and divide operations)
 186 reg_class eax_reg(EAX);
 187 
 188 // Class of EBX (for atomic add)
 189 reg_class ebx_reg(EBX);
 190 
 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 192 reg_class ecx_reg(ECX);
 193 
 194 // Class of EDX (for multiply and divide operations)
 195 reg_class edx_reg(EDX);
 196 
 197 // Class of EDI (for synchronization)
 198 reg_class edi_reg(EDI);
 199 
 200 // Class of ESI (for synchronization)
 201 reg_class esi_reg(ESI);
 202 
 203 // Singleton class for stack pointer
 204 reg_class sp_reg(ESP);
 205 
 206 // Singleton class for instruction pointer
 207 // reg_class ip_reg(EIP);
 208 
 209 // Class of integer register pairs
 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
 211 // Class of integer register pairs (excluding EBP and EDI);
 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
 215 
 216 // Class of integer register pairs that aligns with calling convention
 217 reg_class eadx_reg( EAX,EDX );
 218 reg_class ebcx_reg( ECX,EBX );
 219 
 220 // Not AX or DX, used in divides
 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
 222 // Not AX or DX (and neither EBP), used in divides
 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 226 
 227 // Floating point registers.  Notice FPR0 is not a choice.
 228 // FPR0 is not ever allocated; we use clever encodings to fake
 229 // a 2-address instructions out of Intels FP stack.
 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 231 
 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 233                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 234                       FPR7L,FPR7H );
 235 
 236 reg_class fp_flt_reg0( FPR1L );
 237 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 238 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 240                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 241 
 242 %}
 243 
 244 
 245 //----------SOURCE BLOCK-------------------------------------------------------
 246 // This is a block of C++ code which provides values, functions, and
 247 // definitions necessary in the rest of the architecture description
 248 source_hpp %{
 249 // Must be visible to the DFA in dfa_x86_32.cpp
 250 extern bool is_operand_hi32_zero(Node* n);
 251 %}
 252 
 253 source %{
 254 #define   RELOC_IMM32    Assembler::imm_operand
 255 #define   RELOC_DISP32   Assembler::disp32_operand
 256 
 257 #define __ _masm.
 258 
 259 // How to find the high register of a Long pair, given the low register
 260 #define   HIGH_FROM_LOW(x) ((x)+2)
 261 
 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 263 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 264 // fast versions of NegF/NegD and AbsF/AbsD.
 265 
 266 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 268   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 269   // of 128-bits operands for SSE instructions.
 270   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 271   // Store the value to a 128-bits operand.
 272   operand[0] = lo;
 273   operand[1] = hi;
 274   return operand;
 275 }
 276 
 277 // Buffer for 128-bits masks used by SSE instructions.
 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 279 
 280 // Static initialization during VM startup.
 281 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 283 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 285 
 286 // Offset hacking within calls.
 287 static int pre_call_resets_size() {
 288   int size = 0;
 289   Compile* C = Compile::current();
 290   if (C->in_24_bit_fp_mode()) {
 291     size += 6; // fldcw
 292   }
 293   if (VM_Version::supports_vzeroupper()) {
 294     size += 3; // vzeroupper
 295   }
 296   return size;
 297 }
 298 
 299 // !!!!! Special hack to get all type of calls to specify the byte offset
 300 //       from the start of the call to the point where the return address
 301 //       will point.
 302 int MachCallStaticJavaNode::ret_addr_offset() {
 303   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
 304 }
 305 
 306 int MachCallDynamicJavaNode::ret_addr_offset() {
 307   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 308 }
 309 
 310 static int sizeof_FFree_Float_Stack_All = -1;
 311 
 312 int MachCallRuntimeNode::ret_addr_offset() {
 313   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 314   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 315 }
 316 
 317 //
 318 // Compute padding required for nodes which need alignment
 319 //
 320 
 321 // The address of the call instruction needs to be 4-byte aligned to
 322 // ensure that it does not span a cache line so that it can be patched.
 323 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 324   current_offset += pre_call_resets_size();  // skip fldcw, if any
 325   current_offset += 1;      // skip call opcode byte
 326   return align_up(current_offset, alignment_required()) - current_offset;
 327 }
 328 
 329 // The address of the call instruction needs to be 4-byte aligned to
 330 // ensure that it does not span a cache line so that it can be patched.
 331 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 332   current_offset += pre_call_resets_size();  // skip fldcw, if any
 333   current_offset += 5;      // skip MOV instruction
 334   current_offset += 1;      // skip call opcode byte
 335   return align_up(current_offset, alignment_required()) - current_offset;
 336 }
 337 
 338 // EMIT_RM()
 339 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 340   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 341   cbuf.insts()->emit_int8(c);
 342 }
 343 
 344 // EMIT_CC()
 345 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 346   unsigned char c = (unsigned char)( f1 | f2 );
 347   cbuf.insts()->emit_int8(c);
 348 }
 349 
 350 // EMIT_OPCODE()
 351 void emit_opcode(CodeBuffer &cbuf, int code) {
 352   cbuf.insts()->emit_int8((unsigned char) code);
 353 }
 354 
 355 // EMIT_OPCODE() w/ relocation information
 356 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 357   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 358   emit_opcode(cbuf, code);
 359 }
 360 
 361 // EMIT_D8()
 362 void emit_d8(CodeBuffer &cbuf, int d8) {
 363   cbuf.insts()->emit_int8((unsigned char) d8);
 364 }
 365 
 366 // EMIT_D16()
 367 void emit_d16(CodeBuffer &cbuf, int d16) {
 368   cbuf.insts()->emit_int16(d16);
 369 }
 370 
 371 // EMIT_D32()
 372 void emit_d32(CodeBuffer &cbuf, int d32) {
 373   cbuf.insts()->emit_int32(d32);
 374 }
 375 
 376 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 377 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 378         int format) {
 379   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 380   cbuf.insts()->emit_int32(d32);
 381 }
 382 
 383 // emit 32 bit value and construct relocation entry from RelocationHolder
 384 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 385         int format) {
 386 #ifdef ASSERT
 387   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 388     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
 389   }
 390 #endif
 391   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 392   cbuf.insts()->emit_int32(d32);
 393 }
 394 
 395 // Access stack slot for load or store
 396 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 397   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 398   if( -128 <= disp && disp <= 127 ) {
 399     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 400     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 401     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 402   } else {
 403     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 404     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 405     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 406   }
 407 }
 408 
 409    // rRegI ereg, memory mem) %{    // emit_reg_mem
 410 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 411   // There is no index & no scale, use form without SIB byte
 412   if ((index == 0x4) &&
 413       (scale == 0) && (base != ESP_enc)) {
 414     // If no displacement, mode is 0x0; unless base is [EBP]
 415     if ( (displace == 0) && (base != EBP_enc) ) {
 416       emit_rm(cbuf, 0x0, reg_encoding, base);
 417     }
 418     else {                    // If 8-bit displacement, mode 0x1
 419       if ((displace >= -128) && (displace <= 127)
 420           && (disp_reloc == relocInfo::none) ) {
 421         emit_rm(cbuf, 0x1, reg_encoding, base);
 422         emit_d8(cbuf, displace);
 423       }
 424       else {                  // If 32-bit displacement
 425         if (base == -1) { // Special flag for absolute address
 426           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 427           // (manual lies; no SIB needed here)
 428           if ( disp_reloc != relocInfo::none ) {
 429             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 430           } else {
 431             emit_d32      (cbuf, displace);
 432           }
 433         }
 434         else {                // Normal base + offset
 435           emit_rm(cbuf, 0x2, reg_encoding, base);
 436           if ( disp_reloc != relocInfo::none ) {
 437             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 438           } else {
 439             emit_d32      (cbuf, displace);
 440           }
 441         }
 442       }
 443     }
 444   }
 445   else {                      // Else, encode with the SIB byte
 446     // If no displacement, mode is 0x0; unless base is [EBP]
 447     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 448       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 449       emit_rm(cbuf, scale, index, base);
 450     }
 451     else {                    // If 8-bit displacement, mode 0x1
 452       if ((displace >= -128) && (displace <= 127)
 453           && (disp_reloc == relocInfo::none) ) {
 454         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 455         emit_rm(cbuf, scale, index, base);
 456         emit_d8(cbuf, displace);
 457       }
 458       else {                  // If 32-bit displacement
 459         if (base == 0x04 ) {
 460           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 461           emit_rm(cbuf, scale, index, 0x04);
 462         } else {
 463           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 464           emit_rm(cbuf, scale, index, base);
 465         }
 466         if ( disp_reloc != relocInfo::none ) {
 467           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 468         } else {
 469           emit_d32      (cbuf, displace);
 470         }
 471       }
 472     }
 473   }
 474 }
 475 
 476 
 477 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 478   if( dst_encoding == src_encoding ) {
 479     // reg-reg copy, use an empty encoding
 480   } else {
 481     emit_opcode( cbuf, 0x8B );
 482     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 483   }
 484 }
 485 
 486 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 487   Label exit;
 488   __ jccb(Assembler::noParity, exit);
 489   __ pushf();
 490   //
 491   // comiss/ucomiss instructions set ZF,PF,CF flags and
 492   // zero OF,AF,SF for NaN values.
 493   // Fixup flags by zeroing ZF,PF so that compare of NaN
 494   // values returns 'less than' result (CF is set).
 495   // Leave the rest of flags unchanged.
 496   //
 497   //    7 6 5 4 3 2 1 0
 498   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 499   //    0 0 1 0 1 0 1 1   (0x2B)
 500   //
 501   __ andl(Address(rsp, 0), 0xffffff2b);
 502   __ popf();
 503   __ bind(exit);
 504 }
 505 
 506 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 507   Label done;
 508   __ movl(dst, -1);
 509   __ jcc(Assembler::parity, done);
 510   __ jcc(Assembler::below, done);
 511   __ setb(Assembler::notEqual, dst);
 512   __ movzbl(dst, dst);
 513   __ bind(done);
 514 }
 515 
 516 
 517 //=============================================================================
 518 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 519 
 520 int ConstantTable::calculate_table_base_offset() const {
 521   return 0;  // absolute addressing, no offset
 522 }
 523 
 524 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 525 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 526   ShouldNotReachHere();
 527 }
 528 
 529 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 530   // Empty encoding
 531 }
 532 
 533 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 534   return 0;
 535 }
 536 
 537 #ifndef PRODUCT
 538 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 539   st->print("# MachConstantBaseNode (empty encoding)");
 540 }
 541 #endif
 542 
 543 
 544 //=============================================================================
 545 #ifndef PRODUCT
 546 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 547   Compile* C = ra_->C;
 548 
 549   int framesize = C->output()->frame_size_in_bytes();
 550   int bangsize = C->output()->bang_size_in_bytes();
 551   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 552   // Remove wordSize for return addr which is already pushed.
 553   framesize -= wordSize;
 554 
 555   if (C->output()->need_stack_bang(bangsize)) {
 556     framesize -= wordSize;
 557     st->print("# stack bang (%d bytes)", bangsize);
 558     st->print("\n\t");
 559     st->print("PUSH   EBP\t# Save EBP");
 560     if (PreserveFramePointer) {
 561       st->print("\n\t");
 562       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 563     }
 564     if (framesize) {
 565       st->print("\n\t");
 566       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 567     }
 568   } else {
 569     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 570     st->print("\n\t");
 571     framesize -= wordSize;
 572     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 573     if (PreserveFramePointer) {
 574       st->print("\n\t");
 575       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 576       if (framesize > 0) {
 577         st->print("\n\t");
 578         st->print("ADD    EBP, #%d", framesize);
 579       }
 580     }
 581   }
 582 
 583   if (VerifyStackAtCalls) {
 584     st->print("\n\t");
 585     framesize -= wordSize;
 586     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 587   }
 588 
 589   if( C->in_24_bit_fp_mode() ) {
 590     st->print("\n\t");
 591     st->print("FLDCW  \t# load 24 bit fpu control word");
 592   }
 593   if (UseSSE >= 2 && VerifyFPU) {
 594     st->print("\n\t");
 595     st->print("# verify FPU stack (must be clean on entry)");
 596   }
 597 
 598 #ifdef ASSERT
 599   if (VerifyStackAtCalls) {
 600     st->print("\n\t");
 601     st->print("# stack alignment check");
 602   }
 603 #endif
 604   st->cr();
 605 }
 606 #endif
 607 
 608 
 609 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 610   Compile* C = ra_->C;
 611   MacroAssembler _masm(&cbuf);
 612 
 613   int framesize = C->output()->frame_size_in_bytes();
 614   int bangsize = C->output()->bang_size_in_bytes();
 615 
 616   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL);
 617 
 618   C->output()->set_frame_complete(cbuf.insts_size());
 619 
 620   if (C->has_mach_constant_base_node()) {
 621     // NOTE: We set the table base offset here because users might be
 622     // emitted before MachConstantBaseNode.
 623     ConstantTable& constant_table = C->output()->constant_table();
 624     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 625   }
 626 }
 627 
 628 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 629   return MachNode::size(ra_); // too many variables; just compute it the hard way
 630 }
 631 
 632 int MachPrologNode::reloc() const {
 633   return 0; // a large enough number
 634 }
 635 
 636 //=============================================================================
 637 #ifndef PRODUCT
 638 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 639   Compile *C = ra_->C;
 640   int framesize = C->output()->frame_size_in_bytes();
 641   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 642   // Remove two words for return addr and rbp,
 643   framesize -= 2*wordSize;
 644 
 645   if (C->max_vector_size() > 16) {
 646     st->print("VZEROUPPER");
 647     st->cr(); st->print("\t");
 648   }
 649   if (C->in_24_bit_fp_mode()) {
 650     st->print("FLDCW  standard control word");
 651     st->cr(); st->print("\t");
 652   }
 653   if (framesize) {
 654     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 655     st->cr(); st->print("\t");
 656   }
 657   st->print_cr("POPL   EBP"); st->print("\t");
 658   if (do_polling() && C->is_method_compilation()) {
 659     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 660     st->cr(); st->print("\t");
 661   }
 662 }
 663 #endif
 664 
 665 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 666   Compile *C = ra_->C;
 667   MacroAssembler _masm(&cbuf);
 668 
 669   if (C->max_vector_size() > 16) {
 670     // Clear upper bits of YMM registers when current compiled code uses
 671     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 672     _masm.vzeroupper();
 673   }
 674   // If method set FPU control word, restore to standard control word
 675   if (C->in_24_bit_fp_mode()) {
 676     _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 677   }
 678 
 679   int framesize = C->output()->frame_size_in_bytes();
 680   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 681   // Remove two words for return addr and rbp,
 682   framesize -= 2*wordSize;
 683 
 684   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 685 
 686   if (framesize >= 128) {
 687     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 688     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 689     emit_d32(cbuf, framesize);
 690   } else if (framesize) {
 691     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 692     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 693     emit_d8(cbuf, framesize);
 694   }
 695 
 696   emit_opcode(cbuf, 0x58 | EBP_enc);
 697 
 698   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 699     __ reserved_stack_check();
 700   }
 701 
 702   if (do_polling() && C->is_method_compilation()) {
 703     Register pollReg = as_Register(EBX_enc);
 704     MacroAssembler masm(&cbuf);
 705     masm.get_thread(pollReg);
 706     masm.movl(pollReg, Address(pollReg, in_bytes(Thread::polling_page_offset())));
 707     masm.relocate(relocInfo::poll_return_type);
 708     masm.testl(rax, Address(pollReg, 0));
 709   }
 710 }
 711 
 712 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 713   return MachNode::size(ra_); // too many variables; just compute it
 714                               // the hard way
 715 }
 716 
 717 int MachEpilogNode::reloc() const {
 718   return 0; // a large enough number
 719 }
 720 
 721 const Pipeline * MachEpilogNode::pipeline() const {
 722   return MachNode::pipeline_class();
 723 }
 724 
 725 //=============================================================================
 726 
 727 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 728 static enum RC rc_class( OptoReg::Name reg ) {
 729 
 730   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 731   if (OptoReg::is_stack(reg)) return rc_stack;
 732 
 733   VMReg r = OptoReg::as_VMReg(reg);
 734   if (r->is_Register()) return rc_int;
 735   if (r->is_FloatRegister()) {
 736     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 737     return rc_float;
 738   }
 739   assert(r->is_XMMRegister(), "must be");
 740   return rc_xmm;
 741 }
 742 
 743 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 744                         int opcode, const char *op_str, int size, outputStream* st ) {
 745   if( cbuf ) {
 746     emit_opcode  (*cbuf, opcode );
 747     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 748 #ifndef PRODUCT
 749   } else if( !do_size ) {
 750     if( size != 0 ) st->print("\n\t");
 751     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 752       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 753       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 754     } else { // FLD, FST, PUSH, POP
 755       st->print("%s [ESP + #%d]",op_str,offset);
 756     }
 757 #endif
 758   }
 759   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 760   return size+3+offset_size;
 761 }
 762 
 763 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 764 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 765                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 766   int in_size_in_bits = Assembler::EVEX_32bit;
 767   int evex_encoding = 0;
 768   if (reg_lo+1 == reg_hi) {
 769     in_size_in_bits = Assembler::EVEX_64bit;
 770     evex_encoding = Assembler::VEX_W;
 771   }
 772   if (cbuf) {
 773     MacroAssembler _masm(cbuf);
 774     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
 775     //                          it maps more cases to single byte displacement
 776     _masm.set_managed();
 777     if (reg_lo+1 == reg_hi) { // double move?
 778       if (is_load) {
 779         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 780       } else {
 781         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 782       }
 783     } else {
 784       if (is_load) {
 785         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 786       } else {
 787         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 788       }
 789     }
 790 #ifndef PRODUCT
 791   } else if (!do_size) {
 792     if (size != 0) st->print("\n\t");
 793     if (reg_lo+1 == reg_hi) { // double move?
 794       if (is_load) st->print("%s %s,[ESP + #%d]",
 795                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 796                               Matcher::regName[reg_lo], offset);
 797       else         st->print("MOVSD  [ESP + #%d],%s",
 798                               offset, Matcher::regName[reg_lo]);
 799     } else {
 800       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 801                               Matcher::regName[reg_lo], offset);
 802       else         st->print("MOVSS  [ESP + #%d],%s",
 803                               offset, Matcher::regName[reg_lo]);
 804     }
 805 #endif
 806   }
 807   bool is_single_byte = false;
 808   if ((UseAVX > 2) && (offset != 0)) {
 809     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
 810   }
 811   int offset_size = 0;
 812   if (UseAVX > 2 ) {
 813     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
 814   } else {
 815     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 816   }
 817   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
 818   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 819   return size+5+offset_size;
 820 }
 821 
 822 
 823 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 824                             int src_hi, int dst_hi, int size, outputStream* st ) {
 825   if (cbuf) {
 826     MacroAssembler _masm(cbuf);
 827     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 828     _masm.set_managed();
 829     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 830       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 831                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 832     } else {
 833       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 834                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 835     }
 836 #ifndef PRODUCT
 837   } else if (!do_size) {
 838     if (size != 0) st->print("\n\t");
 839     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 840       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 841         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 842       } else {
 843         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 844       }
 845     } else {
 846       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 847         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 848       } else {
 849         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 850       }
 851     }
 852 #endif
 853   }
 854   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 855   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
 856   int sz = (UseAVX > 2) ? 6 : 4;
 857   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 858       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 859   return size + sz;
 860 }
 861 
 862 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 863                             int src_hi, int dst_hi, int size, outputStream* st ) {
 864   // 32-bit
 865   if (cbuf) {
 866     MacroAssembler _masm(cbuf);
 867     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 868     _masm.set_managed();
 869     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 870              as_Register(Matcher::_regEncode[src_lo]));
 871 #ifndef PRODUCT
 872   } else if (!do_size) {
 873     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 874 #endif
 875   }
 876   return (UseAVX> 2) ? 6 : 4;
 877 }
 878 
 879 
 880 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 881                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 882   // 32-bit
 883   if (cbuf) {
 884     MacroAssembler _masm(cbuf);
 885     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 886     _masm.set_managed();
 887     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 888              as_XMMRegister(Matcher::_regEncode[src_lo]));
 889 #ifndef PRODUCT
 890   } else if (!do_size) {
 891     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 892 #endif
 893   }
 894   return (UseAVX> 2) ? 6 : 4;
 895 }
 896 
 897 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 898   if( cbuf ) {
 899     emit_opcode(*cbuf, 0x8B );
 900     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 901 #ifndef PRODUCT
 902   } else if( !do_size ) {
 903     if( size != 0 ) st->print("\n\t");
 904     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 905 #endif
 906   }
 907   return size+2;
 908 }
 909 
 910 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 911                                  int offset, int size, outputStream* st ) {
 912   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 913     if( cbuf ) {
 914       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 915       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 916 #ifndef PRODUCT
 917     } else if( !do_size ) {
 918       if( size != 0 ) st->print("\n\t");
 919       st->print("FLD    %s",Matcher::regName[src_lo]);
 920 #endif
 921     }
 922     size += 2;
 923   }
 924 
 925   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 926   const char *op_str;
 927   int op;
 928   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 929     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 930     op = 0xDD;
 931   } else {                   // 32-bit store
 932     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 933     op = 0xD9;
 934     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 935   }
 936 
 937   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 938 }
 939 
 940 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 941 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 942                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 943 
 944 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 945                             int stack_offset, int reg, uint ireg, outputStream* st);
 946 
 947 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 948                                      int dst_offset, uint ireg, outputStream* st) {
 949   int calc_size = 0;
 950   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 951   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 952   switch (ireg) {
 953   case Op_VecS:
 954     calc_size = 3+src_offset_size + 3+dst_offset_size;
 955     break;
 956   case Op_VecD: {
 957     calc_size = 3+src_offset_size + 3+dst_offset_size;
 958     int tmp_src_offset = src_offset + 4;
 959     int tmp_dst_offset = dst_offset + 4;
 960     src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4);
 961     dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4);
 962     calc_size += 3+src_offset_size + 3+dst_offset_size;
 963     break;
 964   }
 965   case Op_VecX:
 966   case Op_VecY:
 967   case Op_VecZ:
 968     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 969     break;
 970   default:
 971     ShouldNotReachHere();
 972   }
 973   if (cbuf) {
 974     MacroAssembler _masm(cbuf);
 975     int offset = __ offset();
 976     switch (ireg) {
 977     case Op_VecS:
 978       __ pushl(Address(rsp, src_offset));
 979       __ popl (Address(rsp, dst_offset));
 980       break;
 981     case Op_VecD:
 982       __ pushl(Address(rsp, src_offset));
 983       __ popl (Address(rsp, dst_offset));
 984       __ pushl(Address(rsp, src_offset+4));
 985       __ popl (Address(rsp, dst_offset+4));
 986       break;
 987     case Op_VecX:
 988       __ movdqu(Address(rsp, -16), xmm0);
 989       __ movdqu(xmm0, Address(rsp, src_offset));
 990       __ movdqu(Address(rsp, dst_offset), xmm0);
 991       __ movdqu(xmm0, Address(rsp, -16));
 992       break;
 993     case Op_VecY:
 994       __ vmovdqu(Address(rsp, -32), xmm0);
 995       __ vmovdqu(xmm0, Address(rsp, src_offset));
 996       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 997       __ vmovdqu(xmm0, Address(rsp, -32));
 998       break;
 999     case Op_VecZ:
1000       __ evmovdquq(Address(rsp, -64), xmm0, 2);
1001       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
1002       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
1003       __ evmovdquq(xmm0, Address(rsp, -64), 2);
1004       break;
1005     default:
1006       ShouldNotReachHere();
1007     }
1008     int size = __ offset() - offset;
1009     assert(size == calc_size, "incorrect size calculation");
1010     return size;
1011 #ifndef PRODUCT
1012   } else if (!do_size) {
1013     switch (ireg) {
1014     case Op_VecS:
1015       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1016                 "popl    [rsp + #%d]",
1017                 src_offset, dst_offset);
1018       break;
1019     case Op_VecD:
1020       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1021                 "popq    [rsp + #%d]\n\t"
1022                 "pushl   [rsp + #%d]\n\t"
1023                 "popq    [rsp + #%d]",
1024                 src_offset, dst_offset, src_offset+4, dst_offset+4);
1025       break;
1026      case Op_VecX:
1027       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1028                 "movdqu  xmm0, [rsp + #%d]\n\t"
1029                 "movdqu  [rsp + #%d], xmm0\n\t"
1030                 "movdqu  xmm0, [rsp - #16]",
1031                 src_offset, dst_offset);
1032       break;
1033     case Op_VecY:
1034       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1035                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1036                 "vmovdqu [rsp + #%d], xmm0\n\t"
1037                 "vmovdqu xmm0, [rsp - #32]",
1038                 src_offset, dst_offset);
1039       break;
1040     case Op_VecZ:
1041       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1042                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1043                 "vmovdqu [rsp + #%d], xmm0\n\t"
1044                 "vmovdqu xmm0, [rsp - #64]",
1045                 src_offset, dst_offset);
1046       break;
1047     default:
1048       ShouldNotReachHere();
1049     }
1050 #endif
1051   }
1052   return calc_size;
1053 }
1054 
1055 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1056   // Get registers to move
1057   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1058   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1059   OptoReg::Name dst_second = ra_->get_reg_second(this );
1060   OptoReg::Name dst_first = ra_->get_reg_first(this );
1061 
1062   enum RC src_second_rc = rc_class(src_second);
1063   enum RC src_first_rc = rc_class(src_first);
1064   enum RC dst_second_rc = rc_class(dst_second);
1065   enum RC dst_first_rc = rc_class(dst_first);
1066 
1067   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1068 
1069   // Generate spill code!
1070   int size = 0;
1071 
1072   if( src_first == dst_first && src_second == dst_second )
1073     return size;            // Self copy, no move
1074 
1075   if (bottom_type()->isa_vect() != NULL) {
1076     uint ireg = ideal_reg();
1077     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1078     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1079     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1080     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1081       // mem -> mem
1082       int src_offset = ra_->reg2offset(src_first);
1083       int dst_offset = ra_->reg2offset(dst_first);
1084       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1085     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1086       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1087     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1088       int stack_offset = ra_->reg2offset(dst_first);
1089       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1090     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1091       int stack_offset = ra_->reg2offset(src_first);
1092       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1093     } else {
1094       ShouldNotReachHere();
1095     }
1096   }
1097 
1098   // --------------------------------------
1099   // Check for mem-mem move.  push/pop to move.
1100   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1101     if( src_second == dst_first ) { // overlapping stack copy ranges
1102       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1103       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1104       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1105       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1106     }
1107     // move low bits
1108     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1109     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1110     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1111       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1112       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1113     }
1114     return size;
1115   }
1116 
1117   // --------------------------------------
1118   // Check for integer reg-reg copy
1119   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1120     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1121 
1122   // Check for integer store
1123   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1124     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1125 
1126   // Check for integer load
1127   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1128     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1129 
1130   // Check for integer reg-xmm reg copy
1131   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1132     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1133             "no 64 bit integer-float reg moves" );
1134     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1135   }
1136   // --------------------------------------
1137   // Check for float reg-reg copy
1138   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1139     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1140             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1141     if( cbuf ) {
1142 
1143       // Note the mucking with the register encode to compensate for the 0/1
1144       // indexing issue mentioned in a comment in the reg_def sections
1145       // for FPR registers many lines above here.
1146 
1147       if( src_first != FPR1L_num ) {
1148         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1149         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1150         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1151         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1152      } else {
1153         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1154         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1155      }
1156 #ifndef PRODUCT
1157     } else if( !do_size ) {
1158       if( size != 0 ) st->print("\n\t");
1159       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1160       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1161 #endif
1162     }
1163     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1164   }
1165 
1166   // Check for float store
1167   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1168     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1169   }
1170 
1171   // Check for float load
1172   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1173     int offset = ra_->reg2offset(src_first);
1174     const char *op_str;
1175     int op;
1176     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1177       op_str = "FLD_D";
1178       op = 0xDD;
1179     } else {                   // 32-bit load
1180       op_str = "FLD_S";
1181       op = 0xD9;
1182       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1183     }
1184     if( cbuf ) {
1185       emit_opcode  (*cbuf, op );
1186       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1187       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1188       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1189 #ifndef PRODUCT
1190     } else if( !do_size ) {
1191       if( size != 0 ) st->print("\n\t");
1192       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1193 #endif
1194     }
1195     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1196     return size + 3+offset_size+2;
1197   }
1198 
1199   // Check for xmm reg-reg copy
1200   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1201     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1202             (src_first+1 == src_second && dst_first+1 == dst_second),
1203             "no non-adjacent float-moves" );
1204     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1205   }
1206 
1207   // Check for xmm reg-integer reg copy
1208   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1209     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1210             "no 64 bit float-integer reg moves" );
1211     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1212   }
1213 
1214   // Check for xmm store
1215   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1216     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1217   }
1218 
1219   // Check for float xmm load
1220   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1221     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1222   }
1223 
1224   // Copy from float reg to xmm reg
1225   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1226     // copy to the top of stack from floating point reg
1227     // and use LEA to preserve flags
1228     if( cbuf ) {
1229       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1230       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1231       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1232       emit_d8(*cbuf,0xF8);
1233 #ifndef PRODUCT
1234     } else if( !do_size ) {
1235       if( size != 0 ) st->print("\n\t");
1236       st->print("LEA    ESP,[ESP-8]");
1237 #endif
1238     }
1239     size += 4;
1240 
1241     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1242 
1243     // Copy from the temp memory to the xmm reg.
1244     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1245 
1246     if( cbuf ) {
1247       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1248       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1249       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1250       emit_d8(*cbuf,0x08);
1251 #ifndef PRODUCT
1252     } else if( !do_size ) {
1253       if( size != 0 ) st->print("\n\t");
1254       st->print("LEA    ESP,[ESP+8]");
1255 #endif
1256     }
1257     size += 4;
1258     return size;
1259   }
1260 
1261   assert( size > 0, "missed a case" );
1262 
1263   // --------------------------------------------------------------------
1264   // Check for second bits still needing moving.
1265   if( src_second == dst_second )
1266     return size;               // Self copy; no move
1267   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1268 
1269   // Check for second word int-int move
1270   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1271     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1272 
1273   // Check for second word integer store
1274   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1275     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1276 
1277   // Check for second word integer load
1278   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1279     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1280 
1281 
1282   Unimplemented();
1283   return 0; // Mute compiler
1284 }
1285 
1286 #ifndef PRODUCT
1287 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1288   implementation( NULL, ra_, false, st );
1289 }
1290 #endif
1291 
1292 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1293   implementation( &cbuf, ra_, false, NULL );
1294 }
1295 
1296 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1297   return MachNode::size(ra_);
1298 }
1299 
1300 
1301 //=============================================================================
1302 #ifndef PRODUCT
1303 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1304   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1305   int reg = ra_->get_reg_first(this);
1306   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1307 }
1308 #endif
1309 
1310 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1311   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1312   int reg = ra_->get_encode(this);
1313   if( offset >= 128 ) {
1314     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1315     emit_rm(cbuf, 0x2, reg, 0x04);
1316     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1317     emit_d32(cbuf, offset);
1318   }
1319   else {
1320     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1321     emit_rm(cbuf, 0x1, reg, 0x04);
1322     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1323     emit_d8(cbuf, offset);
1324   }
1325 }
1326 
1327 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1328   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1329   if( offset >= 128 ) {
1330     return 7;
1331   }
1332   else {
1333     return 4;
1334   }
1335 }
1336 
1337 //=============================================================================
1338 #ifndef PRODUCT
1339 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1340   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1341   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1342   st->print_cr("\tNOP");
1343   st->print_cr("\tNOP");
1344   if( !OptoBreakpoint )
1345     st->print_cr("\tNOP");
1346 }
1347 #endif
1348 
1349 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1350   MacroAssembler masm(&cbuf);
1351 #ifdef ASSERT
1352   uint insts_size = cbuf.insts_size();
1353 #endif
1354   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1355   masm.jump_cc(Assembler::notEqual,
1356                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1357   /* WARNING these NOPs are critical so that verified entry point is properly
1358      aligned for patching by NativeJump::patch_verified_entry() */
1359   int nops_cnt = 2;
1360   if( !OptoBreakpoint ) // Leave space for int3
1361      nops_cnt += 1;
1362   masm.nop(nops_cnt);
1363 
1364   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1365 }
1366 
1367 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1368   return OptoBreakpoint ? 11 : 12;
1369 }
1370 
1371 
1372 //=============================================================================
1373 
1374 int Matcher::regnum_to_fpu_offset(int regnum) {
1375   return regnum - 32; // The FP registers are in the second chunk
1376 }
1377 
1378 // This is UltraSparc specific, true just means we have fast l2f conversion
1379 const bool Matcher::convL2FSupported(void) {
1380   return true;
1381 }
1382 
1383 // Is this branch offset short enough that a short branch can be used?
1384 //
1385 // NOTE: If the platform does not provide any short branch variants, then
1386 //       this method should return false for offset 0.
1387 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1388   // The passed offset is relative to address of the branch.
1389   // On 86 a branch displacement is calculated relative to address
1390   // of a next instruction.
1391   offset -= br_size;
1392 
1393   // the short version of jmpConUCF2 contains multiple branches,
1394   // making the reach slightly less
1395   if (rule == jmpConUCF2_rule)
1396     return (-126 <= offset && offset <= 125);
1397   return (-128 <= offset && offset <= 127);
1398 }
1399 
1400 const bool Matcher::isSimpleConstant64(jlong value) {
1401   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1402   return false;
1403 }
1404 
1405 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1406 const bool Matcher::init_array_count_is_in_bytes = false;
1407 
1408 // Needs 2 CMOV's for longs.
1409 const int Matcher::long_cmove_cost() { return 1; }
1410 
1411 // No CMOVF/CMOVD with SSE/SSE2
1412 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1413 
1414 // Does the CPU require late expand (see block.cpp for description of late expand)?
1415 const bool Matcher::require_postalloc_expand = false;
1416 
1417 // Do we need to mask the count passed to shift instructions or does
1418 // the cpu only look at the lower 5/6 bits anyway?
1419 const bool Matcher::need_masked_shift_count = false;
1420 
1421 bool Matcher::narrow_oop_use_complex_address() {
1422   ShouldNotCallThis();
1423   return true;
1424 }
1425 
1426 bool Matcher::narrow_klass_use_complex_address() {
1427   ShouldNotCallThis();
1428   return true;
1429 }
1430 
1431 bool Matcher::const_oop_prefer_decode() {
1432   ShouldNotCallThis();
1433   return true;
1434 }
1435 
1436 bool Matcher::const_klass_prefer_decode() {
1437   ShouldNotCallThis();
1438   return true;
1439 }
1440 
1441 // Is it better to copy float constants, or load them directly from memory?
1442 // Intel can load a float constant from a direct address, requiring no
1443 // extra registers.  Most RISCs will have to materialize an address into a
1444 // register first, so they would do better to copy the constant from stack.
1445 const bool Matcher::rematerialize_float_constants = true;
1446 
1447 // If CPU can load and store mis-aligned doubles directly then no fixup is
1448 // needed.  Else we split the double into 2 integer pieces and move it
1449 // piece-by-piece.  Only happens when passing doubles into C code as the
1450 // Java calling convention forces doubles to be aligned.
1451 const bool Matcher::misaligned_doubles_ok = true;
1452 
1453 
1454 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1455   // Get the memory operand from the node
1456   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1457   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1458   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1459   uint opcnt     = 1;                 // First operand
1460   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1461   while( idx >= skipped+num_edges ) {
1462     skipped += num_edges;
1463     opcnt++;                          // Bump operand count
1464     assert( opcnt < numopnds, "Accessing non-existent operand" );
1465     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1466   }
1467 
1468   MachOper *memory = node->_opnds[opcnt];
1469   MachOper *new_memory = NULL;
1470   switch (memory->opcode()) {
1471   case DIRECT:
1472   case INDOFFSET32X:
1473     // No transformation necessary.
1474     return;
1475   case INDIRECT:
1476     new_memory = new indirect_win95_safeOper( );
1477     break;
1478   case INDOFFSET8:
1479     new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1480     break;
1481   case INDOFFSET32:
1482     new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1483     break;
1484   case INDINDEXOFFSET:
1485     new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1486     break;
1487   case INDINDEXSCALE:
1488     new_memory = new indIndexScale_win95_safeOper(memory->scale());
1489     break;
1490   case INDINDEXSCALEOFFSET:
1491     new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1492     break;
1493   case LOAD_LONG_INDIRECT:
1494   case LOAD_LONG_INDOFFSET32:
1495     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1496     return;
1497   default:
1498     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1499     return;
1500   }
1501   node->_opnds[opcnt] = new_memory;
1502 }
1503 
1504 // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
1505 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1506 
1507 // Are floats conerted to double when stored to stack during deoptimization?
1508 // On x32 it is stored with convertion only when FPU is used for floats.
1509 bool Matcher::float_in_double() { return (UseSSE == 0); }
1510 
1511 // Do ints take an entire long register or just half?
1512 const bool Matcher::int_in_long = false;
1513 
1514 // Return whether or not this register is ever used as an argument.  This
1515 // function is used on startup to build the trampoline stubs in generateOptoStub.
1516 // Registers not mentioned will be killed by the VM call in the trampoline, and
1517 // arguments in those registers not be available to the callee.
1518 bool Matcher::can_be_java_arg( int reg ) {
1519   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1520   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1521   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1522   return false;
1523 }
1524 
1525 bool Matcher::is_spillable_arg( int reg ) {
1526   return can_be_java_arg(reg);
1527 }
1528 
1529 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1530   // Use hardware integer DIV instruction when
1531   // it is faster than a code which use multiply.
1532   // Only when constant divisor fits into 32 bit
1533   // (min_jint is excluded to get only correct
1534   // positive 32 bit values from negative).
1535   return VM_Version::has_fast_idiv() &&
1536          (divisor == (int)divisor && divisor != min_jint);
1537 }
1538 
1539 // Register for DIVI projection of divmodI
1540 RegMask Matcher::divI_proj_mask() {
1541   return EAX_REG_mask();
1542 }
1543 
1544 // Register for MODI projection of divmodI
1545 RegMask Matcher::modI_proj_mask() {
1546   return EDX_REG_mask();
1547 }
1548 
1549 // Register for DIVL projection of divmodL
1550 RegMask Matcher::divL_proj_mask() {
1551   ShouldNotReachHere();
1552   return RegMask();
1553 }
1554 
1555 // Register for MODL projection of divmodL
1556 RegMask Matcher::modL_proj_mask() {
1557   ShouldNotReachHere();
1558   return RegMask();
1559 }
1560 
1561 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1562   return NO_REG_mask();
1563 }
1564 
1565 // Returns true if the high 32 bits of the value is known to be zero.
1566 bool is_operand_hi32_zero(Node* n) {
1567   int opc = n->Opcode();
1568   if (opc == Op_AndL) {
1569     Node* o2 = n->in(2);
1570     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1571       return true;
1572     }
1573   }
1574   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1575     return true;
1576   }
1577   return false;
1578 }
1579 
1580 %}
1581 
1582 //----------ENCODING BLOCK-----------------------------------------------------
1583 // This block specifies the encoding classes used by the compiler to output
1584 // byte streams.  Encoding classes generate functions which are called by
1585 // Machine Instruction Nodes in order to generate the bit encoding of the
1586 // instruction.  Operands specify their base encoding interface with the
1587 // interface keyword.  There are currently supported four interfaces,
1588 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1589 // operand to generate a function which returns its register number when
1590 // queried.   CONST_INTER causes an operand to generate a function which
1591 // returns the value of the constant when queried.  MEMORY_INTER causes an
1592 // operand to generate four functions which return the Base Register, the
1593 // Index Register, the Scale Value, and the Offset Value of the operand when
1594 // queried.  COND_INTER causes an operand to generate six functions which
1595 // return the encoding code (ie - encoding bits for the instruction)
1596 // associated with each basic boolean condition for a conditional instruction.
1597 // Instructions specify two basic values for encoding.  They use the
1598 // ins_encode keyword to specify their encoding class (which must be one of
1599 // the class names specified in the encoding block), and they use the
1600 // opcode keyword to specify, in order, their primary, secondary, and
1601 // tertiary opcode.  Only the opcode sections which a particular instruction
1602 // needs for encoding need to be specified.
1603 encode %{
1604   // Build emit functions for each basic byte or larger field in the intel
1605   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1606   // code in the enc_class source block.  Emit functions will live in the
1607   // main source block for now.  In future, we can generalize this by
1608   // adding a syntax that specifies the sizes of fields in an order,
1609   // so that the adlc can build the emit functions automagically
1610 
1611   // Emit primary opcode
1612   enc_class OpcP %{
1613     emit_opcode(cbuf, $primary);
1614   %}
1615 
1616   // Emit secondary opcode
1617   enc_class OpcS %{
1618     emit_opcode(cbuf, $secondary);
1619   %}
1620 
1621   // Emit opcode directly
1622   enc_class Opcode(immI d8) %{
1623     emit_opcode(cbuf, $d8$$constant);
1624   %}
1625 
1626   enc_class SizePrefix %{
1627     emit_opcode(cbuf,0x66);
1628   %}
1629 
1630   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1631     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1632   %}
1633 
1634   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1635     emit_opcode(cbuf,$opcode$$constant);
1636     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1637   %}
1638 
1639   enc_class mov_r32_imm0( rRegI dst ) %{
1640     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1641     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1642   %}
1643 
1644   enc_class cdq_enc %{
1645     // Full implementation of Java idiv and irem; checks for
1646     // special case as described in JVM spec., p.243 & p.271.
1647     //
1648     //         normal case                           special case
1649     //
1650     // input : rax,: dividend                         min_int
1651     //         reg: divisor                          -1
1652     //
1653     // output: rax,: quotient  (= rax, idiv reg)       min_int
1654     //         rdx: remainder (= rax, irem reg)       0
1655     //
1656     //  Code sequnce:
1657     //
1658     //  81 F8 00 00 00 80    cmp         rax,80000000h
1659     //  0F 85 0B 00 00 00    jne         normal_case
1660     //  33 D2                xor         rdx,edx
1661     //  83 F9 FF             cmp         rcx,0FFh
1662     //  0F 84 03 00 00 00    je          done
1663     //                  normal_case:
1664     //  99                   cdq
1665     //  F7 F9                idiv        rax,ecx
1666     //                  done:
1667     //
1668     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1669     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1670     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1671     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1672     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1673     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1674     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1675     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1676     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1677     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1678     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1679     // normal_case:
1680     emit_opcode(cbuf,0x99);                                         // cdq
1681     // idiv (note: must be emitted by the user of this rule)
1682     // normal:
1683   %}
1684 
1685   // Dense encoding for older common ops
1686   enc_class Opc_plus(immI opcode, rRegI reg) %{
1687     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1688   %}
1689 
1690 
1691   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1692   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1693     // Check for 8-bit immediate, and set sign extend bit in opcode
1694     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1695       emit_opcode(cbuf, $primary | 0x02);
1696     }
1697     else {                          // If 32-bit immediate
1698       emit_opcode(cbuf, $primary);
1699     }
1700   %}
1701 
1702   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1703     // Emit primary opcode and set sign-extend bit
1704     // Check for 8-bit immediate, and set sign extend bit in opcode
1705     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1706       emit_opcode(cbuf, $primary | 0x02);    }
1707     else {                          // If 32-bit immediate
1708       emit_opcode(cbuf, $primary);
1709     }
1710     // Emit r/m byte with secondary opcode, after primary opcode.
1711     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1712   %}
1713 
1714   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1715     // Check for 8-bit immediate, and set sign extend bit in opcode
1716     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1717       $$$emit8$imm$$constant;
1718     }
1719     else {                          // If 32-bit immediate
1720       // Output immediate
1721       $$$emit32$imm$$constant;
1722     }
1723   %}
1724 
1725   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1726     // Emit primary opcode and set sign-extend bit
1727     // Check for 8-bit immediate, and set sign extend bit in opcode
1728     int con = (int)$imm$$constant; // Throw away top bits
1729     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1730     // Emit r/m byte with secondary opcode, after primary opcode.
1731     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1732     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1733     else                               emit_d32(cbuf,con);
1734   %}
1735 
1736   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1737     // Emit primary opcode and set sign-extend bit
1738     // Check for 8-bit immediate, and set sign extend bit in opcode
1739     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1740     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1741     // Emit r/m byte with tertiary opcode, after primary opcode.
1742     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1743     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1744     else                               emit_d32(cbuf,con);
1745   %}
1746 
1747   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1748     emit_cc(cbuf, $secondary, $dst$$reg );
1749   %}
1750 
1751   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1752     int destlo = $dst$$reg;
1753     int desthi = HIGH_FROM_LOW(destlo);
1754     // bswap lo
1755     emit_opcode(cbuf, 0x0F);
1756     emit_cc(cbuf, 0xC8, destlo);
1757     // bswap hi
1758     emit_opcode(cbuf, 0x0F);
1759     emit_cc(cbuf, 0xC8, desthi);
1760     // xchg lo and hi
1761     emit_opcode(cbuf, 0x87);
1762     emit_rm(cbuf, 0x3, destlo, desthi);
1763   %}
1764 
1765   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1766     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1767   %}
1768 
1769   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1770     $$$emit8$primary;
1771     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1772   %}
1773 
1774   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1775     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1776     emit_d8(cbuf, op >> 8 );
1777     emit_d8(cbuf, op & 255);
1778   %}
1779 
1780   // emulate a CMOV with a conditional branch around a MOV
1781   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1782     // Invert sense of branch from sense of CMOV
1783     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1784     emit_d8( cbuf, $brOffs$$constant );
1785   %}
1786 
1787   enc_class enc_PartialSubtypeCheck( ) %{
1788     Register Redi = as_Register(EDI_enc); // result register
1789     Register Reax = as_Register(EAX_enc); // super class
1790     Register Recx = as_Register(ECX_enc); // killed
1791     Register Resi = as_Register(ESI_enc); // sub class
1792     Label miss;
1793 
1794     MacroAssembler _masm(&cbuf);
1795     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1796                                      NULL, &miss,
1797                                      /*set_cond_codes:*/ true);
1798     if ($primary) {
1799       __ xorptr(Redi, Redi);
1800     }
1801     __ bind(miss);
1802   %}
1803 
1804   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1805     MacroAssembler masm(&cbuf);
1806     int start = masm.offset();
1807     if (UseSSE >= 2) {
1808       if (VerifyFPU) {
1809         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1810       }
1811     } else {
1812       // External c_calling_convention expects the FPU stack to be 'clean'.
1813       // Compiled code leaves it dirty.  Do cleanup now.
1814       masm.empty_FPU_stack();
1815     }
1816     if (sizeof_FFree_Float_Stack_All == -1) {
1817       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1818     } else {
1819       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1820     }
1821   %}
1822 
1823   enc_class Verify_FPU_For_Leaf %{
1824     if( VerifyFPU ) {
1825       MacroAssembler masm(&cbuf);
1826       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1827     }
1828   %}
1829 
1830   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1831     // This is the instruction starting address for relocation info.
1832     cbuf.set_insts_mark();
1833     $$$emit8$primary;
1834     // CALL directly to the runtime
1835     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1836                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1837 
1838     if (UseSSE >= 2) {
1839       MacroAssembler _masm(&cbuf);
1840       BasicType rt = tf()->return_type();
1841 
1842       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1843         // A C runtime call where the return value is unused.  In SSE2+
1844         // mode the result needs to be removed from the FPU stack.  It's
1845         // likely that this function call could be removed by the
1846         // optimizer if the C function is a pure function.
1847         __ ffree(0);
1848       } else if (rt == T_FLOAT) {
1849         __ lea(rsp, Address(rsp, -4));
1850         __ fstp_s(Address(rsp, 0));
1851         __ movflt(xmm0, Address(rsp, 0));
1852         __ lea(rsp, Address(rsp,  4));
1853       } else if (rt == T_DOUBLE) {
1854         __ lea(rsp, Address(rsp, -8));
1855         __ fstp_d(Address(rsp, 0));
1856         __ movdbl(xmm0, Address(rsp, 0));
1857         __ lea(rsp, Address(rsp,  8));
1858       }
1859     }
1860   %}
1861 
1862   enc_class pre_call_resets %{
1863     // If method sets FPU control word restore it here
1864     debug_only(int off0 = cbuf.insts_size());
1865     if (ra_->C->in_24_bit_fp_mode()) {
1866       MacroAssembler _masm(&cbuf);
1867       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1868     }
1869     // Clear upper bits of YMM registers when current compiled code uses
1870     // wide vectors to avoid AVX <-> SSE transition penalty during call.
1871     MacroAssembler _masm(&cbuf);
1872     __ vzeroupper();
1873     debug_only(int off1 = cbuf.insts_size());
1874     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1875   %}
1876 
1877   enc_class post_call_FPU %{
1878     // If method sets FPU control word do it here also
1879     if (Compile::current()->in_24_bit_fp_mode()) {
1880       MacroAssembler masm(&cbuf);
1881       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1882     }
1883   %}
1884 
1885   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1886     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1887     // who we intended to call.
1888     cbuf.set_insts_mark();
1889     $$$emit8$primary;
1890 
1891     if (!_method) {
1892       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1893                      runtime_call_Relocation::spec(),
1894                      RELOC_IMM32);
1895     } else {
1896       int method_index = resolved_method_index(cbuf);
1897       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1898                                                   : static_call_Relocation::spec(method_index);
1899       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1900                      rspec, RELOC_DISP32);
1901       // Emit stubs for static call.
1902       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1903       if (stub == NULL) {
1904         ciEnv::current()->record_failure("CodeCache is full");
1905         return;
1906       }
1907     }
1908   %}
1909 
1910   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1911     MacroAssembler _masm(&cbuf);
1912     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1913   %}
1914 
1915   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1916     int disp = in_bytes(Method::from_compiled_offset());
1917     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1918 
1919     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1920     cbuf.set_insts_mark();
1921     $$$emit8$primary;
1922     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1923     emit_d8(cbuf, disp);             // Displacement
1924 
1925   %}
1926 
1927 //   Following encoding is no longer used, but may be restored if calling
1928 //   convention changes significantly.
1929 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1930 //
1931 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1932 //     // int ic_reg     = Matcher::inline_cache_reg();
1933 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1934 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1935 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1936 //
1937 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1938 //     // // so we load it immediately before the call
1939 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1940 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1941 //
1942 //     // xor rbp,ebp
1943 //     emit_opcode(cbuf, 0x33);
1944 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1945 //
1946 //     // CALL to interpreter.
1947 //     cbuf.set_insts_mark();
1948 //     $$$emit8$primary;
1949 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1950 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1951 //   %}
1952 
1953   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1954     $$$emit8$primary;
1955     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1956     $$$emit8$shift$$constant;
1957   %}
1958 
1959   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1960     // Load immediate does not have a zero or sign extended version
1961     // for 8-bit immediates
1962     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1963     $$$emit32$src$$constant;
1964   %}
1965 
1966   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1967     // Load immediate does not have a zero or sign extended version
1968     // for 8-bit immediates
1969     emit_opcode(cbuf, $primary + $dst$$reg);
1970     $$$emit32$src$$constant;
1971   %}
1972 
1973   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1974     // Load immediate does not have a zero or sign extended version
1975     // for 8-bit immediates
1976     int dst_enc = $dst$$reg;
1977     int src_con = $src$$constant & 0x0FFFFFFFFL;
1978     if (src_con == 0) {
1979       // xor dst, dst
1980       emit_opcode(cbuf, 0x33);
1981       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1982     } else {
1983       emit_opcode(cbuf, $primary + dst_enc);
1984       emit_d32(cbuf, src_con);
1985     }
1986   %}
1987 
1988   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
1989     // Load immediate does not have a zero or sign extended version
1990     // for 8-bit immediates
1991     int dst_enc = $dst$$reg + 2;
1992     int src_con = ((julong)($src$$constant)) >> 32;
1993     if (src_con == 0) {
1994       // xor dst, dst
1995       emit_opcode(cbuf, 0x33);
1996       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1997     } else {
1998       emit_opcode(cbuf, $primary + dst_enc);
1999       emit_d32(cbuf, src_con);
2000     }
2001   %}
2002 
2003 
2004   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2005   enc_class enc_Copy( rRegI dst, rRegI src ) %{
2006     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2007   %}
2008 
2009   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2010     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2011   %}
2012 
2013   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2014     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2015   %}
2016 
2017   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2018     $$$emit8$primary;
2019     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2020   %}
2021 
2022   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2023     $$$emit8$secondary;
2024     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2025   %}
2026 
2027   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2028     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2029   %}
2030 
2031   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2032     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2033   %}
2034 
2035   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2036     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2037   %}
2038 
2039   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2040     // Output immediate
2041     $$$emit32$src$$constant;
2042   %}
2043 
2044   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2045     // Output Float immediate bits
2046     jfloat jf = $src$$constant;
2047     int    jf_as_bits = jint_cast( jf );
2048     emit_d32(cbuf, jf_as_bits);
2049   %}
2050 
2051   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2052     // Output Float immediate bits
2053     jfloat jf = $src$$constant;
2054     int    jf_as_bits = jint_cast( jf );
2055     emit_d32(cbuf, jf_as_bits);
2056   %}
2057 
2058   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2059     // Output immediate
2060     $$$emit16$src$$constant;
2061   %}
2062 
2063   enc_class Con_d32(immI src) %{
2064     emit_d32(cbuf,$src$$constant);
2065   %}
2066 
2067   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2068     // Output immediate memory reference
2069     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2070     emit_d32(cbuf, 0x00);
2071   %}
2072 
2073   enc_class lock_prefix( ) %{
2074     emit_opcode(cbuf,0xF0);         // [Lock]
2075   %}
2076 
2077   // Cmp-xchg long value.
2078   // Note: we need to swap rbx, and rcx before and after the
2079   //       cmpxchg8 instruction because the instruction uses
2080   //       rcx as the high order word of the new value to store but
2081   //       our register encoding uses rbx,.
2082   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2083 
2084     // XCHG  rbx,ecx
2085     emit_opcode(cbuf,0x87);
2086     emit_opcode(cbuf,0xD9);
2087     // [Lock]
2088     emit_opcode(cbuf,0xF0);
2089     // CMPXCHG8 [Eptr]
2090     emit_opcode(cbuf,0x0F);
2091     emit_opcode(cbuf,0xC7);
2092     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2093     // XCHG  rbx,ecx
2094     emit_opcode(cbuf,0x87);
2095     emit_opcode(cbuf,0xD9);
2096   %}
2097 
2098   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2099     // [Lock]
2100     emit_opcode(cbuf,0xF0);
2101 
2102     // CMPXCHG [Eptr]
2103     emit_opcode(cbuf,0x0F);
2104     emit_opcode(cbuf,0xB1);
2105     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2106   %}
2107 
2108   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
2109     // [Lock]
2110     emit_opcode(cbuf,0xF0);
2111 
2112     // CMPXCHGB [Eptr]
2113     emit_opcode(cbuf,0x0F);
2114     emit_opcode(cbuf,0xB0);
2115     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2116   %}
2117 
2118   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
2119     // [Lock]
2120     emit_opcode(cbuf,0xF0);
2121 
2122     // 16-bit mode
2123     emit_opcode(cbuf, 0x66);
2124 
2125     // CMPXCHGW [Eptr]
2126     emit_opcode(cbuf,0x0F);
2127     emit_opcode(cbuf,0xB1);
2128     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2129   %}
2130 
2131   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2132     int res_encoding = $res$$reg;
2133 
2134     // MOV  res,0
2135     emit_opcode( cbuf, 0xB8 + res_encoding);
2136     emit_d32( cbuf, 0 );
2137     // JNE,s  fail
2138     emit_opcode(cbuf,0x75);
2139     emit_d8(cbuf, 5 );
2140     // MOV  res,1
2141     emit_opcode( cbuf, 0xB8 + res_encoding);
2142     emit_d32( cbuf, 1 );
2143     // fail:
2144   %}
2145 
2146   enc_class set_instruction_start( ) %{
2147     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2148   %}
2149 
2150   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2151     int reg_encoding = $ereg$$reg;
2152     int base  = $mem$$base;
2153     int index = $mem$$index;
2154     int scale = $mem$$scale;
2155     int displace = $mem$$disp;
2156     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2157     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2158   %}
2159 
2160   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2161     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2162     int base  = $mem$$base;
2163     int index = $mem$$index;
2164     int scale = $mem$$scale;
2165     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2166     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2167     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2168   %}
2169 
2170   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2171     int r1, r2;
2172     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2173     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2174     emit_opcode(cbuf,0x0F);
2175     emit_opcode(cbuf,$tertiary);
2176     emit_rm(cbuf, 0x3, r1, r2);
2177     emit_d8(cbuf,$cnt$$constant);
2178     emit_d8(cbuf,$primary);
2179     emit_rm(cbuf, 0x3, $secondary, r1);
2180     emit_d8(cbuf,$cnt$$constant);
2181   %}
2182 
2183   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2184     emit_opcode( cbuf, 0x8B ); // Move
2185     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2186     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2187       emit_d8(cbuf,$primary);
2188       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2189       emit_d8(cbuf,$cnt$$constant-32);
2190     }
2191     emit_d8(cbuf,$primary);
2192     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2193     emit_d8(cbuf,31);
2194   %}
2195 
2196   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2197     int r1, r2;
2198     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2199     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2200 
2201     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2202     emit_rm(cbuf, 0x3, r1, r2);
2203     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2204       emit_opcode(cbuf,$primary);
2205       emit_rm(cbuf, 0x3, $secondary, r1);
2206       emit_d8(cbuf,$cnt$$constant-32);
2207     }
2208     emit_opcode(cbuf,0x33);  // XOR r2,r2
2209     emit_rm(cbuf, 0x3, r2, r2);
2210   %}
2211 
2212   // Clone of RegMem but accepts an extra parameter to access each
2213   // half of a double in memory; it never needs relocation info.
2214   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2215     emit_opcode(cbuf,$opcode$$constant);
2216     int reg_encoding = $rm_reg$$reg;
2217     int base     = $mem$$base;
2218     int index    = $mem$$index;
2219     int scale    = $mem$$scale;
2220     int displace = $mem$$disp + $disp_for_half$$constant;
2221     relocInfo::relocType disp_reloc = relocInfo::none;
2222     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2223   %}
2224 
2225   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2226   //
2227   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2228   // and it never needs relocation information.
2229   // Frequently used to move data between FPU's Stack Top and memory.
2230   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2231     int rm_byte_opcode = $rm_opcode$$constant;
2232     int base     = $mem$$base;
2233     int index    = $mem$$index;
2234     int scale    = $mem$$scale;
2235     int displace = $mem$$disp;
2236     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2237     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2238   %}
2239 
2240   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2241     int rm_byte_opcode = $rm_opcode$$constant;
2242     int base     = $mem$$base;
2243     int index    = $mem$$index;
2244     int scale    = $mem$$scale;
2245     int displace = $mem$$disp;
2246     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2247     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2248   %}
2249 
2250   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2251     int reg_encoding = $dst$$reg;
2252     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2253     int index        = 0x04;            // 0x04 indicates no index
2254     int scale        = 0x00;            // 0x00 indicates no scale
2255     int displace     = $src1$$constant; // 0x00 indicates no displacement
2256     relocInfo::relocType disp_reloc = relocInfo::none;
2257     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2258   %}
2259 
2260   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2261     // Compare dst,src
2262     emit_opcode(cbuf,0x3B);
2263     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2264     // jmp dst < src around move
2265     emit_opcode(cbuf,0x7C);
2266     emit_d8(cbuf,2);
2267     // move dst,src
2268     emit_opcode(cbuf,0x8B);
2269     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2270   %}
2271 
2272   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2273     // Compare dst,src
2274     emit_opcode(cbuf,0x3B);
2275     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2276     // jmp dst > src around move
2277     emit_opcode(cbuf,0x7F);
2278     emit_d8(cbuf,2);
2279     // move dst,src
2280     emit_opcode(cbuf,0x8B);
2281     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2282   %}
2283 
2284   enc_class enc_FPR_store(memory mem, regDPR src) %{
2285     // If src is FPR1, we can just FST to store it.
2286     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2287     int reg_encoding = 0x2; // Just store
2288     int base  = $mem$$base;
2289     int index = $mem$$index;
2290     int scale = $mem$$scale;
2291     int displace = $mem$$disp;
2292     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2293     if( $src$$reg != FPR1L_enc ) {
2294       reg_encoding = 0x3;  // Store & pop
2295       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2296       emit_d8( cbuf, 0xC0-1+$src$$reg );
2297     }
2298     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2299     emit_opcode(cbuf,$primary);
2300     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2301   %}
2302 
2303   enc_class neg_reg(rRegI dst) %{
2304     // NEG $dst
2305     emit_opcode(cbuf,0xF7);
2306     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2307   %}
2308 
2309   enc_class setLT_reg(eCXRegI dst) %{
2310     // SETLT $dst
2311     emit_opcode(cbuf,0x0F);
2312     emit_opcode(cbuf,0x9C);
2313     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2314   %}
2315 
2316   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2317     int tmpReg = $tmp$$reg;
2318 
2319     // SUB $p,$q
2320     emit_opcode(cbuf,0x2B);
2321     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2322     // SBB $tmp,$tmp
2323     emit_opcode(cbuf,0x1B);
2324     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2325     // AND $tmp,$y
2326     emit_opcode(cbuf,0x23);
2327     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2328     // ADD $p,$tmp
2329     emit_opcode(cbuf,0x03);
2330     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2331   %}
2332 
2333   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2334     // TEST shift,32
2335     emit_opcode(cbuf,0xF7);
2336     emit_rm(cbuf, 0x3, 0, ECX_enc);
2337     emit_d32(cbuf,0x20);
2338     // JEQ,s small
2339     emit_opcode(cbuf, 0x74);
2340     emit_d8(cbuf, 0x04);
2341     // MOV    $dst.hi,$dst.lo
2342     emit_opcode( cbuf, 0x8B );
2343     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2344     // CLR    $dst.lo
2345     emit_opcode(cbuf, 0x33);
2346     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2347 // small:
2348     // SHLD   $dst.hi,$dst.lo,$shift
2349     emit_opcode(cbuf,0x0F);
2350     emit_opcode(cbuf,0xA5);
2351     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2352     // SHL    $dst.lo,$shift"
2353     emit_opcode(cbuf,0xD3);
2354     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2355   %}
2356 
2357   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2358     // TEST shift,32
2359     emit_opcode(cbuf,0xF7);
2360     emit_rm(cbuf, 0x3, 0, ECX_enc);
2361     emit_d32(cbuf,0x20);
2362     // JEQ,s small
2363     emit_opcode(cbuf, 0x74);
2364     emit_d8(cbuf, 0x04);
2365     // MOV    $dst.lo,$dst.hi
2366     emit_opcode( cbuf, 0x8B );
2367     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2368     // CLR    $dst.hi
2369     emit_opcode(cbuf, 0x33);
2370     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2371 // small:
2372     // SHRD   $dst.lo,$dst.hi,$shift
2373     emit_opcode(cbuf,0x0F);
2374     emit_opcode(cbuf,0xAD);
2375     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2376     // SHR    $dst.hi,$shift"
2377     emit_opcode(cbuf,0xD3);
2378     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2379   %}
2380 
2381   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2382     // TEST shift,32
2383     emit_opcode(cbuf,0xF7);
2384     emit_rm(cbuf, 0x3, 0, ECX_enc);
2385     emit_d32(cbuf,0x20);
2386     // JEQ,s small
2387     emit_opcode(cbuf, 0x74);
2388     emit_d8(cbuf, 0x05);
2389     // MOV    $dst.lo,$dst.hi
2390     emit_opcode( cbuf, 0x8B );
2391     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2392     // SAR    $dst.hi,31
2393     emit_opcode(cbuf, 0xC1);
2394     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2395     emit_d8(cbuf, 0x1F );
2396 // small:
2397     // SHRD   $dst.lo,$dst.hi,$shift
2398     emit_opcode(cbuf,0x0F);
2399     emit_opcode(cbuf,0xAD);
2400     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2401     // SAR    $dst.hi,$shift"
2402     emit_opcode(cbuf,0xD3);
2403     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2404   %}
2405 
2406 
2407   // ----------------- Encodings for floating point unit -----------------
2408   // May leave result in FPU-TOS or FPU reg depending on opcodes
2409   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2410     $$$emit8$primary;
2411     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2412   %}
2413 
2414   // Pop argument in FPR0 with FSTP ST(0)
2415   enc_class PopFPU() %{
2416     emit_opcode( cbuf, 0xDD );
2417     emit_d8( cbuf, 0xD8 );
2418   %}
2419 
2420   // !!!!! equivalent to Pop_Reg_F
2421   enc_class Pop_Reg_DPR( regDPR dst ) %{
2422     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2423     emit_d8( cbuf, 0xD8+$dst$$reg );
2424   %}
2425 
2426   enc_class Push_Reg_DPR( regDPR dst ) %{
2427     emit_opcode( cbuf, 0xD9 );
2428     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2429   %}
2430 
2431   enc_class strictfp_bias1( regDPR dst ) %{
2432     emit_opcode( cbuf, 0xDB );           // FLD m80real
2433     emit_opcode( cbuf, 0x2D );
2434     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2435     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2436     emit_opcode( cbuf, 0xC8+$dst$$reg );
2437   %}
2438 
2439   enc_class strictfp_bias2( regDPR dst ) %{
2440     emit_opcode( cbuf, 0xDB );           // FLD m80real
2441     emit_opcode( cbuf, 0x2D );
2442     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2443     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2444     emit_opcode( cbuf, 0xC8+$dst$$reg );
2445   %}
2446 
2447   // Special case for moving an integer register to a stack slot.
2448   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2449     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2450   %}
2451 
2452   // Special case for moving a register to a stack slot.
2453   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2454     // Opcode already emitted
2455     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2456     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2457     emit_d32(cbuf, $dst$$disp);   // Displacement
2458   %}
2459 
2460   // Push the integer in stackSlot 'src' onto FP-stack
2461   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2462     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2463   %}
2464 
2465   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2466   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2467     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2468   %}
2469 
2470   // Same as Pop_Mem_F except for opcode
2471   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2472   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2473     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2474   %}
2475 
2476   enc_class Pop_Reg_FPR( regFPR dst ) %{
2477     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2478     emit_d8( cbuf, 0xD8+$dst$$reg );
2479   %}
2480 
2481   enc_class Push_Reg_FPR( regFPR dst ) %{
2482     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2483     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2484   %}
2485 
2486   // Push FPU's float to a stack-slot, and pop FPU-stack
2487   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2488     int pop = 0x02;
2489     if ($src$$reg != FPR1L_enc) {
2490       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2491       emit_d8( cbuf, 0xC0-1+$src$$reg );
2492       pop = 0x03;
2493     }
2494     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2495   %}
2496 
2497   // Push FPU's double to a stack-slot, and pop FPU-stack
2498   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2499     int pop = 0x02;
2500     if ($src$$reg != FPR1L_enc) {
2501       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2502       emit_d8( cbuf, 0xC0-1+$src$$reg );
2503       pop = 0x03;
2504     }
2505     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2506   %}
2507 
2508   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2509   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2510     int pop = 0xD0 - 1; // -1 since we skip FLD
2511     if ($src$$reg != FPR1L_enc) {
2512       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2513       emit_d8( cbuf, 0xC0-1+$src$$reg );
2514       pop = 0xD8;
2515     }
2516     emit_opcode( cbuf, 0xDD );
2517     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2518   %}
2519 
2520 
2521   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2522     // load dst in FPR0
2523     emit_opcode( cbuf, 0xD9 );
2524     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2525     if ($src$$reg != FPR1L_enc) {
2526       // fincstp
2527       emit_opcode (cbuf, 0xD9);
2528       emit_opcode (cbuf, 0xF7);
2529       // swap src with FPR1:
2530       // FXCH FPR1 with src
2531       emit_opcode(cbuf, 0xD9);
2532       emit_d8(cbuf, 0xC8-1+$src$$reg );
2533       // fdecstp
2534       emit_opcode (cbuf, 0xD9);
2535       emit_opcode (cbuf, 0xF6);
2536     }
2537   %}
2538 
2539   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2540     MacroAssembler _masm(&cbuf);
2541     __ subptr(rsp, 8);
2542     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2543     __ fld_d(Address(rsp, 0));
2544     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2545     __ fld_d(Address(rsp, 0));
2546   %}
2547 
2548   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2549     MacroAssembler _masm(&cbuf);
2550     __ subptr(rsp, 4);
2551     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2552     __ fld_s(Address(rsp, 0));
2553     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2554     __ fld_s(Address(rsp, 0));
2555   %}
2556 
2557   enc_class Push_ResultD(regD dst) %{
2558     MacroAssembler _masm(&cbuf);
2559     __ fstp_d(Address(rsp, 0));
2560     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2561     __ addptr(rsp, 8);
2562   %}
2563 
2564   enc_class Push_ResultF(regF dst, immI d8) %{
2565     MacroAssembler _masm(&cbuf);
2566     __ fstp_s(Address(rsp, 0));
2567     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2568     __ addptr(rsp, $d8$$constant);
2569   %}
2570 
2571   enc_class Push_SrcD(regD src) %{
2572     MacroAssembler _masm(&cbuf);
2573     __ subptr(rsp, 8);
2574     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2575     __ fld_d(Address(rsp, 0));
2576   %}
2577 
2578   enc_class push_stack_temp_qword() %{
2579     MacroAssembler _masm(&cbuf);
2580     __ subptr(rsp, 8);
2581   %}
2582 
2583   enc_class pop_stack_temp_qword() %{
2584     MacroAssembler _masm(&cbuf);
2585     __ addptr(rsp, 8);
2586   %}
2587 
2588   enc_class push_xmm_to_fpr1(regD src) %{
2589     MacroAssembler _masm(&cbuf);
2590     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2591     __ fld_d(Address(rsp, 0));
2592   %}
2593 
2594   enc_class Push_Result_Mod_DPR( regDPR src) %{
2595     if ($src$$reg != FPR1L_enc) {
2596       // fincstp
2597       emit_opcode (cbuf, 0xD9);
2598       emit_opcode (cbuf, 0xF7);
2599       // FXCH FPR1 with src
2600       emit_opcode(cbuf, 0xD9);
2601       emit_d8(cbuf, 0xC8-1+$src$$reg );
2602       // fdecstp
2603       emit_opcode (cbuf, 0xD9);
2604       emit_opcode (cbuf, 0xF6);
2605     }
2606     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2607     // // FSTP   FPR$dst$$reg
2608     // emit_opcode( cbuf, 0xDD );
2609     // emit_d8( cbuf, 0xD8+$dst$$reg );
2610   %}
2611 
2612   enc_class fnstsw_sahf_skip_parity() %{
2613     // fnstsw ax
2614     emit_opcode( cbuf, 0xDF );
2615     emit_opcode( cbuf, 0xE0 );
2616     // sahf
2617     emit_opcode( cbuf, 0x9E );
2618     // jnp  ::skip
2619     emit_opcode( cbuf, 0x7B );
2620     emit_opcode( cbuf, 0x05 );
2621   %}
2622 
2623   enc_class emitModDPR() %{
2624     // fprem must be iterative
2625     // :: loop
2626     // fprem
2627     emit_opcode( cbuf, 0xD9 );
2628     emit_opcode( cbuf, 0xF8 );
2629     // wait
2630     emit_opcode( cbuf, 0x9b );
2631     // fnstsw ax
2632     emit_opcode( cbuf, 0xDF );
2633     emit_opcode( cbuf, 0xE0 );
2634     // sahf
2635     emit_opcode( cbuf, 0x9E );
2636     // jp  ::loop
2637     emit_opcode( cbuf, 0x0F );
2638     emit_opcode( cbuf, 0x8A );
2639     emit_opcode( cbuf, 0xF4 );
2640     emit_opcode( cbuf, 0xFF );
2641     emit_opcode( cbuf, 0xFF );
2642     emit_opcode( cbuf, 0xFF );
2643   %}
2644 
2645   enc_class fpu_flags() %{
2646     // fnstsw_ax
2647     emit_opcode( cbuf, 0xDF);
2648     emit_opcode( cbuf, 0xE0);
2649     // test ax,0x0400
2650     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2651     emit_opcode( cbuf, 0xA9 );
2652     emit_d16   ( cbuf, 0x0400 );
2653     // // // This sequence works, but stalls for 12-16 cycles on PPro
2654     // // test rax,0x0400
2655     // emit_opcode( cbuf, 0xA9 );
2656     // emit_d32   ( cbuf, 0x00000400 );
2657     //
2658     // jz exit (no unordered comparison)
2659     emit_opcode( cbuf, 0x74 );
2660     emit_d8    ( cbuf, 0x02 );
2661     // mov ah,1 - treat as LT case (set carry flag)
2662     emit_opcode( cbuf, 0xB4 );
2663     emit_d8    ( cbuf, 0x01 );
2664     // sahf
2665     emit_opcode( cbuf, 0x9E);
2666   %}
2667 
2668   enc_class cmpF_P6_fixup() %{
2669     // Fixup the integer flags in case comparison involved a NaN
2670     //
2671     // JNP exit (no unordered comparison, P-flag is set by NaN)
2672     emit_opcode( cbuf, 0x7B );
2673     emit_d8    ( cbuf, 0x03 );
2674     // MOV AH,1 - treat as LT case (set carry flag)
2675     emit_opcode( cbuf, 0xB4 );
2676     emit_d8    ( cbuf, 0x01 );
2677     // SAHF
2678     emit_opcode( cbuf, 0x9E);
2679     // NOP     // target for branch to avoid branch to branch
2680     emit_opcode( cbuf, 0x90);
2681   %}
2682 
2683 //     fnstsw_ax();
2684 //     sahf();
2685 //     movl(dst, nan_result);
2686 //     jcc(Assembler::parity, exit);
2687 //     movl(dst, less_result);
2688 //     jcc(Assembler::below, exit);
2689 //     movl(dst, equal_result);
2690 //     jcc(Assembler::equal, exit);
2691 //     movl(dst, greater_result);
2692 
2693 // less_result     =  1;
2694 // greater_result  = -1;
2695 // equal_result    = 0;
2696 // nan_result      = -1;
2697 
2698   enc_class CmpF_Result(rRegI dst) %{
2699     // fnstsw_ax();
2700     emit_opcode( cbuf, 0xDF);
2701     emit_opcode( cbuf, 0xE0);
2702     // sahf
2703     emit_opcode( cbuf, 0x9E);
2704     // movl(dst, nan_result);
2705     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2706     emit_d32( cbuf, -1 );
2707     // jcc(Assembler::parity, exit);
2708     emit_opcode( cbuf, 0x7A );
2709     emit_d8    ( cbuf, 0x13 );
2710     // movl(dst, less_result);
2711     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2712     emit_d32( cbuf, -1 );
2713     // jcc(Assembler::below, exit);
2714     emit_opcode( cbuf, 0x72 );
2715     emit_d8    ( cbuf, 0x0C );
2716     // movl(dst, equal_result);
2717     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2718     emit_d32( cbuf, 0 );
2719     // jcc(Assembler::equal, exit);
2720     emit_opcode( cbuf, 0x74 );
2721     emit_d8    ( cbuf, 0x05 );
2722     // movl(dst, greater_result);
2723     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2724     emit_d32( cbuf, 1 );
2725   %}
2726 
2727 
2728   // Compare the longs and set flags
2729   // BROKEN!  Do Not use as-is
2730   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2731     // CMP    $src1.hi,$src2.hi
2732     emit_opcode( cbuf, 0x3B );
2733     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2734     // JNE,s  done
2735     emit_opcode(cbuf,0x75);
2736     emit_d8(cbuf, 2 );
2737     // CMP    $src1.lo,$src2.lo
2738     emit_opcode( cbuf, 0x3B );
2739     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2740 // done:
2741   %}
2742 
2743   enc_class convert_int_long( regL dst, rRegI src ) %{
2744     // mov $dst.lo,$src
2745     int dst_encoding = $dst$$reg;
2746     int src_encoding = $src$$reg;
2747     encode_Copy( cbuf, dst_encoding  , src_encoding );
2748     // mov $dst.hi,$src
2749     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2750     // sar $dst.hi,31
2751     emit_opcode( cbuf, 0xC1 );
2752     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2753     emit_d8(cbuf, 0x1F );
2754   %}
2755 
2756   enc_class convert_long_double( eRegL src ) %{
2757     // push $src.hi
2758     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2759     // push $src.lo
2760     emit_opcode(cbuf, 0x50+$src$$reg  );
2761     // fild 64-bits at [SP]
2762     emit_opcode(cbuf,0xdf);
2763     emit_d8(cbuf, 0x6C);
2764     emit_d8(cbuf, 0x24);
2765     emit_d8(cbuf, 0x00);
2766     // pop stack
2767     emit_opcode(cbuf, 0x83); // add  SP, #8
2768     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2769     emit_d8(cbuf, 0x8);
2770   %}
2771 
2772   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2773     // IMUL   EDX:EAX,$src1
2774     emit_opcode( cbuf, 0xF7 );
2775     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2776     // SAR    EDX,$cnt-32
2777     int shift_count = ((int)$cnt$$constant) - 32;
2778     if (shift_count > 0) {
2779       emit_opcode(cbuf, 0xC1);
2780       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2781       emit_d8(cbuf, shift_count);
2782     }
2783   %}
2784 
2785   // this version doesn't have add sp, 8
2786   enc_class convert_long_double2( eRegL src ) %{
2787     // push $src.hi
2788     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2789     // push $src.lo
2790     emit_opcode(cbuf, 0x50+$src$$reg  );
2791     // fild 64-bits at [SP]
2792     emit_opcode(cbuf,0xdf);
2793     emit_d8(cbuf, 0x6C);
2794     emit_d8(cbuf, 0x24);
2795     emit_d8(cbuf, 0x00);
2796   %}
2797 
2798   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2799     // Basic idea: long = (long)int * (long)int
2800     // IMUL EDX:EAX, src
2801     emit_opcode( cbuf, 0xF7 );
2802     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2803   %}
2804 
2805   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2806     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2807     // MUL EDX:EAX, src
2808     emit_opcode( cbuf, 0xF7 );
2809     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2810   %}
2811 
2812   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2813     // Basic idea: lo(result) = lo(x_lo * y_lo)
2814     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2815     // MOV    $tmp,$src.lo
2816     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2817     // IMUL   $tmp,EDX
2818     emit_opcode( cbuf, 0x0F );
2819     emit_opcode( cbuf, 0xAF );
2820     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2821     // MOV    EDX,$src.hi
2822     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2823     // IMUL   EDX,EAX
2824     emit_opcode( cbuf, 0x0F );
2825     emit_opcode( cbuf, 0xAF );
2826     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2827     // ADD    $tmp,EDX
2828     emit_opcode( cbuf, 0x03 );
2829     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2830     // MUL   EDX:EAX,$src.lo
2831     emit_opcode( cbuf, 0xF7 );
2832     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2833     // ADD    EDX,ESI
2834     emit_opcode( cbuf, 0x03 );
2835     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2836   %}
2837 
2838   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2839     // Basic idea: lo(result) = lo(src * y_lo)
2840     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2841     // IMUL   $tmp,EDX,$src
2842     emit_opcode( cbuf, 0x6B );
2843     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2844     emit_d8( cbuf, (int)$src$$constant );
2845     // MOV    EDX,$src
2846     emit_opcode(cbuf, 0xB8 + EDX_enc);
2847     emit_d32( cbuf, (int)$src$$constant );
2848     // MUL   EDX:EAX,EDX
2849     emit_opcode( cbuf, 0xF7 );
2850     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2851     // ADD    EDX,ESI
2852     emit_opcode( cbuf, 0x03 );
2853     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2854   %}
2855 
2856   enc_class long_div( eRegL src1, eRegL src2 ) %{
2857     // PUSH src1.hi
2858     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2859     // PUSH src1.lo
2860     emit_opcode(cbuf,               0x50+$src1$$reg  );
2861     // PUSH src2.hi
2862     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2863     // PUSH src2.lo
2864     emit_opcode(cbuf,               0x50+$src2$$reg  );
2865     // CALL directly to the runtime
2866     cbuf.set_insts_mark();
2867     emit_opcode(cbuf,0xE8);       // Call into runtime
2868     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2869     // Restore stack
2870     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2871     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2872     emit_d8(cbuf, 4*4);
2873   %}
2874 
2875   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2876     // PUSH src1.hi
2877     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2878     // PUSH src1.lo
2879     emit_opcode(cbuf,               0x50+$src1$$reg  );
2880     // PUSH src2.hi
2881     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2882     // PUSH src2.lo
2883     emit_opcode(cbuf,               0x50+$src2$$reg  );
2884     // CALL directly to the runtime
2885     cbuf.set_insts_mark();
2886     emit_opcode(cbuf,0xE8);       // Call into runtime
2887     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2888     // Restore stack
2889     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2890     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2891     emit_d8(cbuf, 4*4);
2892   %}
2893 
2894   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2895     // MOV   $tmp,$src.lo
2896     emit_opcode(cbuf, 0x8B);
2897     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2898     // OR    $tmp,$src.hi
2899     emit_opcode(cbuf, 0x0B);
2900     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2901   %}
2902 
2903   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2904     // CMP    $src1.lo,$src2.lo
2905     emit_opcode( cbuf, 0x3B );
2906     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2907     // JNE,s  skip
2908     emit_cc(cbuf, 0x70, 0x5);
2909     emit_d8(cbuf,2);
2910     // CMP    $src1.hi,$src2.hi
2911     emit_opcode( cbuf, 0x3B );
2912     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2913   %}
2914 
2915   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2916     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2917     emit_opcode( cbuf, 0x3B );
2918     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2919     // MOV    $tmp,$src1.hi
2920     emit_opcode( cbuf, 0x8B );
2921     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2922     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2923     emit_opcode( cbuf, 0x1B );
2924     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2925   %}
2926 
2927   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2928     // XOR    $tmp,$tmp
2929     emit_opcode(cbuf,0x33);  // XOR
2930     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2931     // CMP    $tmp,$src.lo
2932     emit_opcode( cbuf, 0x3B );
2933     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2934     // SBB    $tmp,$src.hi
2935     emit_opcode( cbuf, 0x1B );
2936     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2937   %}
2938 
2939  // Sniff, sniff... smells like Gnu Superoptimizer
2940   enc_class neg_long( eRegL dst ) %{
2941     emit_opcode(cbuf,0xF7);    // NEG hi
2942     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2943     emit_opcode(cbuf,0xF7);    // NEG lo
2944     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2945     emit_opcode(cbuf,0x83);    // SBB hi,0
2946     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2947     emit_d8    (cbuf,0 );
2948   %}
2949 
2950   enc_class enc_pop_rdx() %{
2951     emit_opcode(cbuf,0x5A);
2952   %}
2953 
2954   enc_class enc_rethrow() %{
2955     cbuf.set_insts_mark();
2956     emit_opcode(cbuf, 0xE9);        // jmp    entry
2957     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2958                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2959   %}
2960 
2961 
2962   // Convert a double to an int.  Java semantics require we do complex
2963   // manglelations in the corner cases.  So we set the rounding mode to
2964   // 'zero', store the darned double down as an int, and reset the
2965   // rounding mode to 'nearest'.  The hardware throws an exception which
2966   // patches up the correct value directly to the stack.
2967   enc_class DPR2I_encoding( regDPR src ) %{
2968     // Flip to round-to-zero mode.  We attempted to allow invalid-op
2969     // exceptions here, so that a NAN or other corner-case value will
2970     // thrown an exception (but normal values get converted at full speed).
2971     // However, I2C adapters and other float-stack manglers leave pending
2972     // invalid-op exceptions hanging.  We would have to clear them before
2973     // enabling them and that is more expensive than just testing for the
2974     // invalid value Intel stores down in the corner cases.
2975     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2976     emit_opcode(cbuf,0x2D);
2977     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2978     // Allocate a word
2979     emit_opcode(cbuf,0x83);            // SUB ESP,4
2980     emit_opcode(cbuf,0xEC);
2981     emit_d8(cbuf,0x04);
2982     // Encoding assumes a double has been pushed into FPR0.
2983     // Store down the double as an int, popping the FPU stack
2984     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
2985     emit_opcode(cbuf,0x1C);
2986     emit_d8(cbuf,0x24);
2987     // Restore the rounding mode; mask the exception
2988     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
2989     emit_opcode(cbuf,0x2D);
2990     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
2991         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
2992         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
2993 
2994     // Load the converted int; adjust CPU stack
2995     emit_opcode(cbuf,0x58);       // POP EAX
2996     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
2997     emit_d32   (cbuf,0x80000000); //         0x80000000
2998     emit_opcode(cbuf,0x75);       // JNE around_slow_call
2999     emit_d8    (cbuf,0x07);       // Size of slow_call
3000     // Push src onto stack slow-path
3001     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3002     emit_d8    (cbuf,0xC0-1+$src$$reg );
3003     // CALL directly to the runtime
3004     cbuf.set_insts_mark();
3005     emit_opcode(cbuf,0xE8);       // Call into runtime
3006     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3007     // Carry on here...
3008   %}
3009 
3010   enc_class DPR2L_encoding( regDPR src ) %{
3011     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3012     emit_opcode(cbuf,0x2D);
3013     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3014     // Allocate a word
3015     emit_opcode(cbuf,0x83);            // SUB ESP,8
3016     emit_opcode(cbuf,0xEC);
3017     emit_d8(cbuf,0x08);
3018     // Encoding assumes a double has been pushed into FPR0.
3019     // Store down the double as a long, popping the FPU stack
3020     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3021     emit_opcode(cbuf,0x3C);
3022     emit_d8(cbuf,0x24);
3023     // Restore the rounding mode; mask the exception
3024     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3025     emit_opcode(cbuf,0x2D);
3026     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3027         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3028         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3029 
3030     // Load the converted int; adjust CPU stack
3031     emit_opcode(cbuf,0x58);       // POP EAX
3032     emit_opcode(cbuf,0x5A);       // POP EDX
3033     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3034     emit_d8    (cbuf,0xFA);       // rdx
3035     emit_d32   (cbuf,0x80000000); //         0x80000000
3036     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3037     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3038     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3039     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3040     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3041     emit_d8    (cbuf,0x07);       // Size of slow_call
3042     // Push src onto stack slow-path
3043     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3044     emit_d8    (cbuf,0xC0-1+$src$$reg );
3045     // CALL directly to the runtime
3046     cbuf.set_insts_mark();
3047     emit_opcode(cbuf,0xE8);       // Call into runtime
3048     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3049     // Carry on here...
3050   %}
3051 
3052   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3053     // Operand was loaded from memory into fp ST (stack top)
3054     // FMUL   ST,$src  /* D8 C8+i */
3055     emit_opcode(cbuf, 0xD8);
3056     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3057   %}
3058 
3059   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3060     // FADDP  ST,src2  /* D8 C0+i */
3061     emit_opcode(cbuf, 0xD8);
3062     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3063     //could use FADDP  src2,fpST  /* DE C0+i */
3064   %}
3065 
3066   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3067     // FADDP  src2,ST  /* DE C0+i */
3068     emit_opcode(cbuf, 0xDE);
3069     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3070   %}
3071 
3072   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3073     // Operand has been loaded into fp ST (stack top)
3074       // FSUB   ST,$src1
3075       emit_opcode(cbuf, 0xD8);
3076       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3077 
3078       // FDIV
3079       emit_opcode(cbuf, 0xD8);
3080       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3081   %}
3082 
3083   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3084     // Operand was loaded from memory into fp ST (stack top)
3085     // FADD   ST,$src  /* D8 C0+i */
3086     emit_opcode(cbuf, 0xD8);
3087     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3088 
3089     // FMUL  ST,src2  /* D8 C*+i */
3090     emit_opcode(cbuf, 0xD8);
3091     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3092   %}
3093 
3094 
3095   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3096     // Operand was loaded from memory into fp ST (stack top)
3097     // FADD   ST,$src  /* D8 C0+i */
3098     emit_opcode(cbuf, 0xD8);
3099     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3100 
3101     // FMULP  src2,ST  /* DE C8+i */
3102     emit_opcode(cbuf, 0xDE);
3103     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3104   %}
3105 
3106   // Atomically load the volatile long
3107   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3108     emit_opcode(cbuf,0xDF);
3109     int rm_byte_opcode = 0x05;
3110     int base     = $mem$$base;
3111     int index    = $mem$$index;
3112     int scale    = $mem$$scale;
3113     int displace = $mem$$disp;
3114     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3115     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3116     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3117   %}
3118 
3119   // Volatile Store Long.  Must be atomic, so move it into
3120   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3121   // target address before the store (for null-ptr checks)
3122   // so the memory operand is used twice in the encoding.
3123   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3124     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3125     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3126     emit_opcode(cbuf,0xDF);
3127     int rm_byte_opcode = 0x07;
3128     int base     = $mem$$base;
3129     int index    = $mem$$index;
3130     int scale    = $mem$$scale;
3131     int displace = $mem$$disp;
3132     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3133     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3134   %}
3135 
3136 %}
3137 
3138 
3139 //----------FRAME--------------------------------------------------------------
3140 // Definition of frame structure and management information.
3141 //
3142 //  S T A C K   L A Y O U T    Allocators stack-slot number
3143 //                             |   (to get allocators register number
3144 //  G  Owned by    |        |  v    add OptoReg::stack0())
3145 //  r   CALLER     |        |
3146 //  o     |        +--------+      pad to even-align allocators stack-slot
3147 //  w     V        |  pad0  |        numbers; owned by CALLER
3148 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3149 //  h     ^        |   in   |  5
3150 //        |        |  args  |  4   Holes in incoming args owned by SELF
3151 //  |     |        |        |  3
3152 //  |     |        +--------+
3153 //  V     |        | old out|      Empty on Intel, window on Sparc
3154 //        |    old |preserve|      Must be even aligned.
3155 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3156 //        |        |   in   |  3   area for Intel ret address
3157 //     Owned by    |preserve|      Empty on Sparc.
3158 //       SELF      +--------+
3159 //        |        |  pad2  |  2   pad to align old SP
3160 //        |        +--------+  1
3161 //        |        | locks  |  0
3162 //        |        +--------+----> OptoReg::stack0(), even aligned
3163 //        |        |  pad1  | 11   pad to align new SP
3164 //        |        +--------+
3165 //        |        |        | 10
3166 //        |        | spills |  9   spills
3167 //        V        |        |  8   (pad0 slot for callee)
3168 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3169 //        ^        |  out   |  7
3170 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3171 //     Owned by    +--------+
3172 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3173 //        |    new |preserve|      Must be even-aligned.
3174 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3175 //        |        |        |
3176 //
3177 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3178 //         known from SELF's arguments and the Java calling convention.
3179 //         Region 6-7 is determined per call site.
3180 // Note 2: If the calling convention leaves holes in the incoming argument
3181 //         area, those holes are owned by SELF.  Holes in the outgoing area
3182 //         are owned by the CALLEE.  Holes should not be nessecary in the
3183 //         incoming area, as the Java calling convention is completely under
3184 //         the control of the AD file.  Doubles can be sorted and packed to
3185 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3186 //         varargs C calling conventions.
3187 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3188 //         even aligned with pad0 as needed.
3189 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3190 //         region 6-11 is even aligned; it may be padded out more so that
3191 //         the region from SP to FP meets the minimum stack alignment.
3192 
3193 frame %{
3194   // What direction does stack grow in (assumed to be same for C & Java)
3195   stack_direction(TOWARDS_LOW);
3196 
3197   // These three registers define part of the calling convention
3198   // between compiled code and the interpreter.
3199   inline_cache_reg(EAX);                // Inline Cache Register
3200   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3201 
3202   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3203   cisc_spilling_operand_name(indOffset32);
3204 
3205   // Number of stack slots consumed by locking an object
3206   sync_stack_slots(1);
3207 
3208   // Compiled code's Frame Pointer
3209   frame_pointer(ESP);
3210   // Interpreter stores its frame pointer in a register which is
3211   // stored to the stack by I2CAdaptors.
3212   // I2CAdaptors convert from interpreted java to compiled java.
3213   interpreter_frame_pointer(EBP);
3214 
3215   // Stack alignment requirement
3216   // Alignment size in bytes (128-bit -> 16 bytes)
3217   stack_alignment(StackAlignmentInBytes);
3218 
3219   // Number of stack slots between incoming argument block and the start of
3220   // a new frame.  The PROLOG must add this many slots to the stack.  The
3221   // EPILOG must remove this many slots.  Intel needs one slot for
3222   // return address and one for rbp, (must save rbp)
3223   in_preserve_stack_slots(2+VerifyStackAtCalls);
3224 
3225   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3226   // for calls to C.  Supports the var-args backing area for register parms.
3227   varargs_C_out_slots_killed(0);
3228 
3229   // The after-PROLOG location of the return address.  Location of
3230   // return address specifies a type (REG or STACK) and a number
3231   // representing the register number (i.e. - use a register name) or
3232   // stack slot.
3233   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3234   // Otherwise, it is above the locks and verification slot and alignment word
3235   return_addr(STACK - 1 +
3236               align_up((Compile::current()->in_preserve_stack_slots() +
3237                         Compile::current()->fixed_slots()),
3238                        stack_alignment_in_slots()));
3239 
3240   // Body of function which returns an integer array locating
3241   // arguments either in registers or in stack slots.  Passed an array
3242   // of ideal registers called "sig" and a "length" count.  Stack-slot
3243   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3244   // arguments for a CALLEE.  Incoming stack arguments are
3245   // automatically biased by the preserve_stack_slots field above.
3246   calling_convention %{
3247     // No difference between ingoing/outgoing just pass false
3248     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3249   %}
3250 
3251 
3252   // Body of function which returns an integer array locating
3253   // arguments either in registers or in stack slots.  Passed an array
3254   // of ideal registers called "sig" and a "length" count.  Stack-slot
3255   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3256   // arguments for a CALLEE.  Incoming stack arguments are
3257   // automatically biased by the preserve_stack_slots field above.
3258   c_calling_convention %{
3259     // This is obviously always outgoing
3260     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3261   %}
3262 
3263   // Location of C & interpreter return values
3264   c_return_value %{
3265     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3266     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3267     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3268 
3269     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3270     // that C functions return float and double results in XMM0.
3271     if( ideal_reg == Op_RegD && UseSSE>=2 )
3272       return OptoRegPair(XMM0b_num,XMM0_num);
3273     if( ideal_reg == Op_RegF && UseSSE>=2 )
3274       return OptoRegPair(OptoReg::Bad,XMM0_num);
3275 
3276     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3277   %}
3278 
3279   // Location of return values
3280   return_value %{
3281     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3282     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3283     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3284     if( ideal_reg == Op_RegD && UseSSE>=2 )
3285       return OptoRegPair(XMM0b_num,XMM0_num);
3286     if( ideal_reg == Op_RegF && UseSSE>=1 )
3287       return OptoRegPair(OptoReg::Bad,XMM0_num);
3288     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3289   %}
3290 
3291 %}
3292 
3293 //----------ATTRIBUTES---------------------------------------------------------
3294 //----------Operand Attributes-------------------------------------------------
3295 op_attrib op_cost(0);        // Required cost attribute
3296 
3297 //----------Instruction Attributes---------------------------------------------
3298 ins_attrib ins_cost(100);       // Required cost attribute
3299 ins_attrib ins_size(8);         // Required size attribute (in bits)
3300 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3301                                 // non-matching short branch variant of some
3302                                                             // long branch?
3303 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3304                                 // specifies the alignment that some part of the instruction (not
3305                                 // necessarily the start) requires.  If > 1, a compute_padding()
3306                                 // function must be provided for the instruction
3307 
3308 //----------OPERANDS-----------------------------------------------------------
3309 // Operand definitions must precede instruction definitions for correct parsing
3310 // in the ADLC because operands constitute user defined types which are used in
3311 // instruction definitions.
3312 
3313 //----------Simple Operands----------------------------------------------------
3314 // Immediate Operands
3315 // Integer Immediate
3316 operand immI() %{
3317   match(ConI);
3318 
3319   op_cost(10);
3320   format %{ %}
3321   interface(CONST_INTER);
3322 %}
3323 
3324 // Constant for test vs zero
3325 operand immI0() %{
3326   predicate(n->get_int() == 0);
3327   match(ConI);
3328 
3329   op_cost(0);
3330   format %{ %}
3331   interface(CONST_INTER);
3332 %}
3333 
3334 // Constant for increment
3335 operand immI1() %{
3336   predicate(n->get_int() == 1);
3337   match(ConI);
3338 
3339   op_cost(0);
3340   format %{ %}
3341   interface(CONST_INTER);
3342 %}
3343 
3344 // Constant for decrement
3345 operand immI_M1() %{
3346   predicate(n->get_int() == -1);
3347   match(ConI);
3348 
3349   op_cost(0);
3350   format %{ %}
3351   interface(CONST_INTER);
3352 %}
3353 
3354 // Valid scale values for addressing modes
3355 operand immI2() %{
3356   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3357   match(ConI);
3358 
3359   format %{ %}
3360   interface(CONST_INTER);
3361 %}
3362 
3363 operand immI8() %{
3364   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3365   match(ConI);
3366 
3367   op_cost(5);
3368   format %{ %}
3369   interface(CONST_INTER);
3370 %}
3371 
3372 operand immU8() %{
3373   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
3374   match(ConI);
3375 
3376   op_cost(5);
3377   format %{ %}
3378   interface(CONST_INTER);
3379 %}
3380 
3381 operand immI16() %{
3382   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3383   match(ConI);
3384 
3385   op_cost(10);
3386   format %{ %}
3387   interface(CONST_INTER);
3388 %}
3389 
3390 // Int Immediate non-negative
3391 operand immU31()
3392 %{
3393   predicate(n->get_int() >= 0);
3394   match(ConI);
3395 
3396   op_cost(0);
3397   format %{ %}
3398   interface(CONST_INTER);
3399 %}
3400 
3401 // Constant for long shifts
3402 operand immI_32() %{
3403   predicate( n->get_int() == 32 );
3404   match(ConI);
3405 
3406   op_cost(0);
3407   format %{ %}
3408   interface(CONST_INTER);
3409 %}
3410 
3411 operand immI_1_31() %{
3412   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3413   match(ConI);
3414 
3415   op_cost(0);
3416   format %{ %}
3417   interface(CONST_INTER);
3418 %}
3419 
3420 operand immI_32_63() %{
3421   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3422   match(ConI);
3423   op_cost(0);
3424 
3425   format %{ %}
3426   interface(CONST_INTER);
3427 %}
3428 
3429 operand immI_1() %{
3430   predicate( n->get_int() == 1 );
3431   match(ConI);
3432 
3433   op_cost(0);
3434   format %{ %}
3435   interface(CONST_INTER);
3436 %}
3437 
3438 operand immI_2() %{
3439   predicate( n->get_int() == 2 );
3440   match(ConI);
3441 
3442   op_cost(0);
3443   format %{ %}
3444   interface(CONST_INTER);
3445 %}
3446 
3447 operand immI_3() %{
3448   predicate( n->get_int() == 3 );
3449   match(ConI);
3450 
3451   op_cost(0);
3452   format %{ %}
3453   interface(CONST_INTER);
3454 %}
3455 
3456 // Pointer Immediate
3457 operand immP() %{
3458   match(ConP);
3459 
3460   op_cost(10);
3461   format %{ %}
3462   interface(CONST_INTER);
3463 %}
3464 
3465 // NULL Pointer Immediate
3466 operand immP0() %{
3467   predicate( n->get_ptr() == 0 );
3468   match(ConP);
3469   op_cost(0);
3470 
3471   format %{ %}
3472   interface(CONST_INTER);
3473 %}
3474 
3475 // Long Immediate
3476 operand immL() %{
3477   match(ConL);
3478 
3479   op_cost(20);
3480   format %{ %}
3481   interface(CONST_INTER);
3482 %}
3483 
3484 // Long Immediate zero
3485 operand immL0() %{
3486   predicate( n->get_long() == 0L );
3487   match(ConL);
3488   op_cost(0);
3489 
3490   format %{ %}
3491   interface(CONST_INTER);
3492 %}
3493 
3494 // Long Immediate zero
3495 operand immL_M1() %{
3496   predicate( n->get_long() == -1L );
3497   match(ConL);
3498   op_cost(0);
3499 
3500   format %{ %}
3501   interface(CONST_INTER);
3502 %}
3503 
3504 // Long immediate from 0 to 127.
3505 // Used for a shorter form of long mul by 10.
3506 operand immL_127() %{
3507   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3508   match(ConL);
3509   op_cost(0);
3510 
3511   format %{ %}
3512   interface(CONST_INTER);
3513 %}
3514 
3515 // Long Immediate: low 32-bit mask
3516 operand immL_32bits() %{
3517   predicate(n->get_long() == 0xFFFFFFFFL);
3518   match(ConL);
3519   op_cost(0);
3520 
3521   format %{ %}
3522   interface(CONST_INTER);
3523 %}
3524 
3525 // Long Immediate: low 32-bit mask
3526 operand immL32() %{
3527   predicate(n->get_long() == (int)(n->get_long()));
3528   match(ConL);
3529   op_cost(20);
3530 
3531   format %{ %}
3532   interface(CONST_INTER);
3533 %}
3534 
3535 //Double Immediate zero
3536 operand immDPR0() %{
3537   // Do additional (and counter-intuitive) test against NaN to work around VC++
3538   // bug that generates code such that NaNs compare equal to 0.0
3539   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3540   match(ConD);
3541 
3542   op_cost(5);
3543   format %{ %}
3544   interface(CONST_INTER);
3545 %}
3546 
3547 // Double Immediate one
3548 operand immDPR1() %{
3549   predicate( UseSSE<=1 && n->getd() == 1.0 );
3550   match(ConD);
3551 
3552   op_cost(5);
3553   format %{ %}
3554   interface(CONST_INTER);
3555 %}
3556 
3557 // Double Immediate
3558 operand immDPR() %{
3559   predicate(UseSSE<=1);
3560   match(ConD);
3561 
3562   op_cost(5);
3563   format %{ %}
3564   interface(CONST_INTER);
3565 %}
3566 
3567 operand immD() %{
3568   predicate(UseSSE>=2);
3569   match(ConD);
3570 
3571   op_cost(5);
3572   format %{ %}
3573   interface(CONST_INTER);
3574 %}
3575 
3576 // Double Immediate zero
3577 operand immD0() %{
3578   // Do additional (and counter-intuitive) test against NaN to work around VC++
3579   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3580   // compare equal to -0.0.
3581   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3582   match(ConD);
3583 
3584   format %{ %}
3585   interface(CONST_INTER);
3586 %}
3587 
3588 // Float Immediate zero
3589 operand immFPR0() %{
3590   predicate(UseSSE == 0 && n->getf() == 0.0F);
3591   match(ConF);
3592 
3593   op_cost(5);
3594   format %{ %}
3595   interface(CONST_INTER);
3596 %}
3597 
3598 // Float Immediate one
3599 operand immFPR1() %{
3600   predicate(UseSSE == 0 && n->getf() == 1.0F);
3601   match(ConF);
3602 
3603   op_cost(5);
3604   format %{ %}
3605   interface(CONST_INTER);
3606 %}
3607 
3608 // Float Immediate
3609 operand immFPR() %{
3610   predicate( UseSSE == 0 );
3611   match(ConF);
3612 
3613   op_cost(5);
3614   format %{ %}
3615   interface(CONST_INTER);
3616 %}
3617 
3618 // Float Immediate
3619 operand immF() %{
3620   predicate(UseSSE >= 1);
3621   match(ConF);
3622 
3623   op_cost(5);
3624   format %{ %}
3625   interface(CONST_INTER);
3626 %}
3627 
3628 // Float Immediate zero.  Zero and not -0.0
3629 operand immF0() %{
3630   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3631   match(ConF);
3632 
3633   op_cost(5);
3634   format %{ %}
3635   interface(CONST_INTER);
3636 %}
3637 
3638 // Immediates for special shifts (sign extend)
3639 
3640 // Constants for increment
3641 operand immI_16() %{
3642   predicate( n->get_int() == 16 );
3643   match(ConI);
3644 
3645   format %{ %}
3646   interface(CONST_INTER);
3647 %}
3648 
3649 operand immI_24() %{
3650   predicate( n->get_int() == 24 );
3651   match(ConI);
3652 
3653   format %{ %}
3654   interface(CONST_INTER);
3655 %}
3656 
3657 // Constant for byte-wide masking
3658 operand immI_255() %{
3659   predicate( n->get_int() == 255 );
3660   match(ConI);
3661 
3662   format %{ %}
3663   interface(CONST_INTER);
3664 %}
3665 
3666 // Constant for short-wide masking
3667 operand immI_65535() %{
3668   predicate(n->get_int() == 65535);
3669   match(ConI);
3670 
3671   format %{ %}
3672   interface(CONST_INTER);
3673 %}
3674 
3675 // Register Operands
3676 // Integer Register
3677 operand rRegI() %{
3678   constraint(ALLOC_IN_RC(int_reg));
3679   match(RegI);
3680   match(xRegI);
3681   match(eAXRegI);
3682   match(eBXRegI);
3683   match(eCXRegI);
3684   match(eDXRegI);
3685   match(eDIRegI);
3686   match(eSIRegI);
3687 
3688   format %{ %}
3689   interface(REG_INTER);
3690 %}
3691 
3692 // Subset of Integer Register
3693 operand xRegI(rRegI reg) %{
3694   constraint(ALLOC_IN_RC(int_x_reg));
3695   match(reg);
3696   match(eAXRegI);
3697   match(eBXRegI);
3698   match(eCXRegI);
3699   match(eDXRegI);
3700 
3701   format %{ %}
3702   interface(REG_INTER);
3703 %}
3704 
3705 // Special Registers
3706 operand eAXRegI(xRegI reg) %{
3707   constraint(ALLOC_IN_RC(eax_reg));
3708   match(reg);
3709   match(rRegI);
3710 
3711   format %{ "EAX" %}
3712   interface(REG_INTER);
3713 %}
3714 
3715 // Special Registers
3716 operand eBXRegI(xRegI reg) %{
3717   constraint(ALLOC_IN_RC(ebx_reg));
3718   match(reg);
3719   match(rRegI);
3720 
3721   format %{ "EBX" %}
3722   interface(REG_INTER);
3723 %}
3724 
3725 operand eCXRegI(xRegI reg) %{
3726   constraint(ALLOC_IN_RC(ecx_reg));
3727   match(reg);
3728   match(rRegI);
3729 
3730   format %{ "ECX" %}
3731   interface(REG_INTER);
3732 %}
3733 
3734 operand eDXRegI(xRegI reg) %{
3735   constraint(ALLOC_IN_RC(edx_reg));
3736   match(reg);
3737   match(rRegI);
3738 
3739   format %{ "EDX" %}
3740   interface(REG_INTER);
3741 %}
3742 
3743 operand eDIRegI(xRegI reg) %{
3744   constraint(ALLOC_IN_RC(edi_reg));
3745   match(reg);
3746   match(rRegI);
3747 
3748   format %{ "EDI" %}
3749   interface(REG_INTER);
3750 %}
3751 
3752 operand naxRegI() %{
3753   constraint(ALLOC_IN_RC(nax_reg));
3754   match(RegI);
3755   match(eCXRegI);
3756   match(eDXRegI);
3757   match(eSIRegI);
3758   match(eDIRegI);
3759 
3760   format %{ %}
3761   interface(REG_INTER);
3762 %}
3763 
3764 operand nadxRegI() %{
3765   constraint(ALLOC_IN_RC(nadx_reg));
3766   match(RegI);
3767   match(eBXRegI);
3768   match(eCXRegI);
3769   match(eSIRegI);
3770   match(eDIRegI);
3771 
3772   format %{ %}
3773   interface(REG_INTER);
3774 %}
3775 
3776 operand ncxRegI() %{
3777   constraint(ALLOC_IN_RC(ncx_reg));
3778   match(RegI);
3779   match(eAXRegI);
3780   match(eDXRegI);
3781   match(eSIRegI);
3782   match(eDIRegI);
3783 
3784   format %{ %}
3785   interface(REG_INTER);
3786 %}
3787 
3788 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3789 // //
3790 operand eSIRegI(xRegI reg) %{
3791    constraint(ALLOC_IN_RC(esi_reg));
3792    match(reg);
3793    match(rRegI);
3794 
3795    format %{ "ESI" %}
3796    interface(REG_INTER);
3797 %}
3798 
3799 // Pointer Register
3800 operand anyRegP() %{
3801   constraint(ALLOC_IN_RC(any_reg));
3802   match(RegP);
3803   match(eAXRegP);
3804   match(eBXRegP);
3805   match(eCXRegP);
3806   match(eDIRegP);
3807   match(eRegP);
3808 
3809   format %{ %}
3810   interface(REG_INTER);
3811 %}
3812 
3813 operand eRegP() %{
3814   constraint(ALLOC_IN_RC(int_reg));
3815   match(RegP);
3816   match(eAXRegP);
3817   match(eBXRegP);
3818   match(eCXRegP);
3819   match(eDIRegP);
3820 
3821   format %{ %}
3822   interface(REG_INTER);
3823 %}
3824 
3825 // On windows95, EBP is not safe to use for implicit null tests.
3826 operand eRegP_no_EBP() %{
3827   constraint(ALLOC_IN_RC(int_reg_no_ebp));
3828   match(RegP);
3829   match(eAXRegP);
3830   match(eBXRegP);
3831   match(eCXRegP);
3832   match(eDIRegP);
3833 
3834   op_cost(100);
3835   format %{ %}
3836   interface(REG_INTER);
3837 %}
3838 
3839 operand naxRegP() %{
3840   constraint(ALLOC_IN_RC(nax_reg));
3841   match(RegP);
3842   match(eBXRegP);
3843   match(eDXRegP);
3844   match(eCXRegP);
3845   match(eSIRegP);
3846   match(eDIRegP);
3847 
3848   format %{ %}
3849   interface(REG_INTER);
3850 %}
3851 
3852 operand nabxRegP() %{
3853   constraint(ALLOC_IN_RC(nabx_reg));
3854   match(RegP);
3855   match(eCXRegP);
3856   match(eDXRegP);
3857   match(eSIRegP);
3858   match(eDIRegP);
3859 
3860   format %{ %}
3861   interface(REG_INTER);
3862 %}
3863 
3864 operand pRegP() %{
3865   constraint(ALLOC_IN_RC(p_reg));
3866   match(RegP);
3867   match(eBXRegP);
3868   match(eDXRegP);
3869   match(eSIRegP);
3870   match(eDIRegP);
3871 
3872   format %{ %}
3873   interface(REG_INTER);
3874 %}
3875 
3876 // Special Registers
3877 // Return a pointer value
3878 operand eAXRegP(eRegP reg) %{
3879   constraint(ALLOC_IN_RC(eax_reg));
3880   match(reg);
3881   format %{ "EAX" %}
3882   interface(REG_INTER);
3883 %}
3884 
3885 // Used in AtomicAdd
3886 operand eBXRegP(eRegP reg) %{
3887   constraint(ALLOC_IN_RC(ebx_reg));
3888   match(reg);
3889   format %{ "EBX" %}
3890   interface(REG_INTER);
3891 %}
3892 
3893 // Tail-call (interprocedural jump) to interpreter
3894 operand eCXRegP(eRegP reg) %{
3895   constraint(ALLOC_IN_RC(ecx_reg));
3896   match(reg);
3897   format %{ "ECX" %}
3898   interface(REG_INTER);
3899 %}
3900 
3901 operand eDXRegP(eRegP reg) %{
3902   constraint(ALLOC_IN_RC(edx_reg));
3903   match(reg);
3904   format %{ "EDX" %}
3905   interface(REG_INTER);
3906 %}
3907 
3908 operand eSIRegP(eRegP reg) %{
3909   constraint(ALLOC_IN_RC(esi_reg));
3910   match(reg);
3911   format %{ "ESI" %}
3912   interface(REG_INTER);
3913 %}
3914 
3915 // Used in rep stosw
3916 operand eDIRegP(eRegP reg) %{
3917   constraint(ALLOC_IN_RC(edi_reg));
3918   match(reg);
3919   format %{ "EDI" %}
3920   interface(REG_INTER);
3921 %}
3922 
3923 operand eRegL() %{
3924   constraint(ALLOC_IN_RC(long_reg));
3925   match(RegL);
3926   match(eADXRegL);
3927 
3928   format %{ %}
3929   interface(REG_INTER);
3930 %}
3931 
3932 operand eADXRegL( eRegL reg ) %{
3933   constraint(ALLOC_IN_RC(eadx_reg));
3934   match(reg);
3935 
3936   format %{ "EDX:EAX" %}
3937   interface(REG_INTER);
3938 %}
3939 
3940 operand eBCXRegL( eRegL reg ) %{
3941   constraint(ALLOC_IN_RC(ebcx_reg));
3942   match(reg);
3943 
3944   format %{ "EBX:ECX" %}
3945   interface(REG_INTER);
3946 %}
3947 
3948 // Special case for integer high multiply
3949 operand eADXRegL_low_only() %{
3950   constraint(ALLOC_IN_RC(eadx_reg));
3951   match(RegL);
3952 
3953   format %{ "EAX" %}
3954   interface(REG_INTER);
3955 %}
3956 
3957 // Flags register, used as output of compare instructions
3958 operand eFlagsReg() %{
3959   constraint(ALLOC_IN_RC(int_flags));
3960   match(RegFlags);
3961 
3962   format %{ "EFLAGS" %}
3963   interface(REG_INTER);
3964 %}
3965 
3966 // Flags register, used as output of FLOATING POINT compare instructions
3967 operand eFlagsRegU() %{
3968   constraint(ALLOC_IN_RC(int_flags));
3969   match(RegFlags);
3970 
3971   format %{ "EFLAGS_U" %}
3972   interface(REG_INTER);
3973 %}
3974 
3975 operand eFlagsRegUCF() %{
3976   constraint(ALLOC_IN_RC(int_flags));
3977   match(RegFlags);
3978   predicate(false);
3979 
3980   format %{ "EFLAGS_U_CF" %}
3981   interface(REG_INTER);
3982 %}
3983 
3984 // Condition Code Register used by long compare
3985 operand flagsReg_long_LTGE() %{
3986   constraint(ALLOC_IN_RC(int_flags));
3987   match(RegFlags);
3988   format %{ "FLAGS_LTGE" %}
3989   interface(REG_INTER);
3990 %}
3991 operand flagsReg_long_EQNE() %{
3992   constraint(ALLOC_IN_RC(int_flags));
3993   match(RegFlags);
3994   format %{ "FLAGS_EQNE" %}
3995   interface(REG_INTER);
3996 %}
3997 operand flagsReg_long_LEGT() %{
3998   constraint(ALLOC_IN_RC(int_flags));
3999   match(RegFlags);
4000   format %{ "FLAGS_LEGT" %}
4001   interface(REG_INTER);
4002 %}
4003 
4004 // Condition Code Register used by unsigned long compare
4005 operand flagsReg_ulong_LTGE() %{
4006   constraint(ALLOC_IN_RC(int_flags));
4007   match(RegFlags);
4008   format %{ "FLAGS_U_LTGE" %}
4009   interface(REG_INTER);
4010 %}
4011 operand flagsReg_ulong_EQNE() %{
4012   constraint(ALLOC_IN_RC(int_flags));
4013   match(RegFlags);
4014   format %{ "FLAGS_U_EQNE" %}
4015   interface(REG_INTER);
4016 %}
4017 operand flagsReg_ulong_LEGT() %{
4018   constraint(ALLOC_IN_RC(int_flags));
4019   match(RegFlags);
4020   format %{ "FLAGS_U_LEGT" %}
4021   interface(REG_INTER);
4022 %}
4023 
4024 // Float register operands
4025 operand regDPR() %{
4026   predicate( UseSSE < 2 );
4027   constraint(ALLOC_IN_RC(fp_dbl_reg));
4028   match(RegD);
4029   match(regDPR1);
4030   match(regDPR2);
4031   format %{ %}
4032   interface(REG_INTER);
4033 %}
4034 
4035 operand regDPR1(regDPR reg) %{
4036   predicate( UseSSE < 2 );
4037   constraint(ALLOC_IN_RC(fp_dbl_reg0));
4038   match(reg);
4039   format %{ "FPR1" %}
4040   interface(REG_INTER);
4041 %}
4042 
4043 operand regDPR2(regDPR reg) %{
4044   predicate( UseSSE < 2 );
4045   constraint(ALLOC_IN_RC(fp_dbl_reg1));
4046   match(reg);
4047   format %{ "FPR2" %}
4048   interface(REG_INTER);
4049 %}
4050 
4051 operand regnotDPR1(regDPR reg) %{
4052   predicate( UseSSE < 2 );
4053   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4054   match(reg);
4055   format %{ %}
4056   interface(REG_INTER);
4057 %}
4058 
4059 // Float register operands
4060 operand regFPR() %{
4061   predicate( UseSSE < 2 );
4062   constraint(ALLOC_IN_RC(fp_flt_reg));
4063   match(RegF);
4064   match(regFPR1);
4065   format %{ %}
4066   interface(REG_INTER);
4067 %}
4068 
4069 // Float register operands
4070 operand regFPR1(regFPR reg) %{
4071   predicate( UseSSE < 2 );
4072   constraint(ALLOC_IN_RC(fp_flt_reg0));
4073   match(reg);
4074   format %{ "FPR1" %}
4075   interface(REG_INTER);
4076 %}
4077 
4078 // XMM Float register operands
4079 operand regF() %{
4080   predicate( UseSSE>=1 );
4081   constraint(ALLOC_IN_RC(float_reg_legacy));
4082   match(RegF);
4083   format %{ %}
4084   interface(REG_INTER);
4085 %}
4086 
4087 // Float register operands
4088 operand vlRegF() %{
4089    constraint(ALLOC_IN_RC(float_reg_vl));
4090    match(RegF);
4091 
4092    format %{ %}
4093    interface(REG_INTER);
4094 %}
4095 
4096 // XMM Double register operands
4097 operand regD() %{
4098   predicate( UseSSE>=2 );
4099   constraint(ALLOC_IN_RC(double_reg_legacy));
4100   match(RegD);
4101   format %{ %}
4102   interface(REG_INTER);
4103 %}
4104 
4105 // Double register operands
4106 operand vlRegD() %{
4107    constraint(ALLOC_IN_RC(double_reg_vl));
4108    match(RegD);
4109 
4110    format %{ %}
4111    interface(REG_INTER);
4112 %}
4113 
4114 //----------Memory Operands----------------------------------------------------
4115 // Direct Memory Operand
4116 operand direct(immP addr) %{
4117   match(addr);
4118 
4119   format %{ "[$addr]" %}
4120   interface(MEMORY_INTER) %{
4121     base(0xFFFFFFFF);
4122     index(0x4);
4123     scale(0x0);
4124     disp($addr);
4125   %}
4126 %}
4127 
4128 // Indirect Memory Operand
4129 operand indirect(eRegP reg) %{
4130   constraint(ALLOC_IN_RC(int_reg));
4131   match(reg);
4132 
4133   format %{ "[$reg]" %}
4134   interface(MEMORY_INTER) %{
4135     base($reg);
4136     index(0x4);
4137     scale(0x0);
4138     disp(0x0);
4139   %}
4140 %}
4141 
4142 // Indirect Memory Plus Short Offset Operand
4143 operand indOffset8(eRegP reg, immI8 off) %{
4144   match(AddP reg off);
4145 
4146   format %{ "[$reg + $off]" %}
4147   interface(MEMORY_INTER) %{
4148     base($reg);
4149     index(0x4);
4150     scale(0x0);
4151     disp($off);
4152   %}
4153 %}
4154 
4155 // Indirect Memory Plus Long Offset Operand
4156 operand indOffset32(eRegP reg, immI off) %{
4157   match(AddP reg off);
4158 
4159   format %{ "[$reg + $off]" %}
4160   interface(MEMORY_INTER) %{
4161     base($reg);
4162     index(0x4);
4163     scale(0x0);
4164     disp($off);
4165   %}
4166 %}
4167 
4168 // Indirect Memory Plus Long Offset Operand
4169 operand indOffset32X(rRegI reg, immP off) %{
4170   match(AddP off reg);
4171 
4172   format %{ "[$reg + $off]" %}
4173   interface(MEMORY_INTER) %{
4174     base($reg);
4175     index(0x4);
4176     scale(0x0);
4177     disp($off);
4178   %}
4179 %}
4180 
4181 // Indirect Memory Plus Index Register Plus Offset Operand
4182 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4183   match(AddP (AddP reg ireg) off);
4184 
4185   op_cost(10);
4186   format %{"[$reg + $off + $ireg]" %}
4187   interface(MEMORY_INTER) %{
4188     base($reg);
4189     index($ireg);
4190     scale(0x0);
4191     disp($off);
4192   %}
4193 %}
4194 
4195 // Indirect Memory Plus Index Register Plus Offset Operand
4196 operand indIndex(eRegP reg, rRegI ireg) %{
4197   match(AddP reg ireg);
4198 
4199   op_cost(10);
4200   format %{"[$reg + $ireg]" %}
4201   interface(MEMORY_INTER) %{
4202     base($reg);
4203     index($ireg);
4204     scale(0x0);
4205     disp(0x0);
4206   %}
4207 %}
4208 
4209 // // -------------------------------------------------------------------------
4210 // // 486 architecture doesn't support "scale * index + offset" with out a base
4211 // // -------------------------------------------------------------------------
4212 // // Scaled Memory Operands
4213 // // Indirect Memory Times Scale Plus Offset Operand
4214 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4215 //   match(AddP off (LShiftI ireg scale));
4216 //
4217 //   op_cost(10);
4218 //   format %{"[$off + $ireg << $scale]" %}
4219 //   interface(MEMORY_INTER) %{
4220 //     base(0x4);
4221 //     index($ireg);
4222 //     scale($scale);
4223 //     disp($off);
4224 //   %}
4225 // %}
4226 
4227 // Indirect Memory Times Scale Plus Index Register
4228 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4229   match(AddP reg (LShiftI ireg scale));
4230 
4231   op_cost(10);
4232   format %{"[$reg + $ireg << $scale]" %}
4233   interface(MEMORY_INTER) %{
4234     base($reg);
4235     index($ireg);
4236     scale($scale);
4237     disp(0x0);
4238   %}
4239 %}
4240 
4241 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4242 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4243   match(AddP (AddP reg (LShiftI ireg scale)) off);
4244 
4245   op_cost(10);
4246   format %{"[$reg + $off + $ireg << $scale]" %}
4247   interface(MEMORY_INTER) %{
4248     base($reg);
4249     index($ireg);
4250     scale($scale);
4251     disp($off);
4252   %}
4253 %}
4254 
4255 //----------Load Long Memory Operands------------------------------------------
4256 // The load-long idiom will use it's address expression again after loading
4257 // the first word of the long.  If the load-long destination overlaps with
4258 // registers used in the addressing expression, the 2nd half will be loaded
4259 // from a clobbered address.  Fix this by requiring that load-long use
4260 // address registers that do not overlap with the load-long target.
4261 
4262 // load-long support
4263 operand load_long_RegP() %{
4264   constraint(ALLOC_IN_RC(esi_reg));
4265   match(RegP);
4266   match(eSIRegP);
4267   op_cost(100);
4268   format %{  %}
4269   interface(REG_INTER);
4270 %}
4271 
4272 // Indirect Memory Operand Long
4273 operand load_long_indirect(load_long_RegP reg) %{
4274   constraint(ALLOC_IN_RC(esi_reg));
4275   match(reg);
4276 
4277   format %{ "[$reg]" %}
4278   interface(MEMORY_INTER) %{
4279     base($reg);
4280     index(0x4);
4281     scale(0x0);
4282     disp(0x0);
4283   %}
4284 %}
4285 
4286 // Indirect Memory Plus Long Offset Operand
4287 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4288   match(AddP reg off);
4289 
4290   format %{ "[$reg + $off]" %}
4291   interface(MEMORY_INTER) %{
4292     base($reg);
4293     index(0x4);
4294     scale(0x0);
4295     disp($off);
4296   %}
4297 %}
4298 
4299 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4300 
4301 
4302 //----------Special Memory Operands--------------------------------------------
4303 // Stack Slot Operand - This operand is used for loading and storing temporary
4304 //                      values on the stack where a match requires a value to
4305 //                      flow through memory.
4306 operand stackSlotP(sRegP reg) %{
4307   constraint(ALLOC_IN_RC(stack_slots));
4308   // No match rule because this operand is only generated in matching
4309   format %{ "[$reg]" %}
4310   interface(MEMORY_INTER) %{
4311     base(0x4);   // ESP
4312     index(0x4);  // No Index
4313     scale(0x0);  // No Scale
4314     disp($reg);  // Stack Offset
4315   %}
4316 %}
4317 
4318 operand stackSlotI(sRegI reg) %{
4319   constraint(ALLOC_IN_RC(stack_slots));
4320   // No match rule because this operand is only generated in matching
4321   format %{ "[$reg]" %}
4322   interface(MEMORY_INTER) %{
4323     base(0x4);   // ESP
4324     index(0x4);  // No Index
4325     scale(0x0);  // No Scale
4326     disp($reg);  // Stack Offset
4327   %}
4328 %}
4329 
4330 operand stackSlotF(sRegF reg) %{
4331   constraint(ALLOC_IN_RC(stack_slots));
4332   // No match rule because this operand is only generated in matching
4333   format %{ "[$reg]" %}
4334   interface(MEMORY_INTER) %{
4335     base(0x4);   // ESP
4336     index(0x4);  // No Index
4337     scale(0x0);  // No Scale
4338     disp($reg);  // Stack Offset
4339   %}
4340 %}
4341 
4342 operand stackSlotD(sRegD reg) %{
4343   constraint(ALLOC_IN_RC(stack_slots));
4344   // No match rule because this operand is only generated in matching
4345   format %{ "[$reg]" %}
4346   interface(MEMORY_INTER) %{
4347     base(0x4);   // ESP
4348     index(0x4);  // No Index
4349     scale(0x0);  // No Scale
4350     disp($reg);  // Stack Offset
4351   %}
4352 %}
4353 
4354 operand stackSlotL(sRegL reg) %{
4355   constraint(ALLOC_IN_RC(stack_slots));
4356   // No match rule because this operand is only generated in matching
4357   format %{ "[$reg]" %}
4358   interface(MEMORY_INTER) %{
4359     base(0x4);   // ESP
4360     index(0x4);  // No Index
4361     scale(0x0);  // No Scale
4362     disp($reg);  // Stack Offset
4363   %}
4364 %}
4365 
4366 //----------Memory Operands - Win95 Implicit Null Variants----------------
4367 // Indirect Memory Operand
4368 operand indirect_win95_safe(eRegP_no_EBP reg)
4369 %{
4370   constraint(ALLOC_IN_RC(int_reg));
4371   match(reg);
4372 
4373   op_cost(100);
4374   format %{ "[$reg]" %}
4375   interface(MEMORY_INTER) %{
4376     base($reg);
4377     index(0x4);
4378     scale(0x0);
4379     disp(0x0);
4380   %}
4381 %}
4382 
4383 // Indirect Memory Plus Short Offset Operand
4384 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4385 %{
4386   match(AddP reg off);
4387 
4388   op_cost(100);
4389   format %{ "[$reg + $off]" %}
4390   interface(MEMORY_INTER) %{
4391     base($reg);
4392     index(0x4);
4393     scale(0x0);
4394     disp($off);
4395   %}
4396 %}
4397 
4398 // Indirect Memory Plus Long Offset Operand
4399 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4400 %{
4401   match(AddP reg off);
4402 
4403   op_cost(100);
4404   format %{ "[$reg + $off]" %}
4405   interface(MEMORY_INTER) %{
4406     base($reg);
4407     index(0x4);
4408     scale(0x0);
4409     disp($off);
4410   %}
4411 %}
4412 
4413 // Indirect Memory Plus Index Register Plus Offset Operand
4414 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4415 %{
4416   match(AddP (AddP reg ireg) off);
4417 
4418   op_cost(100);
4419   format %{"[$reg + $off + $ireg]" %}
4420   interface(MEMORY_INTER) %{
4421     base($reg);
4422     index($ireg);
4423     scale(0x0);
4424     disp($off);
4425   %}
4426 %}
4427 
4428 // Indirect Memory Times Scale Plus Index Register
4429 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4430 %{
4431   match(AddP reg (LShiftI ireg scale));
4432 
4433   op_cost(100);
4434   format %{"[$reg + $ireg << $scale]" %}
4435   interface(MEMORY_INTER) %{
4436     base($reg);
4437     index($ireg);
4438     scale($scale);
4439     disp(0x0);
4440   %}
4441 %}
4442 
4443 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4444 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4445 %{
4446   match(AddP (AddP reg (LShiftI ireg scale)) off);
4447 
4448   op_cost(100);
4449   format %{"[$reg + $off + $ireg << $scale]" %}
4450   interface(MEMORY_INTER) %{
4451     base($reg);
4452     index($ireg);
4453     scale($scale);
4454     disp($off);
4455   %}
4456 %}
4457 
4458 //----------Conditional Branch Operands----------------------------------------
4459 // Comparison Op  - This is the operation of the comparison, and is limited to
4460 //                  the following set of codes:
4461 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4462 //
4463 // Other attributes of the comparison, such as unsignedness, are specified
4464 // by the comparison instruction that sets a condition code flags register.
4465 // That result is represented by a flags operand whose subtype is appropriate
4466 // to the unsignedness (etc.) of the comparison.
4467 //
4468 // Later, the instruction which matches both the Comparison Op (a Bool) and
4469 // the flags (produced by the Cmp) specifies the coding of the comparison op
4470 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4471 
4472 // Comparision Code
4473 operand cmpOp() %{
4474   match(Bool);
4475 
4476   format %{ "" %}
4477   interface(COND_INTER) %{
4478     equal(0x4, "e");
4479     not_equal(0x5, "ne");
4480     less(0xC, "l");
4481     greater_equal(0xD, "ge");
4482     less_equal(0xE, "le");
4483     greater(0xF, "g");
4484     overflow(0x0, "o");
4485     no_overflow(0x1, "no");
4486   %}
4487 %}
4488 
4489 // Comparison Code, unsigned compare.  Used by FP also, with
4490 // C2 (unordered) turned into GT or LT already.  The other bits
4491 // C0 and C3 are turned into Carry & Zero flags.
4492 operand cmpOpU() %{
4493   match(Bool);
4494 
4495   format %{ "" %}
4496   interface(COND_INTER) %{
4497     equal(0x4, "e");
4498     not_equal(0x5, "ne");
4499     less(0x2, "b");
4500     greater_equal(0x3, "nb");
4501     less_equal(0x6, "be");
4502     greater(0x7, "nbe");
4503     overflow(0x0, "o");
4504     no_overflow(0x1, "no");
4505   %}
4506 %}
4507 
4508 // Floating comparisons that don't require any fixup for the unordered case
4509 operand cmpOpUCF() %{
4510   match(Bool);
4511   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4512             n->as_Bool()->_test._test == BoolTest::ge ||
4513             n->as_Bool()->_test._test == BoolTest::le ||
4514             n->as_Bool()->_test._test == BoolTest::gt);
4515   format %{ "" %}
4516   interface(COND_INTER) %{
4517     equal(0x4, "e");
4518     not_equal(0x5, "ne");
4519     less(0x2, "b");
4520     greater_equal(0x3, "nb");
4521     less_equal(0x6, "be");
4522     greater(0x7, "nbe");
4523     overflow(0x0, "o");
4524     no_overflow(0x1, "no");
4525   %}
4526 %}
4527 
4528 
4529 // Floating comparisons that can be fixed up with extra conditional jumps
4530 operand cmpOpUCF2() %{
4531   match(Bool);
4532   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4533             n->as_Bool()->_test._test == BoolTest::eq);
4534   format %{ "" %}
4535   interface(COND_INTER) %{
4536     equal(0x4, "e");
4537     not_equal(0x5, "ne");
4538     less(0x2, "b");
4539     greater_equal(0x3, "nb");
4540     less_equal(0x6, "be");
4541     greater(0x7, "nbe");
4542     overflow(0x0, "o");
4543     no_overflow(0x1, "no");
4544   %}
4545 %}
4546 
4547 // Comparison Code for FP conditional move
4548 operand cmpOp_fcmov() %{
4549   match(Bool);
4550 
4551   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4552             n->as_Bool()->_test._test != BoolTest::no_overflow);
4553   format %{ "" %}
4554   interface(COND_INTER) %{
4555     equal        (0x0C8);
4556     not_equal    (0x1C8);
4557     less         (0x0C0);
4558     greater_equal(0x1C0);
4559     less_equal   (0x0D0);
4560     greater      (0x1D0);
4561     overflow(0x0, "o"); // not really supported by the instruction
4562     no_overflow(0x1, "no"); // not really supported by the instruction
4563   %}
4564 %}
4565 
4566 // Comparison Code used in long compares
4567 operand cmpOp_commute() %{
4568   match(Bool);
4569 
4570   format %{ "" %}
4571   interface(COND_INTER) %{
4572     equal(0x4, "e");
4573     not_equal(0x5, "ne");
4574     less(0xF, "g");
4575     greater_equal(0xE, "le");
4576     less_equal(0xD, "ge");
4577     greater(0xC, "l");
4578     overflow(0x0, "o");
4579     no_overflow(0x1, "no");
4580   %}
4581 %}
4582 
4583 // Comparison Code used in unsigned long compares
4584 operand cmpOpU_commute() %{
4585   match(Bool);
4586 
4587   format %{ "" %}
4588   interface(COND_INTER) %{
4589     equal(0x4, "e");
4590     not_equal(0x5, "ne");
4591     less(0x7, "nbe");
4592     greater_equal(0x6, "be");
4593     less_equal(0x3, "nb");
4594     greater(0x2, "b");
4595     overflow(0x0, "o");
4596     no_overflow(0x1, "no");
4597   %}
4598 %}
4599 
4600 //----------OPERAND CLASSES----------------------------------------------------
4601 // Operand Classes are groups of operands that are used as to simplify
4602 // instruction definitions by not requiring the AD writer to specify separate
4603 // instructions for every form of operand when the instruction accepts
4604 // multiple operand types with the same basic encoding and format.  The classic
4605 // case of this is memory operands.
4606 
4607 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4608                indIndex, indIndexScale, indIndexScaleOffset);
4609 
4610 // Long memory operations are encoded in 2 instructions and a +4 offset.
4611 // This means some kind of offset is always required and you cannot use
4612 // an oop as the offset (done when working on static globals).
4613 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4614                     indIndex, indIndexScale, indIndexScaleOffset);
4615 
4616 
4617 //----------PIPELINE-----------------------------------------------------------
4618 // Rules which define the behavior of the target architectures pipeline.
4619 pipeline %{
4620 
4621 //----------ATTRIBUTES---------------------------------------------------------
4622 attributes %{
4623   variable_size_instructions;        // Fixed size instructions
4624   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4625   instruction_unit_size = 1;         // An instruction is 1 bytes long
4626   instruction_fetch_unit_size = 16;  // The processor fetches one line
4627   instruction_fetch_units = 1;       // of 16 bytes
4628 
4629   // List of nop instructions
4630   nops( MachNop );
4631 %}
4632 
4633 //----------RESOURCES----------------------------------------------------------
4634 // Resources are the functional units available to the machine
4635 
4636 // Generic P2/P3 pipeline
4637 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4638 // 3 instructions decoded per cycle.
4639 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4640 // 2 ALU op, only ALU0 handles mul/div instructions.
4641 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4642            MS0, MS1, MEM = MS0 | MS1,
4643            BR, FPU,
4644            ALU0, ALU1, ALU = ALU0 | ALU1 );
4645 
4646 //----------PIPELINE DESCRIPTION-----------------------------------------------
4647 // Pipeline Description specifies the stages in the machine's pipeline
4648 
4649 // Generic P2/P3 pipeline
4650 pipe_desc(S0, S1, S2, S3, S4, S5);
4651 
4652 //----------PIPELINE CLASSES---------------------------------------------------
4653 // Pipeline Classes describe the stages in which input and output are
4654 // referenced by the hardware pipeline.
4655 
4656 // Naming convention: ialu or fpu
4657 // Then: _reg
4658 // Then: _reg if there is a 2nd register
4659 // Then: _long if it's a pair of instructions implementing a long
4660 // Then: _fat if it requires the big decoder
4661 //   Or: _mem if it requires the big decoder and a memory unit.
4662 
4663 // Integer ALU reg operation
4664 pipe_class ialu_reg(rRegI dst) %{
4665     single_instruction;
4666     dst    : S4(write);
4667     dst    : S3(read);
4668     DECODE : S0;        // any decoder
4669     ALU    : S3;        // any alu
4670 %}
4671 
4672 // Long ALU reg operation
4673 pipe_class ialu_reg_long(eRegL dst) %{
4674     instruction_count(2);
4675     dst    : S4(write);
4676     dst    : S3(read);
4677     DECODE : S0(2);     // any 2 decoders
4678     ALU    : S3(2);     // both alus
4679 %}
4680 
4681 // Integer ALU reg operation using big decoder
4682 pipe_class ialu_reg_fat(rRegI dst) %{
4683     single_instruction;
4684     dst    : S4(write);
4685     dst    : S3(read);
4686     D0     : S0;        // big decoder only
4687     ALU    : S3;        // any alu
4688 %}
4689 
4690 // Long ALU reg operation using big decoder
4691 pipe_class ialu_reg_long_fat(eRegL dst) %{
4692     instruction_count(2);
4693     dst    : S4(write);
4694     dst    : S3(read);
4695     D0     : S0(2);     // big decoder only; twice
4696     ALU    : S3(2);     // any 2 alus
4697 %}
4698 
4699 // Integer ALU reg-reg operation
4700 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4701     single_instruction;
4702     dst    : S4(write);
4703     src    : S3(read);
4704     DECODE : S0;        // any decoder
4705     ALU    : S3;        // any alu
4706 %}
4707 
4708 // Long ALU reg-reg operation
4709 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4710     instruction_count(2);
4711     dst    : S4(write);
4712     src    : S3(read);
4713     DECODE : S0(2);     // any 2 decoders
4714     ALU    : S3(2);     // both alus
4715 %}
4716 
4717 // Integer ALU reg-reg operation
4718 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4719     single_instruction;
4720     dst    : S4(write);
4721     src    : S3(read);
4722     D0     : S0;        // big decoder only
4723     ALU    : S3;        // any alu
4724 %}
4725 
4726 // Long ALU reg-reg operation
4727 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4728     instruction_count(2);
4729     dst    : S4(write);
4730     src    : S3(read);
4731     D0     : S0(2);     // big decoder only; twice
4732     ALU    : S3(2);     // both alus
4733 %}
4734 
4735 // Integer ALU reg-mem operation
4736 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4737     single_instruction;
4738     dst    : S5(write);
4739     mem    : S3(read);
4740     D0     : S0;        // big decoder only
4741     ALU    : S4;        // any alu
4742     MEM    : S3;        // any mem
4743 %}
4744 
4745 // Long ALU reg-mem operation
4746 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4747     instruction_count(2);
4748     dst    : S5(write);
4749     mem    : S3(read);
4750     D0     : S0(2);     // big decoder only; twice
4751     ALU    : S4(2);     // any 2 alus
4752     MEM    : S3(2);     // both mems
4753 %}
4754 
4755 // Integer mem operation (prefetch)
4756 pipe_class ialu_mem(memory mem)
4757 %{
4758     single_instruction;
4759     mem    : S3(read);
4760     D0     : S0;        // big decoder only
4761     MEM    : S3;        // any mem
4762 %}
4763 
4764 // Integer Store to Memory
4765 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4766     single_instruction;
4767     mem    : S3(read);
4768     src    : S5(read);
4769     D0     : S0;        // big decoder only
4770     ALU    : S4;        // any alu
4771     MEM    : S3;
4772 %}
4773 
4774 // Long Store to Memory
4775 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4776     instruction_count(2);
4777     mem    : S3(read);
4778     src    : S5(read);
4779     D0     : S0(2);     // big decoder only; twice
4780     ALU    : S4(2);     // any 2 alus
4781     MEM    : S3(2);     // Both mems
4782 %}
4783 
4784 // Integer Store to Memory
4785 pipe_class ialu_mem_imm(memory mem) %{
4786     single_instruction;
4787     mem    : S3(read);
4788     D0     : S0;        // big decoder only
4789     ALU    : S4;        // any alu
4790     MEM    : S3;
4791 %}
4792 
4793 // Integer ALU0 reg-reg operation
4794 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4795     single_instruction;
4796     dst    : S4(write);
4797     src    : S3(read);
4798     D0     : S0;        // Big decoder only
4799     ALU0   : S3;        // only alu0
4800 %}
4801 
4802 // Integer ALU0 reg-mem operation
4803 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4804     single_instruction;
4805     dst    : S5(write);
4806     mem    : S3(read);
4807     D0     : S0;        // big decoder only
4808     ALU0   : S4;        // ALU0 only
4809     MEM    : S3;        // any mem
4810 %}
4811 
4812 // Integer ALU reg-reg operation
4813 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4814     single_instruction;
4815     cr     : S4(write);
4816     src1   : S3(read);
4817     src2   : S3(read);
4818     DECODE : S0;        // any decoder
4819     ALU    : S3;        // any alu
4820 %}
4821 
4822 // Integer ALU reg-imm operation
4823 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4824     single_instruction;
4825     cr     : S4(write);
4826     src1   : S3(read);
4827     DECODE : S0;        // any decoder
4828     ALU    : S3;        // any alu
4829 %}
4830 
4831 // Integer ALU reg-mem operation
4832 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4833     single_instruction;
4834     cr     : S4(write);
4835     src1   : S3(read);
4836     src2   : S3(read);
4837     D0     : S0;        // big decoder only
4838     ALU    : S4;        // any alu
4839     MEM    : S3;
4840 %}
4841 
4842 // Conditional move reg-reg
4843 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4844     instruction_count(4);
4845     y      : S4(read);
4846     q      : S3(read);
4847     p      : S3(read);
4848     DECODE : S0(4);     // any decoder
4849 %}
4850 
4851 // Conditional move reg-reg
4852 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4853     single_instruction;
4854     dst    : S4(write);
4855     src    : S3(read);
4856     cr     : S3(read);
4857     DECODE : S0;        // any decoder
4858 %}
4859 
4860 // Conditional move reg-mem
4861 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4862     single_instruction;
4863     dst    : S4(write);
4864     src    : S3(read);
4865     cr     : S3(read);
4866     DECODE : S0;        // any decoder
4867     MEM    : S3;
4868 %}
4869 
4870 // Conditional move reg-reg long
4871 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4872     single_instruction;
4873     dst    : S4(write);
4874     src    : S3(read);
4875     cr     : S3(read);
4876     DECODE : S0(2);     // any 2 decoders
4877 %}
4878 
4879 // Conditional move double reg-reg
4880 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4881     single_instruction;
4882     dst    : S4(write);
4883     src    : S3(read);
4884     cr     : S3(read);
4885     DECODE : S0;        // any decoder
4886 %}
4887 
4888 // Float reg-reg operation
4889 pipe_class fpu_reg(regDPR dst) %{
4890     instruction_count(2);
4891     dst    : S3(read);
4892     DECODE : S0(2);     // any 2 decoders
4893     FPU    : S3;
4894 %}
4895 
4896 // Float reg-reg operation
4897 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4898     instruction_count(2);
4899     dst    : S4(write);
4900     src    : S3(read);
4901     DECODE : S0(2);     // any 2 decoders
4902     FPU    : S3;
4903 %}
4904 
4905 // Float reg-reg operation
4906 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4907     instruction_count(3);
4908     dst    : S4(write);
4909     src1   : S3(read);
4910     src2   : S3(read);
4911     DECODE : S0(3);     // any 3 decoders
4912     FPU    : S3(2);
4913 %}
4914 
4915 // Float reg-reg operation
4916 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4917     instruction_count(4);
4918     dst    : S4(write);
4919     src1   : S3(read);
4920     src2   : S3(read);
4921     src3   : S3(read);
4922     DECODE : S0(4);     // any 3 decoders
4923     FPU    : S3(2);
4924 %}
4925 
4926 // Float reg-reg operation
4927 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4928     instruction_count(4);
4929     dst    : S4(write);
4930     src1   : S3(read);
4931     src2   : S3(read);
4932     src3   : S3(read);
4933     DECODE : S1(3);     // any 3 decoders
4934     D0     : S0;        // Big decoder only
4935     FPU    : S3(2);
4936     MEM    : S3;
4937 %}
4938 
4939 // Float reg-mem operation
4940 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4941     instruction_count(2);
4942     dst    : S5(write);
4943     mem    : S3(read);
4944     D0     : S0;        // big decoder only
4945     DECODE : S1;        // any decoder for FPU POP
4946     FPU    : S4;
4947     MEM    : S3;        // any mem
4948 %}
4949 
4950 // Float reg-mem operation
4951 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4952     instruction_count(3);
4953     dst    : S5(write);
4954     src1   : S3(read);
4955     mem    : S3(read);
4956     D0     : S0;        // big decoder only
4957     DECODE : S1(2);     // any decoder for FPU POP
4958     FPU    : S4;
4959     MEM    : S3;        // any mem
4960 %}
4961 
4962 // Float mem-reg operation
4963 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4964     instruction_count(2);
4965     src    : S5(read);
4966     mem    : S3(read);
4967     DECODE : S0;        // any decoder for FPU PUSH
4968     D0     : S1;        // big decoder only
4969     FPU    : S4;
4970     MEM    : S3;        // any mem
4971 %}
4972 
4973 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
4974     instruction_count(3);
4975     src1   : S3(read);
4976     src2   : S3(read);
4977     mem    : S3(read);
4978     DECODE : S0(2);     // any decoder for FPU PUSH
4979     D0     : S1;        // big decoder only
4980     FPU    : S4;
4981     MEM    : S3;        // any mem
4982 %}
4983 
4984 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
4985     instruction_count(3);
4986     src1   : S3(read);
4987     src2   : S3(read);
4988     mem    : S4(read);
4989     DECODE : S0;        // any decoder for FPU PUSH
4990     D0     : S0(2);     // big decoder only
4991     FPU    : S4;
4992     MEM    : S3(2);     // any mem
4993 %}
4994 
4995 pipe_class fpu_mem_mem(memory dst, memory src1) %{
4996     instruction_count(2);
4997     src1   : S3(read);
4998     dst    : S4(read);
4999     D0     : S0(2);     // big decoder only
5000     MEM    : S3(2);     // any mem
5001 %}
5002 
5003 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
5004     instruction_count(3);
5005     src1   : S3(read);
5006     src2   : S3(read);
5007     dst    : S4(read);
5008     D0     : S0(3);     // big decoder only
5009     FPU    : S4;
5010     MEM    : S3(3);     // any mem
5011 %}
5012 
5013 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
5014     instruction_count(3);
5015     src1   : S4(read);
5016     mem    : S4(read);
5017     DECODE : S0;        // any decoder for FPU PUSH
5018     D0     : S0(2);     // big decoder only
5019     FPU    : S4;
5020     MEM    : S3(2);     // any mem
5021 %}
5022 
5023 // Float load constant
5024 pipe_class fpu_reg_con(regDPR dst) %{
5025     instruction_count(2);
5026     dst    : S5(write);
5027     D0     : S0;        // big decoder only for the load
5028     DECODE : S1;        // any decoder for FPU POP
5029     FPU    : S4;
5030     MEM    : S3;        // any mem
5031 %}
5032 
5033 // Float load constant
5034 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5035     instruction_count(3);
5036     dst    : S5(write);
5037     src    : S3(read);
5038     D0     : S0;        // big decoder only for the load
5039     DECODE : S1(2);     // any decoder for FPU POP
5040     FPU    : S4;
5041     MEM    : S3;        // any mem
5042 %}
5043 
5044 // UnConditional branch
5045 pipe_class pipe_jmp( label labl ) %{
5046     single_instruction;
5047     BR   : S3;
5048 %}
5049 
5050 // Conditional branch
5051 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5052     single_instruction;
5053     cr    : S1(read);
5054     BR    : S3;
5055 %}
5056 
5057 // Allocation idiom
5058 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5059     instruction_count(1); force_serialization;
5060     fixed_latency(6);
5061     heap_ptr : S3(read);
5062     DECODE   : S0(3);
5063     D0       : S2;
5064     MEM      : S3;
5065     ALU      : S3(2);
5066     dst      : S5(write);
5067     BR       : S5;
5068 %}
5069 
5070 // Generic big/slow expanded idiom
5071 pipe_class pipe_slow(  ) %{
5072     instruction_count(10); multiple_bundles; force_serialization;
5073     fixed_latency(100);
5074     D0  : S0(2);
5075     MEM : S3(2);
5076 %}
5077 
5078 // The real do-nothing guy
5079 pipe_class empty( ) %{
5080     instruction_count(0);
5081 %}
5082 
5083 // Define the class for the Nop node
5084 define %{
5085    MachNop = empty;
5086 %}
5087 
5088 %}
5089 
5090 //----------INSTRUCTIONS-------------------------------------------------------
5091 //
5092 // match      -- States which machine-independent subtree may be replaced
5093 //               by this instruction.
5094 // ins_cost   -- The estimated cost of this instruction is used by instruction
5095 //               selection to identify a minimum cost tree of machine
5096 //               instructions that matches a tree of machine-independent
5097 //               instructions.
5098 // format     -- A string providing the disassembly for this instruction.
5099 //               The value of an instruction's operand may be inserted
5100 //               by referring to it with a '$' prefix.
5101 // opcode     -- Three instruction opcodes may be provided.  These are referred
5102 //               to within an encode class as $primary, $secondary, and $tertiary
5103 //               respectively.  The primary opcode is commonly used to
5104 //               indicate the type of machine instruction, while secondary
5105 //               and tertiary are often used for prefix options or addressing
5106 //               modes.
5107 // ins_encode -- A list of encode classes with parameters. The encode class
5108 //               name must have been defined in an 'enc_class' specification
5109 //               in the encode section of the architecture description.
5110 
5111 //----------BSWAP-Instruction--------------------------------------------------
5112 instruct bytes_reverse_int(rRegI dst) %{
5113   match(Set dst (ReverseBytesI dst));
5114 
5115   format %{ "BSWAP  $dst" %}
5116   opcode(0x0F, 0xC8);
5117   ins_encode( OpcP, OpcSReg(dst) );
5118   ins_pipe( ialu_reg );
5119 %}
5120 
5121 instruct bytes_reverse_long(eRegL dst) %{
5122   match(Set dst (ReverseBytesL dst));
5123 
5124   format %{ "BSWAP  $dst.lo\n\t"
5125             "BSWAP  $dst.hi\n\t"
5126             "XCHG   $dst.lo $dst.hi" %}
5127 
5128   ins_cost(125);
5129   ins_encode( bswap_long_bytes(dst) );
5130   ins_pipe( ialu_reg_reg);
5131 %}
5132 
5133 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5134   match(Set dst (ReverseBytesUS dst));
5135   effect(KILL cr);
5136 
5137   format %{ "BSWAP  $dst\n\t"
5138             "SHR    $dst,16\n\t" %}
5139   ins_encode %{
5140     __ bswapl($dst$$Register);
5141     __ shrl($dst$$Register, 16);
5142   %}
5143   ins_pipe( ialu_reg );
5144 %}
5145 
5146 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5147   match(Set dst (ReverseBytesS dst));
5148   effect(KILL cr);
5149 
5150   format %{ "BSWAP  $dst\n\t"
5151             "SAR    $dst,16\n\t" %}
5152   ins_encode %{
5153     __ bswapl($dst$$Register);
5154     __ sarl($dst$$Register, 16);
5155   %}
5156   ins_pipe( ialu_reg );
5157 %}
5158 
5159 
5160 //---------- Zeros Count Instructions ------------------------------------------
5161 
5162 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5163   predicate(UseCountLeadingZerosInstruction);
5164   match(Set dst (CountLeadingZerosI src));
5165   effect(KILL cr);
5166 
5167   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5168   ins_encode %{
5169     __ lzcntl($dst$$Register, $src$$Register);
5170   %}
5171   ins_pipe(ialu_reg);
5172 %}
5173 
5174 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5175   predicate(!UseCountLeadingZerosInstruction);
5176   match(Set dst (CountLeadingZerosI src));
5177   effect(KILL cr);
5178 
5179   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5180             "JNZ    skip\n\t"
5181             "MOV    $dst, -1\n"
5182       "skip:\n\t"
5183             "NEG    $dst\n\t"
5184             "ADD    $dst, 31" %}
5185   ins_encode %{
5186     Register Rdst = $dst$$Register;
5187     Register Rsrc = $src$$Register;
5188     Label skip;
5189     __ bsrl(Rdst, Rsrc);
5190     __ jccb(Assembler::notZero, skip);
5191     __ movl(Rdst, -1);
5192     __ bind(skip);
5193     __ negl(Rdst);
5194     __ addl(Rdst, BitsPerInt - 1);
5195   %}
5196   ins_pipe(ialu_reg);
5197 %}
5198 
5199 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5200   predicate(UseCountLeadingZerosInstruction);
5201   match(Set dst (CountLeadingZerosL src));
5202   effect(TEMP dst, KILL cr);
5203 
5204   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5205             "JNC    done\n\t"
5206             "LZCNT  $dst, $src.lo\n\t"
5207             "ADD    $dst, 32\n"
5208       "done:" %}
5209   ins_encode %{
5210     Register Rdst = $dst$$Register;
5211     Register Rsrc = $src$$Register;
5212     Label done;
5213     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5214     __ jccb(Assembler::carryClear, done);
5215     __ lzcntl(Rdst, Rsrc);
5216     __ addl(Rdst, BitsPerInt);
5217     __ bind(done);
5218   %}
5219   ins_pipe(ialu_reg);
5220 %}
5221 
5222 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5223   predicate(!UseCountLeadingZerosInstruction);
5224   match(Set dst (CountLeadingZerosL src));
5225   effect(TEMP dst, KILL cr);
5226 
5227   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5228             "JZ     msw_is_zero\n\t"
5229             "ADD    $dst, 32\n\t"
5230             "JMP    not_zero\n"
5231       "msw_is_zero:\n\t"
5232             "BSR    $dst, $src.lo\n\t"
5233             "JNZ    not_zero\n\t"
5234             "MOV    $dst, -1\n"
5235       "not_zero:\n\t"
5236             "NEG    $dst\n\t"
5237             "ADD    $dst, 63\n" %}
5238  ins_encode %{
5239     Register Rdst = $dst$$Register;
5240     Register Rsrc = $src$$Register;
5241     Label msw_is_zero;
5242     Label not_zero;
5243     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5244     __ jccb(Assembler::zero, msw_is_zero);
5245     __ addl(Rdst, BitsPerInt);
5246     __ jmpb(not_zero);
5247     __ bind(msw_is_zero);
5248     __ bsrl(Rdst, Rsrc);
5249     __ jccb(Assembler::notZero, not_zero);
5250     __ movl(Rdst, -1);
5251     __ bind(not_zero);
5252     __ negl(Rdst);
5253     __ addl(Rdst, BitsPerLong - 1);
5254   %}
5255   ins_pipe(ialu_reg);
5256 %}
5257 
5258 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5259   predicate(UseCountTrailingZerosInstruction);
5260   match(Set dst (CountTrailingZerosI src));
5261   effect(KILL cr);
5262 
5263   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5264   ins_encode %{
5265     __ tzcntl($dst$$Register, $src$$Register);
5266   %}
5267   ins_pipe(ialu_reg);
5268 %}
5269 
5270 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5271   predicate(!UseCountTrailingZerosInstruction);
5272   match(Set dst (CountTrailingZerosI src));
5273   effect(KILL cr);
5274 
5275   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5276             "JNZ    done\n\t"
5277             "MOV    $dst, 32\n"
5278       "done:" %}
5279   ins_encode %{
5280     Register Rdst = $dst$$Register;
5281     Label done;
5282     __ bsfl(Rdst, $src$$Register);
5283     __ jccb(Assembler::notZero, done);
5284     __ movl(Rdst, BitsPerInt);
5285     __ bind(done);
5286   %}
5287   ins_pipe(ialu_reg);
5288 %}
5289 
5290 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5291   predicate(UseCountTrailingZerosInstruction);
5292   match(Set dst (CountTrailingZerosL src));
5293   effect(TEMP dst, KILL cr);
5294 
5295   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5296             "JNC    done\n\t"
5297             "TZCNT  $dst, $src.hi\n\t"
5298             "ADD    $dst, 32\n"
5299             "done:" %}
5300   ins_encode %{
5301     Register Rdst = $dst$$Register;
5302     Register Rsrc = $src$$Register;
5303     Label done;
5304     __ tzcntl(Rdst, Rsrc);
5305     __ jccb(Assembler::carryClear, done);
5306     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5307     __ addl(Rdst, BitsPerInt);
5308     __ bind(done);
5309   %}
5310   ins_pipe(ialu_reg);
5311 %}
5312 
5313 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5314   predicate(!UseCountTrailingZerosInstruction);
5315   match(Set dst (CountTrailingZerosL src));
5316   effect(TEMP dst, KILL cr);
5317 
5318   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5319             "JNZ    done\n\t"
5320             "BSF    $dst, $src.hi\n\t"
5321             "JNZ    msw_not_zero\n\t"
5322             "MOV    $dst, 32\n"
5323       "msw_not_zero:\n\t"
5324             "ADD    $dst, 32\n"
5325       "done:" %}
5326   ins_encode %{
5327     Register Rdst = $dst$$Register;
5328     Register Rsrc = $src$$Register;
5329     Label msw_not_zero;
5330     Label done;
5331     __ bsfl(Rdst, Rsrc);
5332     __ jccb(Assembler::notZero, done);
5333     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5334     __ jccb(Assembler::notZero, msw_not_zero);
5335     __ movl(Rdst, BitsPerInt);
5336     __ bind(msw_not_zero);
5337     __ addl(Rdst, BitsPerInt);
5338     __ bind(done);
5339   %}
5340   ins_pipe(ialu_reg);
5341 %}
5342 
5343 
5344 //---------- Population Count Instructions -------------------------------------
5345 
5346 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5347   predicate(UsePopCountInstruction);
5348   match(Set dst (PopCountI src));
5349   effect(KILL cr);
5350 
5351   format %{ "POPCNT $dst, $src" %}
5352   ins_encode %{
5353     __ popcntl($dst$$Register, $src$$Register);
5354   %}
5355   ins_pipe(ialu_reg);
5356 %}
5357 
5358 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5359   predicate(UsePopCountInstruction);
5360   match(Set dst (PopCountI (LoadI mem)));
5361   effect(KILL cr);
5362 
5363   format %{ "POPCNT $dst, $mem" %}
5364   ins_encode %{
5365     __ popcntl($dst$$Register, $mem$$Address);
5366   %}
5367   ins_pipe(ialu_reg);
5368 %}
5369 
5370 // Note: Long.bitCount(long) returns an int.
5371 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5372   predicate(UsePopCountInstruction);
5373   match(Set dst (PopCountL src));
5374   effect(KILL cr, TEMP tmp, TEMP dst);
5375 
5376   format %{ "POPCNT $dst, $src.lo\n\t"
5377             "POPCNT $tmp, $src.hi\n\t"
5378             "ADD    $dst, $tmp" %}
5379   ins_encode %{
5380     __ popcntl($dst$$Register, $src$$Register);
5381     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5382     __ addl($dst$$Register, $tmp$$Register);
5383   %}
5384   ins_pipe(ialu_reg);
5385 %}
5386 
5387 // Note: Long.bitCount(long) returns an int.
5388 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5389   predicate(UsePopCountInstruction);
5390   match(Set dst (PopCountL (LoadL mem)));
5391   effect(KILL cr, TEMP tmp, TEMP dst);
5392 
5393   format %{ "POPCNT $dst, $mem\n\t"
5394             "POPCNT $tmp, $mem+4\n\t"
5395             "ADD    $dst, $tmp" %}
5396   ins_encode %{
5397     //__ popcntl($dst$$Register, $mem$$Address$$first);
5398     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5399     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5400     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5401     __ addl($dst$$Register, $tmp$$Register);
5402   %}
5403   ins_pipe(ialu_reg);
5404 %}
5405 
5406 
5407 //----------Load/Store/Move Instructions---------------------------------------
5408 //----------Load Instructions--------------------------------------------------
5409 // Load Byte (8bit signed)
5410 instruct loadB(xRegI dst, memory mem) %{
5411   match(Set dst (LoadB mem));
5412 
5413   ins_cost(125);
5414   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5415 
5416   ins_encode %{
5417     __ movsbl($dst$$Register, $mem$$Address);
5418   %}
5419 
5420   ins_pipe(ialu_reg_mem);
5421 %}
5422 
5423 // Load Byte (8bit signed) into Long Register
5424 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5425   match(Set dst (ConvI2L (LoadB mem)));
5426   effect(KILL cr);
5427 
5428   ins_cost(375);
5429   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5430             "MOV    $dst.hi,$dst.lo\n\t"
5431             "SAR    $dst.hi,7" %}
5432 
5433   ins_encode %{
5434     __ movsbl($dst$$Register, $mem$$Address);
5435     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5436     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5437   %}
5438 
5439   ins_pipe(ialu_reg_mem);
5440 %}
5441 
5442 // Load Unsigned Byte (8bit UNsigned)
5443 instruct loadUB(xRegI dst, memory mem) %{
5444   match(Set dst (LoadUB mem));
5445 
5446   ins_cost(125);
5447   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5448 
5449   ins_encode %{
5450     __ movzbl($dst$$Register, $mem$$Address);
5451   %}
5452 
5453   ins_pipe(ialu_reg_mem);
5454 %}
5455 
5456 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5457 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5458   match(Set dst (ConvI2L (LoadUB mem)));
5459   effect(KILL cr);
5460 
5461   ins_cost(250);
5462   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5463             "XOR    $dst.hi,$dst.hi" %}
5464 
5465   ins_encode %{
5466     Register Rdst = $dst$$Register;
5467     __ movzbl(Rdst, $mem$$Address);
5468     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5469   %}
5470 
5471   ins_pipe(ialu_reg_mem);
5472 %}
5473 
5474 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5475 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5476   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5477   effect(KILL cr);
5478 
5479   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5480             "XOR    $dst.hi,$dst.hi\n\t"
5481             "AND    $dst.lo,right_n_bits($mask, 8)" %}
5482   ins_encode %{
5483     Register Rdst = $dst$$Register;
5484     __ movzbl(Rdst, $mem$$Address);
5485     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5486     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5487   %}
5488   ins_pipe(ialu_reg_mem);
5489 %}
5490 
5491 // Load Short (16bit signed)
5492 instruct loadS(rRegI dst, memory mem) %{
5493   match(Set dst (LoadS mem));
5494 
5495   ins_cost(125);
5496   format %{ "MOVSX  $dst,$mem\t# short" %}
5497 
5498   ins_encode %{
5499     __ movswl($dst$$Register, $mem$$Address);
5500   %}
5501 
5502   ins_pipe(ialu_reg_mem);
5503 %}
5504 
5505 // Load Short (16 bit signed) to Byte (8 bit signed)
5506 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5507   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5508 
5509   ins_cost(125);
5510   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5511   ins_encode %{
5512     __ movsbl($dst$$Register, $mem$$Address);
5513   %}
5514   ins_pipe(ialu_reg_mem);
5515 %}
5516 
5517 // Load Short (16bit signed) into Long Register
5518 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5519   match(Set dst (ConvI2L (LoadS mem)));
5520   effect(KILL cr);
5521 
5522   ins_cost(375);
5523   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5524             "MOV    $dst.hi,$dst.lo\n\t"
5525             "SAR    $dst.hi,15" %}
5526 
5527   ins_encode %{
5528     __ movswl($dst$$Register, $mem$$Address);
5529     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5530     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5531   %}
5532 
5533   ins_pipe(ialu_reg_mem);
5534 %}
5535 
5536 // Load Unsigned Short/Char (16bit unsigned)
5537 instruct loadUS(rRegI dst, memory mem) %{
5538   match(Set dst (LoadUS mem));
5539 
5540   ins_cost(125);
5541   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5542 
5543   ins_encode %{
5544     __ movzwl($dst$$Register, $mem$$Address);
5545   %}
5546 
5547   ins_pipe(ialu_reg_mem);
5548 %}
5549 
5550 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5551 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5552   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5553 
5554   ins_cost(125);
5555   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5556   ins_encode %{
5557     __ movsbl($dst$$Register, $mem$$Address);
5558   %}
5559   ins_pipe(ialu_reg_mem);
5560 %}
5561 
5562 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5563 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5564   match(Set dst (ConvI2L (LoadUS mem)));
5565   effect(KILL cr);
5566 
5567   ins_cost(250);
5568   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5569             "XOR    $dst.hi,$dst.hi" %}
5570 
5571   ins_encode %{
5572     __ movzwl($dst$$Register, $mem$$Address);
5573     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5574   %}
5575 
5576   ins_pipe(ialu_reg_mem);
5577 %}
5578 
5579 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5580 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5581   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5582   effect(KILL cr);
5583 
5584   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5585             "XOR    $dst.hi,$dst.hi" %}
5586   ins_encode %{
5587     Register Rdst = $dst$$Register;
5588     __ movzbl(Rdst, $mem$$Address);
5589     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5590   %}
5591   ins_pipe(ialu_reg_mem);
5592 %}
5593 
5594 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5595 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5596   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5597   effect(KILL cr);
5598 
5599   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5600             "XOR    $dst.hi,$dst.hi\n\t"
5601             "AND    $dst.lo,right_n_bits($mask, 16)" %}
5602   ins_encode %{
5603     Register Rdst = $dst$$Register;
5604     __ movzwl(Rdst, $mem$$Address);
5605     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5606     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5607   %}
5608   ins_pipe(ialu_reg_mem);
5609 %}
5610 
5611 // Load Integer
5612 instruct loadI(rRegI dst, memory mem) %{
5613   match(Set dst (LoadI mem));
5614 
5615   ins_cost(125);
5616   format %{ "MOV    $dst,$mem\t# int" %}
5617 
5618   ins_encode %{
5619     __ movl($dst$$Register, $mem$$Address);
5620   %}
5621 
5622   ins_pipe(ialu_reg_mem);
5623 %}
5624 
5625 // Load Integer (32 bit signed) to Byte (8 bit signed)
5626 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5627   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5628 
5629   ins_cost(125);
5630   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5631   ins_encode %{
5632     __ movsbl($dst$$Register, $mem$$Address);
5633   %}
5634   ins_pipe(ialu_reg_mem);
5635 %}
5636 
5637 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5638 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5639   match(Set dst (AndI (LoadI mem) mask));
5640 
5641   ins_cost(125);
5642   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5643   ins_encode %{
5644     __ movzbl($dst$$Register, $mem$$Address);
5645   %}
5646   ins_pipe(ialu_reg_mem);
5647 %}
5648 
5649 // Load Integer (32 bit signed) to Short (16 bit signed)
5650 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5651   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5652 
5653   ins_cost(125);
5654   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5655   ins_encode %{
5656     __ movswl($dst$$Register, $mem$$Address);
5657   %}
5658   ins_pipe(ialu_reg_mem);
5659 %}
5660 
5661 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5662 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5663   match(Set dst (AndI (LoadI mem) mask));
5664 
5665   ins_cost(125);
5666   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5667   ins_encode %{
5668     __ movzwl($dst$$Register, $mem$$Address);
5669   %}
5670   ins_pipe(ialu_reg_mem);
5671 %}
5672 
5673 // Load Integer into Long Register
5674 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5675   match(Set dst (ConvI2L (LoadI mem)));
5676   effect(KILL cr);
5677 
5678   ins_cost(375);
5679   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5680             "MOV    $dst.hi,$dst.lo\n\t"
5681             "SAR    $dst.hi,31" %}
5682 
5683   ins_encode %{
5684     __ movl($dst$$Register, $mem$$Address);
5685     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5686     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5687   %}
5688 
5689   ins_pipe(ialu_reg_mem);
5690 %}
5691 
5692 // Load Integer with mask 0xFF into Long Register
5693 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5694   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5695   effect(KILL cr);
5696 
5697   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5698             "XOR    $dst.hi,$dst.hi" %}
5699   ins_encode %{
5700     Register Rdst = $dst$$Register;
5701     __ movzbl(Rdst, $mem$$Address);
5702     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5703   %}
5704   ins_pipe(ialu_reg_mem);
5705 %}
5706 
5707 // Load Integer with mask 0xFFFF into Long Register
5708 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5709   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5710   effect(KILL cr);
5711 
5712   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5713             "XOR    $dst.hi,$dst.hi" %}
5714   ins_encode %{
5715     Register Rdst = $dst$$Register;
5716     __ movzwl(Rdst, $mem$$Address);
5717     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5718   %}
5719   ins_pipe(ialu_reg_mem);
5720 %}
5721 
5722 // Load Integer with 31-bit mask into Long Register
5723 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5724   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5725   effect(KILL cr);
5726 
5727   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5728             "XOR    $dst.hi,$dst.hi\n\t"
5729             "AND    $dst.lo,$mask" %}
5730   ins_encode %{
5731     Register Rdst = $dst$$Register;
5732     __ movl(Rdst, $mem$$Address);
5733     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5734     __ andl(Rdst, $mask$$constant);
5735   %}
5736   ins_pipe(ialu_reg_mem);
5737 %}
5738 
5739 // Load Unsigned Integer into Long Register
5740 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5741   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5742   effect(KILL cr);
5743 
5744   ins_cost(250);
5745   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5746             "XOR    $dst.hi,$dst.hi" %}
5747 
5748   ins_encode %{
5749     __ movl($dst$$Register, $mem$$Address);
5750     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5751   %}
5752 
5753   ins_pipe(ialu_reg_mem);
5754 %}
5755 
5756 // Load Long.  Cannot clobber address while loading, so restrict address
5757 // register to ESI
5758 instruct loadL(eRegL dst, load_long_memory mem) %{
5759   predicate(!((LoadLNode*)n)->require_atomic_access());
5760   match(Set dst (LoadL mem));
5761 
5762   ins_cost(250);
5763   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5764             "MOV    $dst.hi,$mem+4" %}
5765 
5766   ins_encode %{
5767     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5768     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5769     __ movl($dst$$Register, Amemlo);
5770     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5771   %}
5772 
5773   ins_pipe(ialu_reg_long_mem);
5774 %}
5775 
5776 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5777 // then store it down to the stack and reload on the int
5778 // side.
5779 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5780   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5781   match(Set dst (LoadL mem));
5782 
5783   ins_cost(200);
5784   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5785             "FISTp  $dst" %}
5786   ins_encode(enc_loadL_volatile(mem,dst));
5787   ins_pipe( fpu_reg_mem );
5788 %}
5789 
5790 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5791   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5792   match(Set dst (LoadL mem));
5793   effect(TEMP tmp);
5794   ins_cost(180);
5795   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5796             "MOVSD  $dst,$tmp" %}
5797   ins_encode %{
5798     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5799     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5800   %}
5801   ins_pipe( pipe_slow );
5802 %}
5803 
5804 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5805   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5806   match(Set dst (LoadL mem));
5807   effect(TEMP tmp);
5808   ins_cost(160);
5809   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5810             "MOVD   $dst.lo,$tmp\n\t"
5811             "PSRLQ  $tmp,32\n\t"
5812             "MOVD   $dst.hi,$tmp" %}
5813   ins_encode %{
5814     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5815     __ movdl($dst$$Register, $tmp$$XMMRegister);
5816     __ psrlq($tmp$$XMMRegister, 32);
5817     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5818   %}
5819   ins_pipe( pipe_slow );
5820 %}
5821 
5822 // Load Range
5823 instruct loadRange(rRegI dst, memory mem) %{
5824   match(Set dst (LoadRange mem));
5825 
5826   ins_cost(125);
5827   format %{ "MOV    $dst,$mem" %}
5828   opcode(0x8B);
5829   ins_encode( OpcP, RegMem(dst,mem));
5830   ins_pipe( ialu_reg_mem );
5831 %}
5832 
5833 
5834 // Load Pointer
5835 instruct loadP(eRegP dst, memory mem) %{
5836   match(Set dst (LoadP mem));
5837 
5838   ins_cost(125);
5839   format %{ "MOV    $dst,$mem" %}
5840   opcode(0x8B);
5841   ins_encode( OpcP, RegMem(dst,mem));
5842   ins_pipe( ialu_reg_mem );
5843 %}
5844 
5845 // Load Klass Pointer
5846 instruct loadKlass(eRegP dst, memory mem) %{
5847   match(Set dst (LoadKlass mem));
5848 
5849   ins_cost(125);
5850   format %{ "MOV    $dst,$mem" %}
5851   opcode(0x8B);
5852   ins_encode( OpcP, RegMem(dst,mem));
5853   ins_pipe( ialu_reg_mem );
5854 %}
5855 
5856 // Load Double
5857 instruct loadDPR(regDPR dst, memory mem) %{
5858   predicate(UseSSE<=1);
5859   match(Set dst (LoadD mem));
5860 
5861   ins_cost(150);
5862   format %{ "FLD_D  ST,$mem\n\t"
5863             "FSTP   $dst" %}
5864   opcode(0xDD);               /* DD /0 */
5865   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5866               Pop_Reg_DPR(dst) );
5867   ins_pipe( fpu_reg_mem );
5868 %}
5869 
5870 // Load Double to XMM
5871 instruct loadD(regD dst, memory mem) %{
5872   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5873   match(Set dst (LoadD mem));
5874   ins_cost(145);
5875   format %{ "MOVSD  $dst,$mem" %}
5876   ins_encode %{
5877     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5878   %}
5879   ins_pipe( pipe_slow );
5880 %}
5881 
5882 instruct loadD_partial(regD dst, memory mem) %{
5883   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5884   match(Set dst (LoadD mem));
5885   ins_cost(145);
5886   format %{ "MOVLPD $dst,$mem" %}
5887   ins_encode %{
5888     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5889   %}
5890   ins_pipe( pipe_slow );
5891 %}
5892 
5893 // Load to XMM register (single-precision floating point)
5894 // MOVSS instruction
5895 instruct loadF(regF dst, memory mem) %{
5896   predicate(UseSSE>=1);
5897   match(Set dst (LoadF mem));
5898   ins_cost(145);
5899   format %{ "MOVSS  $dst,$mem" %}
5900   ins_encode %{
5901     __ movflt ($dst$$XMMRegister, $mem$$Address);
5902   %}
5903   ins_pipe( pipe_slow );
5904 %}
5905 
5906 // Load Float
5907 instruct loadFPR(regFPR dst, memory mem) %{
5908   predicate(UseSSE==0);
5909   match(Set dst (LoadF mem));
5910 
5911   ins_cost(150);
5912   format %{ "FLD_S  ST,$mem\n\t"
5913             "FSTP   $dst" %}
5914   opcode(0xD9);               /* D9 /0 */
5915   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5916               Pop_Reg_FPR(dst) );
5917   ins_pipe( fpu_reg_mem );
5918 %}
5919 
5920 // Load Effective Address
5921 instruct leaP8(eRegP dst, indOffset8 mem) %{
5922   match(Set dst mem);
5923 
5924   ins_cost(110);
5925   format %{ "LEA    $dst,$mem" %}
5926   opcode(0x8D);
5927   ins_encode( OpcP, RegMem(dst,mem));
5928   ins_pipe( ialu_reg_reg_fat );
5929 %}
5930 
5931 instruct leaP32(eRegP dst, indOffset32 mem) %{
5932   match(Set dst mem);
5933 
5934   ins_cost(110);
5935   format %{ "LEA    $dst,$mem" %}
5936   opcode(0x8D);
5937   ins_encode( OpcP, RegMem(dst,mem));
5938   ins_pipe( ialu_reg_reg_fat );
5939 %}
5940 
5941 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5942   match(Set dst mem);
5943 
5944   ins_cost(110);
5945   format %{ "LEA    $dst,$mem" %}
5946   opcode(0x8D);
5947   ins_encode( OpcP, RegMem(dst,mem));
5948   ins_pipe( ialu_reg_reg_fat );
5949 %}
5950 
5951 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5952   match(Set dst mem);
5953 
5954   ins_cost(110);
5955   format %{ "LEA    $dst,$mem" %}
5956   opcode(0x8D);
5957   ins_encode( OpcP, RegMem(dst,mem));
5958   ins_pipe( ialu_reg_reg_fat );
5959 %}
5960 
5961 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5962   match(Set dst mem);
5963 
5964   ins_cost(110);
5965   format %{ "LEA    $dst,$mem" %}
5966   opcode(0x8D);
5967   ins_encode( OpcP, RegMem(dst,mem));
5968   ins_pipe( ialu_reg_reg_fat );
5969 %}
5970 
5971 // Load Constant
5972 instruct loadConI(rRegI dst, immI src) %{
5973   match(Set dst src);
5974 
5975   format %{ "MOV    $dst,$src" %}
5976   ins_encode( LdImmI(dst, src) );
5977   ins_pipe( ialu_reg_fat );
5978 %}
5979 
5980 // Load Constant zero
5981 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
5982   match(Set dst src);
5983   effect(KILL cr);
5984 
5985   ins_cost(50);
5986   format %{ "XOR    $dst,$dst" %}
5987   opcode(0x33);  /* + rd */
5988   ins_encode( OpcP, RegReg( dst, dst ) );
5989   ins_pipe( ialu_reg );
5990 %}
5991 
5992 instruct loadConP(eRegP dst, immP src) %{
5993   match(Set dst src);
5994 
5995   format %{ "MOV    $dst,$src" %}
5996   opcode(0xB8);  /* + rd */
5997   ins_encode( LdImmP(dst, src) );
5998   ins_pipe( ialu_reg_fat );
5999 %}
6000 
6001 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
6002   match(Set dst src);
6003   effect(KILL cr);
6004   ins_cost(200);
6005   format %{ "MOV    $dst.lo,$src.lo\n\t"
6006             "MOV    $dst.hi,$src.hi" %}
6007   opcode(0xB8);
6008   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
6009   ins_pipe( ialu_reg_long_fat );
6010 %}
6011 
6012 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
6013   match(Set dst src);
6014   effect(KILL cr);
6015   ins_cost(150);
6016   format %{ "XOR    $dst.lo,$dst.lo\n\t"
6017             "XOR    $dst.hi,$dst.hi" %}
6018   opcode(0x33,0x33);
6019   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
6020   ins_pipe( ialu_reg_long );
6021 %}
6022 
6023 // The instruction usage is guarded by predicate in operand immFPR().
6024 instruct loadConFPR(regFPR dst, immFPR con) %{
6025   match(Set dst con);
6026   ins_cost(125);
6027   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
6028             "FSTP   $dst" %}
6029   ins_encode %{
6030     __ fld_s($constantaddress($con));
6031     __ fstp_d($dst$$reg);
6032   %}
6033   ins_pipe(fpu_reg_con);
6034 %}
6035 
6036 // The instruction usage is guarded by predicate in operand immFPR0().
6037 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6038   match(Set dst con);
6039   ins_cost(125);
6040   format %{ "FLDZ   ST\n\t"
6041             "FSTP   $dst" %}
6042   ins_encode %{
6043     __ fldz();
6044     __ fstp_d($dst$$reg);
6045   %}
6046   ins_pipe(fpu_reg_con);
6047 %}
6048 
6049 // The instruction usage is guarded by predicate in operand immFPR1().
6050 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6051   match(Set dst con);
6052   ins_cost(125);
6053   format %{ "FLD1   ST\n\t"
6054             "FSTP   $dst" %}
6055   ins_encode %{
6056     __ fld1();
6057     __ fstp_d($dst$$reg);
6058   %}
6059   ins_pipe(fpu_reg_con);
6060 %}
6061 
6062 // The instruction usage is guarded by predicate in operand immF().
6063 instruct loadConF(regF dst, immF con) %{
6064   match(Set dst con);
6065   ins_cost(125);
6066   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6067   ins_encode %{
6068     __ movflt($dst$$XMMRegister, $constantaddress($con));
6069   %}
6070   ins_pipe(pipe_slow);
6071 %}
6072 
6073 // The instruction usage is guarded by predicate in operand immF0().
6074 instruct loadConF0(regF dst, immF0 src) %{
6075   match(Set dst src);
6076   ins_cost(100);
6077   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6078   ins_encode %{
6079     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6080   %}
6081   ins_pipe(pipe_slow);
6082 %}
6083 
6084 // The instruction usage is guarded by predicate in operand immDPR().
6085 instruct loadConDPR(regDPR dst, immDPR con) %{
6086   match(Set dst con);
6087   ins_cost(125);
6088 
6089   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6090             "FSTP   $dst" %}
6091   ins_encode %{
6092     __ fld_d($constantaddress($con));
6093     __ fstp_d($dst$$reg);
6094   %}
6095   ins_pipe(fpu_reg_con);
6096 %}
6097 
6098 // The instruction usage is guarded by predicate in operand immDPR0().
6099 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6100   match(Set dst con);
6101   ins_cost(125);
6102 
6103   format %{ "FLDZ   ST\n\t"
6104             "FSTP   $dst" %}
6105   ins_encode %{
6106     __ fldz();
6107     __ fstp_d($dst$$reg);
6108   %}
6109   ins_pipe(fpu_reg_con);
6110 %}
6111 
6112 // The instruction usage is guarded by predicate in operand immDPR1().
6113 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6114   match(Set dst con);
6115   ins_cost(125);
6116 
6117   format %{ "FLD1   ST\n\t"
6118             "FSTP   $dst" %}
6119   ins_encode %{
6120     __ fld1();
6121     __ fstp_d($dst$$reg);
6122   %}
6123   ins_pipe(fpu_reg_con);
6124 %}
6125 
6126 // The instruction usage is guarded by predicate in operand immD().
6127 instruct loadConD(regD dst, immD con) %{
6128   match(Set dst con);
6129   ins_cost(125);
6130   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6131   ins_encode %{
6132     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6133   %}
6134   ins_pipe(pipe_slow);
6135 %}
6136 
6137 // The instruction usage is guarded by predicate in operand immD0().
6138 instruct loadConD0(regD dst, immD0 src) %{
6139   match(Set dst src);
6140   ins_cost(100);
6141   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6142   ins_encode %{
6143     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6144   %}
6145   ins_pipe( pipe_slow );
6146 %}
6147 
6148 // Load Stack Slot
6149 instruct loadSSI(rRegI dst, stackSlotI src) %{
6150   match(Set dst src);
6151   ins_cost(125);
6152 
6153   format %{ "MOV    $dst,$src" %}
6154   opcode(0x8B);
6155   ins_encode( OpcP, RegMem(dst,src));
6156   ins_pipe( ialu_reg_mem );
6157 %}
6158 
6159 instruct loadSSL(eRegL dst, stackSlotL src) %{
6160   match(Set dst src);
6161 
6162   ins_cost(200);
6163   format %{ "MOV    $dst,$src.lo\n\t"
6164             "MOV    $dst+4,$src.hi" %}
6165   opcode(0x8B, 0x8B);
6166   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6167   ins_pipe( ialu_mem_long_reg );
6168 %}
6169 
6170 // Load Stack Slot
6171 instruct loadSSP(eRegP dst, stackSlotP src) %{
6172   match(Set dst src);
6173   ins_cost(125);
6174 
6175   format %{ "MOV    $dst,$src" %}
6176   opcode(0x8B);
6177   ins_encode( OpcP, RegMem(dst,src));
6178   ins_pipe( ialu_reg_mem );
6179 %}
6180 
6181 // Load Stack Slot
6182 instruct loadSSF(regFPR dst, stackSlotF src) %{
6183   match(Set dst src);
6184   ins_cost(125);
6185 
6186   format %{ "FLD_S  $src\n\t"
6187             "FSTP   $dst" %}
6188   opcode(0xD9);               /* D9 /0, FLD m32real */
6189   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6190               Pop_Reg_FPR(dst) );
6191   ins_pipe( fpu_reg_mem );
6192 %}
6193 
6194 // Load Stack Slot
6195 instruct loadSSD(regDPR dst, stackSlotD src) %{
6196   match(Set dst src);
6197   ins_cost(125);
6198 
6199   format %{ "FLD_D  $src\n\t"
6200             "FSTP   $dst" %}
6201   opcode(0xDD);               /* DD /0, FLD m64real */
6202   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6203               Pop_Reg_DPR(dst) );
6204   ins_pipe( fpu_reg_mem );
6205 %}
6206 
6207 // Prefetch instructions for allocation.
6208 // Must be safe to execute with invalid address (cannot fault).
6209 
6210 instruct prefetchAlloc0( memory mem ) %{
6211   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6212   match(PrefetchAllocation mem);
6213   ins_cost(0);
6214   size(0);
6215   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6216   ins_encode();
6217   ins_pipe(empty);
6218 %}
6219 
6220 instruct prefetchAlloc( memory mem ) %{
6221   predicate(AllocatePrefetchInstr==3);
6222   match( PrefetchAllocation mem );
6223   ins_cost(100);
6224 
6225   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6226   ins_encode %{
6227     __ prefetchw($mem$$Address);
6228   %}
6229   ins_pipe(ialu_mem);
6230 %}
6231 
6232 instruct prefetchAllocNTA( memory mem ) %{
6233   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6234   match(PrefetchAllocation mem);
6235   ins_cost(100);
6236 
6237   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6238   ins_encode %{
6239     __ prefetchnta($mem$$Address);
6240   %}
6241   ins_pipe(ialu_mem);
6242 %}
6243 
6244 instruct prefetchAllocT0( memory mem ) %{
6245   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6246   match(PrefetchAllocation mem);
6247   ins_cost(100);
6248 
6249   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6250   ins_encode %{
6251     __ prefetcht0($mem$$Address);
6252   %}
6253   ins_pipe(ialu_mem);
6254 %}
6255 
6256 instruct prefetchAllocT2( memory mem ) %{
6257   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6258   match(PrefetchAllocation mem);
6259   ins_cost(100);
6260 
6261   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6262   ins_encode %{
6263     __ prefetcht2($mem$$Address);
6264   %}
6265   ins_pipe(ialu_mem);
6266 %}
6267 
6268 //----------Store Instructions-------------------------------------------------
6269 
6270 // Store Byte
6271 instruct storeB(memory mem, xRegI src) %{
6272   match(Set mem (StoreB mem src));
6273 
6274   ins_cost(125);
6275   format %{ "MOV8   $mem,$src" %}
6276   opcode(0x88);
6277   ins_encode( OpcP, RegMem( src, mem ) );
6278   ins_pipe( ialu_mem_reg );
6279 %}
6280 
6281 // Store Char/Short
6282 instruct storeC(memory mem, rRegI src) %{
6283   match(Set mem (StoreC mem src));
6284 
6285   ins_cost(125);
6286   format %{ "MOV16  $mem,$src" %}
6287   opcode(0x89, 0x66);
6288   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6289   ins_pipe( ialu_mem_reg );
6290 %}
6291 
6292 // Store Integer
6293 instruct storeI(memory mem, rRegI src) %{
6294   match(Set mem (StoreI mem src));
6295 
6296   ins_cost(125);
6297   format %{ "MOV    $mem,$src" %}
6298   opcode(0x89);
6299   ins_encode( OpcP, RegMem( src, mem ) );
6300   ins_pipe( ialu_mem_reg );
6301 %}
6302 
6303 // Store Long
6304 instruct storeL(long_memory mem, eRegL src) %{
6305   predicate(!((StoreLNode*)n)->require_atomic_access());
6306   match(Set mem (StoreL mem src));
6307 
6308   ins_cost(200);
6309   format %{ "MOV    $mem,$src.lo\n\t"
6310             "MOV    $mem+4,$src.hi" %}
6311   opcode(0x89, 0x89);
6312   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6313   ins_pipe( ialu_mem_long_reg );
6314 %}
6315 
6316 // Store Long to Integer
6317 instruct storeL2I(memory mem, eRegL src) %{
6318   match(Set mem (StoreI mem (ConvL2I src)));
6319 
6320   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6321   ins_encode %{
6322     __ movl($mem$$Address, $src$$Register);
6323   %}
6324   ins_pipe(ialu_mem_reg);
6325 %}
6326 
6327 // Volatile Store Long.  Must be atomic, so move it into
6328 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6329 // target address before the store (for null-ptr checks)
6330 // so the memory operand is used twice in the encoding.
6331 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6332   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6333   match(Set mem (StoreL mem src));
6334   effect( KILL cr );
6335   ins_cost(400);
6336   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6337             "FILD   $src\n\t"
6338             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6339   opcode(0x3B);
6340   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6341   ins_pipe( fpu_reg_mem );
6342 %}
6343 
6344 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6345   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6346   match(Set mem (StoreL mem src));
6347   effect( TEMP tmp, KILL cr );
6348   ins_cost(380);
6349   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6350             "MOVSD  $tmp,$src\n\t"
6351             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6352   ins_encode %{
6353     __ cmpl(rax, $mem$$Address);
6354     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6355     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6356   %}
6357   ins_pipe( pipe_slow );
6358 %}
6359 
6360 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6361   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6362   match(Set mem (StoreL mem src));
6363   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6364   ins_cost(360);
6365   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6366             "MOVD   $tmp,$src.lo\n\t"
6367             "MOVD   $tmp2,$src.hi\n\t"
6368             "PUNPCKLDQ $tmp,$tmp2\n\t"
6369             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6370   ins_encode %{
6371     __ cmpl(rax, $mem$$Address);
6372     __ movdl($tmp$$XMMRegister, $src$$Register);
6373     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6374     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6375     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6376   %}
6377   ins_pipe( pipe_slow );
6378 %}
6379 
6380 // Store Pointer; for storing unknown oops and raw pointers
6381 instruct storeP(memory mem, anyRegP src) %{
6382   match(Set mem (StoreP mem src));
6383 
6384   ins_cost(125);
6385   format %{ "MOV    $mem,$src" %}
6386   opcode(0x89);
6387   ins_encode( OpcP, RegMem( src, mem ) );
6388   ins_pipe( ialu_mem_reg );
6389 %}
6390 
6391 // Store Integer Immediate
6392 instruct storeImmI(memory mem, immI src) %{
6393   match(Set mem (StoreI mem src));
6394 
6395   ins_cost(150);
6396   format %{ "MOV    $mem,$src" %}
6397   opcode(0xC7);               /* C7 /0 */
6398   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6399   ins_pipe( ialu_mem_imm );
6400 %}
6401 
6402 // Store Short/Char Immediate
6403 instruct storeImmI16(memory mem, immI16 src) %{
6404   predicate(UseStoreImmI16);
6405   match(Set mem (StoreC mem src));
6406 
6407   ins_cost(150);
6408   format %{ "MOV16  $mem,$src" %}
6409   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6410   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6411   ins_pipe( ialu_mem_imm );
6412 %}
6413 
6414 // Store Pointer Immediate; null pointers or constant oops that do not
6415 // need card-mark barriers.
6416 instruct storeImmP(memory mem, immP src) %{
6417   match(Set mem (StoreP mem src));
6418 
6419   ins_cost(150);
6420   format %{ "MOV    $mem,$src" %}
6421   opcode(0xC7);               /* C7 /0 */
6422   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6423   ins_pipe( ialu_mem_imm );
6424 %}
6425 
6426 // Store Byte Immediate
6427 instruct storeImmB(memory mem, immI8 src) %{
6428   match(Set mem (StoreB mem src));
6429 
6430   ins_cost(150);
6431   format %{ "MOV8   $mem,$src" %}
6432   opcode(0xC6);               /* C6 /0 */
6433   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6434   ins_pipe( ialu_mem_imm );
6435 %}
6436 
6437 // Store CMS card-mark Immediate
6438 instruct storeImmCM(memory mem, immI8 src) %{
6439   match(Set mem (StoreCM mem src));
6440 
6441   ins_cost(150);
6442   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6443   opcode(0xC6);               /* C6 /0 */
6444   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6445   ins_pipe( ialu_mem_imm );
6446 %}
6447 
6448 // Store Double
6449 instruct storeDPR( memory mem, regDPR1 src) %{
6450   predicate(UseSSE<=1);
6451   match(Set mem (StoreD mem src));
6452 
6453   ins_cost(100);
6454   format %{ "FST_D  $mem,$src" %}
6455   opcode(0xDD);       /* DD /2 */
6456   ins_encode( enc_FPR_store(mem,src) );
6457   ins_pipe( fpu_mem_reg );
6458 %}
6459 
6460 // Store double does rounding on x86
6461 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6462   predicate(UseSSE<=1);
6463   match(Set mem (StoreD mem (RoundDouble src)));
6464 
6465   ins_cost(100);
6466   format %{ "FST_D  $mem,$src\t# round" %}
6467   opcode(0xDD);       /* DD /2 */
6468   ins_encode( enc_FPR_store(mem,src) );
6469   ins_pipe( fpu_mem_reg );
6470 %}
6471 
6472 // Store XMM register to memory (double-precision floating points)
6473 // MOVSD instruction
6474 instruct storeD(memory mem, regD src) %{
6475   predicate(UseSSE>=2);
6476   match(Set mem (StoreD mem src));
6477   ins_cost(95);
6478   format %{ "MOVSD  $mem,$src" %}
6479   ins_encode %{
6480     __ movdbl($mem$$Address, $src$$XMMRegister);
6481   %}
6482   ins_pipe( pipe_slow );
6483 %}
6484 
6485 // Load Double
6486 instruct MoveD2VL(vlRegD dst, regD src) %{
6487   match(Set dst src);
6488   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6489   ins_encode %{
6490     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6491   %}
6492   ins_pipe( fpu_reg_reg );
6493 %}
6494 
6495 // Load Double
6496 instruct MoveVL2D(regD dst, vlRegD src) %{
6497   match(Set dst src);
6498   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6499   ins_encode %{
6500     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6501   %}
6502   ins_pipe( fpu_reg_reg );
6503 %}
6504 
6505 // Store XMM register to memory (single-precision floating point)
6506 // MOVSS instruction
6507 instruct storeF(memory mem, regF src) %{
6508   predicate(UseSSE>=1);
6509   match(Set mem (StoreF mem src));
6510   ins_cost(95);
6511   format %{ "MOVSS  $mem,$src" %}
6512   ins_encode %{
6513     __ movflt($mem$$Address, $src$$XMMRegister);
6514   %}
6515   ins_pipe( pipe_slow );
6516 %}
6517 
6518 // Load Float
6519 instruct MoveF2VL(vlRegF dst, regF src) %{
6520   match(Set dst src);
6521   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6522   ins_encode %{
6523     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6524   %}
6525   ins_pipe( fpu_reg_reg );
6526 %}
6527 
6528 // Load Float
6529 instruct MoveVL2F(regF dst, vlRegF src) %{
6530   match(Set dst src);
6531   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6532   ins_encode %{
6533     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6534   %}
6535   ins_pipe( fpu_reg_reg );
6536 %}
6537 
6538 // Store Float
6539 instruct storeFPR( memory mem, regFPR1 src) %{
6540   predicate(UseSSE==0);
6541   match(Set mem (StoreF mem src));
6542 
6543   ins_cost(100);
6544   format %{ "FST_S  $mem,$src" %}
6545   opcode(0xD9);       /* D9 /2 */
6546   ins_encode( enc_FPR_store(mem,src) );
6547   ins_pipe( fpu_mem_reg );
6548 %}
6549 
6550 // Store Float does rounding on x86
6551 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6552   predicate(UseSSE==0);
6553   match(Set mem (StoreF mem (RoundFloat src)));
6554 
6555   ins_cost(100);
6556   format %{ "FST_S  $mem,$src\t# round" %}
6557   opcode(0xD9);       /* D9 /2 */
6558   ins_encode( enc_FPR_store(mem,src) );
6559   ins_pipe( fpu_mem_reg );
6560 %}
6561 
6562 // Store Float does rounding on x86
6563 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6564   predicate(UseSSE<=1);
6565   match(Set mem (StoreF mem (ConvD2F src)));
6566 
6567   ins_cost(100);
6568   format %{ "FST_S  $mem,$src\t# D-round" %}
6569   opcode(0xD9);       /* D9 /2 */
6570   ins_encode( enc_FPR_store(mem,src) );
6571   ins_pipe( fpu_mem_reg );
6572 %}
6573 
6574 // Store immediate Float value (it is faster than store from FPU register)
6575 // The instruction usage is guarded by predicate in operand immFPR().
6576 instruct storeFPR_imm( memory mem, immFPR src) %{
6577   match(Set mem (StoreF mem src));
6578 
6579   ins_cost(50);
6580   format %{ "MOV    $mem,$src\t# store float" %}
6581   opcode(0xC7);               /* C7 /0 */
6582   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6583   ins_pipe( ialu_mem_imm );
6584 %}
6585 
6586 // Store immediate Float value (it is faster than store from XMM register)
6587 // The instruction usage is guarded by predicate in operand immF().
6588 instruct storeF_imm( memory mem, immF src) %{
6589   match(Set mem (StoreF mem src));
6590 
6591   ins_cost(50);
6592   format %{ "MOV    $mem,$src\t# store float" %}
6593   opcode(0xC7);               /* C7 /0 */
6594   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6595   ins_pipe( ialu_mem_imm );
6596 %}
6597 
6598 // Store Integer to stack slot
6599 instruct storeSSI(stackSlotI dst, rRegI src) %{
6600   match(Set dst src);
6601 
6602   ins_cost(100);
6603   format %{ "MOV    $dst,$src" %}
6604   opcode(0x89);
6605   ins_encode( OpcPRegSS( dst, src ) );
6606   ins_pipe( ialu_mem_reg );
6607 %}
6608 
6609 // Store Integer to stack slot
6610 instruct storeSSP(stackSlotP dst, eRegP src) %{
6611   match(Set dst src);
6612 
6613   ins_cost(100);
6614   format %{ "MOV    $dst,$src" %}
6615   opcode(0x89);
6616   ins_encode( OpcPRegSS( dst, src ) );
6617   ins_pipe( ialu_mem_reg );
6618 %}
6619 
6620 // Store Long to stack slot
6621 instruct storeSSL(stackSlotL dst, eRegL src) %{
6622   match(Set dst src);
6623 
6624   ins_cost(200);
6625   format %{ "MOV    $dst,$src.lo\n\t"
6626             "MOV    $dst+4,$src.hi" %}
6627   opcode(0x89, 0x89);
6628   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6629   ins_pipe( ialu_mem_long_reg );
6630 %}
6631 
6632 //----------MemBar Instructions-----------------------------------------------
6633 // Memory barrier flavors
6634 
6635 instruct membar_acquire() %{
6636   match(MemBarAcquire);
6637   match(LoadFence);
6638   ins_cost(400);
6639 
6640   size(0);
6641   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6642   ins_encode();
6643   ins_pipe(empty);
6644 %}
6645 
6646 instruct membar_acquire_lock() %{
6647   match(MemBarAcquireLock);
6648   ins_cost(0);
6649 
6650   size(0);
6651   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6652   ins_encode( );
6653   ins_pipe(empty);
6654 %}
6655 
6656 instruct membar_release() %{
6657   match(MemBarRelease);
6658   match(StoreFence);
6659   ins_cost(400);
6660 
6661   size(0);
6662   format %{ "MEMBAR-release ! (empty encoding)" %}
6663   ins_encode( );
6664   ins_pipe(empty);
6665 %}
6666 
6667 instruct membar_release_lock() %{
6668   match(MemBarReleaseLock);
6669   ins_cost(0);
6670 
6671   size(0);
6672   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6673   ins_encode( );
6674   ins_pipe(empty);
6675 %}
6676 
6677 instruct membar_volatile(eFlagsReg cr) %{
6678   match(MemBarVolatile);
6679   effect(KILL cr);
6680   ins_cost(400);
6681 
6682   format %{
6683     $$template
6684     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6685   %}
6686   ins_encode %{
6687     __ membar(Assembler::StoreLoad);
6688   %}
6689   ins_pipe(pipe_slow);
6690 %}
6691 
6692 instruct unnecessary_membar_volatile() %{
6693   match(MemBarVolatile);
6694   predicate(Matcher::post_store_load_barrier(n));
6695   ins_cost(0);
6696 
6697   size(0);
6698   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6699   ins_encode( );
6700   ins_pipe(empty);
6701 %}
6702 
6703 instruct membar_storestore() %{
6704   match(MemBarStoreStore);
6705   ins_cost(0);
6706 
6707   size(0);
6708   format %{ "MEMBAR-storestore (empty encoding)" %}
6709   ins_encode( );
6710   ins_pipe(empty);
6711 %}
6712 
6713 //----------Move Instructions--------------------------------------------------
6714 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6715   match(Set dst (CastX2P src));
6716   format %{ "# X2P  $dst, $src" %}
6717   ins_encode( /*empty encoding*/ );
6718   ins_cost(0);
6719   ins_pipe(empty);
6720 %}
6721 
6722 instruct castP2X(rRegI dst, eRegP src ) %{
6723   match(Set dst (CastP2X src));
6724   ins_cost(50);
6725   format %{ "MOV    $dst, $src\t# CastP2X" %}
6726   ins_encode( enc_Copy( dst, src) );
6727   ins_pipe( ialu_reg_reg );
6728 %}
6729 
6730 //----------Conditional Move---------------------------------------------------
6731 // Conditional move
6732 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6733   predicate(!VM_Version::supports_cmov() );
6734   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6735   ins_cost(200);
6736   format %{ "J$cop,us skip\t# signed cmove\n\t"
6737             "MOV    $dst,$src\n"
6738       "skip:" %}
6739   ins_encode %{
6740     Label Lskip;
6741     // Invert sense of branch from sense of CMOV
6742     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6743     __ movl($dst$$Register, $src$$Register);
6744     __ bind(Lskip);
6745   %}
6746   ins_pipe( pipe_cmov_reg );
6747 %}
6748 
6749 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6750   predicate(!VM_Version::supports_cmov() );
6751   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6752   ins_cost(200);
6753   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6754             "MOV    $dst,$src\n"
6755       "skip:" %}
6756   ins_encode %{
6757     Label Lskip;
6758     // Invert sense of branch from sense of CMOV
6759     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6760     __ movl($dst$$Register, $src$$Register);
6761     __ bind(Lskip);
6762   %}
6763   ins_pipe( pipe_cmov_reg );
6764 %}
6765 
6766 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6767   predicate(VM_Version::supports_cmov() );
6768   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6769   ins_cost(200);
6770   format %{ "CMOV$cop $dst,$src" %}
6771   opcode(0x0F,0x40);
6772   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6773   ins_pipe( pipe_cmov_reg );
6774 %}
6775 
6776 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6777   predicate(VM_Version::supports_cmov() );
6778   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6779   ins_cost(200);
6780   format %{ "CMOV$cop $dst,$src" %}
6781   opcode(0x0F,0x40);
6782   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6783   ins_pipe( pipe_cmov_reg );
6784 %}
6785 
6786 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6787   predicate(VM_Version::supports_cmov() );
6788   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6789   ins_cost(200);
6790   expand %{
6791     cmovI_regU(cop, cr, dst, src);
6792   %}
6793 %}
6794 
6795 // Conditional move
6796 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6797   predicate(VM_Version::supports_cmov() );
6798   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6799   ins_cost(250);
6800   format %{ "CMOV$cop $dst,$src" %}
6801   opcode(0x0F,0x40);
6802   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6803   ins_pipe( pipe_cmov_mem );
6804 %}
6805 
6806 // Conditional move
6807 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6808   predicate(VM_Version::supports_cmov() );
6809   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6810   ins_cost(250);
6811   format %{ "CMOV$cop $dst,$src" %}
6812   opcode(0x0F,0x40);
6813   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6814   ins_pipe( pipe_cmov_mem );
6815 %}
6816 
6817 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6818   predicate(VM_Version::supports_cmov() );
6819   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6820   ins_cost(250);
6821   expand %{
6822     cmovI_memU(cop, cr, dst, src);
6823   %}
6824 %}
6825 
6826 // Conditional move
6827 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6828   predicate(VM_Version::supports_cmov() );
6829   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6830   ins_cost(200);
6831   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6832   opcode(0x0F,0x40);
6833   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6834   ins_pipe( pipe_cmov_reg );
6835 %}
6836 
6837 // Conditional move (non-P6 version)
6838 // Note:  a CMoveP is generated for  stubs and native wrappers
6839 //        regardless of whether we are on a P6, so we
6840 //        emulate a cmov here
6841 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6842   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6843   ins_cost(300);
6844   format %{ "Jn$cop   skip\n\t"
6845           "MOV    $dst,$src\t# pointer\n"
6846       "skip:" %}
6847   opcode(0x8b);
6848   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6849   ins_pipe( pipe_cmov_reg );
6850 %}
6851 
6852 // Conditional move
6853 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6854   predicate(VM_Version::supports_cmov() );
6855   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6856   ins_cost(200);
6857   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6858   opcode(0x0F,0x40);
6859   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6860   ins_pipe( pipe_cmov_reg );
6861 %}
6862 
6863 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6864   predicate(VM_Version::supports_cmov() );
6865   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6866   ins_cost(200);
6867   expand %{
6868     cmovP_regU(cop, cr, dst, src);
6869   %}
6870 %}
6871 
6872 // DISABLED: Requires the ADLC to emit a bottom_type call that
6873 // correctly meets the two pointer arguments; one is an incoming
6874 // register but the other is a memory operand.  ALSO appears to
6875 // be buggy with implicit null checks.
6876 //
6877 //// Conditional move
6878 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6879 //  predicate(VM_Version::supports_cmov() );
6880 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6881 //  ins_cost(250);
6882 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6883 //  opcode(0x0F,0x40);
6884 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6885 //  ins_pipe( pipe_cmov_mem );
6886 //%}
6887 //
6888 //// Conditional move
6889 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6890 //  predicate(VM_Version::supports_cmov() );
6891 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6892 //  ins_cost(250);
6893 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6894 //  opcode(0x0F,0x40);
6895 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6896 //  ins_pipe( pipe_cmov_mem );
6897 //%}
6898 
6899 // Conditional move
6900 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6901   predicate(UseSSE<=1);
6902   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6903   ins_cost(200);
6904   format %{ "FCMOV$cop $dst,$src\t# double" %}
6905   opcode(0xDA);
6906   ins_encode( enc_cmov_dpr(cop,src) );
6907   ins_pipe( pipe_cmovDPR_reg );
6908 %}
6909 
6910 // Conditional move
6911 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6912   predicate(UseSSE==0);
6913   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6914   ins_cost(200);
6915   format %{ "FCMOV$cop $dst,$src\t# float" %}
6916   opcode(0xDA);
6917   ins_encode( enc_cmov_dpr(cop,src) );
6918   ins_pipe( pipe_cmovDPR_reg );
6919 %}
6920 
6921 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6922 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6923   predicate(UseSSE<=1);
6924   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6925   ins_cost(200);
6926   format %{ "Jn$cop   skip\n\t"
6927             "MOV    $dst,$src\t# double\n"
6928       "skip:" %}
6929   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6930   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6931   ins_pipe( pipe_cmovDPR_reg );
6932 %}
6933 
6934 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6935 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6936   predicate(UseSSE==0);
6937   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6938   ins_cost(200);
6939   format %{ "Jn$cop    skip\n\t"
6940             "MOV    $dst,$src\t# float\n"
6941       "skip:" %}
6942   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6943   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6944   ins_pipe( pipe_cmovDPR_reg );
6945 %}
6946 
6947 // No CMOVE with SSE/SSE2
6948 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6949   predicate (UseSSE>=1);
6950   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6951   ins_cost(200);
6952   format %{ "Jn$cop   skip\n\t"
6953             "MOVSS  $dst,$src\t# float\n"
6954       "skip:" %}
6955   ins_encode %{
6956     Label skip;
6957     // Invert sense of branch from sense of CMOV
6958     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6959     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6960     __ bind(skip);
6961   %}
6962   ins_pipe( pipe_slow );
6963 %}
6964 
6965 // No CMOVE with SSE/SSE2
6966 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6967   predicate (UseSSE>=2);
6968   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6969   ins_cost(200);
6970   format %{ "Jn$cop   skip\n\t"
6971             "MOVSD  $dst,$src\t# float\n"
6972       "skip:" %}
6973   ins_encode %{
6974     Label skip;
6975     // Invert sense of branch from sense of CMOV
6976     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6977     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6978     __ bind(skip);
6979   %}
6980   ins_pipe( pipe_slow );
6981 %}
6982 
6983 // unsigned version
6984 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6985   predicate (UseSSE>=1);
6986   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6987   ins_cost(200);
6988   format %{ "Jn$cop   skip\n\t"
6989             "MOVSS  $dst,$src\t# float\n"
6990       "skip:" %}
6991   ins_encode %{
6992     Label skip;
6993     // Invert sense of branch from sense of CMOV
6994     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6995     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6996     __ bind(skip);
6997   %}
6998   ins_pipe( pipe_slow );
6999 %}
7000 
7001 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
7002   predicate (UseSSE>=1);
7003   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7004   ins_cost(200);
7005   expand %{
7006     fcmovF_regU(cop, cr, dst, src);
7007   %}
7008 %}
7009 
7010 // unsigned version
7011 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
7012   predicate (UseSSE>=2);
7013   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7014   ins_cost(200);
7015   format %{ "Jn$cop   skip\n\t"
7016             "MOVSD  $dst,$src\t# float\n"
7017       "skip:" %}
7018   ins_encode %{
7019     Label skip;
7020     // Invert sense of branch from sense of CMOV
7021     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7022     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7023     __ bind(skip);
7024   %}
7025   ins_pipe( pipe_slow );
7026 %}
7027 
7028 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
7029   predicate (UseSSE>=2);
7030   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7031   ins_cost(200);
7032   expand %{
7033     fcmovD_regU(cop, cr, dst, src);
7034   %}
7035 %}
7036 
7037 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
7038   predicate(VM_Version::supports_cmov() );
7039   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7040   ins_cost(200);
7041   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7042             "CMOV$cop $dst.hi,$src.hi" %}
7043   opcode(0x0F,0x40);
7044   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7045   ins_pipe( pipe_cmov_reg_long );
7046 %}
7047 
7048 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
7049   predicate(VM_Version::supports_cmov() );
7050   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7051   ins_cost(200);
7052   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7053             "CMOV$cop $dst.hi,$src.hi" %}
7054   opcode(0x0F,0x40);
7055   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7056   ins_pipe( pipe_cmov_reg_long );
7057 %}
7058 
7059 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
7060   predicate(VM_Version::supports_cmov() );
7061   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7062   ins_cost(200);
7063   expand %{
7064     cmovL_regU(cop, cr, dst, src);
7065   %}
7066 %}
7067 
7068 //----------Arithmetic Instructions--------------------------------------------
7069 //----------Addition Instructions----------------------------------------------
7070 
7071 // Integer Addition Instructions
7072 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7073   match(Set dst (AddI dst src));
7074   effect(KILL cr);
7075 
7076   size(2);
7077   format %{ "ADD    $dst,$src" %}
7078   opcode(0x03);
7079   ins_encode( OpcP, RegReg( dst, src) );
7080   ins_pipe( ialu_reg_reg );
7081 %}
7082 
7083 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7084   match(Set dst (AddI dst src));
7085   effect(KILL cr);
7086 
7087   format %{ "ADD    $dst,$src" %}
7088   opcode(0x81, 0x00); /* /0 id */
7089   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7090   ins_pipe( ialu_reg );
7091 %}
7092 
7093 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7094   predicate(UseIncDec);
7095   match(Set dst (AddI dst src));
7096   effect(KILL cr);
7097 
7098   size(1);
7099   format %{ "INC    $dst" %}
7100   opcode(0x40); /*  */
7101   ins_encode( Opc_plus( primary, dst ) );
7102   ins_pipe( ialu_reg );
7103 %}
7104 
7105 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7106   match(Set dst (AddI src0 src1));
7107   ins_cost(110);
7108 
7109   format %{ "LEA    $dst,[$src0 + $src1]" %}
7110   opcode(0x8D); /* 0x8D /r */
7111   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7112   ins_pipe( ialu_reg_reg );
7113 %}
7114 
7115 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7116   match(Set dst (AddP src0 src1));
7117   ins_cost(110);
7118 
7119   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7120   opcode(0x8D); /* 0x8D /r */
7121   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7122   ins_pipe( ialu_reg_reg );
7123 %}
7124 
7125 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7126   predicate(UseIncDec);
7127   match(Set dst (AddI dst src));
7128   effect(KILL cr);
7129 
7130   size(1);
7131   format %{ "DEC    $dst" %}
7132   opcode(0x48); /*  */
7133   ins_encode( Opc_plus( primary, dst ) );
7134   ins_pipe( ialu_reg );
7135 %}
7136 
7137 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7138   match(Set dst (AddP dst src));
7139   effect(KILL cr);
7140 
7141   size(2);
7142   format %{ "ADD    $dst,$src" %}
7143   opcode(0x03);
7144   ins_encode( OpcP, RegReg( dst, src) );
7145   ins_pipe( ialu_reg_reg );
7146 %}
7147 
7148 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7149   match(Set dst (AddP dst src));
7150   effect(KILL cr);
7151 
7152   format %{ "ADD    $dst,$src" %}
7153   opcode(0x81,0x00); /* Opcode 81 /0 id */
7154   // ins_encode( RegImm( dst, src) );
7155   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7156   ins_pipe( ialu_reg );
7157 %}
7158 
7159 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7160   match(Set dst (AddI dst (LoadI src)));
7161   effect(KILL cr);
7162 
7163   ins_cost(125);
7164   format %{ "ADD    $dst,$src" %}
7165   opcode(0x03);
7166   ins_encode( OpcP, RegMem( dst, src) );
7167   ins_pipe( ialu_reg_mem );
7168 %}
7169 
7170 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7171   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7172   effect(KILL cr);
7173 
7174   ins_cost(150);
7175   format %{ "ADD    $dst,$src" %}
7176   opcode(0x01);  /* Opcode 01 /r */
7177   ins_encode( OpcP, RegMem( src, dst ) );
7178   ins_pipe( ialu_mem_reg );
7179 %}
7180 
7181 // Add Memory with Immediate
7182 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7183   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7184   effect(KILL cr);
7185 
7186   ins_cost(125);
7187   format %{ "ADD    $dst,$src" %}
7188   opcode(0x81);               /* Opcode 81 /0 id */
7189   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7190   ins_pipe( ialu_mem_imm );
7191 %}
7192 
7193 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7194   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7195   effect(KILL cr);
7196 
7197   ins_cost(125);
7198   format %{ "INC    $dst" %}
7199   opcode(0xFF);               /* Opcode FF /0 */
7200   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7201   ins_pipe( ialu_mem_imm );
7202 %}
7203 
7204 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7205   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7206   effect(KILL cr);
7207 
7208   ins_cost(125);
7209   format %{ "DEC    $dst" %}
7210   opcode(0xFF);               /* Opcode FF /1 */
7211   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7212   ins_pipe( ialu_mem_imm );
7213 %}
7214 
7215 
7216 instruct checkCastPP( eRegP dst ) %{
7217   match(Set dst (CheckCastPP dst));
7218 
7219   size(0);
7220   format %{ "#checkcastPP of $dst" %}
7221   ins_encode( /*empty encoding*/ );
7222   ins_pipe( empty );
7223 %}
7224 
7225 instruct castPP( eRegP dst ) %{
7226   match(Set dst (CastPP dst));
7227   format %{ "#castPP of $dst" %}
7228   ins_encode( /*empty encoding*/ );
7229   ins_pipe( empty );
7230 %}
7231 
7232 instruct castII( rRegI dst ) %{
7233   match(Set dst (CastII dst));
7234   format %{ "#castII of $dst" %}
7235   ins_encode( /*empty encoding*/ );
7236   ins_cost(0);
7237   ins_pipe( empty );
7238 %}
7239 
7240 // Load-locked - same as a regular pointer load when used with compare-swap
7241 instruct loadPLocked(eRegP dst, memory mem) %{
7242   match(Set dst (LoadPLocked mem));
7243 
7244   ins_cost(125);
7245   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7246   opcode(0x8B);
7247   ins_encode( OpcP, RegMem(dst,mem));
7248   ins_pipe( ialu_reg_mem );
7249 %}
7250 
7251 // Conditional-store of the updated heap-top.
7252 // Used during allocation of the shared heap.
7253 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7254 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7255   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7256   // EAX is killed if there is contention, but then it's also unused.
7257   // In the common case of no contention, EAX holds the new oop address.
7258   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7259   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7260   ins_pipe( pipe_cmpxchg );
7261 %}
7262 
7263 // Conditional-store of an int value.
7264 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7265 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7266   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7267   effect(KILL oldval);
7268   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7269   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7270   ins_pipe( pipe_cmpxchg );
7271 %}
7272 
7273 // Conditional-store of a long value.
7274 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7275 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7276   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7277   effect(KILL oldval);
7278   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7279             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7280             "XCHG   EBX,ECX"
7281   %}
7282   ins_encode %{
7283     // Note: we need to swap rbx, and rcx before and after the
7284     //       cmpxchg8 instruction because the instruction uses
7285     //       rcx as the high order word of the new value to store but
7286     //       our register encoding uses rbx.
7287     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7288     __ lock();
7289     __ cmpxchg8($mem$$Address);
7290     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7291   %}
7292   ins_pipe( pipe_cmpxchg );
7293 %}
7294 
7295 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7296 
7297 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7298   predicate(VM_Version::supports_cx8());
7299   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7300   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7301   effect(KILL cr, KILL oldval);
7302   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7303             "MOV    $res,0\n\t"
7304             "JNE,s  fail\n\t"
7305             "MOV    $res,1\n"
7306           "fail:" %}
7307   ins_encode( enc_cmpxchg8(mem_ptr),
7308               enc_flags_ne_to_boolean(res) );
7309   ins_pipe( pipe_cmpxchg );
7310 %}
7311 
7312 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7313   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7314   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7315   effect(KILL cr, KILL oldval);
7316   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7317             "MOV    $res,0\n\t"
7318             "JNE,s  fail\n\t"
7319             "MOV    $res,1\n"
7320           "fail:" %}
7321   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7322   ins_pipe( pipe_cmpxchg );
7323 %}
7324 
7325 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7326   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
7327   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
7328   effect(KILL cr, KILL oldval);
7329   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7330             "MOV    $res,0\n\t"
7331             "JNE,s  fail\n\t"
7332             "MOV    $res,1\n"
7333           "fail:" %}
7334   ins_encode( enc_cmpxchgb(mem_ptr),
7335               enc_flags_ne_to_boolean(res) );
7336   ins_pipe( pipe_cmpxchg );
7337 %}
7338 
7339 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7340   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
7341   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
7342   effect(KILL cr, KILL oldval);
7343   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7344             "MOV    $res,0\n\t"
7345             "JNE,s  fail\n\t"
7346             "MOV    $res,1\n"
7347           "fail:" %}
7348   ins_encode( enc_cmpxchgw(mem_ptr),
7349               enc_flags_ne_to_boolean(res) );
7350   ins_pipe( pipe_cmpxchg );
7351 %}
7352 
7353 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7354   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7355   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7356   effect(KILL cr, KILL oldval);
7357   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7358             "MOV    $res,0\n\t"
7359             "JNE,s  fail\n\t"
7360             "MOV    $res,1\n"
7361           "fail:" %}
7362   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7363   ins_pipe( pipe_cmpxchg );
7364 %}
7365 
7366 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7367   predicate(VM_Version::supports_cx8());
7368   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7369   effect(KILL cr);
7370   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7371   ins_encode( enc_cmpxchg8(mem_ptr) );
7372   ins_pipe( pipe_cmpxchg );
7373 %}
7374 
7375 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7376   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7377   effect(KILL cr);
7378   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7379   ins_encode( enc_cmpxchg(mem_ptr) );
7380   ins_pipe( pipe_cmpxchg );
7381 %}
7382 
7383 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7384   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
7385   effect(KILL cr);
7386   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7387   ins_encode( enc_cmpxchgb(mem_ptr) );
7388   ins_pipe( pipe_cmpxchg );
7389 %}
7390 
7391 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7392   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
7393   effect(KILL cr);
7394   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7395   ins_encode( enc_cmpxchgw(mem_ptr) );
7396   ins_pipe( pipe_cmpxchg );
7397 %}
7398 
7399 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7400   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7401   effect(KILL cr);
7402   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7403   ins_encode( enc_cmpxchg(mem_ptr) );
7404   ins_pipe( pipe_cmpxchg );
7405 %}
7406 
7407 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7408   predicate(n->as_LoadStore()->result_not_used());
7409   match(Set dummy (GetAndAddB mem add));
7410   effect(KILL cr);
7411   format %{ "ADDB  [$mem],$add" %}
7412   ins_encode %{
7413     __ lock();
7414     __ addb($mem$$Address, $add$$constant);
7415   %}
7416   ins_pipe( pipe_cmpxchg );
7417 %}
7418 
7419 // Important to match to xRegI: only 8-bit regs.
7420 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
7421   match(Set newval (GetAndAddB mem newval));
7422   effect(KILL cr);
7423   format %{ "XADDB  [$mem],$newval" %}
7424   ins_encode %{
7425     __ lock();
7426     __ xaddb($mem$$Address, $newval$$Register);
7427   %}
7428   ins_pipe( pipe_cmpxchg );
7429 %}
7430 
7431 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7432   predicate(n->as_LoadStore()->result_not_used());
7433   match(Set dummy (GetAndAddS mem add));
7434   effect(KILL cr);
7435   format %{ "ADDS  [$mem],$add" %}
7436   ins_encode %{
7437     __ lock();
7438     __ addw($mem$$Address, $add$$constant);
7439   %}
7440   ins_pipe( pipe_cmpxchg );
7441 %}
7442 
7443 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
7444   match(Set newval (GetAndAddS mem newval));
7445   effect(KILL cr);
7446   format %{ "XADDS  [$mem],$newval" %}
7447   ins_encode %{
7448     __ lock();
7449     __ xaddw($mem$$Address, $newval$$Register);
7450   %}
7451   ins_pipe( pipe_cmpxchg );
7452 %}
7453 
7454 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7455   predicate(n->as_LoadStore()->result_not_used());
7456   match(Set dummy (GetAndAddI mem add));
7457   effect(KILL cr);
7458   format %{ "ADDL  [$mem],$add" %}
7459   ins_encode %{
7460     __ lock();
7461     __ addl($mem$$Address, $add$$constant);
7462   %}
7463   ins_pipe( pipe_cmpxchg );
7464 %}
7465 
7466 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7467   match(Set newval (GetAndAddI mem newval));
7468   effect(KILL cr);
7469   format %{ "XADDL  [$mem],$newval" %}
7470   ins_encode %{
7471     __ lock();
7472     __ xaddl($mem$$Address, $newval$$Register);
7473   %}
7474   ins_pipe( pipe_cmpxchg );
7475 %}
7476 
7477 // Important to match to xRegI: only 8-bit regs.
7478 instruct xchgB( memory mem, xRegI newval) %{
7479   match(Set newval (GetAndSetB mem newval));
7480   format %{ "XCHGB  $newval,[$mem]" %}
7481   ins_encode %{
7482     __ xchgb($newval$$Register, $mem$$Address);
7483   %}
7484   ins_pipe( pipe_cmpxchg );
7485 %}
7486 
7487 instruct xchgS( memory mem, rRegI newval) %{
7488   match(Set newval (GetAndSetS mem newval));
7489   format %{ "XCHGW  $newval,[$mem]" %}
7490   ins_encode %{
7491     __ xchgw($newval$$Register, $mem$$Address);
7492   %}
7493   ins_pipe( pipe_cmpxchg );
7494 %}
7495 
7496 instruct xchgI( memory mem, rRegI newval) %{
7497   match(Set newval (GetAndSetI mem newval));
7498   format %{ "XCHGL  $newval,[$mem]" %}
7499   ins_encode %{
7500     __ xchgl($newval$$Register, $mem$$Address);
7501   %}
7502   ins_pipe( pipe_cmpxchg );
7503 %}
7504 
7505 instruct xchgP( memory mem, pRegP newval) %{
7506   match(Set newval (GetAndSetP mem newval));
7507   format %{ "XCHGL  $newval,[$mem]" %}
7508   ins_encode %{
7509     __ xchgl($newval$$Register, $mem$$Address);
7510   %}
7511   ins_pipe( pipe_cmpxchg );
7512 %}
7513 
7514 //----------Subtraction Instructions-------------------------------------------
7515 
7516 // Integer Subtraction Instructions
7517 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7518   match(Set dst (SubI dst src));
7519   effect(KILL cr);
7520 
7521   size(2);
7522   format %{ "SUB    $dst,$src" %}
7523   opcode(0x2B);
7524   ins_encode( OpcP, RegReg( dst, src) );
7525   ins_pipe( ialu_reg_reg );
7526 %}
7527 
7528 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7529   match(Set dst (SubI dst src));
7530   effect(KILL cr);
7531 
7532   format %{ "SUB    $dst,$src" %}
7533   opcode(0x81,0x05);  /* Opcode 81 /5 */
7534   // ins_encode( RegImm( dst, src) );
7535   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7536   ins_pipe( ialu_reg );
7537 %}
7538 
7539 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7540   match(Set dst (SubI dst (LoadI src)));
7541   effect(KILL cr);
7542 
7543   ins_cost(125);
7544   format %{ "SUB    $dst,$src" %}
7545   opcode(0x2B);
7546   ins_encode( OpcP, RegMem( dst, src) );
7547   ins_pipe( ialu_reg_mem );
7548 %}
7549 
7550 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7551   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7552   effect(KILL cr);
7553 
7554   ins_cost(150);
7555   format %{ "SUB    $dst,$src" %}
7556   opcode(0x29);  /* Opcode 29 /r */
7557   ins_encode( OpcP, RegMem( src, dst ) );
7558   ins_pipe( ialu_mem_reg );
7559 %}
7560 
7561 // Subtract from a pointer
7562 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7563   match(Set dst (AddP dst (SubI zero src)));
7564   effect(KILL cr);
7565 
7566   size(2);
7567   format %{ "SUB    $dst,$src" %}
7568   opcode(0x2B);
7569   ins_encode( OpcP, RegReg( dst, src) );
7570   ins_pipe( ialu_reg_reg );
7571 %}
7572 
7573 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7574   match(Set dst (SubI zero dst));
7575   effect(KILL cr);
7576 
7577   size(2);
7578   format %{ "NEG    $dst" %}
7579   opcode(0xF7,0x03);  // Opcode F7 /3
7580   ins_encode( OpcP, RegOpc( dst ) );
7581   ins_pipe( ialu_reg );
7582 %}
7583 
7584 //----------Multiplication/Division Instructions-------------------------------
7585 // Integer Multiplication Instructions
7586 // Multiply Register
7587 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7588   match(Set dst (MulI dst src));
7589   effect(KILL cr);
7590 
7591   size(3);
7592   ins_cost(300);
7593   format %{ "IMUL   $dst,$src" %}
7594   opcode(0xAF, 0x0F);
7595   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7596   ins_pipe( ialu_reg_reg_alu0 );
7597 %}
7598 
7599 // Multiply 32-bit Immediate
7600 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7601   match(Set dst (MulI src imm));
7602   effect(KILL cr);
7603 
7604   ins_cost(300);
7605   format %{ "IMUL   $dst,$src,$imm" %}
7606   opcode(0x69);  /* 69 /r id */
7607   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7608   ins_pipe( ialu_reg_reg_alu0 );
7609 %}
7610 
7611 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7612   match(Set dst src);
7613   effect(KILL cr);
7614 
7615   // Note that this is artificially increased to make it more expensive than loadConL
7616   ins_cost(250);
7617   format %{ "MOV    EAX,$src\t// low word only" %}
7618   opcode(0xB8);
7619   ins_encode( LdImmL_Lo(dst, src) );
7620   ins_pipe( ialu_reg_fat );
7621 %}
7622 
7623 // Multiply by 32-bit Immediate, taking the shifted high order results
7624 //  (special case for shift by 32)
7625 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7626   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7627   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7628              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7629              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7630   effect(USE src1, KILL cr);
7631 
7632   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7633   ins_cost(0*100 + 1*400 - 150);
7634   format %{ "IMUL   EDX:EAX,$src1" %}
7635   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7636   ins_pipe( pipe_slow );
7637 %}
7638 
7639 // Multiply by 32-bit Immediate, taking the shifted high order results
7640 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7641   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7642   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7643              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7644              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7645   effect(USE src1, KILL cr);
7646 
7647   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7648   ins_cost(1*100 + 1*400 - 150);
7649   format %{ "IMUL   EDX:EAX,$src1\n\t"
7650             "SAR    EDX,$cnt-32" %}
7651   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7652   ins_pipe( pipe_slow );
7653 %}
7654 
7655 // Multiply Memory 32-bit Immediate
7656 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7657   match(Set dst (MulI (LoadI src) imm));
7658   effect(KILL cr);
7659 
7660   ins_cost(300);
7661   format %{ "IMUL   $dst,$src,$imm" %}
7662   opcode(0x69);  /* 69 /r id */
7663   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7664   ins_pipe( ialu_reg_mem_alu0 );
7665 %}
7666 
7667 // Multiply Memory
7668 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7669   match(Set dst (MulI dst (LoadI src)));
7670   effect(KILL cr);
7671 
7672   ins_cost(350);
7673   format %{ "IMUL   $dst,$src" %}
7674   opcode(0xAF, 0x0F);
7675   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7676   ins_pipe( ialu_reg_mem_alu0 );
7677 %}
7678 
7679 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
7680 %{
7681   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
7682   effect(KILL cr, KILL src2);
7683 
7684   expand %{ mulI_eReg(dst, src1, cr);
7685            mulI_eReg(src2, src3, cr);
7686            addI_eReg(dst, src2, cr); %}
7687 %}
7688 
7689 // Multiply Register Int to Long
7690 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7691   // Basic Idea: long = (long)int * (long)int
7692   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7693   effect(DEF dst, USE src, USE src1, KILL flags);
7694 
7695   ins_cost(300);
7696   format %{ "IMUL   $dst,$src1" %}
7697 
7698   ins_encode( long_int_multiply( dst, src1 ) );
7699   ins_pipe( ialu_reg_reg_alu0 );
7700 %}
7701 
7702 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7703   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7704   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7705   effect(KILL flags);
7706 
7707   ins_cost(300);
7708   format %{ "MUL    $dst,$src1" %}
7709 
7710   ins_encode( long_uint_multiply(dst, src1) );
7711   ins_pipe( ialu_reg_reg_alu0 );
7712 %}
7713 
7714 // Multiply Register Long
7715 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7716   match(Set dst (MulL dst src));
7717   effect(KILL cr, TEMP tmp);
7718   ins_cost(4*100+3*400);
7719 // Basic idea: lo(result) = lo(x_lo * y_lo)
7720 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7721   format %{ "MOV    $tmp,$src.lo\n\t"
7722             "IMUL   $tmp,EDX\n\t"
7723             "MOV    EDX,$src.hi\n\t"
7724             "IMUL   EDX,EAX\n\t"
7725             "ADD    $tmp,EDX\n\t"
7726             "MUL    EDX:EAX,$src.lo\n\t"
7727             "ADD    EDX,$tmp" %}
7728   ins_encode( long_multiply( dst, src, tmp ) );
7729   ins_pipe( pipe_slow );
7730 %}
7731 
7732 // Multiply Register Long where the left operand's high 32 bits are zero
7733 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7734   predicate(is_operand_hi32_zero(n->in(1)));
7735   match(Set dst (MulL dst src));
7736   effect(KILL cr, TEMP tmp);
7737   ins_cost(2*100+2*400);
7738 // Basic idea: lo(result) = lo(x_lo * y_lo)
7739 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7740   format %{ "MOV    $tmp,$src.hi\n\t"
7741             "IMUL   $tmp,EAX\n\t"
7742             "MUL    EDX:EAX,$src.lo\n\t"
7743             "ADD    EDX,$tmp" %}
7744   ins_encode %{
7745     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7746     __ imull($tmp$$Register, rax);
7747     __ mull($src$$Register);
7748     __ addl(rdx, $tmp$$Register);
7749   %}
7750   ins_pipe( pipe_slow );
7751 %}
7752 
7753 // Multiply Register Long where the right operand's high 32 bits are zero
7754 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7755   predicate(is_operand_hi32_zero(n->in(2)));
7756   match(Set dst (MulL dst src));
7757   effect(KILL cr, TEMP tmp);
7758   ins_cost(2*100+2*400);
7759 // Basic idea: lo(result) = lo(x_lo * y_lo)
7760 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7761   format %{ "MOV    $tmp,$src.lo\n\t"
7762             "IMUL   $tmp,EDX\n\t"
7763             "MUL    EDX:EAX,$src.lo\n\t"
7764             "ADD    EDX,$tmp" %}
7765   ins_encode %{
7766     __ movl($tmp$$Register, $src$$Register);
7767     __ imull($tmp$$Register, rdx);
7768     __ mull($src$$Register);
7769     __ addl(rdx, $tmp$$Register);
7770   %}
7771   ins_pipe( pipe_slow );
7772 %}
7773 
7774 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7775 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7776   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7777   match(Set dst (MulL dst src));
7778   effect(KILL cr);
7779   ins_cost(1*400);
7780 // Basic idea: lo(result) = lo(x_lo * y_lo)
7781 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7782   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7783   ins_encode %{
7784     __ mull($src$$Register);
7785   %}
7786   ins_pipe( pipe_slow );
7787 %}
7788 
7789 // Multiply Register Long by small constant
7790 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7791   match(Set dst (MulL dst src));
7792   effect(KILL cr, TEMP tmp);
7793   ins_cost(2*100+2*400);
7794   size(12);
7795 // Basic idea: lo(result) = lo(src * EAX)
7796 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7797   format %{ "IMUL   $tmp,EDX,$src\n\t"
7798             "MOV    EDX,$src\n\t"
7799             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7800             "ADD    EDX,$tmp" %}
7801   ins_encode( long_multiply_con( dst, src, tmp ) );
7802   ins_pipe( pipe_slow );
7803 %}
7804 
7805 // Integer DIV with Register
7806 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7807   match(Set rax (DivI rax div));
7808   effect(KILL rdx, KILL cr);
7809   size(26);
7810   ins_cost(30*100+10*100);
7811   format %{ "CMP    EAX,0x80000000\n\t"
7812             "JNE,s  normal\n\t"
7813             "XOR    EDX,EDX\n\t"
7814             "CMP    ECX,-1\n\t"
7815             "JE,s   done\n"
7816     "normal: CDQ\n\t"
7817             "IDIV   $div\n\t"
7818     "done:"        %}
7819   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7820   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7821   ins_pipe( ialu_reg_reg_alu0 );
7822 %}
7823 
7824 // Divide Register Long
7825 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7826   match(Set dst (DivL src1 src2));
7827   effect( KILL cr, KILL cx, KILL bx );
7828   ins_cost(10000);
7829   format %{ "PUSH   $src1.hi\n\t"
7830             "PUSH   $src1.lo\n\t"
7831             "PUSH   $src2.hi\n\t"
7832             "PUSH   $src2.lo\n\t"
7833             "CALL   SharedRuntime::ldiv\n\t"
7834             "ADD    ESP,16" %}
7835   ins_encode( long_div(src1,src2) );
7836   ins_pipe( pipe_slow );
7837 %}
7838 
7839 // Integer DIVMOD with Register, both quotient and mod results
7840 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7841   match(DivModI rax div);
7842   effect(KILL cr);
7843   size(26);
7844   ins_cost(30*100+10*100);
7845   format %{ "CMP    EAX,0x80000000\n\t"
7846             "JNE,s  normal\n\t"
7847             "XOR    EDX,EDX\n\t"
7848             "CMP    ECX,-1\n\t"
7849             "JE,s   done\n"
7850     "normal: CDQ\n\t"
7851             "IDIV   $div\n\t"
7852     "done:"        %}
7853   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7854   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7855   ins_pipe( pipe_slow );
7856 %}
7857 
7858 // Integer MOD with Register
7859 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7860   match(Set rdx (ModI rax div));
7861   effect(KILL rax, KILL cr);
7862 
7863   size(26);
7864   ins_cost(300);
7865   format %{ "CDQ\n\t"
7866             "IDIV   $div" %}
7867   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7868   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7869   ins_pipe( ialu_reg_reg_alu0 );
7870 %}
7871 
7872 // Remainder Register Long
7873 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7874   match(Set dst (ModL src1 src2));
7875   effect( KILL cr, KILL cx, KILL bx );
7876   ins_cost(10000);
7877   format %{ "PUSH   $src1.hi\n\t"
7878             "PUSH   $src1.lo\n\t"
7879             "PUSH   $src2.hi\n\t"
7880             "PUSH   $src2.lo\n\t"
7881             "CALL   SharedRuntime::lrem\n\t"
7882             "ADD    ESP,16" %}
7883   ins_encode( long_mod(src1,src2) );
7884   ins_pipe( pipe_slow );
7885 %}
7886 
7887 // Divide Register Long (no special case since divisor != -1)
7888 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7889   match(Set dst (DivL dst imm));
7890   effect( TEMP tmp, TEMP tmp2, KILL cr );
7891   ins_cost(1000);
7892   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7893             "XOR    $tmp2,$tmp2\n\t"
7894             "CMP    $tmp,EDX\n\t"
7895             "JA,s   fast\n\t"
7896             "MOV    $tmp2,EAX\n\t"
7897             "MOV    EAX,EDX\n\t"
7898             "MOV    EDX,0\n\t"
7899             "JLE,s  pos\n\t"
7900             "LNEG   EAX : $tmp2\n\t"
7901             "DIV    $tmp # unsigned division\n\t"
7902             "XCHG   EAX,$tmp2\n\t"
7903             "DIV    $tmp\n\t"
7904             "LNEG   $tmp2 : EAX\n\t"
7905             "JMP,s  done\n"
7906     "pos:\n\t"
7907             "DIV    $tmp\n\t"
7908             "XCHG   EAX,$tmp2\n"
7909     "fast:\n\t"
7910             "DIV    $tmp\n"
7911     "done:\n\t"
7912             "MOV    EDX,$tmp2\n\t"
7913             "NEG    EDX:EAX # if $imm < 0" %}
7914   ins_encode %{
7915     int con = (int)$imm$$constant;
7916     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7917     int pcon = (con > 0) ? con : -con;
7918     Label Lfast, Lpos, Ldone;
7919 
7920     __ movl($tmp$$Register, pcon);
7921     __ xorl($tmp2$$Register,$tmp2$$Register);
7922     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7923     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7924 
7925     __ movl($tmp2$$Register, $dst$$Register); // save
7926     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7927     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7928     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7929 
7930     // Negative dividend.
7931     // convert value to positive to use unsigned division
7932     __ lneg($dst$$Register, $tmp2$$Register);
7933     __ divl($tmp$$Register);
7934     __ xchgl($dst$$Register, $tmp2$$Register);
7935     __ divl($tmp$$Register);
7936     // revert result back to negative
7937     __ lneg($tmp2$$Register, $dst$$Register);
7938     __ jmpb(Ldone);
7939 
7940     __ bind(Lpos);
7941     __ divl($tmp$$Register); // Use unsigned division
7942     __ xchgl($dst$$Register, $tmp2$$Register);
7943     // Fallthrow for final divide, tmp2 has 32 bit hi result
7944 
7945     __ bind(Lfast);
7946     // fast path: src is positive
7947     __ divl($tmp$$Register); // Use unsigned division
7948 
7949     __ bind(Ldone);
7950     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7951     if (con < 0) {
7952       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7953     }
7954   %}
7955   ins_pipe( pipe_slow );
7956 %}
7957 
7958 // Remainder Register Long (remainder fit into 32 bits)
7959 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7960   match(Set dst (ModL dst imm));
7961   effect( TEMP tmp, TEMP tmp2, KILL cr );
7962   ins_cost(1000);
7963   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7964             "CMP    $tmp,EDX\n\t"
7965             "JA,s   fast\n\t"
7966             "MOV    $tmp2,EAX\n\t"
7967             "MOV    EAX,EDX\n\t"
7968             "MOV    EDX,0\n\t"
7969             "JLE,s  pos\n\t"
7970             "LNEG   EAX : $tmp2\n\t"
7971             "DIV    $tmp # unsigned division\n\t"
7972             "MOV    EAX,$tmp2\n\t"
7973             "DIV    $tmp\n\t"
7974             "NEG    EDX\n\t"
7975             "JMP,s  done\n"
7976     "pos:\n\t"
7977             "DIV    $tmp\n\t"
7978             "MOV    EAX,$tmp2\n"
7979     "fast:\n\t"
7980             "DIV    $tmp\n"
7981     "done:\n\t"
7982             "MOV    EAX,EDX\n\t"
7983             "SAR    EDX,31\n\t" %}
7984   ins_encode %{
7985     int con = (int)$imm$$constant;
7986     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7987     int pcon = (con > 0) ? con : -con;
7988     Label  Lfast, Lpos, Ldone;
7989 
7990     __ movl($tmp$$Register, pcon);
7991     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7992     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7993 
7994     __ movl($tmp2$$Register, $dst$$Register); // save
7995     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7996     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7997     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7998 
7999     // Negative dividend.
8000     // convert value to positive to use unsigned division
8001     __ lneg($dst$$Register, $tmp2$$Register);
8002     __ divl($tmp$$Register);
8003     __ movl($dst$$Register, $tmp2$$Register);
8004     __ divl($tmp$$Register);
8005     // revert remainder back to negative
8006     __ negl(HIGH_FROM_LOW($dst$$Register));
8007     __ jmpb(Ldone);
8008 
8009     __ bind(Lpos);
8010     __ divl($tmp$$Register);
8011     __ movl($dst$$Register, $tmp2$$Register);
8012 
8013     __ bind(Lfast);
8014     // fast path: src is positive
8015     __ divl($tmp$$Register);
8016 
8017     __ bind(Ldone);
8018     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8019     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
8020 
8021   %}
8022   ins_pipe( pipe_slow );
8023 %}
8024 
8025 // Integer Shift Instructions
8026 // Shift Left by one
8027 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8028   match(Set dst (LShiftI dst shift));
8029   effect(KILL cr);
8030 
8031   size(2);
8032   format %{ "SHL    $dst,$shift" %}
8033   opcode(0xD1, 0x4);  /* D1 /4 */
8034   ins_encode( OpcP, RegOpc( dst ) );
8035   ins_pipe( ialu_reg );
8036 %}
8037 
8038 // Shift Left by 8-bit immediate
8039 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8040   match(Set dst (LShiftI dst shift));
8041   effect(KILL cr);
8042 
8043   size(3);
8044   format %{ "SHL    $dst,$shift" %}
8045   opcode(0xC1, 0x4);  /* C1 /4 ib */
8046   ins_encode( RegOpcImm( dst, shift) );
8047   ins_pipe( ialu_reg );
8048 %}
8049 
8050 // Shift Left by variable
8051 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8052   match(Set dst (LShiftI dst shift));
8053   effect(KILL cr);
8054 
8055   size(2);
8056   format %{ "SHL    $dst,$shift" %}
8057   opcode(0xD3, 0x4);  /* D3 /4 */
8058   ins_encode( OpcP, RegOpc( dst ) );
8059   ins_pipe( ialu_reg_reg );
8060 %}
8061 
8062 // Arithmetic shift right by one
8063 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8064   match(Set dst (RShiftI dst shift));
8065   effect(KILL cr);
8066 
8067   size(2);
8068   format %{ "SAR    $dst,$shift" %}
8069   opcode(0xD1, 0x7);  /* D1 /7 */
8070   ins_encode( OpcP, RegOpc( dst ) );
8071   ins_pipe( ialu_reg );
8072 %}
8073 
8074 // Arithmetic shift right by one
8075 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
8076   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8077   effect(KILL cr);
8078   format %{ "SAR    $dst,$shift" %}
8079   opcode(0xD1, 0x7);  /* D1 /7 */
8080   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
8081   ins_pipe( ialu_mem_imm );
8082 %}
8083 
8084 // Arithmetic Shift Right by 8-bit immediate
8085 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8086   match(Set dst (RShiftI dst shift));
8087   effect(KILL cr);
8088 
8089   size(3);
8090   format %{ "SAR    $dst,$shift" %}
8091   opcode(0xC1, 0x7);  /* C1 /7 ib */
8092   ins_encode( RegOpcImm( dst, shift ) );
8093   ins_pipe( ialu_mem_imm );
8094 %}
8095 
8096 // Arithmetic Shift Right by 8-bit immediate
8097 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
8098   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8099   effect(KILL cr);
8100 
8101   format %{ "SAR    $dst,$shift" %}
8102   opcode(0xC1, 0x7);  /* C1 /7 ib */
8103   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
8104   ins_pipe( ialu_mem_imm );
8105 %}
8106 
8107 // Arithmetic Shift Right by variable
8108 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8109   match(Set dst (RShiftI dst shift));
8110   effect(KILL cr);
8111 
8112   size(2);
8113   format %{ "SAR    $dst,$shift" %}
8114   opcode(0xD3, 0x7);  /* D3 /7 */
8115   ins_encode( OpcP, RegOpc( dst ) );
8116   ins_pipe( ialu_reg_reg );
8117 %}
8118 
8119 // Logical shift right by one
8120 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8121   match(Set dst (URShiftI dst shift));
8122   effect(KILL cr);
8123 
8124   size(2);
8125   format %{ "SHR    $dst,$shift" %}
8126   opcode(0xD1, 0x5);  /* D1 /5 */
8127   ins_encode( OpcP, RegOpc( dst ) );
8128   ins_pipe( ialu_reg );
8129 %}
8130 
8131 // Logical Shift Right by 8-bit immediate
8132 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8133   match(Set dst (URShiftI dst shift));
8134   effect(KILL cr);
8135 
8136   size(3);
8137   format %{ "SHR    $dst,$shift" %}
8138   opcode(0xC1, 0x5);  /* C1 /5 ib */
8139   ins_encode( RegOpcImm( dst, shift) );
8140   ins_pipe( ialu_reg );
8141 %}
8142 
8143 
8144 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8145 // This idiom is used by the compiler for the i2b bytecode.
8146 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
8147   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8148 
8149   size(3);
8150   format %{ "MOVSX  $dst,$src :8" %}
8151   ins_encode %{
8152     __ movsbl($dst$$Register, $src$$Register);
8153   %}
8154   ins_pipe(ialu_reg_reg);
8155 %}
8156 
8157 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8158 // This idiom is used by the compiler the i2s bytecode.
8159 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
8160   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8161 
8162   size(3);
8163   format %{ "MOVSX  $dst,$src :16" %}
8164   ins_encode %{
8165     __ movswl($dst$$Register, $src$$Register);
8166   %}
8167   ins_pipe(ialu_reg_reg);
8168 %}
8169 
8170 
8171 // Logical Shift Right by variable
8172 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8173   match(Set dst (URShiftI dst shift));
8174   effect(KILL cr);
8175 
8176   size(2);
8177   format %{ "SHR    $dst,$shift" %}
8178   opcode(0xD3, 0x5);  /* D3 /5 */
8179   ins_encode( OpcP, RegOpc( dst ) );
8180   ins_pipe( ialu_reg_reg );
8181 %}
8182 
8183 
8184 //----------Logical Instructions-----------------------------------------------
8185 //----------Integer Logical Instructions---------------------------------------
8186 // And Instructions
8187 // And Register with Register
8188 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8189   match(Set dst (AndI dst src));
8190   effect(KILL cr);
8191 
8192   size(2);
8193   format %{ "AND    $dst,$src" %}
8194   opcode(0x23);
8195   ins_encode( OpcP, RegReg( dst, src) );
8196   ins_pipe( ialu_reg_reg );
8197 %}
8198 
8199 // And Register with Immediate
8200 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8201   match(Set dst (AndI dst src));
8202   effect(KILL cr);
8203 
8204   format %{ "AND    $dst,$src" %}
8205   opcode(0x81,0x04);  /* Opcode 81 /4 */
8206   // ins_encode( RegImm( dst, src) );
8207   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8208   ins_pipe( ialu_reg );
8209 %}
8210 
8211 // And Register with Memory
8212 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8213   match(Set dst (AndI dst (LoadI src)));
8214   effect(KILL cr);
8215 
8216   ins_cost(125);
8217   format %{ "AND    $dst,$src" %}
8218   opcode(0x23);
8219   ins_encode( OpcP, RegMem( dst, src) );
8220   ins_pipe( ialu_reg_mem );
8221 %}
8222 
8223 // And Memory with Register
8224 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8225   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8226   effect(KILL cr);
8227 
8228   ins_cost(150);
8229   format %{ "AND    $dst,$src" %}
8230   opcode(0x21);  /* Opcode 21 /r */
8231   ins_encode( OpcP, RegMem( src, dst ) );
8232   ins_pipe( ialu_mem_reg );
8233 %}
8234 
8235 // And Memory with Immediate
8236 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8237   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8238   effect(KILL cr);
8239 
8240   ins_cost(125);
8241   format %{ "AND    $dst,$src" %}
8242   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8243   // ins_encode( MemImm( dst, src) );
8244   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8245   ins_pipe( ialu_mem_imm );
8246 %}
8247 
8248 // BMI1 instructions
8249 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8250   match(Set dst (AndI (XorI src1 minus_1) src2));
8251   predicate(UseBMI1Instructions);
8252   effect(KILL cr);
8253 
8254   format %{ "ANDNL  $dst, $src1, $src2" %}
8255 
8256   ins_encode %{
8257     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8258   %}
8259   ins_pipe(ialu_reg);
8260 %}
8261 
8262 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8263   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8264   predicate(UseBMI1Instructions);
8265   effect(KILL cr);
8266 
8267   ins_cost(125);
8268   format %{ "ANDNL  $dst, $src1, $src2" %}
8269 
8270   ins_encode %{
8271     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8272   %}
8273   ins_pipe(ialu_reg_mem);
8274 %}
8275 
8276 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8277   match(Set dst (AndI (SubI imm_zero src) src));
8278   predicate(UseBMI1Instructions);
8279   effect(KILL cr);
8280 
8281   format %{ "BLSIL  $dst, $src" %}
8282 
8283   ins_encode %{
8284     __ blsil($dst$$Register, $src$$Register);
8285   %}
8286   ins_pipe(ialu_reg);
8287 %}
8288 
8289 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8290   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8291   predicate(UseBMI1Instructions);
8292   effect(KILL cr);
8293 
8294   ins_cost(125);
8295   format %{ "BLSIL  $dst, $src" %}
8296 
8297   ins_encode %{
8298     __ blsil($dst$$Register, $src$$Address);
8299   %}
8300   ins_pipe(ialu_reg_mem);
8301 %}
8302 
8303 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8304 %{
8305   match(Set dst (XorI (AddI src minus_1) src));
8306   predicate(UseBMI1Instructions);
8307   effect(KILL cr);
8308 
8309   format %{ "BLSMSKL $dst, $src" %}
8310 
8311   ins_encode %{
8312     __ blsmskl($dst$$Register, $src$$Register);
8313   %}
8314 
8315   ins_pipe(ialu_reg);
8316 %}
8317 
8318 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8319 %{
8320   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8321   predicate(UseBMI1Instructions);
8322   effect(KILL cr);
8323 
8324   ins_cost(125);
8325   format %{ "BLSMSKL $dst, $src" %}
8326 
8327   ins_encode %{
8328     __ blsmskl($dst$$Register, $src$$Address);
8329   %}
8330 
8331   ins_pipe(ialu_reg_mem);
8332 %}
8333 
8334 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8335 %{
8336   match(Set dst (AndI (AddI src minus_1) src) );
8337   predicate(UseBMI1Instructions);
8338   effect(KILL cr);
8339 
8340   format %{ "BLSRL  $dst, $src" %}
8341 
8342   ins_encode %{
8343     __ blsrl($dst$$Register, $src$$Register);
8344   %}
8345 
8346   ins_pipe(ialu_reg);
8347 %}
8348 
8349 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8350 %{
8351   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8352   predicate(UseBMI1Instructions);
8353   effect(KILL cr);
8354 
8355   ins_cost(125);
8356   format %{ "BLSRL  $dst, $src" %}
8357 
8358   ins_encode %{
8359     __ blsrl($dst$$Register, $src$$Address);
8360   %}
8361 
8362   ins_pipe(ialu_reg_mem);
8363 %}
8364 
8365 // Or Instructions
8366 // Or Register with Register
8367 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8368   match(Set dst (OrI dst src));
8369   effect(KILL cr);
8370 
8371   size(2);
8372   format %{ "OR     $dst,$src" %}
8373   opcode(0x0B);
8374   ins_encode( OpcP, RegReg( dst, src) );
8375   ins_pipe( ialu_reg_reg );
8376 %}
8377 
8378 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8379   match(Set dst (OrI dst (CastP2X src)));
8380   effect(KILL cr);
8381 
8382   size(2);
8383   format %{ "OR     $dst,$src" %}
8384   opcode(0x0B);
8385   ins_encode( OpcP, RegReg( dst, src) );
8386   ins_pipe( ialu_reg_reg );
8387 %}
8388 
8389 
8390 // Or Register with Immediate
8391 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8392   match(Set dst (OrI dst src));
8393   effect(KILL cr);
8394 
8395   format %{ "OR     $dst,$src" %}
8396   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8397   // ins_encode( RegImm( dst, src) );
8398   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8399   ins_pipe( ialu_reg );
8400 %}
8401 
8402 // Or Register with Memory
8403 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8404   match(Set dst (OrI dst (LoadI src)));
8405   effect(KILL cr);
8406 
8407   ins_cost(125);
8408   format %{ "OR     $dst,$src" %}
8409   opcode(0x0B);
8410   ins_encode( OpcP, RegMem( dst, src) );
8411   ins_pipe( ialu_reg_mem );
8412 %}
8413 
8414 // Or Memory with Register
8415 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8416   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8417   effect(KILL cr);
8418 
8419   ins_cost(150);
8420   format %{ "OR     $dst,$src" %}
8421   opcode(0x09);  /* Opcode 09 /r */
8422   ins_encode( OpcP, RegMem( src, dst ) );
8423   ins_pipe( ialu_mem_reg );
8424 %}
8425 
8426 // Or Memory with Immediate
8427 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8428   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8429   effect(KILL cr);
8430 
8431   ins_cost(125);
8432   format %{ "OR     $dst,$src" %}
8433   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8434   // ins_encode( MemImm( dst, src) );
8435   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8436   ins_pipe( ialu_mem_imm );
8437 %}
8438 
8439 // ROL/ROR
8440 // ROL expand
8441 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8442   effect(USE_DEF dst, USE shift, KILL cr);
8443 
8444   format %{ "ROL    $dst, $shift" %}
8445   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8446   ins_encode( OpcP, RegOpc( dst ));
8447   ins_pipe( ialu_reg );
8448 %}
8449 
8450 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8451   effect(USE_DEF dst, USE shift, KILL cr);
8452 
8453   format %{ "ROL    $dst, $shift" %}
8454   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8455   ins_encode( RegOpcImm(dst, shift) );
8456   ins_pipe(ialu_reg);
8457 %}
8458 
8459 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8460   effect(USE_DEF dst, USE shift, KILL cr);
8461 
8462   format %{ "ROL    $dst, $shift" %}
8463   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8464   ins_encode(OpcP, RegOpc(dst));
8465   ins_pipe( ialu_reg_reg );
8466 %}
8467 // end of ROL expand
8468 
8469 // ROL 32bit by one once
8470 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8471   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8472 
8473   expand %{
8474     rolI_eReg_imm1(dst, lshift, cr);
8475   %}
8476 %}
8477 
8478 // ROL 32bit var by imm8 once
8479 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8480   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8481   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8482 
8483   expand %{
8484     rolI_eReg_imm8(dst, lshift, cr);
8485   %}
8486 %}
8487 
8488 // ROL 32bit var by var once
8489 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8490   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8491 
8492   expand %{
8493     rolI_eReg_CL(dst, shift, cr);
8494   %}
8495 %}
8496 
8497 // ROL 32bit var by var once
8498 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8499   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8500 
8501   expand %{
8502     rolI_eReg_CL(dst, shift, cr);
8503   %}
8504 %}
8505 
8506 // ROR expand
8507 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8508   effect(USE_DEF dst, USE shift, KILL cr);
8509 
8510   format %{ "ROR    $dst, $shift" %}
8511   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8512   ins_encode( OpcP, RegOpc( dst ) );
8513   ins_pipe( ialu_reg );
8514 %}
8515 
8516 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8517   effect (USE_DEF dst, USE shift, KILL cr);
8518 
8519   format %{ "ROR    $dst, $shift" %}
8520   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8521   ins_encode( RegOpcImm(dst, shift) );
8522   ins_pipe( ialu_reg );
8523 %}
8524 
8525 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8526   effect(USE_DEF dst, USE shift, KILL cr);
8527 
8528   format %{ "ROR    $dst, $shift" %}
8529   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8530   ins_encode(OpcP, RegOpc(dst));
8531   ins_pipe( ialu_reg_reg );
8532 %}
8533 // end of ROR expand
8534 
8535 // ROR right once
8536 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8537   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8538 
8539   expand %{
8540     rorI_eReg_imm1(dst, rshift, cr);
8541   %}
8542 %}
8543 
8544 // ROR 32bit by immI8 once
8545 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8546   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8547   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8548 
8549   expand %{
8550     rorI_eReg_imm8(dst, rshift, cr);
8551   %}
8552 %}
8553 
8554 // ROR 32bit var by var once
8555 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8556   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8557 
8558   expand %{
8559     rorI_eReg_CL(dst, shift, cr);
8560   %}
8561 %}
8562 
8563 // ROR 32bit var by var once
8564 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8565   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8566 
8567   expand %{
8568     rorI_eReg_CL(dst, shift, cr);
8569   %}
8570 %}
8571 
8572 // Xor Instructions
8573 // Xor Register with Register
8574 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8575   match(Set dst (XorI dst src));
8576   effect(KILL cr);
8577 
8578   size(2);
8579   format %{ "XOR    $dst,$src" %}
8580   opcode(0x33);
8581   ins_encode( OpcP, RegReg( dst, src) );
8582   ins_pipe( ialu_reg_reg );
8583 %}
8584 
8585 // Xor Register with Immediate -1
8586 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8587   match(Set dst (XorI dst imm));
8588 
8589   size(2);
8590   format %{ "NOT    $dst" %}
8591   ins_encode %{
8592      __ notl($dst$$Register);
8593   %}
8594   ins_pipe( ialu_reg );
8595 %}
8596 
8597 // Xor Register with Immediate
8598 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8599   match(Set dst (XorI dst src));
8600   effect(KILL cr);
8601 
8602   format %{ "XOR    $dst,$src" %}
8603   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8604   // ins_encode( RegImm( dst, src) );
8605   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8606   ins_pipe( ialu_reg );
8607 %}
8608 
8609 // Xor Register with Memory
8610 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8611   match(Set dst (XorI dst (LoadI src)));
8612   effect(KILL cr);
8613 
8614   ins_cost(125);
8615   format %{ "XOR    $dst,$src" %}
8616   opcode(0x33);
8617   ins_encode( OpcP, RegMem(dst, src) );
8618   ins_pipe( ialu_reg_mem );
8619 %}
8620 
8621 // Xor Memory with Register
8622 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8623   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8624   effect(KILL cr);
8625 
8626   ins_cost(150);
8627   format %{ "XOR    $dst,$src" %}
8628   opcode(0x31);  /* Opcode 31 /r */
8629   ins_encode( OpcP, RegMem( src, dst ) );
8630   ins_pipe( ialu_mem_reg );
8631 %}
8632 
8633 // Xor Memory with Immediate
8634 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8635   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8636   effect(KILL cr);
8637 
8638   ins_cost(125);
8639   format %{ "XOR    $dst,$src" %}
8640   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8641   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8642   ins_pipe( ialu_mem_imm );
8643 %}
8644 
8645 //----------Convert Int to Boolean---------------------------------------------
8646 
8647 instruct movI_nocopy(rRegI dst, rRegI src) %{
8648   effect( DEF dst, USE src );
8649   format %{ "MOV    $dst,$src" %}
8650   ins_encode( enc_Copy( dst, src) );
8651   ins_pipe( ialu_reg_reg );
8652 %}
8653 
8654 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8655   effect( USE_DEF dst, USE src, KILL cr );
8656 
8657   size(4);
8658   format %{ "NEG    $dst\n\t"
8659             "ADC    $dst,$src" %}
8660   ins_encode( neg_reg(dst),
8661               OpcRegReg(0x13,dst,src) );
8662   ins_pipe( ialu_reg_reg_long );
8663 %}
8664 
8665 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8666   match(Set dst (Conv2B src));
8667 
8668   expand %{
8669     movI_nocopy(dst,src);
8670     ci2b(dst,src,cr);
8671   %}
8672 %}
8673 
8674 instruct movP_nocopy(rRegI dst, eRegP src) %{
8675   effect( DEF dst, USE src );
8676   format %{ "MOV    $dst,$src" %}
8677   ins_encode( enc_Copy( dst, src) );
8678   ins_pipe( ialu_reg_reg );
8679 %}
8680 
8681 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8682   effect( USE_DEF dst, USE src, KILL cr );
8683   format %{ "NEG    $dst\n\t"
8684             "ADC    $dst,$src" %}
8685   ins_encode( neg_reg(dst),
8686               OpcRegReg(0x13,dst,src) );
8687   ins_pipe( ialu_reg_reg_long );
8688 %}
8689 
8690 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8691   match(Set dst (Conv2B src));
8692 
8693   expand %{
8694     movP_nocopy(dst,src);
8695     cp2b(dst,src,cr);
8696   %}
8697 %}
8698 
8699 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8700   match(Set dst (CmpLTMask p q));
8701   effect(KILL cr);
8702   ins_cost(400);
8703 
8704   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8705   format %{ "XOR    $dst,$dst\n\t"
8706             "CMP    $p,$q\n\t"
8707             "SETlt  $dst\n\t"
8708             "NEG    $dst" %}
8709   ins_encode %{
8710     Register Rp = $p$$Register;
8711     Register Rq = $q$$Register;
8712     Register Rd = $dst$$Register;
8713     Label done;
8714     __ xorl(Rd, Rd);
8715     __ cmpl(Rp, Rq);
8716     __ setb(Assembler::less, Rd);
8717     __ negl(Rd);
8718   %}
8719 
8720   ins_pipe(pipe_slow);
8721 %}
8722 
8723 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8724   match(Set dst (CmpLTMask dst zero));
8725   effect(DEF dst, KILL cr);
8726   ins_cost(100);
8727 
8728   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8729   ins_encode %{
8730   __ sarl($dst$$Register, 31);
8731   %}
8732   ins_pipe(ialu_reg);
8733 %}
8734 
8735 /* better to save a register than avoid a branch */
8736 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8737   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8738   effect(KILL cr);
8739   ins_cost(400);
8740   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8741             "JGE    done\n\t"
8742             "ADD    $p,$y\n"
8743             "done:  " %}
8744   ins_encode %{
8745     Register Rp = $p$$Register;
8746     Register Rq = $q$$Register;
8747     Register Ry = $y$$Register;
8748     Label done;
8749     __ subl(Rp, Rq);
8750     __ jccb(Assembler::greaterEqual, done);
8751     __ addl(Rp, Ry);
8752     __ bind(done);
8753   %}
8754 
8755   ins_pipe(pipe_cmplt);
8756 %}
8757 
8758 /* better to save a register than avoid a branch */
8759 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8760   match(Set y (AndI (CmpLTMask p q) y));
8761   effect(KILL cr);
8762 
8763   ins_cost(300);
8764 
8765   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8766             "JLT      done\n\t"
8767             "XORL     $y, $y\n"
8768             "done:  " %}
8769   ins_encode %{
8770     Register Rp = $p$$Register;
8771     Register Rq = $q$$Register;
8772     Register Ry = $y$$Register;
8773     Label done;
8774     __ cmpl(Rp, Rq);
8775     __ jccb(Assembler::less, done);
8776     __ xorl(Ry, Ry);
8777     __ bind(done);
8778   %}
8779 
8780   ins_pipe(pipe_cmplt);
8781 %}
8782 
8783 /* If I enable this, I encourage spilling in the inner loop of compress.
8784 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8785   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8786 */
8787 //----------Overflow Math Instructions-----------------------------------------
8788 
8789 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8790 %{
8791   match(Set cr (OverflowAddI op1 op2));
8792   effect(DEF cr, USE_KILL op1, USE op2);
8793 
8794   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8795 
8796   ins_encode %{
8797     __ addl($op1$$Register, $op2$$Register);
8798   %}
8799   ins_pipe(ialu_reg_reg);
8800 %}
8801 
8802 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8803 %{
8804   match(Set cr (OverflowAddI op1 op2));
8805   effect(DEF cr, USE_KILL op1, USE op2);
8806 
8807   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8808 
8809   ins_encode %{
8810     __ addl($op1$$Register, $op2$$constant);
8811   %}
8812   ins_pipe(ialu_reg_reg);
8813 %}
8814 
8815 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8816 %{
8817   match(Set cr (OverflowSubI op1 op2));
8818 
8819   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8820   ins_encode %{
8821     __ cmpl($op1$$Register, $op2$$Register);
8822   %}
8823   ins_pipe(ialu_reg_reg);
8824 %}
8825 
8826 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8827 %{
8828   match(Set cr (OverflowSubI op1 op2));
8829 
8830   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8831   ins_encode %{
8832     __ cmpl($op1$$Register, $op2$$constant);
8833   %}
8834   ins_pipe(ialu_reg_reg);
8835 %}
8836 
8837 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8838 %{
8839   match(Set cr (OverflowSubI zero op2));
8840   effect(DEF cr, USE_KILL op2);
8841 
8842   format %{ "NEG    $op2\t# overflow check int" %}
8843   ins_encode %{
8844     __ negl($op2$$Register);
8845   %}
8846   ins_pipe(ialu_reg_reg);
8847 %}
8848 
8849 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8850 %{
8851   match(Set cr (OverflowMulI op1 op2));
8852   effect(DEF cr, USE_KILL op1, USE op2);
8853 
8854   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8855   ins_encode %{
8856     __ imull($op1$$Register, $op2$$Register);
8857   %}
8858   ins_pipe(ialu_reg_reg_alu0);
8859 %}
8860 
8861 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8862 %{
8863   match(Set cr (OverflowMulI op1 op2));
8864   effect(DEF cr, TEMP tmp, USE op1, USE op2);
8865 
8866   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8867   ins_encode %{
8868     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8869   %}
8870   ins_pipe(ialu_reg_reg_alu0);
8871 %}
8872 
8873 // Integer Absolute Instructions
8874 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
8875 %{
8876   match(Set dst (AbsI src));
8877   effect(TEMP dst, TEMP tmp, KILL cr);
8878   format %{ "movl $tmp, $src\n\t"
8879             "sarl $tmp, 31\n\t"
8880             "movl $dst, $src\n\t"
8881             "xorl $dst, $tmp\n\t"
8882             "subl $dst, $tmp\n"
8883           %}
8884   ins_encode %{
8885     __ movl($tmp$$Register, $src$$Register);
8886     __ sarl($tmp$$Register, 31);
8887     __ movl($dst$$Register, $src$$Register);
8888     __ xorl($dst$$Register, $tmp$$Register);
8889     __ subl($dst$$Register, $tmp$$Register);
8890   %}
8891 
8892   ins_pipe(ialu_reg_reg);
8893 %}
8894 
8895 //----------Long Instructions------------------------------------------------
8896 // Add Long Register with Register
8897 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8898   match(Set dst (AddL dst src));
8899   effect(KILL cr);
8900   ins_cost(200);
8901   format %{ "ADD    $dst.lo,$src.lo\n\t"
8902             "ADC    $dst.hi,$src.hi" %}
8903   opcode(0x03, 0x13);
8904   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8905   ins_pipe( ialu_reg_reg_long );
8906 %}
8907 
8908 // Add Long Register with Immediate
8909 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8910   match(Set dst (AddL dst src));
8911   effect(KILL cr);
8912   format %{ "ADD    $dst.lo,$src.lo\n\t"
8913             "ADC    $dst.hi,$src.hi" %}
8914   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8915   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8916   ins_pipe( ialu_reg_long );
8917 %}
8918 
8919 // Add Long Register with Memory
8920 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8921   match(Set dst (AddL dst (LoadL mem)));
8922   effect(KILL cr);
8923   ins_cost(125);
8924   format %{ "ADD    $dst.lo,$mem\n\t"
8925             "ADC    $dst.hi,$mem+4" %}
8926   opcode(0x03, 0x13);
8927   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8928   ins_pipe( ialu_reg_long_mem );
8929 %}
8930 
8931 // Subtract Long Register with Register.
8932 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8933   match(Set dst (SubL dst src));
8934   effect(KILL cr);
8935   ins_cost(200);
8936   format %{ "SUB    $dst.lo,$src.lo\n\t"
8937             "SBB    $dst.hi,$src.hi" %}
8938   opcode(0x2B, 0x1B);
8939   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8940   ins_pipe( ialu_reg_reg_long );
8941 %}
8942 
8943 // Subtract Long Register with Immediate
8944 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8945   match(Set dst (SubL dst src));
8946   effect(KILL cr);
8947   format %{ "SUB    $dst.lo,$src.lo\n\t"
8948             "SBB    $dst.hi,$src.hi" %}
8949   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
8950   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8951   ins_pipe( ialu_reg_long );
8952 %}
8953 
8954 // Subtract Long Register with Memory
8955 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8956   match(Set dst (SubL dst (LoadL mem)));
8957   effect(KILL cr);
8958   ins_cost(125);
8959   format %{ "SUB    $dst.lo,$mem\n\t"
8960             "SBB    $dst.hi,$mem+4" %}
8961   opcode(0x2B, 0x1B);
8962   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8963   ins_pipe( ialu_reg_long_mem );
8964 %}
8965 
8966 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8967   match(Set dst (SubL zero dst));
8968   effect(KILL cr);
8969   ins_cost(300);
8970   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
8971   ins_encode( neg_long(dst) );
8972   ins_pipe( ialu_reg_reg_long );
8973 %}
8974 
8975 // And Long Register with Register
8976 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8977   match(Set dst (AndL dst src));
8978   effect(KILL cr);
8979   format %{ "AND    $dst.lo,$src.lo\n\t"
8980             "AND    $dst.hi,$src.hi" %}
8981   opcode(0x23,0x23);
8982   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8983   ins_pipe( ialu_reg_reg_long );
8984 %}
8985 
8986 // And Long Register with Immediate
8987 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8988   match(Set dst (AndL dst src));
8989   effect(KILL cr);
8990   format %{ "AND    $dst.lo,$src.lo\n\t"
8991             "AND    $dst.hi,$src.hi" %}
8992   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
8993   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8994   ins_pipe( ialu_reg_long );
8995 %}
8996 
8997 // And Long Register with Memory
8998 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8999   match(Set dst (AndL dst (LoadL mem)));
9000   effect(KILL cr);
9001   ins_cost(125);
9002   format %{ "AND    $dst.lo,$mem\n\t"
9003             "AND    $dst.hi,$mem+4" %}
9004   opcode(0x23, 0x23);
9005   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9006   ins_pipe( ialu_reg_long_mem );
9007 %}
9008 
9009 // BMI1 instructions
9010 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
9011   match(Set dst (AndL (XorL src1 minus_1) src2));
9012   predicate(UseBMI1Instructions);
9013   effect(KILL cr, TEMP dst);
9014 
9015   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
9016             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
9017          %}
9018 
9019   ins_encode %{
9020     Register Rdst = $dst$$Register;
9021     Register Rsrc1 = $src1$$Register;
9022     Register Rsrc2 = $src2$$Register;
9023     __ andnl(Rdst, Rsrc1, Rsrc2);
9024     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
9025   %}
9026   ins_pipe(ialu_reg_reg_long);
9027 %}
9028 
9029 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
9030   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
9031   predicate(UseBMI1Instructions);
9032   effect(KILL cr, TEMP dst);
9033 
9034   ins_cost(125);
9035   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
9036             "ANDNL  $dst.hi, $src1.hi, $src2+4"
9037          %}
9038 
9039   ins_encode %{
9040     Register Rdst = $dst$$Register;
9041     Register Rsrc1 = $src1$$Register;
9042     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
9043 
9044     __ andnl(Rdst, Rsrc1, $src2$$Address);
9045     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
9046   %}
9047   ins_pipe(ialu_reg_mem);
9048 %}
9049 
9050 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
9051   match(Set dst (AndL (SubL imm_zero src) src));
9052   predicate(UseBMI1Instructions);
9053   effect(KILL cr, TEMP dst);
9054 
9055   format %{ "MOVL   $dst.hi, 0\n\t"
9056             "BLSIL  $dst.lo, $src.lo\n\t"
9057             "JNZ    done\n\t"
9058             "BLSIL  $dst.hi, $src.hi\n"
9059             "done:"
9060          %}
9061 
9062   ins_encode %{
9063     Label done;
9064     Register Rdst = $dst$$Register;
9065     Register Rsrc = $src$$Register;
9066     __ movl(HIGH_FROM_LOW(Rdst), 0);
9067     __ blsil(Rdst, Rsrc);
9068     __ jccb(Assembler::notZero, done);
9069     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9070     __ bind(done);
9071   %}
9072   ins_pipe(ialu_reg);
9073 %}
9074 
9075 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
9076   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
9077   predicate(UseBMI1Instructions);
9078   effect(KILL cr, TEMP dst);
9079 
9080   ins_cost(125);
9081   format %{ "MOVL   $dst.hi, 0\n\t"
9082             "BLSIL  $dst.lo, $src\n\t"
9083             "JNZ    done\n\t"
9084             "BLSIL  $dst.hi, $src+4\n"
9085             "done:"
9086          %}
9087 
9088   ins_encode %{
9089     Label done;
9090     Register Rdst = $dst$$Register;
9091     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9092 
9093     __ movl(HIGH_FROM_LOW(Rdst), 0);
9094     __ blsil(Rdst, $src$$Address);
9095     __ jccb(Assembler::notZero, done);
9096     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
9097     __ bind(done);
9098   %}
9099   ins_pipe(ialu_reg_mem);
9100 %}
9101 
9102 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9103 %{
9104   match(Set dst (XorL (AddL src minus_1) src));
9105   predicate(UseBMI1Instructions);
9106   effect(KILL cr, TEMP dst);
9107 
9108   format %{ "MOVL    $dst.hi, 0\n\t"
9109             "BLSMSKL $dst.lo, $src.lo\n\t"
9110             "JNC     done\n\t"
9111             "BLSMSKL $dst.hi, $src.hi\n"
9112             "done:"
9113          %}
9114 
9115   ins_encode %{
9116     Label done;
9117     Register Rdst = $dst$$Register;
9118     Register Rsrc = $src$$Register;
9119     __ movl(HIGH_FROM_LOW(Rdst), 0);
9120     __ blsmskl(Rdst, Rsrc);
9121     __ jccb(Assembler::carryClear, done);
9122     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9123     __ bind(done);
9124   %}
9125 
9126   ins_pipe(ialu_reg);
9127 %}
9128 
9129 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9130 %{
9131   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
9132   predicate(UseBMI1Instructions);
9133   effect(KILL cr, TEMP dst);
9134 
9135   ins_cost(125);
9136   format %{ "MOVL    $dst.hi, 0\n\t"
9137             "BLSMSKL $dst.lo, $src\n\t"
9138             "JNC     done\n\t"
9139             "BLSMSKL $dst.hi, $src+4\n"
9140             "done:"
9141          %}
9142 
9143   ins_encode %{
9144     Label done;
9145     Register Rdst = $dst$$Register;
9146     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9147 
9148     __ movl(HIGH_FROM_LOW(Rdst), 0);
9149     __ blsmskl(Rdst, $src$$Address);
9150     __ jccb(Assembler::carryClear, done);
9151     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
9152     __ bind(done);
9153   %}
9154 
9155   ins_pipe(ialu_reg_mem);
9156 %}
9157 
9158 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9159 %{
9160   match(Set dst (AndL (AddL src minus_1) src) );
9161   predicate(UseBMI1Instructions);
9162   effect(KILL cr, TEMP dst);
9163 
9164   format %{ "MOVL   $dst.hi, $src.hi\n\t"
9165             "BLSRL  $dst.lo, $src.lo\n\t"
9166             "JNC    done\n\t"
9167             "BLSRL  $dst.hi, $src.hi\n"
9168             "done:"
9169   %}
9170 
9171   ins_encode %{
9172     Label done;
9173     Register Rdst = $dst$$Register;
9174     Register Rsrc = $src$$Register;
9175     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9176     __ blsrl(Rdst, Rsrc);
9177     __ jccb(Assembler::carryClear, done);
9178     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9179     __ bind(done);
9180   %}
9181 
9182   ins_pipe(ialu_reg);
9183 %}
9184 
9185 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9186 %{
9187   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
9188   predicate(UseBMI1Instructions);
9189   effect(KILL cr, TEMP dst);
9190 
9191   ins_cost(125);
9192   format %{ "MOVL   $dst.hi, $src+4\n\t"
9193             "BLSRL  $dst.lo, $src\n\t"
9194             "JNC    done\n\t"
9195             "BLSRL  $dst.hi, $src+4\n"
9196             "done:"
9197   %}
9198 
9199   ins_encode %{
9200     Label done;
9201     Register Rdst = $dst$$Register;
9202     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9203     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
9204     __ blsrl(Rdst, $src$$Address);
9205     __ jccb(Assembler::carryClear, done);
9206     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
9207     __ bind(done);
9208   %}
9209 
9210   ins_pipe(ialu_reg_mem);
9211 %}
9212 
9213 // Or Long Register with Register
9214 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9215   match(Set dst (OrL dst src));
9216   effect(KILL cr);
9217   format %{ "OR     $dst.lo,$src.lo\n\t"
9218             "OR     $dst.hi,$src.hi" %}
9219   opcode(0x0B,0x0B);
9220   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9221   ins_pipe( ialu_reg_reg_long );
9222 %}
9223 
9224 // Or Long Register with Immediate
9225 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9226   match(Set dst (OrL dst src));
9227   effect(KILL cr);
9228   format %{ "OR     $dst.lo,$src.lo\n\t"
9229             "OR     $dst.hi,$src.hi" %}
9230   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9231   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9232   ins_pipe( ialu_reg_long );
9233 %}
9234 
9235 // Or Long Register with Memory
9236 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9237   match(Set dst (OrL dst (LoadL mem)));
9238   effect(KILL cr);
9239   ins_cost(125);
9240   format %{ "OR     $dst.lo,$mem\n\t"
9241             "OR     $dst.hi,$mem+4" %}
9242   opcode(0x0B,0x0B);
9243   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9244   ins_pipe( ialu_reg_long_mem );
9245 %}
9246 
9247 // Xor Long Register with Register
9248 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9249   match(Set dst (XorL dst src));
9250   effect(KILL cr);
9251   format %{ "XOR    $dst.lo,$src.lo\n\t"
9252             "XOR    $dst.hi,$src.hi" %}
9253   opcode(0x33,0x33);
9254   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9255   ins_pipe( ialu_reg_reg_long );
9256 %}
9257 
9258 // Xor Long Register with Immediate -1
9259 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9260   match(Set dst (XorL dst imm));
9261   format %{ "NOT    $dst.lo\n\t"
9262             "NOT    $dst.hi" %}
9263   ins_encode %{
9264      __ notl($dst$$Register);
9265      __ notl(HIGH_FROM_LOW($dst$$Register));
9266   %}
9267   ins_pipe( ialu_reg_long );
9268 %}
9269 
9270 // Xor Long Register with Immediate
9271 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9272   match(Set dst (XorL dst src));
9273   effect(KILL cr);
9274   format %{ "XOR    $dst.lo,$src.lo\n\t"
9275             "XOR    $dst.hi,$src.hi" %}
9276   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9277   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9278   ins_pipe( ialu_reg_long );
9279 %}
9280 
9281 // Xor Long Register with Memory
9282 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9283   match(Set dst (XorL dst (LoadL mem)));
9284   effect(KILL cr);
9285   ins_cost(125);
9286   format %{ "XOR    $dst.lo,$mem\n\t"
9287             "XOR    $dst.hi,$mem+4" %}
9288   opcode(0x33,0x33);
9289   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9290   ins_pipe( ialu_reg_long_mem );
9291 %}
9292 
9293 // Shift Left Long by 1
9294 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9295   predicate(UseNewLongLShift);
9296   match(Set dst (LShiftL dst cnt));
9297   effect(KILL cr);
9298   ins_cost(100);
9299   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9300             "ADC    $dst.hi,$dst.hi" %}
9301   ins_encode %{
9302     __ addl($dst$$Register,$dst$$Register);
9303     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9304   %}
9305   ins_pipe( ialu_reg_long );
9306 %}
9307 
9308 // Shift Left Long by 2
9309 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9310   predicate(UseNewLongLShift);
9311   match(Set dst (LShiftL dst cnt));
9312   effect(KILL cr);
9313   ins_cost(100);
9314   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9315             "ADC    $dst.hi,$dst.hi\n\t"
9316             "ADD    $dst.lo,$dst.lo\n\t"
9317             "ADC    $dst.hi,$dst.hi" %}
9318   ins_encode %{
9319     __ addl($dst$$Register,$dst$$Register);
9320     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9321     __ addl($dst$$Register,$dst$$Register);
9322     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9323   %}
9324   ins_pipe( ialu_reg_long );
9325 %}
9326 
9327 // Shift Left Long by 3
9328 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9329   predicate(UseNewLongLShift);
9330   match(Set dst (LShiftL dst cnt));
9331   effect(KILL cr);
9332   ins_cost(100);
9333   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9334             "ADC    $dst.hi,$dst.hi\n\t"
9335             "ADD    $dst.lo,$dst.lo\n\t"
9336             "ADC    $dst.hi,$dst.hi\n\t"
9337             "ADD    $dst.lo,$dst.lo\n\t"
9338             "ADC    $dst.hi,$dst.hi" %}
9339   ins_encode %{
9340     __ addl($dst$$Register,$dst$$Register);
9341     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9342     __ addl($dst$$Register,$dst$$Register);
9343     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9344     __ addl($dst$$Register,$dst$$Register);
9345     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9346   %}
9347   ins_pipe( ialu_reg_long );
9348 %}
9349 
9350 // Shift Left Long by 1-31
9351 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9352   match(Set dst (LShiftL dst cnt));
9353   effect(KILL cr);
9354   ins_cost(200);
9355   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9356             "SHL    $dst.lo,$cnt" %}
9357   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9358   ins_encode( move_long_small_shift(dst,cnt) );
9359   ins_pipe( ialu_reg_long );
9360 %}
9361 
9362 // Shift Left Long by 32-63
9363 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9364   match(Set dst (LShiftL dst cnt));
9365   effect(KILL cr);
9366   ins_cost(300);
9367   format %{ "MOV    $dst.hi,$dst.lo\n"
9368           "\tSHL    $dst.hi,$cnt-32\n"
9369           "\tXOR    $dst.lo,$dst.lo" %}
9370   opcode(0xC1, 0x4);  /* C1 /4 ib */
9371   ins_encode( move_long_big_shift_clr(dst,cnt) );
9372   ins_pipe( ialu_reg_long );
9373 %}
9374 
9375 // Shift Left Long by variable
9376 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9377   match(Set dst (LShiftL dst shift));
9378   effect(KILL cr);
9379   ins_cost(500+200);
9380   size(17);
9381   format %{ "TEST   $shift,32\n\t"
9382             "JEQ,s  small\n\t"
9383             "MOV    $dst.hi,$dst.lo\n\t"
9384             "XOR    $dst.lo,$dst.lo\n"
9385     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9386             "SHL    $dst.lo,$shift" %}
9387   ins_encode( shift_left_long( dst, shift ) );
9388   ins_pipe( pipe_slow );
9389 %}
9390 
9391 // Shift Right Long by 1-31
9392 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9393   match(Set dst (URShiftL dst cnt));
9394   effect(KILL cr);
9395   ins_cost(200);
9396   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9397             "SHR    $dst.hi,$cnt" %}
9398   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9399   ins_encode( move_long_small_shift(dst,cnt) );
9400   ins_pipe( ialu_reg_long );
9401 %}
9402 
9403 // Shift Right Long by 32-63
9404 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9405   match(Set dst (URShiftL dst cnt));
9406   effect(KILL cr);
9407   ins_cost(300);
9408   format %{ "MOV    $dst.lo,$dst.hi\n"
9409           "\tSHR    $dst.lo,$cnt-32\n"
9410           "\tXOR    $dst.hi,$dst.hi" %}
9411   opcode(0xC1, 0x5);  /* C1 /5 ib */
9412   ins_encode( move_long_big_shift_clr(dst,cnt) );
9413   ins_pipe( ialu_reg_long );
9414 %}
9415 
9416 // Shift Right Long by variable
9417 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9418   match(Set dst (URShiftL dst shift));
9419   effect(KILL cr);
9420   ins_cost(600);
9421   size(17);
9422   format %{ "TEST   $shift,32\n\t"
9423             "JEQ,s  small\n\t"
9424             "MOV    $dst.lo,$dst.hi\n\t"
9425             "XOR    $dst.hi,$dst.hi\n"
9426     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9427             "SHR    $dst.hi,$shift" %}
9428   ins_encode( shift_right_long( dst, shift ) );
9429   ins_pipe( pipe_slow );
9430 %}
9431 
9432 // Shift Right Long by 1-31
9433 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9434   match(Set dst (RShiftL dst cnt));
9435   effect(KILL cr);
9436   ins_cost(200);
9437   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9438             "SAR    $dst.hi,$cnt" %}
9439   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9440   ins_encode( move_long_small_shift(dst,cnt) );
9441   ins_pipe( ialu_reg_long );
9442 %}
9443 
9444 // Shift Right Long by 32-63
9445 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9446   match(Set dst (RShiftL dst cnt));
9447   effect(KILL cr);
9448   ins_cost(300);
9449   format %{ "MOV    $dst.lo,$dst.hi\n"
9450           "\tSAR    $dst.lo,$cnt-32\n"
9451           "\tSAR    $dst.hi,31" %}
9452   opcode(0xC1, 0x7);  /* C1 /7 ib */
9453   ins_encode( move_long_big_shift_sign(dst,cnt) );
9454   ins_pipe( ialu_reg_long );
9455 %}
9456 
9457 // Shift Right arithmetic Long by variable
9458 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9459   match(Set dst (RShiftL dst shift));
9460   effect(KILL cr);
9461   ins_cost(600);
9462   size(18);
9463   format %{ "TEST   $shift,32\n\t"
9464             "JEQ,s  small\n\t"
9465             "MOV    $dst.lo,$dst.hi\n\t"
9466             "SAR    $dst.hi,31\n"
9467     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9468             "SAR    $dst.hi,$shift" %}
9469   ins_encode( shift_right_arith_long( dst, shift ) );
9470   ins_pipe( pipe_slow );
9471 %}
9472 
9473 
9474 //----------Double Instructions------------------------------------------------
9475 // Double Math
9476 
9477 // Compare & branch
9478 
9479 // P6 version of float compare, sets condition codes in EFLAGS
9480 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9481   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9482   match(Set cr (CmpD src1 src2));
9483   effect(KILL rax);
9484   ins_cost(150);
9485   format %{ "FLD    $src1\n\t"
9486             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9487             "JNP    exit\n\t"
9488             "MOV    ah,1       // saw a NaN, set CF\n\t"
9489             "SAHF\n"
9490      "exit:\tNOP               // avoid branch to branch" %}
9491   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9492   ins_encode( Push_Reg_DPR(src1),
9493               OpcP, RegOpc(src2),
9494               cmpF_P6_fixup );
9495   ins_pipe( pipe_slow );
9496 %}
9497 
9498 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9499   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9500   match(Set cr (CmpD src1 src2));
9501   ins_cost(150);
9502   format %{ "FLD    $src1\n\t"
9503             "FUCOMIP ST,$src2  // P6 instruction" %}
9504   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9505   ins_encode( Push_Reg_DPR(src1),
9506               OpcP, RegOpc(src2));
9507   ins_pipe( pipe_slow );
9508 %}
9509 
9510 // Compare & branch
9511 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9512   predicate(UseSSE<=1);
9513   match(Set cr (CmpD src1 src2));
9514   effect(KILL rax);
9515   ins_cost(200);
9516   format %{ "FLD    $src1\n\t"
9517             "FCOMp  $src2\n\t"
9518             "FNSTSW AX\n\t"
9519             "TEST   AX,0x400\n\t"
9520             "JZ,s   flags\n\t"
9521             "MOV    AH,1\t# unordered treat as LT\n"
9522     "flags:\tSAHF" %}
9523   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9524   ins_encode( Push_Reg_DPR(src1),
9525               OpcP, RegOpc(src2),
9526               fpu_flags);
9527   ins_pipe( pipe_slow );
9528 %}
9529 
9530 // Compare vs zero into -1,0,1
9531 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9532   predicate(UseSSE<=1);
9533   match(Set dst (CmpD3 src1 zero));
9534   effect(KILL cr, KILL rax);
9535   ins_cost(280);
9536   format %{ "FTSTD  $dst,$src1" %}
9537   opcode(0xE4, 0xD9);
9538   ins_encode( Push_Reg_DPR(src1),
9539               OpcS, OpcP, PopFPU,
9540               CmpF_Result(dst));
9541   ins_pipe( pipe_slow );
9542 %}
9543 
9544 // Compare into -1,0,1
9545 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9546   predicate(UseSSE<=1);
9547   match(Set dst (CmpD3 src1 src2));
9548   effect(KILL cr, KILL rax);
9549   ins_cost(300);
9550   format %{ "FCMPD  $dst,$src1,$src2" %}
9551   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9552   ins_encode( Push_Reg_DPR(src1),
9553               OpcP, RegOpc(src2),
9554               CmpF_Result(dst));
9555   ins_pipe( pipe_slow );
9556 %}
9557 
9558 // float compare and set condition codes in EFLAGS by XMM regs
9559 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9560   predicate(UseSSE>=2);
9561   match(Set cr (CmpD src1 src2));
9562   ins_cost(145);
9563   format %{ "UCOMISD $src1,$src2\n\t"
9564             "JNP,s   exit\n\t"
9565             "PUSHF\t# saw NaN, set CF\n\t"
9566             "AND     [rsp], #0xffffff2b\n\t"
9567             "POPF\n"
9568     "exit:" %}
9569   ins_encode %{
9570     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9571     emit_cmpfp_fixup(_masm);
9572   %}
9573   ins_pipe( pipe_slow );
9574 %}
9575 
9576 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9577   predicate(UseSSE>=2);
9578   match(Set cr (CmpD src1 src2));
9579   ins_cost(100);
9580   format %{ "UCOMISD $src1,$src2" %}
9581   ins_encode %{
9582     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9583   %}
9584   ins_pipe( pipe_slow );
9585 %}
9586 
9587 // float compare and set condition codes in EFLAGS by XMM regs
9588 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9589   predicate(UseSSE>=2);
9590   match(Set cr (CmpD src1 (LoadD src2)));
9591   ins_cost(145);
9592   format %{ "UCOMISD $src1,$src2\n\t"
9593             "JNP,s   exit\n\t"
9594             "PUSHF\t# saw NaN, set CF\n\t"
9595             "AND     [rsp], #0xffffff2b\n\t"
9596             "POPF\n"
9597     "exit:" %}
9598   ins_encode %{
9599     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9600     emit_cmpfp_fixup(_masm);
9601   %}
9602   ins_pipe( pipe_slow );
9603 %}
9604 
9605 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9606   predicate(UseSSE>=2);
9607   match(Set cr (CmpD src1 (LoadD src2)));
9608   ins_cost(100);
9609   format %{ "UCOMISD $src1,$src2" %}
9610   ins_encode %{
9611     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9612   %}
9613   ins_pipe( pipe_slow );
9614 %}
9615 
9616 // Compare into -1,0,1 in XMM
9617 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9618   predicate(UseSSE>=2);
9619   match(Set dst (CmpD3 src1 src2));
9620   effect(KILL cr);
9621   ins_cost(255);
9622   format %{ "UCOMISD $src1, $src2\n\t"
9623             "MOV     $dst, #-1\n\t"
9624             "JP,s    done\n\t"
9625             "JB,s    done\n\t"
9626             "SETNE   $dst\n\t"
9627             "MOVZB   $dst, $dst\n"
9628     "done:" %}
9629   ins_encode %{
9630     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9631     emit_cmpfp3(_masm, $dst$$Register);
9632   %}
9633   ins_pipe( pipe_slow );
9634 %}
9635 
9636 // Compare into -1,0,1 in XMM and memory
9637 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9638   predicate(UseSSE>=2);
9639   match(Set dst (CmpD3 src1 (LoadD src2)));
9640   effect(KILL cr);
9641   ins_cost(275);
9642   format %{ "UCOMISD $src1, $src2\n\t"
9643             "MOV     $dst, #-1\n\t"
9644             "JP,s    done\n\t"
9645             "JB,s    done\n\t"
9646             "SETNE   $dst\n\t"
9647             "MOVZB   $dst, $dst\n"
9648     "done:" %}
9649   ins_encode %{
9650     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9651     emit_cmpfp3(_masm, $dst$$Register);
9652   %}
9653   ins_pipe( pipe_slow );
9654 %}
9655 
9656 
9657 instruct subDPR_reg(regDPR dst, regDPR src) %{
9658   predicate (UseSSE <=1);
9659   match(Set dst (SubD dst src));
9660 
9661   format %{ "FLD    $src\n\t"
9662             "DSUBp  $dst,ST" %}
9663   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9664   ins_cost(150);
9665   ins_encode( Push_Reg_DPR(src),
9666               OpcP, RegOpc(dst) );
9667   ins_pipe( fpu_reg_reg );
9668 %}
9669 
9670 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9671   predicate (UseSSE <=1);
9672   match(Set dst (RoundDouble (SubD src1 src2)));
9673   ins_cost(250);
9674 
9675   format %{ "FLD    $src2\n\t"
9676             "DSUB   ST,$src1\n\t"
9677             "FSTP_D $dst\t# D-round" %}
9678   opcode(0xD8, 0x5);
9679   ins_encode( Push_Reg_DPR(src2),
9680               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9681   ins_pipe( fpu_mem_reg_reg );
9682 %}
9683 
9684 
9685 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9686   predicate (UseSSE <=1);
9687   match(Set dst (SubD dst (LoadD src)));
9688   ins_cost(150);
9689 
9690   format %{ "FLD    $src\n\t"
9691             "DSUBp  $dst,ST" %}
9692   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9693   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9694               OpcP, RegOpc(dst) );
9695   ins_pipe( fpu_reg_mem );
9696 %}
9697 
9698 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9699   predicate (UseSSE<=1);
9700   match(Set dst (AbsD src));
9701   ins_cost(100);
9702   format %{ "FABS" %}
9703   opcode(0xE1, 0xD9);
9704   ins_encode( OpcS, OpcP );
9705   ins_pipe( fpu_reg_reg );
9706 %}
9707 
9708 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9709   predicate(UseSSE<=1);
9710   match(Set dst (NegD src));
9711   ins_cost(100);
9712   format %{ "FCHS" %}
9713   opcode(0xE0, 0xD9);
9714   ins_encode( OpcS, OpcP );
9715   ins_pipe( fpu_reg_reg );
9716 %}
9717 
9718 instruct addDPR_reg(regDPR dst, regDPR src) %{
9719   predicate(UseSSE<=1);
9720   match(Set dst (AddD dst src));
9721   format %{ "FLD    $src\n\t"
9722             "DADD   $dst,ST" %}
9723   size(4);
9724   ins_cost(150);
9725   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9726   ins_encode( Push_Reg_DPR(src),
9727               OpcP, RegOpc(dst) );
9728   ins_pipe( fpu_reg_reg );
9729 %}
9730 
9731 
9732 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9733   predicate(UseSSE<=1);
9734   match(Set dst (RoundDouble (AddD src1 src2)));
9735   ins_cost(250);
9736 
9737   format %{ "FLD    $src2\n\t"
9738             "DADD   ST,$src1\n\t"
9739             "FSTP_D $dst\t# D-round" %}
9740   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9741   ins_encode( Push_Reg_DPR(src2),
9742               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9743   ins_pipe( fpu_mem_reg_reg );
9744 %}
9745 
9746 
9747 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9748   predicate(UseSSE<=1);
9749   match(Set dst (AddD dst (LoadD src)));
9750   ins_cost(150);
9751 
9752   format %{ "FLD    $src\n\t"
9753             "DADDp  $dst,ST" %}
9754   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9755   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9756               OpcP, RegOpc(dst) );
9757   ins_pipe( fpu_reg_mem );
9758 %}
9759 
9760 // add-to-memory
9761 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9762   predicate(UseSSE<=1);
9763   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9764   ins_cost(150);
9765 
9766   format %{ "FLD_D  $dst\n\t"
9767             "DADD   ST,$src\n\t"
9768             "FST_D  $dst" %}
9769   opcode(0xDD, 0x0);
9770   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9771               Opcode(0xD8), RegOpc(src),
9772               set_instruction_start,
9773               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9774   ins_pipe( fpu_reg_mem );
9775 %}
9776 
9777 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9778   predicate(UseSSE<=1);
9779   match(Set dst (AddD dst con));
9780   ins_cost(125);
9781   format %{ "FLD1\n\t"
9782             "DADDp  $dst,ST" %}
9783   ins_encode %{
9784     __ fld1();
9785     __ faddp($dst$$reg);
9786   %}
9787   ins_pipe(fpu_reg);
9788 %}
9789 
9790 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9791   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9792   match(Set dst (AddD dst con));
9793   ins_cost(200);
9794   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9795             "DADDp  $dst,ST" %}
9796   ins_encode %{
9797     __ fld_d($constantaddress($con));
9798     __ faddp($dst$$reg);
9799   %}
9800   ins_pipe(fpu_reg_mem);
9801 %}
9802 
9803 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9804   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9805   match(Set dst (RoundDouble (AddD src con)));
9806   ins_cost(200);
9807   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9808             "DADD   ST,$src\n\t"
9809             "FSTP_D $dst\t# D-round" %}
9810   ins_encode %{
9811     __ fld_d($constantaddress($con));
9812     __ fadd($src$$reg);
9813     __ fstp_d(Address(rsp, $dst$$disp));
9814   %}
9815   ins_pipe(fpu_mem_reg_con);
9816 %}
9817 
9818 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9819   predicate(UseSSE<=1);
9820   match(Set dst (MulD dst src));
9821   format %{ "FLD    $src\n\t"
9822             "DMULp  $dst,ST" %}
9823   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9824   ins_cost(150);
9825   ins_encode( Push_Reg_DPR(src),
9826               OpcP, RegOpc(dst) );
9827   ins_pipe( fpu_reg_reg );
9828 %}
9829 
9830 // Strict FP instruction biases argument before multiply then
9831 // biases result to avoid double rounding of subnormals.
9832 //
9833 // scale arg1 by multiplying arg1 by 2^(-15360)
9834 // load arg2
9835 // multiply scaled arg1 by arg2
9836 // rescale product by 2^(15360)
9837 //
9838 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9839   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9840   match(Set dst (MulD dst src));
9841   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9842 
9843   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9844             "DMULp  $dst,ST\n\t"
9845             "FLD    $src\n\t"
9846             "DMULp  $dst,ST\n\t"
9847             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9848             "DMULp  $dst,ST\n\t" %}
9849   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9850   ins_encode( strictfp_bias1(dst),
9851               Push_Reg_DPR(src),
9852               OpcP, RegOpc(dst),
9853               strictfp_bias2(dst) );
9854   ins_pipe( fpu_reg_reg );
9855 %}
9856 
9857 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9858   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9859   match(Set dst (MulD dst con));
9860   ins_cost(200);
9861   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9862             "DMULp  $dst,ST" %}
9863   ins_encode %{
9864     __ fld_d($constantaddress($con));
9865     __ fmulp($dst$$reg);
9866   %}
9867   ins_pipe(fpu_reg_mem);
9868 %}
9869 
9870 
9871 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9872   predicate( UseSSE<=1 );
9873   match(Set dst (MulD dst (LoadD src)));
9874   ins_cost(200);
9875   format %{ "FLD_D  $src\n\t"
9876             "DMULp  $dst,ST" %}
9877   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9878   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9879               OpcP, RegOpc(dst) );
9880   ins_pipe( fpu_reg_mem );
9881 %}
9882 
9883 //
9884 // Cisc-alternate to reg-reg multiply
9885 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9886   predicate( UseSSE<=1 );
9887   match(Set dst (MulD src (LoadD mem)));
9888   ins_cost(250);
9889   format %{ "FLD_D  $mem\n\t"
9890             "DMUL   ST,$src\n\t"
9891             "FSTP_D $dst" %}
9892   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9893   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9894               OpcReg_FPR(src),
9895               Pop_Reg_DPR(dst) );
9896   ins_pipe( fpu_reg_reg_mem );
9897 %}
9898 
9899 
9900 // MACRO3 -- addDPR a mulDPR
9901 // This instruction is a '2-address' instruction in that the result goes
9902 // back to src2.  This eliminates a move from the macro; possibly the
9903 // register allocator will have to add it back (and maybe not).
9904 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9905   predicate( UseSSE<=1 );
9906   match(Set src2 (AddD (MulD src0 src1) src2));
9907   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9908             "DMUL   ST,$src1\n\t"
9909             "DADDp  $src2,ST" %}
9910   ins_cost(250);
9911   opcode(0xDD); /* LoadD DD /0 */
9912   ins_encode( Push_Reg_FPR(src0),
9913               FMul_ST_reg(src1),
9914               FAddP_reg_ST(src2) );
9915   ins_pipe( fpu_reg_reg_reg );
9916 %}
9917 
9918 
9919 // MACRO3 -- subDPR a mulDPR
9920 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9921   predicate( UseSSE<=1 );
9922   match(Set src2 (SubD (MulD src0 src1) src2));
9923   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9924             "DMUL   ST,$src1\n\t"
9925             "DSUBRp $src2,ST" %}
9926   ins_cost(250);
9927   ins_encode( Push_Reg_FPR(src0),
9928               FMul_ST_reg(src1),
9929               Opcode(0xDE), Opc_plus(0xE0,src2));
9930   ins_pipe( fpu_reg_reg_reg );
9931 %}
9932 
9933 
9934 instruct divDPR_reg(regDPR dst, regDPR src) %{
9935   predicate( UseSSE<=1 );
9936   match(Set dst (DivD dst src));
9937 
9938   format %{ "FLD    $src\n\t"
9939             "FDIVp  $dst,ST" %}
9940   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9941   ins_cost(150);
9942   ins_encode( Push_Reg_DPR(src),
9943               OpcP, RegOpc(dst) );
9944   ins_pipe( fpu_reg_reg );
9945 %}
9946 
9947 // Strict FP instruction biases argument before division then
9948 // biases result, to avoid double rounding of subnormals.
9949 //
9950 // scale dividend by multiplying dividend by 2^(-15360)
9951 // load divisor
9952 // divide scaled dividend by divisor
9953 // rescale quotient by 2^(15360)
9954 //
9955 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9956   predicate (UseSSE<=1);
9957   match(Set dst (DivD dst src));
9958   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9959   ins_cost(01);
9960 
9961   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9962             "DMULp  $dst,ST\n\t"
9963             "FLD    $src\n\t"
9964             "FDIVp  $dst,ST\n\t"
9965             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9966             "DMULp  $dst,ST\n\t" %}
9967   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9968   ins_encode( strictfp_bias1(dst),
9969               Push_Reg_DPR(src),
9970               OpcP, RegOpc(dst),
9971               strictfp_bias2(dst) );
9972   ins_pipe( fpu_reg_reg );
9973 %}
9974 
9975 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9976   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
9977   match(Set dst (RoundDouble (DivD src1 src2)));
9978 
9979   format %{ "FLD    $src1\n\t"
9980             "FDIV   ST,$src2\n\t"
9981             "FSTP_D $dst\t# D-round" %}
9982   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
9983   ins_encode( Push_Reg_DPR(src1),
9984               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
9985   ins_pipe( fpu_mem_reg_reg );
9986 %}
9987 
9988 
9989 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9990   predicate(UseSSE<=1);
9991   match(Set dst (ModD dst src));
9992   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9993 
9994   format %{ "DMOD   $dst,$src" %}
9995   ins_cost(250);
9996   ins_encode(Push_Reg_Mod_DPR(dst, src),
9997               emitModDPR(),
9998               Push_Result_Mod_DPR(src),
9999               Pop_Reg_DPR(dst));
10000   ins_pipe( pipe_slow );
10001 %}
10002 
10003 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
10004   predicate(UseSSE>=2);
10005   match(Set dst (ModD src0 src1));
10006   effect(KILL rax, KILL cr);
10007 
10008   format %{ "SUB    ESP,8\t # DMOD\n"
10009           "\tMOVSD  [ESP+0],$src1\n"
10010           "\tFLD_D  [ESP+0]\n"
10011           "\tMOVSD  [ESP+0],$src0\n"
10012           "\tFLD_D  [ESP+0]\n"
10013      "loop:\tFPREM\n"
10014           "\tFWAIT\n"
10015           "\tFNSTSW AX\n"
10016           "\tSAHF\n"
10017           "\tJP     loop\n"
10018           "\tFSTP_D [ESP+0]\n"
10019           "\tMOVSD  $dst,[ESP+0]\n"
10020           "\tADD    ESP,8\n"
10021           "\tFSTP   ST0\t # Restore FPU Stack"
10022     %}
10023   ins_cost(250);
10024   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
10025   ins_pipe( pipe_slow );
10026 %}
10027 
10028 instruct atanDPR_reg(regDPR dst, regDPR src) %{
10029   predicate (UseSSE<=1);
10030   match(Set dst(AtanD dst src));
10031   format %{ "DATA   $dst,$src" %}
10032   opcode(0xD9, 0xF3);
10033   ins_encode( Push_Reg_DPR(src),
10034               OpcP, OpcS, RegOpc(dst) );
10035   ins_pipe( pipe_slow );
10036 %}
10037 
10038 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10039   predicate (UseSSE>=2);
10040   match(Set dst(AtanD dst src));
10041   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10042   format %{ "DATA   $dst,$src" %}
10043   opcode(0xD9, 0xF3);
10044   ins_encode( Push_SrcD(src),
10045               OpcP, OpcS, Push_ResultD(dst) );
10046   ins_pipe( pipe_slow );
10047 %}
10048 
10049 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10050   predicate (UseSSE<=1);
10051   match(Set dst (SqrtD src));
10052   format %{ "DSQRT  $dst,$src" %}
10053   opcode(0xFA, 0xD9);
10054   ins_encode( Push_Reg_DPR(src),
10055               OpcS, OpcP, Pop_Reg_DPR(dst) );
10056   ins_pipe( pipe_slow );
10057 %}
10058 
10059 //-------------Float Instructions-------------------------------
10060 // Float Math
10061 
10062 // Code for float compare:
10063 //     fcompp();
10064 //     fwait(); fnstsw_ax();
10065 //     sahf();
10066 //     movl(dst, unordered_result);
10067 //     jcc(Assembler::parity, exit);
10068 //     movl(dst, less_result);
10069 //     jcc(Assembler::below, exit);
10070 //     movl(dst, equal_result);
10071 //     jcc(Assembler::equal, exit);
10072 //     movl(dst, greater_result);
10073 //   exit:
10074 
10075 // P6 version of float compare, sets condition codes in EFLAGS
10076 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10077   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10078   match(Set cr (CmpF src1 src2));
10079   effect(KILL rax);
10080   ins_cost(150);
10081   format %{ "FLD    $src1\n\t"
10082             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10083             "JNP    exit\n\t"
10084             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10085             "SAHF\n"
10086      "exit:\tNOP               // avoid branch to branch" %}
10087   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10088   ins_encode( Push_Reg_DPR(src1),
10089               OpcP, RegOpc(src2),
10090               cmpF_P6_fixup );
10091   ins_pipe( pipe_slow );
10092 %}
10093 
10094 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10095   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10096   match(Set cr (CmpF src1 src2));
10097   ins_cost(100);
10098   format %{ "FLD    $src1\n\t"
10099             "FUCOMIP ST,$src2  // P6 instruction" %}
10100   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10101   ins_encode( Push_Reg_DPR(src1),
10102               OpcP, RegOpc(src2));
10103   ins_pipe( pipe_slow );
10104 %}
10105 
10106 
10107 // Compare & branch
10108 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10109   predicate(UseSSE == 0);
10110   match(Set cr (CmpF src1 src2));
10111   effect(KILL rax);
10112   ins_cost(200);
10113   format %{ "FLD    $src1\n\t"
10114             "FCOMp  $src2\n\t"
10115             "FNSTSW AX\n\t"
10116             "TEST   AX,0x400\n\t"
10117             "JZ,s   flags\n\t"
10118             "MOV    AH,1\t# unordered treat as LT\n"
10119     "flags:\tSAHF" %}
10120   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10121   ins_encode( Push_Reg_DPR(src1),
10122               OpcP, RegOpc(src2),
10123               fpu_flags);
10124   ins_pipe( pipe_slow );
10125 %}
10126 
10127 // Compare vs zero into -1,0,1
10128 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10129   predicate(UseSSE == 0);
10130   match(Set dst (CmpF3 src1 zero));
10131   effect(KILL cr, KILL rax);
10132   ins_cost(280);
10133   format %{ "FTSTF  $dst,$src1" %}
10134   opcode(0xE4, 0xD9);
10135   ins_encode( Push_Reg_DPR(src1),
10136               OpcS, OpcP, PopFPU,
10137               CmpF_Result(dst));
10138   ins_pipe( pipe_slow );
10139 %}
10140 
10141 // Compare into -1,0,1
10142 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10143   predicate(UseSSE == 0);
10144   match(Set dst (CmpF3 src1 src2));
10145   effect(KILL cr, KILL rax);
10146   ins_cost(300);
10147   format %{ "FCMPF  $dst,$src1,$src2" %}
10148   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10149   ins_encode( Push_Reg_DPR(src1),
10150               OpcP, RegOpc(src2),
10151               CmpF_Result(dst));
10152   ins_pipe( pipe_slow );
10153 %}
10154 
10155 // float compare and set condition codes in EFLAGS by XMM regs
10156 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10157   predicate(UseSSE>=1);
10158   match(Set cr (CmpF src1 src2));
10159   ins_cost(145);
10160   format %{ "UCOMISS $src1,$src2\n\t"
10161             "JNP,s   exit\n\t"
10162             "PUSHF\t# saw NaN, set CF\n\t"
10163             "AND     [rsp], #0xffffff2b\n\t"
10164             "POPF\n"
10165     "exit:" %}
10166   ins_encode %{
10167     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10168     emit_cmpfp_fixup(_masm);
10169   %}
10170   ins_pipe( pipe_slow );
10171 %}
10172 
10173 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10174   predicate(UseSSE>=1);
10175   match(Set cr (CmpF src1 src2));
10176   ins_cost(100);
10177   format %{ "UCOMISS $src1,$src2" %}
10178   ins_encode %{
10179     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10180   %}
10181   ins_pipe( pipe_slow );
10182 %}
10183 
10184 // float compare and set condition codes in EFLAGS by XMM regs
10185 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10186   predicate(UseSSE>=1);
10187   match(Set cr (CmpF src1 (LoadF src2)));
10188   ins_cost(165);
10189   format %{ "UCOMISS $src1,$src2\n\t"
10190             "JNP,s   exit\n\t"
10191             "PUSHF\t# saw NaN, set CF\n\t"
10192             "AND     [rsp], #0xffffff2b\n\t"
10193             "POPF\n"
10194     "exit:" %}
10195   ins_encode %{
10196     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10197     emit_cmpfp_fixup(_masm);
10198   %}
10199   ins_pipe( pipe_slow );
10200 %}
10201 
10202 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10203   predicate(UseSSE>=1);
10204   match(Set cr (CmpF src1 (LoadF src2)));
10205   ins_cost(100);
10206   format %{ "UCOMISS $src1,$src2" %}
10207   ins_encode %{
10208     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10209   %}
10210   ins_pipe( pipe_slow );
10211 %}
10212 
10213 // Compare into -1,0,1 in XMM
10214 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10215   predicate(UseSSE>=1);
10216   match(Set dst (CmpF3 src1 src2));
10217   effect(KILL cr);
10218   ins_cost(255);
10219   format %{ "UCOMISS $src1, $src2\n\t"
10220             "MOV     $dst, #-1\n\t"
10221             "JP,s    done\n\t"
10222             "JB,s    done\n\t"
10223             "SETNE   $dst\n\t"
10224             "MOVZB   $dst, $dst\n"
10225     "done:" %}
10226   ins_encode %{
10227     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10228     emit_cmpfp3(_masm, $dst$$Register);
10229   %}
10230   ins_pipe( pipe_slow );
10231 %}
10232 
10233 // Compare into -1,0,1 in XMM and memory
10234 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10235   predicate(UseSSE>=1);
10236   match(Set dst (CmpF3 src1 (LoadF src2)));
10237   effect(KILL cr);
10238   ins_cost(275);
10239   format %{ "UCOMISS $src1, $src2\n\t"
10240             "MOV     $dst, #-1\n\t"
10241             "JP,s    done\n\t"
10242             "JB,s    done\n\t"
10243             "SETNE   $dst\n\t"
10244             "MOVZB   $dst, $dst\n"
10245     "done:" %}
10246   ins_encode %{
10247     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10248     emit_cmpfp3(_masm, $dst$$Register);
10249   %}
10250   ins_pipe( pipe_slow );
10251 %}
10252 
10253 // Spill to obtain 24-bit precision
10254 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10255   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10256   match(Set dst (SubF src1 src2));
10257 
10258   format %{ "FSUB   $dst,$src1 - $src2" %}
10259   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10260   ins_encode( Push_Reg_FPR(src1),
10261               OpcReg_FPR(src2),
10262               Pop_Mem_FPR(dst) );
10263   ins_pipe( fpu_mem_reg_reg );
10264 %}
10265 //
10266 // This instruction does not round to 24-bits
10267 instruct subFPR_reg(regFPR dst, regFPR src) %{
10268   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10269   match(Set dst (SubF dst src));
10270 
10271   format %{ "FSUB   $dst,$src" %}
10272   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10273   ins_encode( Push_Reg_FPR(src),
10274               OpcP, RegOpc(dst) );
10275   ins_pipe( fpu_reg_reg );
10276 %}
10277 
10278 // Spill to obtain 24-bit precision
10279 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10280   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10281   match(Set dst (AddF src1 src2));
10282 
10283   format %{ "FADD   $dst,$src1,$src2" %}
10284   opcode(0xD8, 0x0); /* D8 C0+i */
10285   ins_encode( Push_Reg_FPR(src2),
10286               OpcReg_FPR(src1),
10287               Pop_Mem_FPR(dst) );
10288   ins_pipe( fpu_mem_reg_reg );
10289 %}
10290 //
10291 // This instruction does not round to 24-bits
10292 instruct addFPR_reg(regFPR dst, regFPR src) %{
10293   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10294   match(Set dst (AddF dst src));
10295 
10296   format %{ "FLD    $src\n\t"
10297             "FADDp  $dst,ST" %}
10298   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10299   ins_encode( Push_Reg_FPR(src),
10300               OpcP, RegOpc(dst) );
10301   ins_pipe( fpu_reg_reg );
10302 %}
10303 
10304 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10305   predicate(UseSSE==0);
10306   match(Set dst (AbsF src));
10307   ins_cost(100);
10308   format %{ "FABS" %}
10309   opcode(0xE1, 0xD9);
10310   ins_encode( OpcS, OpcP );
10311   ins_pipe( fpu_reg_reg );
10312 %}
10313 
10314 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10315   predicate(UseSSE==0);
10316   match(Set dst (NegF src));
10317   ins_cost(100);
10318   format %{ "FCHS" %}
10319   opcode(0xE0, 0xD9);
10320   ins_encode( OpcS, OpcP );
10321   ins_pipe( fpu_reg_reg );
10322 %}
10323 
10324 // Cisc-alternate to addFPR_reg
10325 // Spill to obtain 24-bit precision
10326 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10327   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10328   match(Set dst (AddF src1 (LoadF src2)));
10329 
10330   format %{ "FLD    $src2\n\t"
10331             "FADD   ST,$src1\n\t"
10332             "FSTP_S $dst" %}
10333   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10334   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10335               OpcReg_FPR(src1),
10336               Pop_Mem_FPR(dst) );
10337   ins_pipe( fpu_mem_reg_mem );
10338 %}
10339 //
10340 // Cisc-alternate to addFPR_reg
10341 // This instruction does not round to 24-bits
10342 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10343   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10344   match(Set dst (AddF dst (LoadF src)));
10345 
10346   format %{ "FADD   $dst,$src" %}
10347   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10348   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10349               OpcP, RegOpc(dst) );
10350   ins_pipe( fpu_reg_mem );
10351 %}
10352 
10353 // // Following two instructions for _222_mpegaudio
10354 // Spill to obtain 24-bit precision
10355 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10356   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10357   match(Set dst (AddF src1 src2));
10358 
10359   format %{ "FADD   $dst,$src1,$src2" %}
10360   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10361   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10362               OpcReg_FPR(src2),
10363               Pop_Mem_FPR(dst) );
10364   ins_pipe( fpu_mem_reg_mem );
10365 %}
10366 
10367 // Cisc-spill variant
10368 // Spill to obtain 24-bit precision
10369 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10370   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10371   match(Set dst (AddF src1 (LoadF src2)));
10372 
10373   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10374   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10375   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10376               set_instruction_start,
10377               OpcP, RMopc_Mem(secondary,src1),
10378               Pop_Mem_FPR(dst) );
10379   ins_pipe( fpu_mem_mem_mem );
10380 %}
10381 
10382 // Spill to obtain 24-bit precision
10383 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10384   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10385   match(Set dst (AddF src1 src2));
10386 
10387   format %{ "FADD   $dst,$src1,$src2" %}
10388   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10389   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10390               set_instruction_start,
10391               OpcP, RMopc_Mem(secondary,src1),
10392               Pop_Mem_FPR(dst) );
10393   ins_pipe( fpu_mem_mem_mem );
10394 %}
10395 
10396 
10397 // Spill to obtain 24-bit precision
10398 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10399   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10400   match(Set dst (AddF src con));
10401   format %{ "FLD    $src\n\t"
10402             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10403             "FSTP_S $dst"  %}
10404   ins_encode %{
10405     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10406     __ fadd_s($constantaddress($con));
10407     __ fstp_s(Address(rsp, $dst$$disp));
10408   %}
10409   ins_pipe(fpu_mem_reg_con);
10410 %}
10411 //
10412 // This instruction does not round to 24-bits
10413 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10414   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10415   match(Set dst (AddF src con));
10416   format %{ "FLD    $src\n\t"
10417             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10418             "FSTP   $dst"  %}
10419   ins_encode %{
10420     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10421     __ fadd_s($constantaddress($con));
10422     __ fstp_d($dst$$reg);
10423   %}
10424   ins_pipe(fpu_reg_reg_con);
10425 %}
10426 
10427 // Spill to obtain 24-bit precision
10428 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10429   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10430   match(Set dst (MulF src1 src2));
10431 
10432   format %{ "FLD    $src1\n\t"
10433             "FMUL   $src2\n\t"
10434             "FSTP_S $dst"  %}
10435   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10436   ins_encode( Push_Reg_FPR(src1),
10437               OpcReg_FPR(src2),
10438               Pop_Mem_FPR(dst) );
10439   ins_pipe( fpu_mem_reg_reg );
10440 %}
10441 //
10442 // This instruction does not round to 24-bits
10443 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10444   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10445   match(Set dst (MulF src1 src2));
10446 
10447   format %{ "FLD    $src1\n\t"
10448             "FMUL   $src2\n\t"
10449             "FSTP_S $dst"  %}
10450   opcode(0xD8, 0x1); /* D8 C8+i */
10451   ins_encode( Push_Reg_FPR(src2),
10452               OpcReg_FPR(src1),
10453               Pop_Reg_FPR(dst) );
10454   ins_pipe( fpu_reg_reg_reg );
10455 %}
10456 
10457 
10458 // Spill to obtain 24-bit precision
10459 // Cisc-alternate to reg-reg multiply
10460 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10461   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10462   match(Set dst (MulF src1 (LoadF src2)));
10463 
10464   format %{ "FLD_S  $src2\n\t"
10465             "FMUL   $src1\n\t"
10466             "FSTP_S $dst"  %}
10467   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10468   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10469               OpcReg_FPR(src1),
10470               Pop_Mem_FPR(dst) );
10471   ins_pipe( fpu_mem_reg_mem );
10472 %}
10473 //
10474 // This instruction does not round to 24-bits
10475 // Cisc-alternate to reg-reg multiply
10476 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10477   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10478   match(Set dst (MulF src1 (LoadF src2)));
10479 
10480   format %{ "FMUL   $dst,$src1,$src2" %}
10481   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10482   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10483               OpcReg_FPR(src1),
10484               Pop_Reg_FPR(dst) );
10485   ins_pipe( fpu_reg_reg_mem );
10486 %}
10487 
10488 // Spill to obtain 24-bit precision
10489 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10490   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10491   match(Set dst (MulF src1 src2));
10492 
10493   format %{ "FMUL   $dst,$src1,$src2" %}
10494   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10495   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10496               set_instruction_start,
10497               OpcP, RMopc_Mem(secondary,src1),
10498               Pop_Mem_FPR(dst) );
10499   ins_pipe( fpu_mem_mem_mem );
10500 %}
10501 
10502 // Spill to obtain 24-bit precision
10503 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10504   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10505   match(Set dst (MulF src con));
10506 
10507   format %{ "FLD    $src\n\t"
10508             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10509             "FSTP_S $dst"  %}
10510   ins_encode %{
10511     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10512     __ fmul_s($constantaddress($con));
10513     __ fstp_s(Address(rsp, $dst$$disp));
10514   %}
10515   ins_pipe(fpu_mem_reg_con);
10516 %}
10517 //
10518 // This instruction does not round to 24-bits
10519 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10520   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10521   match(Set dst (MulF src con));
10522 
10523   format %{ "FLD    $src\n\t"
10524             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10525             "FSTP   $dst"  %}
10526   ins_encode %{
10527     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10528     __ fmul_s($constantaddress($con));
10529     __ fstp_d($dst$$reg);
10530   %}
10531   ins_pipe(fpu_reg_reg_con);
10532 %}
10533 
10534 
10535 //
10536 // MACRO1 -- subsume unshared load into mulFPR
10537 // This instruction does not round to 24-bits
10538 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10539   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10540   match(Set dst (MulF (LoadF mem1) src));
10541 
10542   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10543             "FMUL   ST,$src\n\t"
10544             "FSTP   $dst" %}
10545   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10546   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10547               OpcReg_FPR(src),
10548               Pop_Reg_FPR(dst) );
10549   ins_pipe( fpu_reg_reg_mem );
10550 %}
10551 //
10552 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10553 // This instruction does not round to 24-bits
10554 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10555   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10556   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10557   ins_cost(95);
10558 
10559   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10560             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10561             "FADD   ST,$src2\n\t"
10562             "FSTP   $dst" %}
10563   opcode(0xD9); /* LoadF D9 /0 */
10564   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10565               FMul_ST_reg(src1),
10566               FAdd_ST_reg(src2),
10567               Pop_Reg_FPR(dst) );
10568   ins_pipe( fpu_reg_mem_reg_reg );
10569 %}
10570 
10571 // MACRO3 -- addFPR a mulFPR
10572 // This instruction does not round to 24-bits.  It is a '2-address'
10573 // instruction in that the result goes back to src2.  This eliminates
10574 // a move from the macro; possibly the register allocator will have
10575 // to add it back (and maybe not).
10576 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10577   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10578   match(Set src2 (AddF (MulF src0 src1) src2));
10579 
10580   format %{ "FLD    $src0     ===MACRO3===\n\t"
10581             "FMUL   ST,$src1\n\t"
10582             "FADDP  $src2,ST" %}
10583   opcode(0xD9); /* LoadF D9 /0 */
10584   ins_encode( Push_Reg_FPR(src0),
10585               FMul_ST_reg(src1),
10586               FAddP_reg_ST(src2) );
10587   ins_pipe( fpu_reg_reg_reg );
10588 %}
10589 
10590 // MACRO4 -- divFPR subFPR
10591 // This instruction does not round to 24-bits
10592 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10593   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10594   match(Set dst (DivF (SubF src2 src1) src3));
10595 
10596   format %{ "FLD    $src2   ===MACRO4===\n\t"
10597             "FSUB   ST,$src1\n\t"
10598             "FDIV   ST,$src3\n\t"
10599             "FSTP  $dst" %}
10600   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10601   ins_encode( Push_Reg_FPR(src2),
10602               subFPR_divFPR_encode(src1,src3),
10603               Pop_Reg_FPR(dst) );
10604   ins_pipe( fpu_reg_reg_reg_reg );
10605 %}
10606 
10607 // Spill to obtain 24-bit precision
10608 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10609   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10610   match(Set dst (DivF src1 src2));
10611 
10612   format %{ "FDIV   $dst,$src1,$src2" %}
10613   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10614   ins_encode( Push_Reg_FPR(src1),
10615               OpcReg_FPR(src2),
10616               Pop_Mem_FPR(dst) );
10617   ins_pipe( fpu_mem_reg_reg );
10618 %}
10619 //
10620 // This instruction does not round to 24-bits
10621 instruct divFPR_reg(regFPR dst, regFPR src) %{
10622   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10623   match(Set dst (DivF dst src));
10624 
10625   format %{ "FDIV   $dst,$src" %}
10626   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10627   ins_encode( Push_Reg_FPR(src),
10628               OpcP, RegOpc(dst) );
10629   ins_pipe( fpu_reg_reg );
10630 %}
10631 
10632 
10633 // Spill to obtain 24-bit precision
10634 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10635   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10636   match(Set dst (ModF src1 src2));
10637   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10638 
10639   format %{ "FMOD   $dst,$src1,$src2" %}
10640   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10641               emitModDPR(),
10642               Push_Result_Mod_DPR(src2),
10643               Pop_Mem_FPR(dst));
10644   ins_pipe( pipe_slow );
10645 %}
10646 //
10647 // This instruction does not round to 24-bits
10648 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10649   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10650   match(Set dst (ModF dst src));
10651   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10652 
10653   format %{ "FMOD   $dst,$src" %}
10654   ins_encode(Push_Reg_Mod_DPR(dst, src),
10655               emitModDPR(),
10656               Push_Result_Mod_DPR(src),
10657               Pop_Reg_FPR(dst));
10658   ins_pipe( pipe_slow );
10659 %}
10660 
10661 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10662   predicate(UseSSE>=1);
10663   match(Set dst (ModF src0 src1));
10664   effect(KILL rax, KILL cr);
10665   format %{ "SUB    ESP,4\t # FMOD\n"
10666           "\tMOVSS  [ESP+0],$src1\n"
10667           "\tFLD_S  [ESP+0]\n"
10668           "\tMOVSS  [ESP+0],$src0\n"
10669           "\tFLD_S  [ESP+0]\n"
10670      "loop:\tFPREM\n"
10671           "\tFWAIT\n"
10672           "\tFNSTSW AX\n"
10673           "\tSAHF\n"
10674           "\tJP     loop\n"
10675           "\tFSTP_S [ESP+0]\n"
10676           "\tMOVSS  $dst,[ESP+0]\n"
10677           "\tADD    ESP,4\n"
10678           "\tFSTP   ST0\t # Restore FPU Stack"
10679     %}
10680   ins_cost(250);
10681   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10682   ins_pipe( pipe_slow );
10683 %}
10684 
10685 
10686 //----------Arithmetic Conversion Instructions---------------------------------
10687 // The conversions operations are all Alpha sorted.  Please keep it that way!
10688 
10689 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10690   predicate(UseSSE==0);
10691   match(Set dst (RoundFloat src));
10692   ins_cost(125);
10693   format %{ "FST_S  $dst,$src\t# F-round" %}
10694   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10695   ins_pipe( fpu_mem_reg );
10696 %}
10697 
10698 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10699   predicate(UseSSE<=1);
10700   match(Set dst (RoundDouble src));
10701   ins_cost(125);
10702   format %{ "FST_D  $dst,$src\t# D-round" %}
10703   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10704   ins_pipe( fpu_mem_reg );
10705 %}
10706 
10707 // Force rounding to 24-bit precision and 6-bit exponent
10708 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10709   predicate(UseSSE==0);
10710   match(Set dst (ConvD2F src));
10711   format %{ "FST_S  $dst,$src\t# F-round" %}
10712   expand %{
10713     roundFloat_mem_reg(dst,src);
10714   %}
10715 %}
10716 
10717 // Force rounding to 24-bit precision and 6-bit exponent
10718 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10719   predicate(UseSSE==1);
10720   match(Set dst (ConvD2F src));
10721   effect( KILL cr );
10722   format %{ "SUB    ESP,4\n\t"
10723             "FST_S  [ESP],$src\t# F-round\n\t"
10724             "MOVSS  $dst,[ESP]\n\t"
10725             "ADD ESP,4" %}
10726   ins_encode %{
10727     __ subptr(rsp, 4);
10728     if ($src$$reg != FPR1L_enc) {
10729       __ fld_s($src$$reg-1);
10730       __ fstp_s(Address(rsp, 0));
10731     } else {
10732       __ fst_s(Address(rsp, 0));
10733     }
10734     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10735     __ addptr(rsp, 4);
10736   %}
10737   ins_pipe( pipe_slow );
10738 %}
10739 
10740 // Force rounding double precision to single precision
10741 instruct convD2F_reg(regF dst, regD src) %{
10742   predicate(UseSSE>=2);
10743   match(Set dst (ConvD2F src));
10744   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10745   ins_encode %{
10746     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10747   %}
10748   ins_pipe( pipe_slow );
10749 %}
10750 
10751 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10752   predicate(UseSSE==0);
10753   match(Set dst (ConvF2D src));
10754   format %{ "FST_S  $dst,$src\t# D-round" %}
10755   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10756   ins_pipe( fpu_reg_reg );
10757 %}
10758 
10759 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10760   predicate(UseSSE==1);
10761   match(Set dst (ConvF2D src));
10762   format %{ "FST_D  $dst,$src\t# D-round" %}
10763   expand %{
10764     roundDouble_mem_reg(dst,src);
10765   %}
10766 %}
10767 
10768 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10769   predicate(UseSSE==1);
10770   match(Set dst (ConvF2D src));
10771   effect( KILL cr );
10772   format %{ "SUB    ESP,4\n\t"
10773             "MOVSS  [ESP] $src\n\t"
10774             "FLD_S  [ESP]\n\t"
10775             "ADD    ESP,4\n\t"
10776             "FSTP   $dst\t# D-round" %}
10777   ins_encode %{
10778     __ subptr(rsp, 4);
10779     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10780     __ fld_s(Address(rsp, 0));
10781     __ addptr(rsp, 4);
10782     __ fstp_d($dst$$reg);
10783   %}
10784   ins_pipe( pipe_slow );
10785 %}
10786 
10787 instruct convF2D_reg(regD dst, regF src) %{
10788   predicate(UseSSE>=2);
10789   match(Set dst (ConvF2D src));
10790   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10791   ins_encode %{
10792     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10793   %}
10794   ins_pipe( pipe_slow );
10795 %}
10796 
10797 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10798 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10799   predicate(UseSSE<=1);
10800   match(Set dst (ConvD2I src));
10801   effect( KILL tmp, KILL cr );
10802   format %{ "FLD    $src\t# Convert double to int \n\t"
10803             "FLDCW  trunc mode\n\t"
10804             "SUB    ESP,4\n\t"
10805             "FISTp  [ESP + #0]\n\t"
10806             "FLDCW  std/24-bit mode\n\t"
10807             "POP    EAX\n\t"
10808             "CMP    EAX,0x80000000\n\t"
10809             "JNE,s  fast\n\t"
10810             "FLD_D  $src\n\t"
10811             "CALL   d2i_wrapper\n"
10812       "fast:" %}
10813   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10814   ins_pipe( pipe_slow );
10815 %}
10816 
10817 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10818 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10819   predicate(UseSSE>=2);
10820   match(Set dst (ConvD2I src));
10821   effect( KILL tmp, KILL cr );
10822   format %{ "CVTTSD2SI $dst, $src\n\t"
10823             "CMP    $dst,0x80000000\n\t"
10824             "JNE,s  fast\n\t"
10825             "SUB    ESP, 8\n\t"
10826             "MOVSD  [ESP], $src\n\t"
10827             "FLD_D  [ESP]\n\t"
10828             "ADD    ESP, 8\n\t"
10829             "CALL   d2i_wrapper\n"
10830       "fast:" %}
10831   ins_encode %{
10832     Label fast;
10833     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10834     __ cmpl($dst$$Register, 0x80000000);
10835     __ jccb(Assembler::notEqual, fast);
10836     __ subptr(rsp, 8);
10837     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10838     __ fld_d(Address(rsp, 0));
10839     __ addptr(rsp, 8);
10840     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10841     __ bind(fast);
10842   %}
10843   ins_pipe( pipe_slow );
10844 %}
10845 
10846 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10847   predicate(UseSSE<=1);
10848   match(Set dst (ConvD2L src));
10849   effect( KILL cr );
10850   format %{ "FLD    $src\t# Convert double to long\n\t"
10851             "FLDCW  trunc mode\n\t"
10852             "SUB    ESP,8\n\t"
10853             "FISTp  [ESP + #0]\n\t"
10854             "FLDCW  std/24-bit mode\n\t"
10855             "POP    EAX\n\t"
10856             "POP    EDX\n\t"
10857             "CMP    EDX,0x80000000\n\t"
10858             "JNE,s  fast\n\t"
10859             "TEST   EAX,EAX\n\t"
10860             "JNE,s  fast\n\t"
10861             "FLD    $src\n\t"
10862             "CALL   d2l_wrapper\n"
10863       "fast:" %}
10864   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10865   ins_pipe( pipe_slow );
10866 %}
10867 
10868 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10869 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10870   predicate (UseSSE>=2);
10871   match(Set dst (ConvD2L src));
10872   effect( KILL cr );
10873   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10874             "MOVSD  [ESP],$src\n\t"
10875             "FLD_D  [ESP]\n\t"
10876             "FLDCW  trunc mode\n\t"
10877             "FISTp  [ESP + #0]\n\t"
10878             "FLDCW  std/24-bit mode\n\t"
10879             "POP    EAX\n\t"
10880             "POP    EDX\n\t"
10881             "CMP    EDX,0x80000000\n\t"
10882             "JNE,s  fast\n\t"
10883             "TEST   EAX,EAX\n\t"
10884             "JNE,s  fast\n\t"
10885             "SUB    ESP,8\n\t"
10886             "MOVSD  [ESP],$src\n\t"
10887             "FLD_D  [ESP]\n\t"
10888             "ADD    ESP,8\n\t"
10889             "CALL   d2l_wrapper\n"
10890       "fast:" %}
10891   ins_encode %{
10892     Label fast;
10893     __ subptr(rsp, 8);
10894     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10895     __ fld_d(Address(rsp, 0));
10896     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10897     __ fistp_d(Address(rsp, 0));
10898     // Restore the rounding mode, mask the exception
10899     if (Compile::current()->in_24_bit_fp_mode()) {
10900       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10901     } else {
10902       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10903     }
10904     // Load the converted long, adjust CPU stack
10905     __ pop(rax);
10906     __ pop(rdx);
10907     __ cmpl(rdx, 0x80000000);
10908     __ jccb(Assembler::notEqual, fast);
10909     __ testl(rax, rax);
10910     __ jccb(Assembler::notEqual, fast);
10911     __ subptr(rsp, 8);
10912     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10913     __ fld_d(Address(rsp, 0));
10914     __ addptr(rsp, 8);
10915     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10916     __ bind(fast);
10917   %}
10918   ins_pipe( pipe_slow );
10919 %}
10920 
10921 // Convert a double to an int.  Java semantics require we do complex
10922 // manglations in the corner cases.  So we set the rounding mode to
10923 // 'zero', store the darned double down as an int, and reset the
10924 // rounding mode to 'nearest'.  The hardware stores a flag value down
10925 // if we would overflow or converted a NAN; we check for this and
10926 // and go the slow path if needed.
10927 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10928   predicate(UseSSE==0);
10929   match(Set dst (ConvF2I src));
10930   effect( KILL tmp, KILL cr );
10931   format %{ "FLD    $src\t# Convert float to int \n\t"
10932             "FLDCW  trunc mode\n\t"
10933             "SUB    ESP,4\n\t"
10934             "FISTp  [ESP + #0]\n\t"
10935             "FLDCW  std/24-bit mode\n\t"
10936             "POP    EAX\n\t"
10937             "CMP    EAX,0x80000000\n\t"
10938             "JNE,s  fast\n\t"
10939             "FLD    $src\n\t"
10940             "CALL   d2i_wrapper\n"
10941       "fast:" %}
10942   // DPR2I_encoding works for FPR2I
10943   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10944   ins_pipe( pipe_slow );
10945 %}
10946 
10947 // Convert a float in xmm to an int reg.
10948 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10949   predicate(UseSSE>=1);
10950   match(Set dst (ConvF2I src));
10951   effect( KILL tmp, KILL cr );
10952   format %{ "CVTTSS2SI $dst, $src\n\t"
10953             "CMP    $dst,0x80000000\n\t"
10954             "JNE,s  fast\n\t"
10955             "SUB    ESP, 4\n\t"
10956             "MOVSS  [ESP], $src\n\t"
10957             "FLD    [ESP]\n\t"
10958             "ADD    ESP, 4\n\t"
10959             "CALL   d2i_wrapper\n"
10960       "fast:" %}
10961   ins_encode %{
10962     Label fast;
10963     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10964     __ cmpl($dst$$Register, 0x80000000);
10965     __ jccb(Assembler::notEqual, fast);
10966     __ subptr(rsp, 4);
10967     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10968     __ fld_s(Address(rsp, 0));
10969     __ addptr(rsp, 4);
10970     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10971     __ bind(fast);
10972   %}
10973   ins_pipe( pipe_slow );
10974 %}
10975 
10976 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10977   predicate(UseSSE==0);
10978   match(Set dst (ConvF2L src));
10979   effect( KILL cr );
10980   format %{ "FLD    $src\t# Convert float to long\n\t"
10981             "FLDCW  trunc mode\n\t"
10982             "SUB    ESP,8\n\t"
10983             "FISTp  [ESP + #0]\n\t"
10984             "FLDCW  std/24-bit mode\n\t"
10985             "POP    EAX\n\t"
10986             "POP    EDX\n\t"
10987             "CMP    EDX,0x80000000\n\t"
10988             "JNE,s  fast\n\t"
10989             "TEST   EAX,EAX\n\t"
10990             "JNE,s  fast\n\t"
10991             "FLD    $src\n\t"
10992             "CALL   d2l_wrapper\n"
10993       "fast:" %}
10994   // DPR2L_encoding works for FPR2L
10995   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10996   ins_pipe( pipe_slow );
10997 %}
10998 
10999 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11000 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
11001   predicate (UseSSE>=1);
11002   match(Set dst (ConvF2L src));
11003   effect( KILL cr );
11004   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
11005             "MOVSS  [ESP],$src\n\t"
11006             "FLD_S  [ESP]\n\t"
11007             "FLDCW  trunc mode\n\t"
11008             "FISTp  [ESP + #0]\n\t"
11009             "FLDCW  std/24-bit mode\n\t"
11010             "POP    EAX\n\t"
11011             "POP    EDX\n\t"
11012             "CMP    EDX,0x80000000\n\t"
11013             "JNE,s  fast\n\t"
11014             "TEST   EAX,EAX\n\t"
11015             "JNE,s  fast\n\t"
11016             "SUB    ESP,4\t# Convert float to long\n\t"
11017             "MOVSS  [ESP],$src\n\t"
11018             "FLD_S  [ESP]\n\t"
11019             "ADD    ESP,4\n\t"
11020             "CALL   d2l_wrapper\n"
11021       "fast:" %}
11022   ins_encode %{
11023     Label fast;
11024     __ subptr(rsp, 8);
11025     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11026     __ fld_s(Address(rsp, 0));
11027     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
11028     __ fistp_d(Address(rsp, 0));
11029     // Restore the rounding mode, mask the exception
11030     if (Compile::current()->in_24_bit_fp_mode()) {
11031       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
11032     } else {
11033       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
11034     }
11035     // Load the converted long, adjust CPU stack
11036     __ pop(rax);
11037     __ pop(rdx);
11038     __ cmpl(rdx, 0x80000000);
11039     __ jccb(Assembler::notEqual, fast);
11040     __ testl(rax, rax);
11041     __ jccb(Assembler::notEqual, fast);
11042     __ subptr(rsp, 4);
11043     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11044     __ fld_s(Address(rsp, 0));
11045     __ addptr(rsp, 4);
11046     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11047     __ bind(fast);
11048   %}
11049   ins_pipe( pipe_slow );
11050 %}
11051 
11052 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11053   predicate( UseSSE<=1 );
11054   match(Set dst (ConvI2D src));
11055   format %{ "FILD   $src\n\t"
11056             "FSTP   $dst" %}
11057   opcode(0xDB, 0x0);  /* DB /0 */
11058   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11059   ins_pipe( fpu_reg_mem );
11060 %}
11061 
11062 instruct convI2D_reg(regD dst, rRegI src) %{
11063   predicate( UseSSE>=2 && !UseXmmI2D );
11064   match(Set dst (ConvI2D src));
11065   format %{ "CVTSI2SD $dst,$src" %}
11066   ins_encode %{
11067     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11068   %}
11069   ins_pipe( pipe_slow );
11070 %}
11071 
11072 instruct convI2D_mem(regD dst, memory mem) %{
11073   predicate( UseSSE>=2 );
11074   match(Set dst (ConvI2D (LoadI mem)));
11075   format %{ "CVTSI2SD $dst,$mem" %}
11076   ins_encode %{
11077     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11078   %}
11079   ins_pipe( pipe_slow );
11080 %}
11081 
11082 instruct convXI2D_reg(regD dst, rRegI src)
11083 %{
11084   predicate( UseSSE>=2 && UseXmmI2D );
11085   match(Set dst (ConvI2D src));
11086 
11087   format %{ "MOVD  $dst,$src\n\t"
11088             "CVTDQ2PD $dst,$dst\t# i2d" %}
11089   ins_encode %{
11090     __ movdl($dst$$XMMRegister, $src$$Register);
11091     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11092   %}
11093   ins_pipe(pipe_slow); // XXX
11094 %}
11095 
11096 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11097   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11098   match(Set dst (ConvI2D (LoadI mem)));
11099   format %{ "FILD   $mem\n\t"
11100             "FSTP   $dst" %}
11101   opcode(0xDB);      /* DB /0 */
11102   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11103               Pop_Reg_DPR(dst));
11104   ins_pipe( fpu_reg_mem );
11105 %}
11106 
11107 // Convert a byte to a float; no rounding step needed.
11108 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11109   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11110   match(Set dst (ConvI2F src));
11111   format %{ "FILD   $src\n\t"
11112             "FSTP   $dst" %}
11113 
11114   opcode(0xDB, 0x0);  /* DB /0 */
11115   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11116   ins_pipe( fpu_reg_mem );
11117 %}
11118 
11119 // In 24-bit mode, force exponent rounding by storing back out
11120 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11121   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11122   match(Set dst (ConvI2F src));
11123   ins_cost(200);
11124   format %{ "FILD   $src\n\t"
11125             "FSTP_S $dst" %}
11126   opcode(0xDB, 0x0);  /* DB /0 */
11127   ins_encode( Push_Mem_I(src),
11128               Pop_Mem_FPR(dst));
11129   ins_pipe( fpu_mem_mem );
11130 %}
11131 
11132 // In 24-bit mode, force exponent rounding by storing back out
11133 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11134   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11135   match(Set dst (ConvI2F (LoadI mem)));
11136   ins_cost(200);
11137   format %{ "FILD   $mem\n\t"
11138             "FSTP_S $dst" %}
11139   opcode(0xDB);  /* DB /0 */
11140   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11141               Pop_Mem_FPR(dst));
11142   ins_pipe( fpu_mem_mem );
11143 %}
11144 
11145 // This instruction does not round to 24-bits
11146 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11147   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11148   match(Set dst (ConvI2F src));
11149   format %{ "FILD   $src\n\t"
11150             "FSTP   $dst" %}
11151   opcode(0xDB, 0x0);  /* DB /0 */
11152   ins_encode( Push_Mem_I(src),
11153               Pop_Reg_FPR(dst));
11154   ins_pipe( fpu_reg_mem );
11155 %}
11156 
11157 // This instruction does not round to 24-bits
11158 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11159   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11160   match(Set dst (ConvI2F (LoadI mem)));
11161   format %{ "FILD   $mem\n\t"
11162             "FSTP   $dst" %}
11163   opcode(0xDB);      /* DB /0 */
11164   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11165               Pop_Reg_FPR(dst));
11166   ins_pipe( fpu_reg_mem );
11167 %}
11168 
11169 // Convert an int to a float in xmm; no rounding step needed.
11170 instruct convI2F_reg(regF dst, rRegI src) %{
11171   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11172   match(Set dst (ConvI2F src));
11173   format %{ "CVTSI2SS $dst, $src" %}
11174   ins_encode %{
11175     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11176   %}
11177   ins_pipe( pipe_slow );
11178 %}
11179 
11180  instruct convXI2F_reg(regF dst, rRegI src)
11181 %{
11182   predicate( UseSSE>=2 && UseXmmI2F );
11183   match(Set dst (ConvI2F src));
11184 
11185   format %{ "MOVD  $dst,$src\n\t"
11186             "CVTDQ2PS $dst,$dst\t# i2f" %}
11187   ins_encode %{
11188     __ movdl($dst$$XMMRegister, $src$$Register);
11189     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11190   %}
11191   ins_pipe(pipe_slow); // XXX
11192 %}
11193 
11194 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11195   match(Set dst (ConvI2L src));
11196   effect(KILL cr);
11197   ins_cost(375);
11198   format %{ "MOV    $dst.lo,$src\n\t"
11199             "MOV    $dst.hi,$src\n\t"
11200             "SAR    $dst.hi,31" %}
11201   ins_encode(convert_int_long(dst,src));
11202   ins_pipe( ialu_reg_reg_long );
11203 %}
11204 
11205 // Zero-extend convert int to long
11206 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11207   match(Set dst (AndL (ConvI2L src) mask) );
11208   effect( KILL flags );
11209   ins_cost(250);
11210   format %{ "MOV    $dst.lo,$src\n\t"
11211             "XOR    $dst.hi,$dst.hi" %}
11212   opcode(0x33); // XOR
11213   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11214   ins_pipe( ialu_reg_reg_long );
11215 %}
11216 
11217 // Zero-extend long
11218 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11219   match(Set dst (AndL src mask) );
11220   effect( KILL flags );
11221   ins_cost(250);
11222   format %{ "MOV    $dst.lo,$src.lo\n\t"
11223             "XOR    $dst.hi,$dst.hi\n\t" %}
11224   opcode(0x33); // XOR
11225   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11226   ins_pipe( ialu_reg_reg_long );
11227 %}
11228 
11229 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11230   predicate (UseSSE<=1);
11231   match(Set dst (ConvL2D src));
11232   effect( KILL cr );
11233   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11234             "PUSH   $src.lo\n\t"
11235             "FILD   ST,[ESP + #0]\n\t"
11236             "ADD    ESP,8\n\t"
11237             "FSTP_D $dst\t# D-round" %}
11238   opcode(0xDF, 0x5);  /* DF /5 */
11239   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11240   ins_pipe( pipe_slow );
11241 %}
11242 
11243 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11244   predicate (UseSSE>=2);
11245   match(Set dst (ConvL2D src));
11246   effect( KILL cr );
11247   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11248             "PUSH   $src.lo\n\t"
11249             "FILD_D [ESP]\n\t"
11250             "FSTP_D [ESP]\n\t"
11251             "MOVSD  $dst,[ESP]\n\t"
11252             "ADD    ESP,8" %}
11253   opcode(0xDF, 0x5);  /* DF /5 */
11254   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11255   ins_pipe( pipe_slow );
11256 %}
11257 
11258 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11259   predicate (UseSSE>=1);
11260   match(Set dst (ConvL2F src));
11261   effect( KILL cr );
11262   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11263             "PUSH   $src.lo\n\t"
11264             "FILD_D [ESP]\n\t"
11265             "FSTP_S [ESP]\n\t"
11266             "MOVSS  $dst,[ESP]\n\t"
11267             "ADD    ESP,8" %}
11268   opcode(0xDF, 0x5);  /* DF /5 */
11269   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11270   ins_pipe( pipe_slow );
11271 %}
11272 
11273 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11274   match(Set dst (ConvL2F src));
11275   effect( KILL cr );
11276   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11277             "PUSH   $src.lo\n\t"
11278             "FILD   ST,[ESP + #0]\n\t"
11279             "ADD    ESP,8\n\t"
11280             "FSTP_S $dst\t# F-round" %}
11281   opcode(0xDF, 0x5);  /* DF /5 */
11282   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11283   ins_pipe( pipe_slow );
11284 %}
11285 
11286 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11287   match(Set dst (ConvL2I src));
11288   effect( DEF dst, USE src );
11289   format %{ "MOV    $dst,$src.lo" %}
11290   ins_encode(enc_CopyL_Lo(dst,src));
11291   ins_pipe( ialu_reg_reg );
11292 %}
11293 
11294 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11295   match(Set dst (MoveF2I src));
11296   effect( DEF dst, USE src );
11297   ins_cost(100);
11298   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11299   ins_encode %{
11300     __ movl($dst$$Register, Address(rsp, $src$$disp));
11301   %}
11302   ins_pipe( ialu_reg_mem );
11303 %}
11304 
11305 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11306   predicate(UseSSE==0);
11307   match(Set dst (MoveF2I src));
11308   effect( DEF dst, USE src );
11309 
11310   ins_cost(125);
11311   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11312   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11313   ins_pipe( fpu_mem_reg );
11314 %}
11315 
11316 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11317   predicate(UseSSE>=1);
11318   match(Set dst (MoveF2I src));
11319   effect( DEF dst, USE src );
11320 
11321   ins_cost(95);
11322   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11323   ins_encode %{
11324     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11325   %}
11326   ins_pipe( pipe_slow );
11327 %}
11328 
11329 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11330   predicate(UseSSE>=2);
11331   match(Set dst (MoveF2I src));
11332   effect( DEF dst, USE src );
11333   ins_cost(85);
11334   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11335   ins_encode %{
11336     __ movdl($dst$$Register, $src$$XMMRegister);
11337   %}
11338   ins_pipe( pipe_slow );
11339 %}
11340 
11341 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11342   match(Set dst (MoveI2F src));
11343   effect( DEF dst, USE src );
11344 
11345   ins_cost(100);
11346   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11347   ins_encode %{
11348     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11349   %}
11350   ins_pipe( ialu_mem_reg );
11351 %}
11352 
11353 
11354 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11355   predicate(UseSSE==0);
11356   match(Set dst (MoveI2F src));
11357   effect(DEF dst, USE src);
11358 
11359   ins_cost(125);
11360   format %{ "FLD_S  $src\n\t"
11361             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11362   opcode(0xD9);               /* D9 /0, FLD m32real */
11363   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11364               Pop_Reg_FPR(dst) );
11365   ins_pipe( fpu_reg_mem );
11366 %}
11367 
11368 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11369   predicate(UseSSE>=1);
11370   match(Set dst (MoveI2F src));
11371   effect( DEF dst, USE src );
11372 
11373   ins_cost(95);
11374   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11375   ins_encode %{
11376     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11377   %}
11378   ins_pipe( pipe_slow );
11379 %}
11380 
11381 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11382   predicate(UseSSE>=2);
11383   match(Set dst (MoveI2F src));
11384   effect( DEF dst, USE src );
11385 
11386   ins_cost(85);
11387   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11388   ins_encode %{
11389     __ movdl($dst$$XMMRegister, $src$$Register);
11390   %}
11391   ins_pipe( pipe_slow );
11392 %}
11393 
11394 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11395   match(Set dst (MoveD2L src));
11396   effect(DEF dst, USE src);
11397 
11398   ins_cost(250);
11399   format %{ "MOV    $dst.lo,$src\n\t"
11400             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11401   opcode(0x8B, 0x8B);
11402   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11403   ins_pipe( ialu_mem_long_reg );
11404 %}
11405 
11406 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11407   predicate(UseSSE<=1);
11408   match(Set dst (MoveD2L src));
11409   effect(DEF dst, USE src);
11410 
11411   ins_cost(125);
11412   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11413   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11414   ins_pipe( fpu_mem_reg );
11415 %}
11416 
11417 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11418   predicate(UseSSE>=2);
11419   match(Set dst (MoveD2L src));
11420   effect(DEF dst, USE src);
11421   ins_cost(95);
11422   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11423   ins_encode %{
11424     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11425   %}
11426   ins_pipe( pipe_slow );
11427 %}
11428 
11429 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11430   predicate(UseSSE>=2);
11431   match(Set dst (MoveD2L src));
11432   effect(DEF dst, USE src, TEMP tmp);
11433   ins_cost(85);
11434   format %{ "MOVD   $dst.lo,$src\n\t"
11435             "PSHUFLW $tmp,$src,0x4E\n\t"
11436             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11437   ins_encode %{
11438     __ movdl($dst$$Register, $src$$XMMRegister);
11439     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11440     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11441   %}
11442   ins_pipe( pipe_slow );
11443 %}
11444 
11445 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11446   match(Set dst (MoveL2D src));
11447   effect(DEF dst, USE src);
11448 
11449   ins_cost(200);
11450   format %{ "MOV    $dst,$src.lo\n\t"
11451             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11452   opcode(0x89, 0x89);
11453   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11454   ins_pipe( ialu_mem_long_reg );
11455 %}
11456 
11457 
11458 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11459   predicate(UseSSE<=1);
11460   match(Set dst (MoveL2D src));
11461   effect(DEF dst, USE src);
11462   ins_cost(125);
11463 
11464   format %{ "FLD_D  $src\n\t"
11465             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11466   opcode(0xDD);               /* DD /0, FLD m64real */
11467   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11468               Pop_Reg_DPR(dst) );
11469   ins_pipe( fpu_reg_mem );
11470 %}
11471 
11472 
11473 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11474   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11475   match(Set dst (MoveL2D src));
11476   effect(DEF dst, USE src);
11477 
11478   ins_cost(95);
11479   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11480   ins_encode %{
11481     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11482   %}
11483   ins_pipe( pipe_slow );
11484 %}
11485 
11486 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11487   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11488   match(Set dst (MoveL2D src));
11489   effect(DEF dst, USE src);
11490 
11491   ins_cost(95);
11492   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11493   ins_encode %{
11494     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11495   %}
11496   ins_pipe( pipe_slow );
11497 %}
11498 
11499 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11500   predicate(UseSSE>=2);
11501   match(Set dst (MoveL2D src));
11502   effect(TEMP dst, USE src, TEMP tmp);
11503   ins_cost(85);
11504   format %{ "MOVD   $dst,$src.lo\n\t"
11505             "MOVD   $tmp,$src.hi\n\t"
11506             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11507   ins_encode %{
11508     __ movdl($dst$$XMMRegister, $src$$Register);
11509     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11510     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11511   %}
11512   ins_pipe( pipe_slow );
11513 %}
11514 
11515 
11516 // =======================================================================
11517 // fast clearing of an array
11518 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11519   predicate(!((ClearArrayNode*)n)->is_large());
11520   match(Set dummy (ClearArray cnt base));
11521   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11522 
11523   format %{ $$template
11524     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11525     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11526     $$emit$$"JG     LARGE\n\t"
11527     $$emit$$"SHL    ECX, 1\n\t"
11528     $$emit$$"DEC    ECX\n\t"
11529     $$emit$$"JS     DONE\t# Zero length\n\t"
11530     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11531     $$emit$$"DEC    ECX\n\t"
11532     $$emit$$"JGE    LOOP\n\t"
11533     $$emit$$"JMP    DONE\n\t"
11534     $$emit$$"# LARGE:\n\t"
11535     if (UseFastStosb) {
11536        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11537        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11538     } else if (UseXMMForObjInit) {
11539        $$emit$$"MOV     RDI,RAX\n\t"
11540        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11541        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11542        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11543        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11544        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11545        $$emit$$"ADD     0x40,RAX\n\t"
11546        $$emit$$"# L_zero_64_bytes:\n\t"
11547        $$emit$$"SUB     0x8,RCX\n\t"
11548        $$emit$$"JGE     L_loop\n\t"
11549        $$emit$$"ADD     0x4,RCX\n\t"
11550        $$emit$$"JL      L_tail\n\t"
11551        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11552        $$emit$$"ADD     0x20,RAX\n\t"
11553        $$emit$$"SUB     0x4,RCX\n\t"
11554        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11555        $$emit$$"ADD     0x4,RCX\n\t"
11556        $$emit$$"JLE     L_end\n\t"
11557        $$emit$$"DEC     RCX\n\t"
11558        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11559        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11560        $$emit$$"ADD     0x8,RAX\n\t"
11561        $$emit$$"DEC     RCX\n\t"
11562        $$emit$$"JGE     L_sloop\n\t"
11563        $$emit$$"# L_end:\n\t"
11564     } else {
11565        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11566        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11567     }
11568     $$emit$$"# DONE"
11569   %}
11570   ins_encode %{
11571     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11572                  $tmp$$XMMRegister, false);
11573   %}
11574   ins_pipe( pipe_slow );
11575 %}
11576 
11577 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11578   predicate(((ClearArrayNode*)n)->is_large());
11579   match(Set dummy (ClearArray cnt base));
11580   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11581   format %{ $$template
11582     if (UseFastStosb) {
11583        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11584        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11585        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11586     } else if (UseXMMForObjInit) {
11587        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11588        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11589        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11590        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11591        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11592        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11593        $$emit$$"ADD     0x40,RAX\n\t"
11594        $$emit$$"# L_zero_64_bytes:\n\t"
11595        $$emit$$"SUB     0x8,RCX\n\t"
11596        $$emit$$"JGE     L_loop\n\t"
11597        $$emit$$"ADD     0x4,RCX\n\t"
11598        $$emit$$"JL      L_tail\n\t"
11599        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11600        $$emit$$"ADD     0x20,RAX\n\t"
11601        $$emit$$"SUB     0x4,RCX\n\t"
11602        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11603        $$emit$$"ADD     0x4,RCX\n\t"
11604        $$emit$$"JLE     L_end\n\t"
11605        $$emit$$"DEC     RCX\n\t"
11606        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11607        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11608        $$emit$$"ADD     0x8,RAX\n\t"
11609        $$emit$$"DEC     RCX\n\t"
11610        $$emit$$"JGE     L_sloop\n\t"
11611        $$emit$$"# L_end:\n\t"
11612     } else {
11613        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11614        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11615        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11616     }
11617     $$emit$$"# DONE"
11618   %}
11619   ins_encode %{
11620     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11621                  $tmp$$XMMRegister, true);
11622   %}
11623   ins_pipe( pipe_slow );
11624 %}
11625 
11626 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11627                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11628   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11629   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11630   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11631 
11632   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11633   ins_encode %{
11634     __ string_compare($str1$$Register, $str2$$Register,
11635                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11636                       $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11637   %}
11638   ins_pipe( pipe_slow );
11639 %}
11640 
11641 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11642                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11643   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11644   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11645   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11646 
11647   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11648   ins_encode %{
11649     __ string_compare($str1$$Register, $str2$$Register,
11650                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11651                       $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11652   %}
11653   ins_pipe( pipe_slow );
11654 %}
11655 
11656 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11657                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11658   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11659   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11660   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11661 
11662   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11663   ins_encode %{
11664     __ string_compare($str1$$Register, $str2$$Register,
11665                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11666                       $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11667   %}
11668   ins_pipe( pipe_slow );
11669 %}
11670 
11671 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11672                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11673   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11674   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11675   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11676 
11677   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11678   ins_encode %{
11679     __ string_compare($str2$$Register, $str1$$Register,
11680                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11681                       $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11682   %}
11683   ins_pipe( pipe_slow );
11684 %}
11685 
11686 // fast string equals
11687 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11688                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11689   match(Set result (StrEquals (Binary str1 str2) cnt));
11690   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11691 
11692   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11693   ins_encode %{
11694     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11695                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11696                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11697   %}
11698 
11699   ins_pipe( pipe_slow );
11700 %}
11701 
11702 // fast search of substring with known size.
11703 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11704                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11705   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11706   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11707   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11708 
11709   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11710   ins_encode %{
11711     int icnt2 = (int)$int_cnt2$$constant;
11712     if (icnt2 >= 16) {
11713       // IndexOf for constant substrings with size >= 16 elements
11714       // which don't need to be loaded through stack.
11715       __ string_indexofC8($str1$$Register, $str2$$Register,
11716                           $cnt1$$Register, $cnt2$$Register,
11717                           icnt2, $result$$Register,
11718                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11719     } else {
11720       // Small strings are loaded through stack if they cross page boundary.
11721       __ string_indexof($str1$$Register, $str2$$Register,
11722                         $cnt1$$Register, $cnt2$$Register,
11723                         icnt2, $result$$Register,
11724                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11725     }
11726   %}
11727   ins_pipe( pipe_slow );
11728 %}
11729 
11730 // fast search of substring with known size.
11731 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11732                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11733   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11734   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11735   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11736 
11737   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11738   ins_encode %{
11739     int icnt2 = (int)$int_cnt2$$constant;
11740     if (icnt2 >= 8) {
11741       // IndexOf for constant substrings with size >= 8 elements
11742       // which don't need to be loaded through stack.
11743       __ string_indexofC8($str1$$Register, $str2$$Register,
11744                           $cnt1$$Register, $cnt2$$Register,
11745                           icnt2, $result$$Register,
11746                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11747     } else {
11748       // Small strings are loaded through stack if they cross page boundary.
11749       __ string_indexof($str1$$Register, $str2$$Register,
11750                         $cnt1$$Register, $cnt2$$Register,
11751                         icnt2, $result$$Register,
11752                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11753     }
11754   %}
11755   ins_pipe( pipe_slow );
11756 %}
11757 
11758 // fast search of substring with known size.
11759 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11760                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11761   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11762   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11763   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11764 
11765   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11766   ins_encode %{
11767     int icnt2 = (int)$int_cnt2$$constant;
11768     if (icnt2 >= 8) {
11769       // IndexOf for constant substrings with size >= 8 elements
11770       // which don't need to be loaded through stack.
11771       __ string_indexofC8($str1$$Register, $str2$$Register,
11772                           $cnt1$$Register, $cnt2$$Register,
11773                           icnt2, $result$$Register,
11774                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11775     } else {
11776       // Small strings are loaded through stack if they cross page boundary.
11777       __ string_indexof($str1$$Register, $str2$$Register,
11778                         $cnt1$$Register, $cnt2$$Register,
11779                         icnt2, $result$$Register,
11780                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11781     }
11782   %}
11783   ins_pipe( pipe_slow );
11784 %}
11785 
11786 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11787                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11788   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11789   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11790   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11791 
11792   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11793   ins_encode %{
11794     __ string_indexof($str1$$Register, $str2$$Register,
11795                       $cnt1$$Register, $cnt2$$Register,
11796                       (-1), $result$$Register,
11797                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11798   %}
11799   ins_pipe( pipe_slow );
11800 %}
11801 
11802 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11803                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11804   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11805   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11806   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11807 
11808   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11809   ins_encode %{
11810     __ string_indexof($str1$$Register, $str2$$Register,
11811                       $cnt1$$Register, $cnt2$$Register,
11812                       (-1), $result$$Register,
11813                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11814   %}
11815   ins_pipe( pipe_slow );
11816 %}
11817 
11818 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11819                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11820   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11821   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11822   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11823 
11824   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11825   ins_encode %{
11826     __ string_indexof($str1$$Register, $str2$$Register,
11827                       $cnt1$$Register, $cnt2$$Register,
11828                       (-1), $result$$Register,
11829                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11830   %}
11831   ins_pipe( pipe_slow );
11832 %}
11833 
11834 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11835                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11836   predicate(UseSSE42Intrinsics);
11837   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11838   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11839   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11840   ins_encode %{
11841     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11842                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11843   %}
11844   ins_pipe( pipe_slow );
11845 %}
11846 
11847 // fast array equals
11848 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11849                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11850 %{
11851   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11852   match(Set result (AryEq ary1 ary2));
11853   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11854   //ins_cost(300);
11855 
11856   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11857   ins_encode %{
11858     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11859                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11860                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11861   %}
11862   ins_pipe( pipe_slow );
11863 %}
11864 
11865 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11866                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11867 %{
11868   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11869   match(Set result (AryEq ary1 ary2));
11870   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11871   //ins_cost(300);
11872 
11873   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11874   ins_encode %{
11875     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11876                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11877                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11878   %}
11879   ins_pipe( pipe_slow );
11880 %}
11881 
11882 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
11883                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
11884 %{
11885   match(Set result (HasNegatives ary1 len));
11886   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11887 
11888   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11889   ins_encode %{
11890     __ has_negatives($ary1$$Register, $len$$Register,
11891                      $result$$Register, $tmp3$$Register,
11892                      $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11893   %}
11894   ins_pipe( pipe_slow );
11895 %}
11896 
11897 // fast char[] to byte[] compression
11898 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11899                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11900   match(Set result (StrCompressedCopy src (Binary dst len)));
11901   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11902 
11903   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11904   ins_encode %{
11905     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11906                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11907                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11908   %}
11909   ins_pipe( pipe_slow );
11910 %}
11911 
11912 // fast byte[] to char[] inflation
11913 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
11914                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
11915   match(Set dummy (StrInflatedCopy src (Binary dst len)));
11916   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11917 
11918   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11919   ins_encode %{
11920     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11921                           $tmp1$$XMMRegister, $tmp2$$Register);
11922   %}
11923   ins_pipe( pipe_slow );
11924 %}
11925 
11926 // encode char[] to byte[] in ISO_8859_1
11927 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11928                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11929                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11930   match(Set result (EncodeISOArray src (Binary dst len)));
11931   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11932 
11933   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11934   ins_encode %{
11935     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11936                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11937                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11938   %}
11939   ins_pipe( pipe_slow );
11940 %}
11941 
11942 
11943 //----------Control Flow Instructions------------------------------------------
11944 // Signed compare Instructions
11945 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
11946   match(Set cr (CmpI op1 op2));
11947   effect( DEF cr, USE op1, USE op2 );
11948   format %{ "CMP    $op1,$op2" %}
11949   opcode(0x3B);  /* Opcode 3B /r */
11950   ins_encode( OpcP, RegReg( op1, op2) );
11951   ins_pipe( ialu_cr_reg_reg );
11952 %}
11953 
11954 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
11955   match(Set cr (CmpI op1 op2));
11956   effect( DEF cr, USE op1 );
11957   format %{ "CMP    $op1,$op2" %}
11958   opcode(0x81,0x07);  /* Opcode 81 /7 */
11959   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
11960   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11961   ins_pipe( ialu_cr_reg_imm );
11962 %}
11963 
11964 // Cisc-spilled version of cmpI_eReg
11965 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
11966   match(Set cr (CmpI op1 (LoadI op2)));
11967 
11968   format %{ "CMP    $op1,$op2" %}
11969   ins_cost(500);
11970   opcode(0x3B);  /* Opcode 3B /r */
11971   ins_encode( OpcP, RegMem( op1, op2) );
11972   ins_pipe( ialu_cr_reg_mem );
11973 %}
11974 
11975 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
11976   match(Set cr (CmpI src zero));
11977   effect( DEF cr, USE src );
11978 
11979   format %{ "TEST   $src,$src" %}
11980   opcode(0x85);
11981   ins_encode( OpcP, RegReg( src, src ) );
11982   ins_pipe( ialu_cr_reg_imm );
11983 %}
11984 
11985 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
11986   match(Set cr (CmpI (AndI src con) zero));
11987 
11988   format %{ "TEST   $src,$con" %}
11989   opcode(0xF7,0x00);
11990   ins_encode( OpcP, RegOpc(src), Con32(con) );
11991   ins_pipe( ialu_cr_reg_imm );
11992 %}
11993 
11994 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
11995   match(Set cr (CmpI (AndI src mem) zero));
11996 
11997   format %{ "TEST   $src,$mem" %}
11998   opcode(0x85);
11999   ins_encode( OpcP, RegMem( src, mem ) );
12000   ins_pipe( ialu_cr_reg_mem );
12001 %}
12002 
12003 // Unsigned compare Instructions; really, same as signed except they
12004 // produce an eFlagsRegU instead of eFlagsReg.
12005 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12006   match(Set cr (CmpU op1 op2));
12007 
12008   format %{ "CMPu   $op1,$op2" %}
12009   opcode(0x3B);  /* Opcode 3B /r */
12010   ins_encode( OpcP, RegReg( op1, op2) );
12011   ins_pipe( ialu_cr_reg_reg );
12012 %}
12013 
12014 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12015   match(Set cr (CmpU op1 op2));
12016 
12017   format %{ "CMPu   $op1,$op2" %}
12018   opcode(0x81,0x07);  /* Opcode 81 /7 */
12019   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12020   ins_pipe( ialu_cr_reg_imm );
12021 %}
12022 
12023 // // Cisc-spilled version of cmpU_eReg
12024 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12025   match(Set cr (CmpU op1 (LoadI op2)));
12026 
12027   format %{ "CMPu   $op1,$op2" %}
12028   ins_cost(500);
12029   opcode(0x3B);  /* Opcode 3B /r */
12030   ins_encode( OpcP, RegMem( op1, op2) );
12031   ins_pipe( ialu_cr_reg_mem );
12032 %}
12033 
12034 // // Cisc-spilled version of cmpU_eReg
12035 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12036 //  match(Set cr (CmpU (LoadI op1) op2));
12037 //
12038 //  format %{ "CMPu   $op1,$op2" %}
12039 //  ins_cost(500);
12040 //  opcode(0x39);  /* Opcode 39 /r */
12041 //  ins_encode( OpcP, RegMem( op1, op2) );
12042 //%}
12043 
12044 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
12045   match(Set cr (CmpU src zero));
12046 
12047   format %{ "TESTu  $src,$src" %}
12048   opcode(0x85);
12049   ins_encode( OpcP, RegReg( src, src ) );
12050   ins_pipe( ialu_cr_reg_imm );
12051 %}
12052 
12053 // Unsigned pointer compare Instructions
12054 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12055   match(Set cr (CmpP op1 op2));
12056 
12057   format %{ "CMPu   $op1,$op2" %}
12058   opcode(0x3B);  /* Opcode 3B /r */
12059   ins_encode( OpcP, RegReg( op1, op2) );
12060   ins_pipe( ialu_cr_reg_reg );
12061 %}
12062 
12063 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12064   match(Set cr (CmpP op1 op2));
12065 
12066   format %{ "CMPu   $op1,$op2" %}
12067   opcode(0x81,0x07);  /* Opcode 81 /7 */
12068   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12069   ins_pipe( ialu_cr_reg_imm );
12070 %}
12071 
12072 // // Cisc-spilled version of cmpP_eReg
12073 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12074   match(Set cr (CmpP op1 (LoadP op2)));
12075 
12076   format %{ "CMPu   $op1,$op2" %}
12077   ins_cost(500);
12078   opcode(0x3B);  /* Opcode 3B /r */
12079   ins_encode( OpcP, RegMem( op1, op2) );
12080   ins_pipe( ialu_cr_reg_mem );
12081 %}
12082 
12083 // // Cisc-spilled version of cmpP_eReg
12084 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12085 //  match(Set cr (CmpP (LoadP op1) op2));
12086 //
12087 //  format %{ "CMPu   $op1,$op2" %}
12088 //  ins_cost(500);
12089 //  opcode(0x39);  /* Opcode 39 /r */
12090 //  ins_encode( OpcP, RegMem( op1, op2) );
12091 //%}
12092 
12093 // Compare raw pointer (used in out-of-heap check).
12094 // Only works because non-oop pointers must be raw pointers
12095 // and raw pointers have no anti-dependencies.
12096 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12097   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12098   match(Set cr (CmpP op1 (LoadP op2)));
12099 
12100   format %{ "CMPu   $op1,$op2" %}
12101   opcode(0x3B);  /* Opcode 3B /r */
12102   ins_encode( OpcP, RegMem( op1, op2) );
12103   ins_pipe( ialu_cr_reg_mem );
12104 %}
12105 
12106 //
12107 // This will generate a signed flags result. This should be ok
12108 // since any compare to a zero should be eq/neq.
12109 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12110   match(Set cr (CmpP src zero));
12111 
12112   format %{ "TEST   $src,$src" %}
12113   opcode(0x85);
12114   ins_encode( OpcP, RegReg( src, src ) );
12115   ins_pipe( ialu_cr_reg_imm );
12116 %}
12117 
12118 // Cisc-spilled version of testP_reg
12119 // This will generate a signed flags result. This should be ok
12120 // since any compare to a zero should be eq/neq.
12121 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
12122   match(Set cr (CmpP (LoadP op) zero));
12123 
12124   format %{ "TEST   $op,0xFFFFFFFF" %}
12125   ins_cost(500);
12126   opcode(0xF7);               /* Opcode F7 /0 */
12127   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12128   ins_pipe( ialu_cr_reg_imm );
12129 %}
12130 
12131 // Yanked all unsigned pointer compare operations.
12132 // Pointer compares are done with CmpP which is already unsigned.
12133 
12134 //----------Max and Min--------------------------------------------------------
12135 // Min Instructions
12136 ////
12137 //   *** Min and Max using the conditional move are slower than the
12138 //   *** branch version on a Pentium III.
12139 // // Conditional move for min
12140 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12141 //  effect( USE_DEF op2, USE op1, USE cr );
12142 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12143 //  opcode(0x4C,0x0F);
12144 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12145 //  ins_pipe( pipe_cmov_reg );
12146 //%}
12147 //
12148 //// Min Register with Register (P6 version)
12149 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12150 //  predicate(VM_Version::supports_cmov() );
12151 //  match(Set op2 (MinI op1 op2));
12152 //  ins_cost(200);
12153 //  expand %{
12154 //    eFlagsReg cr;
12155 //    compI_eReg(cr,op1,op2);
12156 //    cmovI_reg_lt(op2,op1,cr);
12157 //  %}
12158 //%}
12159 
12160 // Min Register with Register (generic version)
12161 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12162   match(Set dst (MinI dst src));
12163   effect(KILL flags);
12164   ins_cost(300);
12165 
12166   format %{ "MIN    $dst,$src" %}
12167   opcode(0xCC);
12168   ins_encode( min_enc(dst,src) );
12169   ins_pipe( pipe_slow );
12170 %}
12171 
12172 // Max Register with Register
12173 //   *** Min and Max using the conditional move are slower than the
12174 //   *** branch version on a Pentium III.
12175 // // Conditional move for max
12176 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12177 //  effect( USE_DEF op2, USE op1, USE cr );
12178 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12179 //  opcode(0x4F,0x0F);
12180 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12181 //  ins_pipe( pipe_cmov_reg );
12182 //%}
12183 //
12184 // // Max Register with Register (P6 version)
12185 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12186 //  predicate(VM_Version::supports_cmov() );
12187 //  match(Set op2 (MaxI op1 op2));
12188 //  ins_cost(200);
12189 //  expand %{
12190 //    eFlagsReg cr;
12191 //    compI_eReg(cr,op1,op2);
12192 //    cmovI_reg_gt(op2,op1,cr);
12193 //  %}
12194 //%}
12195 
12196 // Max Register with Register (generic version)
12197 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12198   match(Set dst (MaxI dst src));
12199   effect(KILL flags);
12200   ins_cost(300);
12201 
12202   format %{ "MAX    $dst,$src" %}
12203   opcode(0xCC);
12204   ins_encode( max_enc(dst,src) );
12205   ins_pipe( pipe_slow );
12206 %}
12207 
12208 // ============================================================================
12209 // Counted Loop limit node which represents exact final iterator value.
12210 // Note: the resulting value should fit into integer range since
12211 // counted loops have limit check on overflow.
12212 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12213   match(Set limit (LoopLimit (Binary init limit) stride));
12214   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12215   ins_cost(300);
12216 
12217   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12218   ins_encode %{
12219     int strd = (int)$stride$$constant;
12220     assert(strd != 1 && strd != -1, "sanity");
12221     int m1 = (strd > 0) ? 1 : -1;
12222     // Convert limit to long (EAX:EDX)
12223     __ cdql();
12224     // Convert init to long (init:tmp)
12225     __ movl($tmp$$Register, $init$$Register);
12226     __ sarl($tmp$$Register, 31);
12227     // $limit - $init
12228     __ subl($limit$$Register, $init$$Register);
12229     __ sbbl($limit_hi$$Register, $tmp$$Register);
12230     // + ($stride - 1)
12231     if (strd > 0) {
12232       __ addl($limit$$Register, (strd - 1));
12233       __ adcl($limit_hi$$Register, 0);
12234       __ movl($tmp$$Register, strd);
12235     } else {
12236       __ addl($limit$$Register, (strd + 1));
12237       __ adcl($limit_hi$$Register, -1);
12238       __ lneg($limit_hi$$Register, $limit$$Register);
12239       __ movl($tmp$$Register, -strd);
12240     }
12241     // signed devision: (EAX:EDX) / pos_stride
12242     __ idivl($tmp$$Register);
12243     if (strd < 0) {
12244       // restore sign
12245       __ negl($tmp$$Register);
12246     }
12247     // (EAX) * stride
12248     __ mull($tmp$$Register);
12249     // + init (ignore upper bits)
12250     __ addl($limit$$Register, $init$$Register);
12251   %}
12252   ins_pipe( pipe_slow );
12253 %}
12254 
12255 // ============================================================================
12256 // Branch Instructions
12257 // Jump Table
12258 instruct jumpXtnd(rRegI switch_val) %{
12259   match(Jump switch_val);
12260   ins_cost(350);
12261   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12262   ins_encode %{
12263     // Jump to Address(table_base + switch_reg)
12264     Address index(noreg, $switch_val$$Register, Address::times_1);
12265     __ jump(ArrayAddress($constantaddress, index));
12266   %}
12267   ins_pipe(pipe_jmp);
12268 %}
12269 
12270 // Jump Direct - Label defines a relative address from JMP+1
12271 instruct jmpDir(label labl) %{
12272   match(Goto);
12273   effect(USE labl);
12274 
12275   ins_cost(300);
12276   format %{ "JMP    $labl" %}
12277   size(5);
12278   ins_encode %{
12279     Label* L = $labl$$label;
12280     __ jmp(*L, false); // Always long jump
12281   %}
12282   ins_pipe( pipe_jmp );
12283 %}
12284 
12285 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12286 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12287   match(If cop cr);
12288   effect(USE labl);
12289 
12290   ins_cost(300);
12291   format %{ "J$cop    $labl" %}
12292   size(6);
12293   ins_encode %{
12294     Label* L = $labl$$label;
12295     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12296   %}
12297   ins_pipe( pipe_jcc );
12298 %}
12299 
12300 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12301 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12302   predicate(!n->has_vector_mask_set());
12303   match(CountedLoopEnd cop cr);
12304   effect(USE labl);
12305 
12306   ins_cost(300);
12307   format %{ "J$cop    $labl\t# Loop end" %}
12308   size(6);
12309   ins_encode %{
12310     Label* L = $labl$$label;
12311     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12312   %}
12313   ins_pipe( pipe_jcc );
12314 %}
12315 
12316 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12317 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12318   predicate(!n->has_vector_mask_set());
12319   match(CountedLoopEnd cop cmp);
12320   effect(USE labl);
12321 
12322   ins_cost(300);
12323   format %{ "J$cop,u  $labl\t# Loop end" %}
12324   size(6);
12325   ins_encode %{
12326     Label* L = $labl$$label;
12327     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12328   %}
12329   ins_pipe( pipe_jcc );
12330 %}
12331 
12332 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12333   predicate(!n->has_vector_mask_set());
12334   match(CountedLoopEnd cop cmp);
12335   effect(USE labl);
12336 
12337   ins_cost(200);
12338   format %{ "J$cop,u  $labl\t# Loop end" %}
12339   size(6);
12340   ins_encode %{
12341     Label* L = $labl$$label;
12342     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12343   %}
12344   ins_pipe( pipe_jcc );
12345 %}
12346 
12347 // mask version
12348 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12349 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{
12350   predicate(n->has_vector_mask_set());
12351   match(CountedLoopEnd cop cr);
12352   effect(USE labl);
12353 
12354   ins_cost(400);
12355   format %{ "J$cop    $labl\t# Loop end\n\t"
12356             "restorevectmask \t# vector mask restore for loops" %}
12357   size(10);
12358   ins_encode %{
12359     Label* L = $labl$$label;
12360     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12361     __ restorevectmask();
12362   %}
12363   ins_pipe( pipe_jcc );
12364 %}
12365 
12366 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12367 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12368   predicate(n->has_vector_mask_set());
12369   match(CountedLoopEnd cop cmp);
12370   effect(USE labl);
12371 
12372   ins_cost(400);
12373   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12374             "restorevectmask \t# vector mask restore for loops" %}
12375   size(10);
12376   ins_encode %{
12377     Label* L = $labl$$label;
12378     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12379     __ restorevectmask();
12380   %}
12381   ins_pipe( pipe_jcc );
12382 %}
12383 
12384 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12385   predicate(n->has_vector_mask_set());
12386   match(CountedLoopEnd cop cmp);
12387   effect(USE labl);
12388 
12389   ins_cost(300);
12390   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12391             "restorevectmask \t# vector mask restore for loops" %}
12392   size(10);
12393   ins_encode %{
12394     Label* L = $labl$$label;
12395     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12396     __ restorevectmask();
12397   %}
12398   ins_pipe( pipe_jcc );
12399 %}
12400 
12401 // Jump Direct Conditional - using unsigned comparison
12402 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12403   match(If cop cmp);
12404   effect(USE labl);
12405 
12406   ins_cost(300);
12407   format %{ "J$cop,u  $labl" %}
12408   size(6);
12409   ins_encode %{
12410     Label* L = $labl$$label;
12411     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12412   %}
12413   ins_pipe(pipe_jcc);
12414 %}
12415 
12416 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12417   match(If cop cmp);
12418   effect(USE labl);
12419 
12420   ins_cost(200);
12421   format %{ "J$cop,u  $labl" %}
12422   size(6);
12423   ins_encode %{
12424     Label* L = $labl$$label;
12425     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12426   %}
12427   ins_pipe(pipe_jcc);
12428 %}
12429 
12430 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12431   match(If cop cmp);
12432   effect(USE labl);
12433 
12434   ins_cost(200);
12435   format %{ $$template
12436     if ($cop$$cmpcode == Assembler::notEqual) {
12437       $$emit$$"JP,u   $labl\n\t"
12438       $$emit$$"J$cop,u   $labl"
12439     } else {
12440       $$emit$$"JP,u   done\n\t"
12441       $$emit$$"J$cop,u   $labl\n\t"
12442       $$emit$$"done:"
12443     }
12444   %}
12445   ins_encode %{
12446     Label* l = $labl$$label;
12447     if ($cop$$cmpcode == Assembler::notEqual) {
12448       __ jcc(Assembler::parity, *l, false);
12449       __ jcc(Assembler::notEqual, *l, false);
12450     } else if ($cop$$cmpcode == Assembler::equal) {
12451       Label done;
12452       __ jccb(Assembler::parity, done);
12453       __ jcc(Assembler::equal, *l, false);
12454       __ bind(done);
12455     } else {
12456        ShouldNotReachHere();
12457     }
12458   %}
12459   ins_pipe(pipe_jcc);
12460 %}
12461 
12462 // ============================================================================
12463 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12464 // array for an instance of the superklass.  Set a hidden internal cache on a
12465 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12466 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12467 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12468   match(Set result (PartialSubtypeCheck sub super));
12469   effect( KILL rcx, KILL cr );
12470 
12471   ins_cost(1100);  // slightly larger than the next version
12472   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12473             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12474             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12475             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12476             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12477             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12478             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12479      "miss:\t" %}
12480 
12481   opcode(0x1); // Force a XOR of EDI
12482   ins_encode( enc_PartialSubtypeCheck() );
12483   ins_pipe( pipe_slow );
12484 %}
12485 
12486 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12487   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12488   effect( KILL rcx, KILL result );
12489 
12490   ins_cost(1000);
12491   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12492             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12493             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12494             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12495             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12496             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12497      "miss:\t" %}
12498 
12499   opcode(0x0);  // No need to XOR EDI
12500   ins_encode( enc_PartialSubtypeCheck() );
12501   ins_pipe( pipe_slow );
12502 %}
12503 
12504 // ============================================================================
12505 // Branch Instructions -- short offset versions
12506 //
12507 // These instructions are used to replace jumps of a long offset (the default
12508 // match) with jumps of a shorter offset.  These instructions are all tagged
12509 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12510 // match rules in general matching.  Instead, the ADLC generates a conversion
12511 // method in the MachNode which can be used to do in-place replacement of the
12512 // long variant with the shorter variant.  The compiler will determine if a
12513 // branch can be taken by the is_short_branch_offset() predicate in the machine
12514 // specific code section of the file.
12515 
12516 // Jump Direct - Label defines a relative address from JMP+1
12517 instruct jmpDir_short(label labl) %{
12518   match(Goto);
12519   effect(USE labl);
12520 
12521   ins_cost(300);
12522   format %{ "JMP,s  $labl" %}
12523   size(2);
12524   ins_encode %{
12525     Label* L = $labl$$label;
12526     __ jmpb(*L);
12527   %}
12528   ins_pipe( pipe_jmp );
12529   ins_short_branch(1);
12530 %}
12531 
12532 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12533 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12534   match(If cop cr);
12535   effect(USE labl);
12536 
12537   ins_cost(300);
12538   format %{ "J$cop,s  $labl" %}
12539   size(2);
12540   ins_encode %{
12541     Label* L = $labl$$label;
12542     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12543   %}
12544   ins_pipe( pipe_jcc );
12545   ins_short_branch(1);
12546 %}
12547 
12548 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12549 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12550   match(CountedLoopEnd cop cr);
12551   effect(USE labl);
12552 
12553   ins_cost(300);
12554   format %{ "J$cop,s  $labl\t# Loop end" %}
12555   size(2);
12556   ins_encode %{
12557     Label* L = $labl$$label;
12558     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12559   %}
12560   ins_pipe( pipe_jcc );
12561   ins_short_branch(1);
12562 %}
12563 
12564 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12565 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12566   match(CountedLoopEnd cop cmp);
12567   effect(USE labl);
12568 
12569   ins_cost(300);
12570   format %{ "J$cop,us $labl\t# Loop end" %}
12571   size(2);
12572   ins_encode %{
12573     Label* L = $labl$$label;
12574     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12575   %}
12576   ins_pipe( pipe_jcc );
12577   ins_short_branch(1);
12578 %}
12579 
12580 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12581   match(CountedLoopEnd cop cmp);
12582   effect(USE labl);
12583 
12584   ins_cost(300);
12585   format %{ "J$cop,us $labl\t# Loop end" %}
12586   size(2);
12587   ins_encode %{
12588     Label* L = $labl$$label;
12589     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12590   %}
12591   ins_pipe( pipe_jcc );
12592   ins_short_branch(1);
12593 %}
12594 
12595 // Jump Direct Conditional - using unsigned comparison
12596 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12597   match(If cop cmp);
12598   effect(USE labl);
12599 
12600   ins_cost(300);
12601   format %{ "J$cop,us $labl" %}
12602   size(2);
12603   ins_encode %{
12604     Label* L = $labl$$label;
12605     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12606   %}
12607   ins_pipe( pipe_jcc );
12608   ins_short_branch(1);
12609 %}
12610 
12611 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12612   match(If cop cmp);
12613   effect(USE labl);
12614 
12615   ins_cost(300);
12616   format %{ "J$cop,us $labl" %}
12617   size(2);
12618   ins_encode %{
12619     Label* L = $labl$$label;
12620     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12621   %}
12622   ins_pipe( pipe_jcc );
12623   ins_short_branch(1);
12624 %}
12625 
12626 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12627   match(If cop cmp);
12628   effect(USE labl);
12629 
12630   ins_cost(300);
12631   format %{ $$template
12632     if ($cop$$cmpcode == Assembler::notEqual) {
12633       $$emit$$"JP,u,s   $labl\n\t"
12634       $$emit$$"J$cop,u,s   $labl"
12635     } else {
12636       $$emit$$"JP,u,s   done\n\t"
12637       $$emit$$"J$cop,u,s  $labl\n\t"
12638       $$emit$$"done:"
12639     }
12640   %}
12641   size(4);
12642   ins_encode %{
12643     Label* l = $labl$$label;
12644     if ($cop$$cmpcode == Assembler::notEqual) {
12645       __ jccb(Assembler::parity, *l);
12646       __ jccb(Assembler::notEqual, *l);
12647     } else if ($cop$$cmpcode == Assembler::equal) {
12648       Label done;
12649       __ jccb(Assembler::parity, done);
12650       __ jccb(Assembler::equal, *l);
12651       __ bind(done);
12652     } else {
12653        ShouldNotReachHere();
12654     }
12655   %}
12656   ins_pipe(pipe_jcc);
12657   ins_short_branch(1);
12658 %}
12659 
12660 // ============================================================================
12661 // Long Compare
12662 //
12663 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12664 // is tricky.  The flavor of compare used depends on whether we are testing
12665 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12666 // The GE test is the negated LT test.  The LE test can be had by commuting
12667 // the operands (yielding a GE test) and then negating; negate again for the
12668 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12669 // NE test is negated from that.
12670 
12671 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12672 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12673 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12674 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12675 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12676 // foo match ends up with the wrong leaf.  One fix is to not match both
12677 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12678 // both forms beat the trinary form of long-compare and both are very useful
12679 // on Intel which has so few registers.
12680 
12681 // Manifest a CmpL result in an integer register.  Very painful.
12682 // This is the test to avoid.
12683 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12684   match(Set dst (CmpL3 src1 src2));
12685   effect( KILL flags );
12686   ins_cost(1000);
12687   format %{ "XOR    $dst,$dst\n\t"
12688             "CMP    $src1.hi,$src2.hi\n\t"
12689             "JLT,s  m_one\n\t"
12690             "JGT,s  p_one\n\t"
12691             "CMP    $src1.lo,$src2.lo\n\t"
12692             "JB,s   m_one\n\t"
12693             "JEQ,s  done\n"
12694     "p_one:\tINC    $dst\n\t"
12695             "JMP,s  done\n"
12696     "m_one:\tDEC    $dst\n"
12697      "done:" %}
12698   ins_encode %{
12699     Label p_one, m_one, done;
12700     __ xorptr($dst$$Register, $dst$$Register);
12701     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12702     __ jccb(Assembler::less,    m_one);
12703     __ jccb(Assembler::greater, p_one);
12704     __ cmpl($src1$$Register, $src2$$Register);
12705     __ jccb(Assembler::below,   m_one);
12706     __ jccb(Assembler::equal,   done);
12707     __ bind(p_one);
12708     __ incrementl($dst$$Register);
12709     __ jmpb(done);
12710     __ bind(m_one);
12711     __ decrementl($dst$$Register);
12712     __ bind(done);
12713   %}
12714   ins_pipe( pipe_slow );
12715 %}
12716 
12717 //======
12718 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12719 // compares.  Can be used for LE or GT compares by reversing arguments.
12720 // NOT GOOD FOR EQ/NE tests.
12721 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12722   match( Set flags (CmpL src zero ));
12723   ins_cost(100);
12724   format %{ "TEST   $src.hi,$src.hi" %}
12725   opcode(0x85);
12726   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12727   ins_pipe( ialu_cr_reg_reg );
12728 %}
12729 
12730 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12731 // compares.  Can be used for LE or GT compares by reversing arguments.
12732 // NOT GOOD FOR EQ/NE tests.
12733 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12734   match( Set flags (CmpL src1 src2 ));
12735   effect( TEMP tmp );
12736   ins_cost(300);
12737   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12738             "MOV    $tmp,$src1.hi\n\t"
12739             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12740   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12741   ins_pipe( ialu_cr_reg_reg );
12742 %}
12743 
12744 // Long compares reg < zero/req OR reg >= zero/req.
12745 // Just a wrapper for a normal branch, plus the predicate test.
12746 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12747   match(If cmp flags);
12748   effect(USE labl);
12749   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12750   expand %{
12751     jmpCon(cmp,flags,labl);    // JLT or JGE...
12752   %}
12753 %}
12754 
12755 //======
12756 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12757 // compares.  Can be used for LE or GT compares by reversing arguments.
12758 // NOT GOOD FOR EQ/NE tests.
12759 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
12760   match(Set flags (CmpUL src zero));
12761   ins_cost(100);
12762   format %{ "TEST   $src.hi,$src.hi" %}
12763   opcode(0x85);
12764   ins_encode(OpcP, RegReg_Hi2(src, src));
12765   ins_pipe(ialu_cr_reg_reg);
12766 %}
12767 
12768 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12769 // compares.  Can be used for LE or GT compares by reversing arguments.
12770 // NOT GOOD FOR EQ/NE tests.
12771 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
12772   match(Set flags (CmpUL src1 src2));
12773   effect(TEMP tmp);
12774   ins_cost(300);
12775   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12776             "MOV    $tmp,$src1.hi\n\t"
12777             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
12778   ins_encode(long_cmp_flags2(src1, src2, tmp));
12779   ins_pipe(ialu_cr_reg_reg);
12780 %}
12781 
12782 // Unsigned long compares reg < zero/req OR reg >= zero/req.
12783 // Just a wrapper for a normal branch, plus the predicate test.
12784 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
12785   match(If cmp flags);
12786   effect(USE labl);
12787   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
12788   expand %{
12789     jmpCon(cmp, flags, labl);    // JLT or JGE...
12790   %}
12791 %}
12792 
12793 // Compare 2 longs and CMOVE longs.
12794 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12795   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12796   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12797   ins_cost(400);
12798   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12799             "CMOV$cmp $dst.hi,$src.hi" %}
12800   opcode(0x0F,0x40);
12801   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12802   ins_pipe( pipe_cmov_reg_long );
12803 %}
12804 
12805 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12806   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12807   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12808   ins_cost(500);
12809   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12810             "CMOV$cmp $dst.hi,$src.hi" %}
12811   opcode(0x0F,0x40);
12812   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12813   ins_pipe( pipe_cmov_reg_long );
12814 %}
12815 
12816 // Compare 2 longs and CMOVE ints.
12817 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12818   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12819   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12820   ins_cost(200);
12821   format %{ "CMOV$cmp $dst,$src" %}
12822   opcode(0x0F,0x40);
12823   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12824   ins_pipe( pipe_cmov_reg );
12825 %}
12826 
12827 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12828   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12829   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12830   ins_cost(250);
12831   format %{ "CMOV$cmp $dst,$src" %}
12832   opcode(0x0F,0x40);
12833   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12834   ins_pipe( pipe_cmov_mem );
12835 %}
12836 
12837 // Compare 2 longs and CMOVE ints.
12838 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12839   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12840   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12841   ins_cost(200);
12842   format %{ "CMOV$cmp $dst,$src" %}
12843   opcode(0x0F,0x40);
12844   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12845   ins_pipe( pipe_cmov_reg );
12846 %}
12847 
12848 // Compare 2 longs and CMOVE doubles
12849 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12850   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12851   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12852   ins_cost(200);
12853   expand %{
12854     fcmovDPR_regS(cmp,flags,dst,src);
12855   %}
12856 %}
12857 
12858 // Compare 2 longs and CMOVE doubles
12859 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12860   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12861   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12862   ins_cost(200);
12863   expand %{
12864     fcmovD_regS(cmp,flags,dst,src);
12865   %}
12866 %}
12867 
12868 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12869   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12870   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12871   ins_cost(200);
12872   expand %{
12873     fcmovFPR_regS(cmp,flags,dst,src);
12874   %}
12875 %}
12876 
12877 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12878   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12879   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12880   ins_cost(200);
12881   expand %{
12882     fcmovF_regS(cmp,flags,dst,src);
12883   %}
12884 %}
12885 
12886 //======
12887 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12888 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12889   match( Set flags (CmpL src zero ));
12890   effect(TEMP tmp);
12891   ins_cost(200);
12892   format %{ "MOV    $tmp,$src.lo\n\t"
12893             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12894   ins_encode( long_cmp_flags0( src, tmp ) );
12895   ins_pipe( ialu_reg_reg_long );
12896 %}
12897 
12898 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12899 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12900   match( Set flags (CmpL src1 src2 ));
12901   ins_cost(200+300);
12902   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12903             "JNE,s  skip\n\t"
12904             "CMP    $src1.hi,$src2.hi\n\t"
12905      "skip:\t" %}
12906   ins_encode( long_cmp_flags1( src1, src2 ) );
12907   ins_pipe( ialu_cr_reg_reg );
12908 %}
12909 
12910 // Long compare reg == zero/reg OR reg != zero/reg
12911 // Just a wrapper for a normal branch, plus the predicate test.
12912 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12913   match(If cmp flags);
12914   effect(USE labl);
12915   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12916   expand %{
12917     jmpCon(cmp,flags,labl);    // JEQ or JNE...
12918   %}
12919 %}
12920 
12921 //======
12922 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
12923 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
12924   match(Set flags (CmpUL src zero));
12925   effect(TEMP tmp);
12926   ins_cost(200);
12927   format %{ "MOV    $tmp,$src.lo\n\t"
12928             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
12929   ins_encode(long_cmp_flags0(src, tmp));
12930   ins_pipe(ialu_reg_reg_long);
12931 %}
12932 
12933 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
12934 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
12935   match(Set flags (CmpUL src1 src2));
12936   ins_cost(200+300);
12937   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12938             "JNE,s  skip\n\t"
12939             "CMP    $src1.hi,$src2.hi\n\t"
12940      "skip:\t" %}
12941   ins_encode(long_cmp_flags1(src1, src2));
12942   ins_pipe(ialu_cr_reg_reg);
12943 %}
12944 
12945 // Unsigned long compare reg == zero/reg OR reg != zero/reg
12946 // Just a wrapper for a normal branch, plus the predicate test.
12947 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
12948   match(If cmp flags);
12949   effect(USE labl);
12950   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
12951   expand %{
12952     jmpCon(cmp, flags, labl);    // JEQ or JNE...
12953   %}
12954 %}
12955 
12956 // Compare 2 longs and CMOVE longs.
12957 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12958   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12959   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12960   ins_cost(400);
12961   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12962             "CMOV$cmp $dst.hi,$src.hi" %}
12963   opcode(0x0F,0x40);
12964   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12965   ins_pipe( pipe_cmov_reg_long );
12966 %}
12967 
12968 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12969   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12970   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12971   ins_cost(500);
12972   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12973             "CMOV$cmp $dst.hi,$src.hi" %}
12974   opcode(0x0F,0x40);
12975   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12976   ins_pipe( pipe_cmov_reg_long );
12977 %}
12978 
12979 // Compare 2 longs and CMOVE ints.
12980 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
12981   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12982   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12983   ins_cost(200);
12984   format %{ "CMOV$cmp $dst,$src" %}
12985   opcode(0x0F,0x40);
12986   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12987   ins_pipe( pipe_cmov_reg );
12988 %}
12989 
12990 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
12991   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12992   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12993   ins_cost(250);
12994   format %{ "CMOV$cmp $dst,$src" %}
12995   opcode(0x0F,0x40);
12996   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12997   ins_pipe( pipe_cmov_mem );
12998 %}
12999 
13000 // Compare 2 longs and CMOVE ints.
13001 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13002   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13003   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13004   ins_cost(200);
13005   format %{ "CMOV$cmp $dst,$src" %}
13006   opcode(0x0F,0x40);
13007   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13008   ins_pipe( pipe_cmov_reg );
13009 %}
13010 
13011 // Compare 2 longs and CMOVE doubles
13012 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13013   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13014   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13015   ins_cost(200);
13016   expand %{
13017     fcmovDPR_regS(cmp,flags,dst,src);
13018   %}
13019 %}
13020 
13021 // Compare 2 longs and CMOVE doubles
13022 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13023   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13024   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13025   ins_cost(200);
13026   expand %{
13027     fcmovD_regS(cmp,flags,dst,src);
13028   %}
13029 %}
13030 
13031 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13032   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13033   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13034   ins_cost(200);
13035   expand %{
13036     fcmovFPR_regS(cmp,flags,dst,src);
13037   %}
13038 %}
13039 
13040 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13041   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13042   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13043   ins_cost(200);
13044   expand %{
13045     fcmovF_regS(cmp,flags,dst,src);
13046   %}
13047 %}
13048 
13049 //======
13050 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13051 // Same as cmpL_reg_flags_LEGT except must negate src
13052 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13053   match( Set flags (CmpL src zero ));
13054   effect( TEMP tmp );
13055   ins_cost(300);
13056   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13057             "CMP    $tmp,$src.lo\n\t"
13058             "SBB    $tmp,$src.hi\n\t" %}
13059   ins_encode( long_cmp_flags3(src, tmp) );
13060   ins_pipe( ialu_reg_reg_long );
13061 %}
13062 
13063 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13064 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13065 // requires a commuted test to get the same result.
13066 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13067   match( Set flags (CmpL src1 src2 ));
13068   effect( TEMP tmp );
13069   ins_cost(300);
13070   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13071             "MOV    $tmp,$src2.hi\n\t"
13072             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13073   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13074   ins_pipe( ialu_cr_reg_reg );
13075 %}
13076 
13077 // Long compares reg < zero/req OR reg >= zero/req.
13078 // Just a wrapper for a normal branch, plus the predicate test
13079 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13080   match(If cmp flags);
13081   effect(USE labl);
13082   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13083   ins_cost(300);
13084   expand %{
13085     jmpCon(cmp,flags,labl);    // JGT or JLE...
13086   %}
13087 %}
13088 
13089 //======
13090 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13091 // Same as cmpUL_reg_flags_LEGT except must negate src
13092 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13093   match(Set flags (CmpUL src zero));
13094   effect(TEMP tmp);
13095   ins_cost(300);
13096   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13097             "CMP    $tmp,$src.lo\n\t"
13098             "SBB    $tmp,$src.hi\n\t" %}
13099   ins_encode(long_cmp_flags3(src, tmp));
13100   ins_pipe(ialu_reg_reg_long);
13101 %}
13102 
13103 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13104 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13105 // requires a commuted test to get the same result.
13106 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13107   match(Set flags (CmpUL src1 src2));
13108   effect(TEMP tmp);
13109   ins_cost(300);
13110   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13111             "MOV    $tmp,$src2.hi\n\t"
13112             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13113   ins_encode(long_cmp_flags2( src2, src1, tmp));
13114   ins_pipe(ialu_cr_reg_reg);
13115 %}
13116 
13117 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13118 // Just a wrapper for a normal branch, plus the predicate test
13119 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13120   match(If cmp flags);
13121   effect(USE labl);
13122   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13123   ins_cost(300);
13124   expand %{
13125     jmpCon(cmp, flags, labl);    // JGT or JLE...
13126   %}
13127 %}
13128 
13129 // Compare 2 longs and CMOVE longs.
13130 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13131   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13132   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13133   ins_cost(400);
13134   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13135             "CMOV$cmp $dst.hi,$src.hi" %}
13136   opcode(0x0F,0x40);
13137   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13138   ins_pipe( pipe_cmov_reg_long );
13139 %}
13140 
13141 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13142   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13143   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13144   ins_cost(500);
13145   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13146             "CMOV$cmp $dst.hi,$src.hi+4" %}
13147   opcode(0x0F,0x40);
13148   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13149   ins_pipe( pipe_cmov_reg_long );
13150 %}
13151 
13152 // Compare 2 longs and CMOVE ints.
13153 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13154   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13155   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13156   ins_cost(200);
13157   format %{ "CMOV$cmp $dst,$src" %}
13158   opcode(0x0F,0x40);
13159   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13160   ins_pipe( pipe_cmov_reg );
13161 %}
13162 
13163 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13164   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13165   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13166   ins_cost(250);
13167   format %{ "CMOV$cmp $dst,$src" %}
13168   opcode(0x0F,0x40);
13169   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13170   ins_pipe( pipe_cmov_mem );
13171 %}
13172 
13173 // Compare 2 longs and CMOVE ptrs.
13174 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13175   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13176   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13177   ins_cost(200);
13178   format %{ "CMOV$cmp $dst,$src" %}
13179   opcode(0x0F,0x40);
13180   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13181   ins_pipe( pipe_cmov_reg );
13182 %}
13183 
13184 // Compare 2 longs and CMOVE doubles
13185 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13186   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13187   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13188   ins_cost(200);
13189   expand %{
13190     fcmovDPR_regS(cmp,flags,dst,src);
13191   %}
13192 %}
13193 
13194 // Compare 2 longs and CMOVE doubles
13195 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13196   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13197   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13198   ins_cost(200);
13199   expand %{
13200     fcmovD_regS(cmp,flags,dst,src);
13201   %}
13202 %}
13203 
13204 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13205   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13206   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13207   ins_cost(200);
13208   expand %{
13209     fcmovFPR_regS(cmp,flags,dst,src);
13210   %}
13211 %}
13212 
13213 
13214 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13215   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13216   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13217   ins_cost(200);
13218   expand %{
13219     fcmovF_regS(cmp,flags,dst,src);
13220   %}
13221 %}
13222 
13223 
13224 // ============================================================================
13225 // Procedure Call/Return Instructions
13226 // Call Java Static Instruction
13227 // Note: If this code changes, the corresponding ret_addr_offset() and
13228 //       compute_padding() functions will have to be adjusted.
13229 instruct CallStaticJavaDirect(method meth) %{
13230   match(CallStaticJava);
13231   effect(USE meth);
13232 
13233   ins_cost(300);
13234   format %{ "CALL,static " %}
13235   opcode(0xE8); /* E8 cd */
13236   ins_encode( pre_call_resets,
13237               Java_Static_Call( meth ),
13238               call_epilog,
13239               post_call_FPU );
13240   ins_pipe( pipe_slow );
13241   ins_alignment(4);
13242 %}
13243 
13244 // Call Java Dynamic Instruction
13245 // Note: If this code changes, the corresponding ret_addr_offset() and
13246 //       compute_padding() functions will have to be adjusted.
13247 instruct CallDynamicJavaDirect(method meth) %{
13248   match(CallDynamicJava);
13249   effect(USE meth);
13250 
13251   ins_cost(300);
13252   format %{ "MOV    EAX,(oop)-1\n\t"
13253             "CALL,dynamic" %}
13254   opcode(0xE8); /* E8 cd */
13255   ins_encode( pre_call_resets,
13256               Java_Dynamic_Call( meth ),
13257               call_epilog,
13258               post_call_FPU );
13259   ins_pipe( pipe_slow );
13260   ins_alignment(4);
13261 %}
13262 
13263 // Call Runtime Instruction
13264 instruct CallRuntimeDirect(method meth) %{
13265   match(CallRuntime );
13266   effect(USE meth);
13267 
13268   ins_cost(300);
13269   format %{ "CALL,runtime " %}
13270   opcode(0xE8); /* E8 cd */
13271   // Use FFREEs to clear entries in float stack
13272   ins_encode( pre_call_resets,
13273               FFree_Float_Stack_All,
13274               Java_To_Runtime( meth ),
13275               post_call_FPU );
13276   ins_pipe( pipe_slow );
13277 %}
13278 
13279 // Call runtime without safepoint
13280 instruct CallLeafDirect(method meth) %{
13281   match(CallLeaf);
13282   effect(USE meth);
13283 
13284   ins_cost(300);
13285   format %{ "CALL_LEAF,runtime " %}
13286   opcode(0xE8); /* E8 cd */
13287   ins_encode( pre_call_resets,
13288               FFree_Float_Stack_All,
13289               Java_To_Runtime( meth ),
13290               Verify_FPU_For_Leaf, post_call_FPU );
13291   ins_pipe( pipe_slow );
13292 %}
13293 
13294 instruct CallLeafNoFPDirect(method meth) %{
13295   match(CallLeafNoFP);
13296   effect(USE meth);
13297 
13298   ins_cost(300);
13299   format %{ "CALL_LEAF_NOFP,runtime " %}
13300   opcode(0xE8); /* E8 cd */
13301   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13302   ins_pipe( pipe_slow );
13303 %}
13304 
13305 
13306 // Return Instruction
13307 // Remove the return address & jump to it.
13308 instruct Ret() %{
13309   match(Return);
13310   format %{ "RET" %}
13311   opcode(0xC3);
13312   ins_encode(OpcP);
13313   ins_pipe( pipe_jmp );
13314 %}
13315 
13316 // Tail Call; Jump from runtime stub to Java code.
13317 // Also known as an 'interprocedural jump'.
13318 // Target of jump will eventually return to caller.
13319 // TailJump below removes the return address.
13320 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
13321   match(TailCall jump_target method_oop );
13322   ins_cost(300);
13323   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
13324   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13325   ins_encode( OpcP, RegOpc(jump_target) );
13326   ins_pipe( pipe_jmp );
13327 %}
13328 
13329 
13330 // Tail Jump; remove the return address; jump to target.
13331 // TailCall above leaves the return address around.
13332 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13333   match( TailJump jump_target ex_oop );
13334   ins_cost(300);
13335   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13336             "JMP    $jump_target " %}
13337   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13338   ins_encode( enc_pop_rdx,
13339               OpcP, RegOpc(jump_target) );
13340   ins_pipe( pipe_jmp );
13341 %}
13342 
13343 // Create exception oop: created by stack-crawling runtime code.
13344 // Created exception is now available to this handler, and is setup
13345 // just prior to jumping to this handler.  No code emitted.
13346 instruct CreateException( eAXRegP ex_oop )
13347 %{
13348   match(Set ex_oop (CreateEx));
13349 
13350   size(0);
13351   // use the following format syntax
13352   format %{ "# exception oop is in EAX; no code emitted" %}
13353   ins_encode();
13354   ins_pipe( empty );
13355 %}
13356 
13357 
13358 // Rethrow exception:
13359 // The exception oop will come in the first argument position.
13360 // Then JUMP (not call) to the rethrow stub code.
13361 instruct RethrowException()
13362 %{
13363   match(Rethrow);
13364 
13365   // use the following format syntax
13366   format %{ "JMP    rethrow_stub" %}
13367   ins_encode(enc_rethrow);
13368   ins_pipe( pipe_jmp );
13369 %}
13370 
13371 // inlined locking and unlocking
13372 
13373 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13374   predicate(Compile::current()->use_rtm());
13375   match(Set cr (FastLock object box));
13376   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13377   ins_cost(300);
13378   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13379   ins_encode %{
13380     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13381                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13382                  _counters, _rtm_counters, _stack_rtm_counters,
13383                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13384                  true, ra_->C->profile_rtm());
13385   %}
13386   ins_pipe(pipe_slow);
13387 %}
13388 
13389 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13390   predicate(!Compile::current()->use_rtm());
13391   match(Set cr (FastLock object box));
13392   effect(TEMP tmp, TEMP scr, USE_KILL box);
13393   ins_cost(300);
13394   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13395   ins_encode %{
13396     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13397                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
13398   %}
13399   ins_pipe(pipe_slow);
13400 %}
13401 
13402 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13403   match(Set cr (FastUnlock object box));
13404   effect(TEMP tmp, USE_KILL box);
13405   ins_cost(300);
13406   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13407   ins_encode %{
13408     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13409   %}
13410   ins_pipe(pipe_slow);
13411 %}
13412 
13413 
13414 
13415 // ============================================================================
13416 // Safepoint Instruction
13417 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13418   match(SafePoint poll);
13419   effect(KILL cr, USE poll);
13420 
13421   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13422   ins_cost(125);
13423   // EBP would need size(3)
13424   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13425   ins_encode %{
13426     __ relocate(relocInfo::poll_type);
13427     address pre_pc = __ pc();
13428     __ testl(rax, Address($poll$$Register, 0));
13429     address post_pc = __ pc();
13430     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13431   %}
13432   ins_pipe(ialu_reg_mem);
13433 %}
13434 
13435 
13436 // ============================================================================
13437 // This name is KNOWN by the ADLC and cannot be changed.
13438 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13439 // for this guy.
13440 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13441   match(Set dst (ThreadLocal));
13442   effect(DEF dst, KILL cr);
13443 
13444   format %{ "MOV    $dst, Thread::current()" %}
13445   ins_encode %{
13446     Register dstReg = as_Register($dst$$reg);
13447     __ get_thread(dstReg);
13448   %}
13449   ins_pipe( ialu_reg_fat );
13450 %}
13451 
13452 
13453 
13454 //----------PEEPHOLE RULES-----------------------------------------------------
13455 // These must follow all instruction definitions as they use the names
13456 // defined in the instructions definitions.
13457 //
13458 // peepmatch ( root_instr_name [preceding_instruction]* );
13459 //
13460 // peepconstraint %{
13461 // (instruction_number.operand_name relational_op instruction_number.operand_name
13462 //  [, ...] );
13463 // // instruction numbers are zero-based using left to right order in peepmatch
13464 //
13465 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13466 // // provide an instruction_number.operand_name for each operand that appears
13467 // // in the replacement instruction's match rule
13468 //
13469 // ---------VM FLAGS---------------------------------------------------------
13470 //
13471 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13472 //
13473 // Each peephole rule is given an identifying number starting with zero and
13474 // increasing by one in the order seen by the parser.  An individual peephole
13475 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13476 // on the command-line.
13477 //
13478 // ---------CURRENT LIMITATIONS----------------------------------------------
13479 //
13480 // Only match adjacent instructions in same basic block
13481 // Only equality constraints
13482 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13483 // Only one replacement instruction
13484 //
13485 // ---------EXAMPLE----------------------------------------------------------
13486 //
13487 // // pertinent parts of existing instructions in architecture description
13488 // instruct movI(rRegI dst, rRegI src) %{
13489 //   match(Set dst (CopyI src));
13490 // %}
13491 //
13492 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
13493 //   match(Set dst (AddI dst src));
13494 //   effect(KILL cr);
13495 // %}
13496 //
13497 // // Change (inc mov) to lea
13498 // peephole %{
13499 //   // increment preceeded by register-register move
13500 //   peepmatch ( incI_eReg movI );
13501 //   // require that the destination register of the increment
13502 //   // match the destination register of the move
13503 //   peepconstraint ( 0.dst == 1.dst );
13504 //   // construct a replacement instruction that sets
13505 //   // the destination to ( move's source register + one )
13506 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13507 // %}
13508 //
13509 // Implementation no longer uses movX instructions since
13510 // machine-independent system no longer uses CopyX nodes.
13511 //
13512 // peephole %{
13513 //   peepmatch ( incI_eReg movI );
13514 //   peepconstraint ( 0.dst == 1.dst );
13515 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13516 // %}
13517 //
13518 // peephole %{
13519 //   peepmatch ( decI_eReg movI );
13520 //   peepconstraint ( 0.dst == 1.dst );
13521 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13522 // %}
13523 //
13524 // peephole %{
13525 //   peepmatch ( addI_eReg_imm movI );
13526 //   peepconstraint ( 0.dst == 1.dst );
13527 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13528 // %}
13529 //
13530 // peephole %{
13531 //   peepmatch ( addP_eReg_imm movP );
13532 //   peepconstraint ( 0.dst == 1.dst );
13533 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13534 // %}
13535 
13536 // // Change load of spilled value to only a spill
13537 // instruct storeI(memory mem, rRegI src) %{
13538 //   match(Set mem (StoreI mem src));
13539 // %}
13540 //
13541 // instruct loadI(rRegI dst, memory mem) %{
13542 //   match(Set dst (LoadI mem));
13543 // %}
13544 //
13545 peephole %{
13546   peepmatch ( loadI storeI );
13547   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13548   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13549 %}
13550 
13551 //----------SMARTSPILL RULES---------------------------------------------------
13552 // These must follow all instruction definitions as they use the names
13553 // defined in the instructions definitions.