1 //
   2 // Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // architecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 //
 105 // Empty fill registers, which are never used, but supply alignment to xmm regs
 106 //
 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
 115 
 116 // Specify priority of register selection within phases of register
 117 // allocation.  Highest priority is first.  A useful heuristic is to
 118 // give registers a low priority when they are required by machine
 119 // instructions, like EAX and EDX.  Registers which are used as
 120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 121 // For the Intel integer registers, the equivalent Long pairs are
 122 // EDX:EAX, EBX:ECX, and EDI:EBP.
 123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 126                     FPR6L, FPR6H, FPR7L, FPR7H,
 127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
 128 
 129 
 130 //----------Architecture Description Register Classes--------------------------
 131 // Several register classes are automatically defined based upon information in
 132 // this architecture description.
 133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 134 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 137 //
 138 // Class for no registers (empty set).
 139 reg_class no_reg();
 140 
 141 // Class for all registers
 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 143 // Class for all registers (excluding EBP)
 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
 145 // Dynamic register class that selects at runtime between register classes
 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
 149 
 150 // Class for general registers
 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 152 // Class for general registers (excluding EBP).
 153 // This register class can be used for implicit null checks on win95.
 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
 155 // Used also if the PreserveFramePointer flag is true.
 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
 159 
 160 // Class of "X" registers
 161 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 162 
 163 // Class of registers that can appear in an address with no offset.
 164 // EBP and ESP require an extra instruction byte for zero offset.
 165 // Used in fast-unlock
 166 reg_class p_reg(EDX, EDI, ESI, EBX);
 167 
 168 // Class for general registers excluding ECX
 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
 170 // Class for general registers excluding ECX (and EBP)
 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
 174 
 175 // Class for general registers excluding EAX
 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 177 
 178 // Class for general registers excluding EAX and EBX.
 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
 180 // Class for general registers excluding EAX and EBX (and EBP)
 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
 184 
 185 // Class of EAX (for multiply and divide operations)
 186 reg_class eax_reg(EAX);
 187 
 188 // Class of EBX (for atomic add)
 189 reg_class ebx_reg(EBX);
 190 
 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 192 reg_class ecx_reg(ECX);
 193 
 194 // Class of EDX (for multiply and divide operations)
 195 reg_class edx_reg(EDX);
 196 
 197 // Class of EDI (for synchronization)
 198 reg_class edi_reg(EDI);
 199 
 200 // Class of ESI (for synchronization)
 201 reg_class esi_reg(ESI);
 202 
 203 // Singleton class for stack pointer
 204 reg_class sp_reg(ESP);
 205 
 206 // Singleton class for instruction pointer
 207 // reg_class ip_reg(EIP);
 208 
 209 // Class of integer register pairs
 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
 211 // Class of integer register pairs (excluding EBP and EDI);
 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
 215 
 216 // Class of integer register pairs that aligns with calling convention
 217 reg_class eadx_reg( EAX,EDX );
 218 reg_class ebcx_reg( ECX,EBX );
 219 
 220 // Not AX or DX, used in divides
 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
 222 // Not AX or DX (and neither EBP), used in divides
 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 226 
 227 // Floating point registers.  Notice FPR0 is not a choice.
 228 // FPR0 is not ever allocated; we use clever encodings to fake
 229 // a 2-address instructions out of Intels FP stack.
 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 231 
 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 233                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 234                       FPR7L,FPR7H );
 235 
 236 reg_class fp_flt_reg0( FPR1L );
 237 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 238 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 240                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 241 
 242 %}
 243 
 244 
 245 //----------SOURCE BLOCK-------------------------------------------------------
 246 // This is a block of C++ code which provides values, functions, and
 247 // definitions necessary in the rest of the architecture description
 248 source_hpp %{
 249 // Must be visible to the DFA in dfa_x86_32.cpp
 250 extern bool is_operand_hi32_zero(Node* n);
 251 %}
 252 
 253 source %{
 254 #define   RELOC_IMM32    Assembler::imm_operand
 255 #define   RELOC_DISP32   Assembler::disp32_operand
 256 
 257 #define __ _masm.
 258 
 259 // How to find the high register of a Long pair, given the low register
 260 #define   HIGH_FROM_LOW(x) ((x)+2)
 261 
 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 263 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 264 // fast versions of NegF/NegD and AbsF/AbsD.
 265 
 266 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 268   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 269   // of 128-bits operands for SSE instructions.
 270   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 271   // Store the value to a 128-bits operand.
 272   operand[0] = lo;
 273   operand[1] = hi;
 274   return operand;
 275 }
 276 
 277 // Buffer for 128-bits masks used by SSE instructions.
 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 279 
 280 // Static initialization during VM startup.
 281 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 283 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 285 
 286 // Offset hacking within calls.
 287 static int pre_call_resets_size() {
 288   int size = 0;
 289   Compile* C = Compile::current();
 290   if (C->in_24_bit_fp_mode()) {
 291     size += 6; // fldcw
 292   }
 293   if (VM_Version::supports_vzeroupper()) {
 294     size += 3; // vzeroupper
 295   }
 296   return size;
 297 }
 298 
 299 // !!!!! Special hack to get all type of calls to specify the byte offset
 300 //       from the start of the call to the point where the return address
 301 //       will point.
 302 int MachCallStaticJavaNode::ret_addr_offset() {
 303   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
 304 }
 305 
 306 int MachCallDynamicJavaNode::ret_addr_offset() {
 307   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 308 }
 309 
 310 static int sizeof_FFree_Float_Stack_All = -1;
 311 
 312 int MachCallRuntimeNode::ret_addr_offset() {
 313   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 314   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 315 }
 316 
 317 //
 318 // Compute padding required for nodes which need alignment
 319 //
 320 
 321 // The address of the call instruction needs to be 4-byte aligned to
 322 // ensure that it does not span a cache line so that it can be patched.
 323 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 324   current_offset += pre_call_resets_size();  // skip fldcw, if any
 325   current_offset += 1;      // skip call opcode byte
 326   return align_up(current_offset, alignment_required()) - current_offset;
 327 }
 328 
 329 // The address of the call instruction needs to be 4-byte aligned to
 330 // ensure that it does not span a cache line so that it can be patched.
 331 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 332   current_offset += pre_call_resets_size();  // skip fldcw, if any
 333   current_offset += 5;      // skip MOV instruction
 334   current_offset += 1;      // skip call opcode byte
 335   return align_up(current_offset, alignment_required()) - current_offset;
 336 }
 337 
 338 // EMIT_RM()
 339 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 340   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 341   cbuf.insts()->emit_int8(c);
 342 }
 343 
 344 // EMIT_CC()
 345 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 346   unsigned char c = (unsigned char)( f1 | f2 );
 347   cbuf.insts()->emit_int8(c);
 348 }
 349 
 350 // EMIT_OPCODE()
 351 void emit_opcode(CodeBuffer &cbuf, int code) {
 352   cbuf.insts()->emit_int8((unsigned char) code);
 353 }
 354 
 355 // EMIT_OPCODE() w/ relocation information
 356 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 357   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 358   emit_opcode(cbuf, code);
 359 }
 360 
 361 // EMIT_D8()
 362 void emit_d8(CodeBuffer &cbuf, int d8) {
 363   cbuf.insts()->emit_int8((unsigned char) d8);
 364 }
 365 
 366 // EMIT_D16()
 367 void emit_d16(CodeBuffer &cbuf, int d16) {
 368   cbuf.insts()->emit_int16(d16);
 369 }
 370 
 371 // EMIT_D32()
 372 void emit_d32(CodeBuffer &cbuf, int d32) {
 373   cbuf.insts()->emit_int32(d32);
 374 }
 375 
 376 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 377 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 378         int format) {
 379   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 380   cbuf.insts()->emit_int32(d32);
 381 }
 382 
 383 // emit 32 bit value and construct relocation entry from RelocationHolder
 384 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 385         int format) {
 386 #ifdef ASSERT
 387   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 388     assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code");
 389   }
 390 #endif
 391   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 392   cbuf.insts()->emit_int32(d32);
 393 }
 394 
 395 // Access stack slot for load or store
 396 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 397   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 398   if( -128 <= disp && disp <= 127 ) {
 399     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 400     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 401     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 402   } else {
 403     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 404     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 405     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 406   }
 407 }
 408 
 409    // rRegI ereg, memory mem) %{    // emit_reg_mem
 410 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 411   // There is no index & no scale, use form without SIB byte
 412   if ((index == 0x4) &&
 413       (scale == 0) && (base != ESP_enc)) {
 414     // If no displacement, mode is 0x0; unless base is [EBP]
 415     if ( (displace == 0) && (base != EBP_enc) ) {
 416       emit_rm(cbuf, 0x0, reg_encoding, base);
 417     }
 418     else {                    // If 8-bit displacement, mode 0x1
 419       if ((displace >= -128) && (displace <= 127)
 420           && (disp_reloc == relocInfo::none) ) {
 421         emit_rm(cbuf, 0x1, reg_encoding, base);
 422         emit_d8(cbuf, displace);
 423       }
 424       else {                  // If 32-bit displacement
 425         if (base == -1) { // Special flag for absolute address
 426           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 427           // (manual lies; no SIB needed here)
 428           if ( disp_reloc != relocInfo::none ) {
 429             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 430           } else {
 431             emit_d32      (cbuf, displace);
 432           }
 433         }
 434         else {                // Normal base + offset
 435           emit_rm(cbuf, 0x2, reg_encoding, base);
 436           if ( disp_reloc != relocInfo::none ) {
 437             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 438           } else {
 439             emit_d32      (cbuf, displace);
 440           }
 441         }
 442       }
 443     }
 444   }
 445   else {                      // Else, encode with the SIB byte
 446     // If no displacement, mode is 0x0; unless base is [EBP]
 447     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 448       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 449       emit_rm(cbuf, scale, index, base);
 450     }
 451     else {                    // If 8-bit displacement, mode 0x1
 452       if ((displace >= -128) && (displace <= 127)
 453           && (disp_reloc == relocInfo::none) ) {
 454         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 455         emit_rm(cbuf, scale, index, base);
 456         emit_d8(cbuf, displace);
 457       }
 458       else {                  // If 32-bit displacement
 459         if (base == 0x04 ) {
 460           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 461           emit_rm(cbuf, scale, index, 0x04);
 462         } else {
 463           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 464           emit_rm(cbuf, scale, index, base);
 465         }
 466         if ( disp_reloc != relocInfo::none ) {
 467           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 468         } else {
 469           emit_d32      (cbuf, displace);
 470         }
 471       }
 472     }
 473   }
 474 }
 475 
 476 
 477 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 478   if( dst_encoding == src_encoding ) {
 479     // reg-reg copy, use an empty encoding
 480   } else {
 481     emit_opcode( cbuf, 0x8B );
 482     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 483   }
 484 }
 485 
 486 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 487   Label exit;
 488   __ jccb(Assembler::noParity, exit);
 489   __ pushf();
 490   //
 491   // comiss/ucomiss instructions set ZF,PF,CF flags and
 492   // zero OF,AF,SF for NaN values.
 493   // Fixup flags by zeroing ZF,PF so that compare of NaN
 494   // values returns 'less than' result (CF is set).
 495   // Leave the rest of flags unchanged.
 496   //
 497   //    7 6 5 4 3 2 1 0
 498   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 499   //    0 0 1 0 1 0 1 1   (0x2B)
 500   //
 501   __ andl(Address(rsp, 0), 0xffffff2b);
 502   __ popf();
 503   __ bind(exit);
 504 }
 505 
 506 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 507   Label done;
 508   __ movl(dst, -1);
 509   __ jcc(Assembler::parity, done);
 510   __ jcc(Assembler::below, done);
 511   __ setb(Assembler::notEqual, dst);
 512   __ movzbl(dst, dst);
 513   __ bind(done);
 514 }
 515 
 516 
 517 //=============================================================================
 518 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 519 
 520 int ConstantTable::calculate_table_base_offset() const {
 521   return 0;  // absolute addressing, no offset
 522 }
 523 
 524 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 525 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 526   ShouldNotReachHere();
 527 }
 528 
 529 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 530   // Empty encoding
 531 }
 532 
 533 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 534   return 0;
 535 }
 536 
 537 #ifndef PRODUCT
 538 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 539   st->print("# MachConstantBaseNode (empty encoding)");
 540 }
 541 #endif
 542 
 543 
 544 //=============================================================================
 545 #ifndef PRODUCT
 546 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 547   Compile* C = ra_->C;
 548 
 549   int framesize = C->output()->frame_size_in_bytes();
 550   int bangsize = C->output()->bang_size_in_bytes();
 551   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 552   // Remove wordSize for return addr which is already pushed.
 553   framesize -= wordSize;
 554 
 555   if (C->output()->need_stack_bang(bangsize)) {
 556     framesize -= wordSize;
 557     st->print("# stack bang (%d bytes)", bangsize);
 558     st->print("\n\t");
 559     st->print("PUSH   EBP\t# Save EBP");
 560     if (PreserveFramePointer) {
 561       st->print("\n\t");
 562       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 563     }
 564     if (framesize) {
 565       st->print("\n\t");
 566       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 567     }
 568   } else {
 569     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 570     st->print("\n\t");
 571     framesize -= wordSize;
 572     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 573     if (PreserveFramePointer) {
 574       st->print("\n\t");
 575       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 576       if (framesize > 0) {
 577         st->print("\n\t");
 578         st->print("ADD    EBP, #%d", framesize);
 579       }
 580     }
 581   }
 582 
 583   if (VerifyStackAtCalls) {
 584     st->print("\n\t");
 585     framesize -= wordSize;
 586     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 587   }
 588 
 589   if( C->in_24_bit_fp_mode() ) {
 590     st->print("\n\t");
 591     st->print("FLDCW  \t# load 24 bit fpu control word");
 592   }
 593   if (UseSSE >= 2 && VerifyFPU) {
 594     st->print("\n\t");
 595     st->print("# verify FPU stack (must be clean on entry)");
 596   }
 597 
 598 #ifdef ASSERT
 599   if (VerifyStackAtCalls) {
 600     st->print("\n\t");
 601     st->print("# stack alignment check");
 602   }
 603 #endif
 604   st->cr();
 605 }
 606 #endif
 607 
 608 
 609 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 610   Compile* C = ra_->C;
 611   MacroAssembler _masm(&cbuf);
 612 
 613   int framesize = C->output()->frame_size_in_bytes();
 614   int bangsize = C->output()->bang_size_in_bytes();
 615 
 616   __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != NULL);
 617 
 618   C->output()->set_frame_complete(cbuf.insts_size());
 619 
 620   if (C->has_mach_constant_base_node()) {
 621     // NOTE: We set the table base offset here because users might be
 622     // emitted before MachConstantBaseNode.
 623     ConstantTable& constant_table = C->output()->constant_table();
 624     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 625   }
 626 }
 627 
 628 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 629   return MachNode::size(ra_); // too many variables; just compute it the hard way
 630 }
 631 
 632 int MachPrologNode::reloc() const {
 633   return 0; // a large enough number
 634 }
 635 
 636 //=============================================================================
 637 #ifndef PRODUCT
 638 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 639   Compile *C = ra_->C;
 640   int framesize = C->output()->frame_size_in_bytes();
 641   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 642   // Remove two words for return addr and rbp,
 643   framesize -= 2*wordSize;
 644 
 645   if (C->max_vector_size() > 16) {
 646     st->print("VZEROUPPER");
 647     st->cr(); st->print("\t");
 648   }
 649   if (C->in_24_bit_fp_mode()) {
 650     st->print("FLDCW  standard control word");
 651     st->cr(); st->print("\t");
 652   }
 653   if (framesize) {
 654     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 655     st->cr(); st->print("\t");
 656   }
 657   st->print_cr("POPL   EBP"); st->print("\t");
 658   if (do_polling() && C->is_method_compilation()) {
 659     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 660     st->cr(); st->print("\t");
 661   }
 662 }
 663 #endif
 664 
 665 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 666   Compile *C = ra_->C;
 667   MacroAssembler _masm(&cbuf);
 668 
 669   if (C->max_vector_size() > 16) {
 670     // Clear upper bits of YMM registers when current compiled code uses
 671     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 672     _masm.vzeroupper();
 673   }
 674   // If method set FPU control word, restore to standard control word
 675   if (C->in_24_bit_fp_mode()) {
 676     _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 677   }
 678 
 679   int framesize = C->output()->frame_size_in_bytes();
 680   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 681   // Remove two words for return addr and rbp,
 682   framesize -= 2*wordSize;
 683 
 684   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 685 
 686   if (framesize >= 128) {
 687     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 688     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 689     emit_d32(cbuf, framesize);
 690   } else if (framesize) {
 691     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 692     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 693     emit_d8(cbuf, framesize);
 694   }
 695 
 696   emit_opcode(cbuf, 0x58 | EBP_enc);
 697 
 698   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 699     __ reserved_stack_check();
 700   }
 701 
 702   if (do_polling() && C->is_method_compilation()) {
 703     Register pollReg = as_Register(EBX_enc);
 704     MacroAssembler masm(&cbuf);
 705     masm.get_thread(pollReg);
 706     masm.movl(pollReg, Address(pollReg, in_bytes(Thread::polling_page_offset())));
 707     masm.relocate(relocInfo::poll_return_type);
 708     masm.testl(rax, Address(pollReg, 0));
 709   }
 710 }
 711 
 712 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 713   return MachNode::size(ra_); // too many variables; just compute it
 714                               // the hard way
 715 }
 716 
 717 int MachEpilogNode::reloc() const {
 718   return 0; // a large enough number
 719 }
 720 
 721 const Pipeline * MachEpilogNode::pipeline() const {
 722   return MachNode::pipeline_class();
 723 }
 724 
 725 //=============================================================================
 726 
 727 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 728 static enum RC rc_class( OptoReg::Name reg ) {
 729 
 730   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 731   if (OptoReg::is_stack(reg)) return rc_stack;
 732 
 733   VMReg r = OptoReg::as_VMReg(reg);
 734   if (r->is_Register()) return rc_int;
 735   if (r->is_FloatRegister()) {
 736     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 737     return rc_float;
 738   }
 739   assert(r->is_XMMRegister(), "must be");
 740   return rc_xmm;
 741 }
 742 
 743 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 744                         int opcode, const char *op_str, int size, outputStream* st ) {
 745   if( cbuf ) {
 746     emit_opcode  (*cbuf, opcode );
 747     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 748 #ifndef PRODUCT
 749   } else if( !do_size ) {
 750     if( size != 0 ) st->print("\n\t");
 751     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 752       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 753       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 754     } else { // FLD, FST, PUSH, POP
 755       st->print("%s [ESP + #%d]",op_str,offset);
 756     }
 757 #endif
 758   }
 759   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 760   return size+3+offset_size;
 761 }
 762 
 763 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 764 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 765                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 766   int in_size_in_bits = Assembler::EVEX_32bit;
 767   int evex_encoding = 0;
 768   if (reg_lo+1 == reg_hi) {
 769     in_size_in_bits = Assembler::EVEX_64bit;
 770     evex_encoding = Assembler::VEX_W;
 771   }
 772   if (cbuf) {
 773     MacroAssembler _masm(cbuf);
 774     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations,
 775     //                          it maps more cases to single byte displacement
 776     _masm.set_managed();
 777     if (reg_lo+1 == reg_hi) { // double move?
 778       if (is_load) {
 779         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 780       } else {
 781         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 782       }
 783     } else {
 784       if (is_load) {
 785         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 786       } else {
 787         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 788       }
 789     }
 790 #ifndef PRODUCT
 791   } else if (!do_size) {
 792     if (size != 0) st->print("\n\t");
 793     if (reg_lo+1 == reg_hi) { // double move?
 794       if (is_load) st->print("%s %s,[ESP + #%d]",
 795                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 796                               Matcher::regName[reg_lo], offset);
 797       else         st->print("MOVSD  [ESP + #%d],%s",
 798                               offset, Matcher::regName[reg_lo]);
 799     } else {
 800       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 801                               Matcher::regName[reg_lo], offset);
 802       else         st->print("MOVSS  [ESP + #%d],%s",
 803                               offset, Matcher::regName[reg_lo]);
 804     }
 805 #endif
 806   }
 807   bool is_single_byte = false;
 808   if ((UseAVX > 2) && (offset != 0)) {
 809     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
 810   }
 811   int offset_size = 0;
 812   if (UseAVX > 2 ) {
 813     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
 814   } else {
 815     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 816   }
 817   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
 818   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 819   return size+5+offset_size;
 820 }
 821 
 822 
 823 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 824                             int src_hi, int dst_hi, int size, outputStream* st ) {
 825   if (cbuf) {
 826     MacroAssembler _masm(cbuf);
 827     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 828     _masm.set_managed();
 829     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 830       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 831                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 832     } else {
 833       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 834                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 835     }
 836 #ifndef PRODUCT
 837   } else if (!do_size) {
 838     if (size != 0) st->print("\n\t");
 839     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 840       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 841         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 842       } else {
 843         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 844       }
 845     } else {
 846       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 847         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 848       } else {
 849         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 850       }
 851     }
 852 #endif
 853   }
 854   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 855   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
 856   int sz = (UseAVX > 2) ? 6 : 4;
 857   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 858       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 859   return size + sz;
 860 }
 861 
 862 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 863                             int src_hi, int dst_hi, int size, outputStream* st ) {
 864   // 32-bit
 865   if (cbuf) {
 866     MacroAssembler _masm(cbuf);
 867     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 868     _masm.set_managed();
 869     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 870              as_Register(Matcher::_regEncode[src_lo]));
 871 #ifndef PRODUCT
 872   } else if (!do_size) {
 873     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 874 #endif
 875   }
 876   return (UseAVX> 2) ? 6 : 4;
 877 }
 878 
 879 
 880 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 881                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 882   // 32-bit
 883   if (cbuf) {
 884     MacroAssembler _masm(cbuf);
 885     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 886     _masm.set_managed();
 887     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 888              as_XMMRegister(Matcher::_regEncode[src_lo]));
 889 #ifndef PRODUCT
 890   } else if (!do_size) {
 891     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 892 #endif
 893   }
 894   return (UseAVX> 2) ? 6 : 4;
 895 }
 896 
 897 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 898   if( cbuf ) {
 899     emit_opcode(*cbuf, 0x8B );
 900     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 901 #ifndef PRODUCT
 902   } else if( !do_size ) {
 903     if( size != 0 ) st->print("\n\t");
 904     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 905 #endif
 906   }
 907   return size+2;
 908 }
 909 
 910 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 911                                  int offset, int size, outputStream* st ) {
 912   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 913     if( cbuf ) {
 914       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 915       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 916 #ifndef PRODUCT
 917     } else if( !do_size ) {
 918       if( size != 0 ) st->print("\n\t");
 919       st->print("FLD    %s",Matcher::regName[src_lo]);
 920 #endif
 921     }
 922     size += 2;
 923   }
 924 
 925   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 926   const char *op_str;
 927   int op;
 928   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 929     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 930     op = 0xDD;
 931   } else {                   // 32-bit store
 932     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 933     op = 0xD9;
 934     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 935   }
 936 
 937   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 938 }
 939 
 940 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 941 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 942                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 943 
 944 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 945                             int stack_offset, int reg, uint ireg, outputStream* st);
 946 
 947 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 948                                      int dst_offset, uint ireg, outputStream* st) {
 949   int calc_size = 0;
 950   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 951   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 952   switch (ireg) {
 953   case Op_VecS:
 954     calc_size = 3+src_offset_size + 3+dst_offset_size;
 955     break;
 956   case Op_VecD: {
 957     calc_size = 3+src_offset_size + 3+dst_offset_size;
 958     int tmp_src_offset = src_offset + 4;
 959     int tmp_dst_offset = dst_offset + 4;
 960     src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4);
 961     dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4);
 962     calc_size += 3+src_offset_size + 3+dst_offset_size;
 963     break;
 964   }
 965   case Op_VecX:
 966   case Op_VecY:
 967   case Op_VecZ:
 968     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 969     break;
 970   default:
 971     ShouldNotReachHere();
 972   }
 973   if (cbuf) {
 974     MacroAssembler _masm(cbuf);
 975     int offset = __ offset();
 976     switch (ireg) {
 977     case Op_VecS:
 978       __ pushl(Address(rsp, src_offset));
 979       __ popl (Address(rsp, dst_offset));
 980       break;
 981     case Op_VecD:
 982       __ pushl(Address(rsp, src_offset));
 983       __ popl (Address(rsp, dst_offset));
 984       __ pushl(Address(rsp, src_offset+4));
 985       __ popl (Address(rsp, dst_offset+4));
 986       break;
 987     case Op_VecX:
 988       __ movdqu(Address(rsp, -16), xmm0);
 989       __ movdqu(xmm0, Address(rsp, src_offset));
 990       __ movdqu(Address(rsp, dst_offset), xmm0);
 991       __ movdqu(xmm0, Address(rsp, -16));
 992       break;
 993     case Op_VecY:
 994       __ vmovdqu(Address(rsp, -32), xmm0);
 995       __ vmovdqu(xmm0, Address(rsp, src_offset));
 996       __ vmovdqu(Address(rsp, dst_offset), xmm0);
 997       __ vmovdqu(xmm0, Address(rsp, -32));
 998       break;
 999     case Op_VecZ:
1000       __ evmovdquq(Address(rsp, -64), xmm0, 2);
1001       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
1002       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
1003       __ evmovdquq(xmm0, Address(rsp, -64), 2);
1004       break;
1005     default:
1006       ShouldNotReachHere();
1007     }
1008     int size = __ offset() - offset;
1009     assert(size == calc_size, "incorrect size calculation");
1010     return size;
1011 #ifndef PRODUCT
1012   } else if (!do_size) {
1013     switch (ireg) {
1014     case Op_VecS:
1015       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1016                 "popl    [rsp + #%d]",
1017                 src_offset, dst_offset);
1018       break;
1019     case Op_VecD:
1020       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1021                 "popq    [rsp + #%d]\n\t"
1022                 "pushl   [rsp + #%d]\n\t"
1023                 "popq    [rsp + #%d]",
1024                 src_offset, dst_offset, src_offset+4, dst_offset+4);
1025       break;
1026      case Op_VecX:
1027       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1028                 "movdqu  xmm0, [rsp + #%d]\n\t"
1029                 "movdqu  [rsp + #%d], xmm0\n\t"
1030                 "movdqu  xmm0, [rsp - #16]",
1031                 src_offset, dst_offset);
1032       break;
1033     case Op_VecY:
1034       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1035                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1036                 "vmovdqu [rsp + #%d], xmm0\n\t"
1037                 "vmovdqu xmm0, [rsp - #32]",
1038                 src_offset, dst_offset);
1039       break;
1040     case Op_VecZ:
1041       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1042                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1043                 "vmovdqu [rsp + #%d], xmm0\n\t"
1044                 "vmovdqu xmm0, [rsp - #64]",
1045                 src_offset, dst_offset);
1046       break;
1047     default:
1048       ShouldNotReachHere();
1049     }
1050 #endif
1051   }
1052   return calc_size;
1053 }
1054 
1055 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1056   // Get registers to move
1057   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1058   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1059   OptoReg::Name dst_second = ra_->get_reg_second(this );
1060   OptoReg::Name dst_first = ra_->get_reg_first(this );
1061 
1062   enum RC src_second_rc = rc_class(src_second);
1063   enum RC src_first_rc = rc_class(src_first);
1064   enum RC dst_second_rc = rc_class(dst_second);
1065   enum RC dst_first_rc = rc_class(dst_first);
1066 
1067   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1068 
1069   // Generate spill code!
1070   int size = 0;
1071 
1072   if( src_first == dst_first && src_second == dst_second )
1073     return size;            // Self copy, no move
1074 
1075   if (bottom_type()->isa_vect() != NULL) {
1076     uint ireg = ideal_reg();
1077     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1078     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1079     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1080     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1081       // mem -> mem
1082       int src_offset = ra_->reg2offset(src_first);
1083       int dst_offset = ra_->reg2offset(dst_first);
1084       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1085     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1086       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1087     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1088       int stack_offset = ra_->reg2offset(dst_first);
1089       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1090     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1091       int stack_offset = ra_->reg2offset(src_first);
1092       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1093     } else {
1094       ShouldNotReachHere();
1095     }
1096   }
1097 
1098   // --------------------------------------
1099   // Check for mem-mem move.  push/pop to move.
1100   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1101     if( src_second == dst_first ) { // overlapping stack copy ranges
1102       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1103       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1104       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1105       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1106     }
1107     // move low bits
1108     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1109     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1110     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1111       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1112       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1113     }
1114     return size;
1115   }
1116 
1117   // --------------------------------------
1118   // Check for integer reg-reg copy
1119   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1120     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1121 
1122   // Check for integer store
1123   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1124     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1125 
1126   // Check for integer load
1127   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1128     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1129 
1130   // Check for integer reg-xmm reg copy
1131   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1132     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1133             "no 64 bit integer-float reg moves" );
1134     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1135   }
1136   // --------------------------------------
1137   // Check for float reg-reg copy
1138   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1139     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1140             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1141     if( cbuf ) {
1142 
1143       // Note the mucking with the register encode to compensate for the 0/1
1144       // indexing issue mentioned in a comment in the reg_def sections
1145       // for FPR registers many lines above here.
1146 
1147       if( src_first != FPR1L_num ) {
1148         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1149         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1150         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1151         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1152      } else {
1153         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1154         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1155      }
1156 #ifndef PRODUCT
1157     } else if( !do_size ) {
1158       if( size != 0 ) st->print("\n\t");
1159       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1160       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1161 #endif
1162     }
1163     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1164   }
1165 
1166   // Check for float store
1167   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1168     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1169   }
1170 
1171   // Check for float load
1172   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1173     int offset = ra_->reg2offset(src_first);
1174     const char *op_str;
1175     int op;
1176     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1177       op_str = "FLD_D";
1178       op = 0xDD;
1179     } else {                   // 32-bit load
1180       op_str = "FLD_S";
1181       op = 0xD9;
1182       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1183     }
1184     if( cbuf ) {
1185       emit_opcode  (*cbuf, op );
1186       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1187       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1188       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1189 #ifndef PRODUCT
1190     } else if( !do_size ) {
1191       if( size != 0 ) st->print("\n\t");
1192       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1193 #endif
1194     }
1195     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1196     return size + 3+offset_size+2;
1197   }
1198 
1199   // Check for xmm reg-reg copy
1200   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1201     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1202             (src_first+1 == src_second && dst_first+1 == dst_second),
1203             "no non-adjacent float-moves" );
1204     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1205   }
1206 
1207   // Check for xmm reg-integer reg copy
1208   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1209     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1210             "no 64 bit float-integer reg moves" );
1211     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1212   }
1213 
1214   // Check for xmm store
1215   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1216     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1217   }
1218 
1219   // Check for float xmm load
1220   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1221     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1222   }
1223 
1224   // Copy from float reg to xmm reg
1225   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1226     // copy to the top of stack from floating point reg
1227     // and use LEA to preserve flags
1228     if( cbuf ) {
1229       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1230       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1231       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1232       emit_d8(*cbuf,0xF8);
1233 #ifndef PRODUCT
1234     } else if( !do_size ) {
1235       if( size != 0 ) st->print("\n\t");
1236       st->print("LEA    ESP,[ESP-8]");
1237 #endif
1238     }
1239     size += 4;
1240 
1241     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1242 
1243     // Copy from the temp memory to the xmm reg.
1244     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1245 
1246     if( cbuf ) {
1247       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1248       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1249       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1250       emit_d8(*cbuf,0x08);
1251 #ifndef PRODUCT
1252     } else if( !do_size ) {
1253       if( size != 0 ) st->print("\n\t");
1254       st->print("LEA    ESP,[ESP+8]");
1255 #endif
1256     }
1257     size += 4;
1258     return size;
1259   }
1260 
1261   assert( size > 0, "missed a case" );
1262 
1263   // --------------------------------------------------------------------
1264   // Check for second bits still needing moving.
1265   if( src_second == dst_second )
1266     return size;               // Self copy; no move
1267   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1268 
1269   // Check for second word int-int move
1270   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1271     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1272 
1273   // Check for second word integer store
1274   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1275     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1276 
1277   // Check for second word integer load
1278   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1279     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1280 
1281 
1282   Unimplemented();
1283   return 0; // Mute compiler
1284 }
1285 
1286 #ifndef PRODUCT
1287 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1288   implementation( NULL, ra_, false, st );
1289 }
1290 #endif
1291 
1292 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1293   implementation( &cbuf, ra_, false, NULL );
1294 }
1295 
1296 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1297   return MachNode::size(ra_);
1298 }
1299 
1300 
1301 //=============================================================================
1302 #ifndef PRODUCT
1303 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1304   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1305   int reg = ra_->get_reg_first(this);
1306   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1307 }
1308 #endif
1309 
1310 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1311   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1312   int reg = ra_->get_encode(this);
1313   if( offset >= 128 ) {
1314     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1315     emit_rm(cbuf, 0x2, reg, 0x04);
1316     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1317     emit_d32(cbuf, offset);
1318   }
1319   else {
1320     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1321     emit_rm(cbuf, 0x1, reg, 0x04);
1322     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1323     emit_d8(cbuf, offset);
1324   }
1325 }
1326 
1327 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1328   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1329   if( offset >= 128 ) {
1330     return 7;
1331   }
1332   else {
1333     return 4;
1334   }
1335 }
1336 
1337 //=============================================================================
1338 #ifndef PRODUCT
1339 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1340   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1341   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1342   st->print_cr("\tNOP");
1343   st->print_cr("\tNOP");
1344   if( !OptoBreakpoint )
1345     st->print_cr("\tNOP");
1346 }
1347 #endif
1348 
1349 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1350   MacroAssembler masm(&cbuf);
1351 #ifdef ASSERT
1352   uint insts_size = cbuf.insts_size();
1353 #endif
1354   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1355   masm.jump_cc(Assembler::notEqual,
1356                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1357   /* WARNING these NOPs are critical so that verified entry point is properly
1358      aligned for patching by NativeJump::patch_verified_entry() */
1359   int nops_cnt = 2;
1360   if( !OptoBreakpoint ) // Leave space for int3
1361      nops_cnt += 1;
1362   masm.nop(nops_cnt);
1363 
1364   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1365 }
1366 
1367 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1368   return OptoBreakpoint ? 11 : 12;
1369 }
1370 
1371 
1372 //=============================================================================
1373 
1374 int Matcher::regnum_to_fpu_offset(int regnum) {
1375   return regnum - 32; // The FP registers are in the second chunk
1376 }
1377 
1378 // This is UltraSparc specific, true just means we have fast l2f conversion
1379 const bool Matcher::convL2FSupported(void) {
1380   return true;
1381 }
1382 
1383 // Is this branch offset short enough that a short branch can be used?
1384 //
1385 // NOTE: If the platform does not provide any short branch variants, then
1386 //       this method should return false for offset 0.
1387 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1388   // The passed offset is relative to address of the branch.
1389   // On 86 a branch displacement is calculated relative to address
1390   // of a next instruction.
1391   offset -= br_size;
1392 
1393   // the short version of jmpConUCF2 contains multiple branches,
1394   // making the reach slightly less
1395   if (rule == jmpConUCF2_rule)
1396     return (-126 <= offset && offset <= 125);
1397   return (-128 <= offset && offset <= 127);
1398 }
1399 
1400 const bool Matcher::isSimpleConstant64(jlong value) {
1401   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1402   return false;
1403 }
1404 
1405 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1406 const bool Matcher::init_array_count_is_in_bytes = false;
1407 
1408 // Needs 2 CMOV's for longs.
1409 const int Matcher::long_cmove_cost() { return 1; }
1410 
1411 // No CMOVF/CMOVD with SSE/SSE2
1412 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1413 
1414 // Does the CPU require late expand (see block.cpp for description of late expand)?
1415 const bool Matcher::require_postalloc_expand = false;
1416 
1417 // Do we need to mask the count passed to shift instructions or does
1418 // the cpu only look at the lower 5/6 bits anyway?
1419 const bool Matcher::need_masked_shift_count = false;
1420 
1421 bool Matcher::narrow_oop_use_complex_address() {
1422   ShouldNotCallThis();
1423   return true;
1424 }
1425 
1426 bool Matcher::narrow_klass_use_complex_address() {
1427   ShouldNotCallThis();
1428   return true;
1429 }
1430 
1431 bool Matcher::const_oop_prefer_decode() {
1432   ShouldNotCallThis();
1433   return true;
1434 }
1435 
1436 bool Matcher::const_klass_prefer_decode() {
1437   ShouldNotCallThis();
1438   return true;
1439 }
1440 
1441 // Is it better to copy float constants, or load them directly from memory?
1442 // Intel can load a float constant from a direct address, requiring no
1443 // extra registers.  Most RISCs will have to materialize an address into a
1444 // register first, so they would do better to copy the constant from stack.
1445 const bool Matcher::rematerialize_float_constants = true;
1446 
1447 // If CPU can load and store mis-aligned doubles directly then no fixup is
1448 // needed.  Else we split the double into 2 integer pieces and move it
1449 // piece-by-piece.  Only happens when passing doubles into C code as the
1450 // Java calling convention forces doubles to be aligned.
1451 const bool Matcher::misaligned_doubles_ok = true;
1452 
1453 
1454 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1455   // Get the memory operand from the node
1456   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1457   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1458   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1459   uint opcnt     = 1;                 // First operand
1460   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1461   while( idx >= skipped+num_edges ) {
1462     skipped += num_edges;
1463     opcnt++;                          // Bump operand count
1464     assert( opcnt < numopnds, "Accessing non-existent operand" );
1465     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1466   }
1467 
1468   MachOper *memory = node->_opnds[opcnt];
1469   MachOper *new_memory = NULL;
1470   switch (memory->opcode()) {
1471   case DIRECT:
1472   case INDOFFSET32X:
1473     // No transformation necessary.
1474     return;
1475   case INDIRECT:
1476     new_memory = new indirect_win95_safeOper( );
1477     break;
1478   case INDOFFSET8:
1479     new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1480     break;
1481   case INDOFFSET32:
1482     new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1483     break;
1484   case INDINDEXOFFSET:
1485     new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1486     break;
1487   case INDINDEXSCALE:
1488     new_memory = new indIndexScale_win95_safeOper(memory->scale());
1489     break;
1490   case INDINDEXSCALEOFFSET:
1491     new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1492     break;
1493   case LOAD_LONG_INDIRECT:
1494   case LOAD_LONG_INDOFFSET32:
1495     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1496     return;
1497   default:
1498     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1499     return;
1500   }
1501   node->_opnds[opcnt] = new_memory;
1502 }
1503 
1504 // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
1505 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1506 
1507 // Are floats conerted to double when stored to stack during deoptimization?
1508 // On x32 it is stored with convertion only when FPU is used for floats.
1509 bool Matcher::float_in_double() { return (UseSSE == 0); }
1510 
1511 // Do ints take an entire long register or just half?
1512 const bool Matcher::int_in_long = false;
1513 
1514 // Return whether or not this register is ever used as an argument.  This
1515 // function is used on startup to build the trampoline stubs in generateOptoStub.
1516 // Registers not mentioned will be killed by the VM call in the trampoline, and
1517 // arguments in those registers not be available to the callee.
1518 bool Matcher::can_be_java_arg( int reg ) {
1519   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1520   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1521   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1522   return false;
1523 }
1524 
1525 bool Matcher::is_spillable_arg( int reg ) {
1526   return can_be_java_arg(reg);
1527 }
1528 
1529 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1530   // Use hardware integer DIV instruction when
1531   // it is faster than a code which use multiply.
1532   // Only when constant divisor fits into 32 bit
1533   // (min_jint is excluded to get only correct
1534   // positive 32 bit values from negative).
1535   return VM_Version::has_fast_idiv() &&
1536          (divisor == (int)divisor && divisor != min_jint);
1537 }
1538 
1539 // Register for DIVI projection of divmodI
1540 RegMask Matcher::divI_proj_mask() {
1541   return EAX_REG_mask();
1542 }
1543 
1544 // Register for MODI projection of divmodI
1545 RegMask Matcher::modI_proj_mask() {
1546   return EDX_REG_mask();
1547 }
1548 
1549 // Register for DIVL projection of divmodL
1550 RegMask Matcher::divL_proj_mask() {
1551   ShouldNotReachHere();
1552   return RegMask();
1553 }
1554 
1555 // Register for MODL projection of divmodL
1556 RegMask Matcher::modL_proj_mask() {
1557   ShouldNotReachHere();
1558   return RegMask();
1559 }
1560 
1561 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1562   return NO_REG_mask();
1563 }
1564 
1565 // Returns true if the high 32 bits of the value is known to be zero.
1566 bool is_operand_hi32_zero(Node* n) {
1567   int opc = n->Opcode();
1568   if (opc == Op_AndL) {
1569     Node* o2 = n->in(2);
1570     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1571       return true;
1572     }
1573   }
1574   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1575     return true;
1576   }
1577   return false;
1578 }
1579 
1580 %}
1581 
1582 //----------ENCODING BLOCK-----------------------------------------------------
1583 // This block specifies the encoding classes used by the compiler to output
1584 // byte streams.  Encoding classes generate functions which are called by
1585 // Machine Instruction Nodes in order to generate the bit encoding of the
1586 // instruction.  Operands specify their base encoding interface with the
1587 // interface keyword.  There are currently supported four interfaces,
1588 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1589 // operand to generate a function which returns its register number when
1590 // queried.   CONST_INTER causes an operand to generate a function which
1591 // returns the value of the constant when queried.  MEMORY_INTER causes an
1592 // operand to generate four functions which return the Base Register, the
1593 // Index Register, the Scale Value, and the Offset Value of the operand when
1594 // queried.  COND_INTER causes an operand to generate six functions which
1595 // return the encoding code (ie - encoding bits for the instruction)
1596 // associated with each basic boolean condition for a conditional instruction.
1597 // Instructions specify two basic values for encoding.  They use the
1598 // ins_encode keyword to specify their encoding class (which must be one of
1599 // the class names specified in the encoding block), and they use the
1600 // opcode keyword to specify, in order, their primary, secondary, and
1601 // tertiary opcode.  Only the opcode sections which a particular instruction
1602 // needs for encoding need to be specified.
1603 encode %{
1604   // Build emit functions for each basic byte or larger field in the intel
1605   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1606   // code in the enc_class source block.  Emit functions will live in the
1607   // main source block for now.  In future, we can generalize this by
1608   // adding a syntax that specifies the sizes of fields in an order,
1609   // so that the adlc can build the emit functions automagically
1610 
1611   // Emit primary opcode
1612   enc_class OpcP %{
1613     emit_opcode(cbuf, $primary);
1614   %}
1615 
1616   // Emit secondary opcode
1617   enc_class OpcS %{
1618     emit_opcode(cbuf, $secondary);
1619   %}
1620 
1621   // Emit opcode directly
1622   enc_class Opcode(immI d8) %{
1623     emit_opcode(cbuf, $d8$$constant);
1624   %}
1625 
1626   enc_class SizePrefix %{
1627     emit_opcode(cbuf,0x66);
1628   %}
1629 
1630   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1631     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1632   %}
1633 
1634   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1635     emit_opcode(cbuf,$opcode$$constant);
1636     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1637   %}
1638 
1639   enc_class mov_r32_imm0( rRegI dst ) %{
1640     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1641     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1642   %}
1643 
1644   enc_class cdq_enc %{
1645     // Full implementation of Java idiv and irem; checks for
1646     // special case as described in JVM spec., p.243 & p.271.
1647     //
1648     //         normal case                           special case
1649     //
1650     // input : rax,: dividend                         min_int
1651     //         reg: divisor                          -1
1652     //
1653     // output: rax,: quotient  (= rax, idiv reg)       min_int
1654     //         rdx: remainder (= rax, irem reg)       0
1655     //
1656     //  Code sequnce:
1657     //
1658     //  81 F8 00 00 00 80    cmp         rax,80000000h
1659     //  0F 85 0B 00 00 00    jne         normal_case
1660     //  33 D2                xor         rdx,edx
1661     //  83 F9 FF             cmp         rcx,0FFh
1662     //  0F 84 03 00 00 00    je          done
1663     //                  normal_case:
1664     //  99                   cdq
1665     //  F7 F9                idiv        rax,ecx
1666     //                  done:
1667     //
1668     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1669     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1670     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1671     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1672     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1673     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1674     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1675     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1676     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1677     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1678     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1679     // normal_case:
1680     emit_opcode(cbuf,0x99);                                         // cdq
1681     // idiv (note: must be emitted by the user of this rule)
1682     // normal:
1683   %}
1684 
1685   // Dense encoding for older common ops
1686   enc_class Opc_plus(immI opcode, rRegI reg) %{
1687     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1688   %}
1689 
1690 
1691   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1692   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1693     // Check for 8-bit immediate, and set sign extend bit in opcode
1694     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1695       emit_opcode(cbuf, $primary | 0x02);
1696     }
1697     else {                          // If 32-bit immediate
1698       emit_opcode(cbuf, $primary);
1699     }
1700   %}
1701 
1702   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1703     // Emit primary opcode and set sign-extend bit
1704     // Check for 8-bit immediate, and set sign extend bit in opcode
1705     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1706       emit_opcode(cbuf, $primary | 0x02);    }
1707     else {                          // If 32-bit immediate
1708       emit_opcode(cbuf, $primary);
1709     }
1710     // Emit r/m byte with secondary opcode, after primary opcode.
1711     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1712   %}
1713 
1714   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1715     // Check for 8-bit immediate, and set sign extend bit in opcode
1716     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1717       $$$emit8$imm$$constant;
1718     }
1719     else {                          // If 32-bit immediate
1720       // Output immediate
1721       $$$emit32$imm$$constant;
1722     }
1723   %}
1724 
1725   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1726     // Emit primary opcode and set sign-extend bit
1727     // Check for 8-bit immediate, and set sign extend bit in opcode
1728     int con = (int)$imm$$constant; // Throw away top bits
1729     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1730     // Emit r/m byte with secondary opcode, after primary opcode.
1731     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1732     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1733     else                               emit_d32(cbuf,con);
1734   %}
1735 
1736   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1737     // Emit primary opcode and set sign-extend bit
1738     // Check for 8-bit immediate, and set sign extend bit in opcode
1739     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1740     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1741     // Emit r/m byte with tertiary opcode, after primary opcode.
1742     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1743     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1744     else                               emit_d32(cbuf,con);
1745   %}
1746 
1747   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1748     emit_cc(cbuf, $secondary, $dst$$reg );
1749   %}
1750 
1751   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1752     int destlo = $dst$$reg;
1753     int desthi = HIGH_FROM_LOW(destlo);
1754     // bswap lo
1755     emit_opcode(cbuf, 0x0F);
1756     emit_cc(cbuf, 0xC8, destlo);
1757     // bswap hi
1758     emit_opcode(cbuf, 0x0F);
1759     emit_cc(cbuf, 0xC8, desthi);
1760     // xchg lo and hi
1761     emit_opcode(cbuf, 0x87);
1762     emit_rm(cbuf, 0x3, destlo, desthi);
1763   %}
1764 
1765   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1766     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1767   %}
1768 
1769   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1770     $$$emit8$primary;
1771     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1772   %}
1773 
1774   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1775     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1776     emit_d8(cbuf, op >> 8 );
1777     emit_d8(cbuf, op & 255);
1778   %}
1779 
1780   // emulate a CMOV with a conditional branch around a MOV
1781   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1782     // Invert sense of branch from sense of CMOV
1783     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1784     emit_d8( cbuf, $brOffs$$constant );
1785   %}
1786 
1787   enc_class enc_PartialSubtypeCheck( ) %{
1788     Register Redi = as_Register(EDI_enc); // result register
1789     Register Reax = as_Register(EAX_enc); // super class
1790     Register Recx = as_Register(ECX_enc); // killed
1791     Register Resi = as_Register(ESI_enc); // sub class
1792     Label miss;
1793 
1794     MacroAssembler _masm(&cbuf);
1795     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1796                                      NULL, &miss,
1797                                      /*set_cond_codes:*/ true);
1798     if ($primary) {
1799       __ xorptr(Redi, Redi);
1800     }
1801     __ bind(miss);
1802   %}
1803 
1804   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1805     MacroAssembler masm(&cbuf);
1806     int start = masm.offset();
1807     if (UseSSE >= 2) {
1808       if (VerifyFPU) {
1809         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1810       }
1811     } else {
1812       // External c_calling_convention expects the FPU stack to be 'clean'.
1813       // Compiled code leaves it dirty.  Do cleanup now.
1814       masm.empty_FPU_stack();
1815     }
1816     if (sizeof_FFree_Float_Stack_All == -1) {
1817       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1818     } else {
1819       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1820     }
1821   %}
1822 
1823   enc_class Verify_FPU_For_Leaf %{
1824     if( VerifyFPU ) {
1825       MacroAssembler masm(&cbuf);
1826       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1827     }
1828   %}
1829 
1830   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1831     // This is the instruction starting address for relocation info.
1832     cbuf.set_insts_mark();
1833     $$$emit8$primary;
1834     // CALL directly to the runtime
1835     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1836                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1837 
1838     if (UseSSE >= 2) {
1839       MacroAssembler _masm(&cbuf);
1840       BasicType rt = tf()->return_type();
1841 
1842       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1843         // A C runtime call where the return value is unused.  In SSE2+
1844         // mode the result needs to be removed from the FPU stack.  It's
1845         // likely that this function call could be removed by the
1846         // optimizer if the C function is a pure function.
1847         __ ffree(0);
1848       } else if (rt == T_FLOAT) {
1849         __ lea(rsp, Address(rsp, -4));
1850         __ fstp_s(Address(rsp, 0));
1851         __ movflt(xmm0, Address(rsp, 0));
1852         __ lea(rsp, Address(rsp,  4));
1853       } else if (rt == T_DOUBLE) {
1854         __ lea(rsp, Address(rsp, -8));
1855         __ fstp_d(Address(rsp, 0));
1856         __ movdbl(xmm0, Address(rsp, 0));
1857         __ lea(rsp, Address(rsp,  8));
1858       }
1859     }
1860   %}
1861 
1862   enc_class pre_call_resets %{
1863     // If method sets FPU control word restore it here
1864     debug_only(int off0 = cbuf.insts_size());
1865     if (ra_->C->in_24_bit_fp_mode()) {
1866       MacroAssembler _masm(&cbuf);
1867       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1868     }
1869     // Clear upper bits of YMM registers when current compiled code uses
1870     // wide vectors to avoid AVX <-> SSE transition penalty during call.
1871     MacroAssembler _masm(&cbuf);
1872     __ vzeroupper();
1873     debug_only(int off1 = cbuf.insts_size());
1874     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1875   %}
1876 
1877   enc_class post_call_FPU %{
1878     // If method sets FPU control word do it here also
1879     if (Compile::current()->in_24_bit_fp_mode()) {
1880       MacroAssembler masm(&cbuf);
1881       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1882     }
1883   %}
1884 
1885   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1886     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1887     // who we intended to call.
1888     cbuf.set_insts_mark();
1889     $$$emit8$primary;
1890 
1891     if (!_method) {
1892       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1893                      runtime_call_Relocation::spec(),
1894                      RELOC_IMM32);
1895     } else {
1896       int method_index = resolved_method_index(cbuf);
1897       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1898                                                   : static_call_Relocation::spec(method_index);
1899       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1900                      rspec, RELOC_DISP32);
1901       // Emit stubs for static call.
1902       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1903       if (stub == NULL) {
1904         ciEnv::current()->record_failure("CodeCache is full");
1905         return;
1906       }
1907     }
1908   %}
1909 
1910   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1911     MacroAssembler _masm(&cbuf);
1912     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1913   %}
1914 
1915   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1916     int disp = in_bytes(Method::from_compiled_offset());
1917     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1918 
1919     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1920     cbuf.set_insts_mark();
1921     $$$emit8$primary;
1922     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1923     emit_d8(cbuf, disp);             // Displacement
1924 
1925   %}
1926 
1927 //   Following encoding is no longer used, but may be restored if calling
1928 //   convention changes significantly.
1929 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1930 //
1931 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1932 //     // int ic_reg     = Matcher::inline_cache_reg();
1933 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1934 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1935 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1936 //
1937 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1938 //     // // so we load it immediately before the call
1939 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1940 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1941 //
1942 //     // xor rbp,ebp
1943 //     emit_opcode(cbuf, 0x33);
1944 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1945 //
1946 //     // CALL to interpreter.
1947 //     cbuf.set_insts_mark();
1948 //     $$$emit8$primary;
1949 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1950 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1951 //   %}
1952 
1953   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1954     $$$emit8$primary;
1955     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1956     $$$emit8$shift$$constant;
1957   %}
1958 
1959   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1960     // Load immediate does not have a zero or sign extended version
1961     // for 8-bit immediates
1962     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1963     $$$emit32$src$$constant;
1964   %}
1965 
1966   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1967     // Load immediate does not have a zero or sign extended version
1968     // for 8-bit immediates
1969     emit_opcode(cbuf, $primary + $dst$$reg);
1970     $$$emit32$src$$constant;
1971   %}
1972 
1973   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1974     // Load immediate does not have a zero or sign extended version
1975     // for 8-bit immediates
1976     int dst_enc = $dst$$reg;
1977     int src_con = $src$$constant & 0x0FFFFFFFFL;
1978     if (src_con == 0) {
1979       // xor dst, dst
1980       emit_opcode(cbuf, 0x33);
1981       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1982     } else {
1983       emit_opcode(cbuf, $primary + dst_enc);
1984       emit_d32(cbuf, src_con);
1985     }
1986   %}
1987 
1988   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
1989     // Load immediate does not have a zero or sign extended version
1990     // for 8-bit immediates
1991     int dst_enc = $dst$$reg + 2;
1992     int src_con = ((julong)($src$$constant)) >> 32;
1993     if (src_con == 0) {
1994       // xor dst, dst
1995       emit_opcode(cbuf, 0x33);
1996       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1997     } else {
1998       emit_opcode(cbuf, $primary + dst_enc);
1999       emit_d32(cbuf, src_con);
2000     }
2001   %}
2002 
2003 
2004   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2005   enc_class enc_Copy( rRegI dst, rRegI src ) %{
2006     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2007   %}
2008 
2009   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2010     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2011   %}
2012 
2013   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2014     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2015   %}
2016 
2017   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2018     $$$emit8$primary;
2019     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2020   %}
2021 
2022   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2023     $$$emit8$secondary;
2024     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2025   %}
2026 
2027   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2028     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2029   %}
2030 
2031   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2032     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2033   %}
2034 
2035   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2036     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2037   %}
2038 
2039   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2040     // Output immediate
2041     $$$emit32$src$$constant;
2042   %}
2043 
2044   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2045     // Output Float immediate bits
2046     jfloat jf = $src$$constant;
2047     int    jf_as_bits = jint_cast( jf );
2048     emit_d32(cbuf, jf_as_bits);
2049   %}
2050 
2051   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2052     // Output Float immediate bits
2053     jfloat jf = $src$$constant;
2054     int    jf_as_bits = jint_cast( jf );
2055     emit_d32(cbuf, jf_as_bits);
2056   %}
2057 
2058   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2059     // Output immediate
2060     $$$emit16$src$$constant;
2061   %}
2062 
2063   enc_class Con_d32(immI src) %{
2064     emit_d32(cbuf,$src$$constant);
2065   %}
2066 
2067   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2068     // Output immediate memory reference
2069     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2070     emit_d32(cbuf, 0x00);
2071   %}
2072 
2073   enc_class lock_prefix( ) %{
2074     emit_opcode(cbuf,0xF0);         // [Lock]
2075   %}
2076 
2077   // Cmp-xchg long value.
2078   // Note: we need to swap rbx, and rcx before and after the
2079   //       cmpxchg8 instruction because the instruction uses
2080   //       rcx as the high order word of the new value to store but
2081   //       our register encoding uses rbx,.
2082   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2083 
2084     // XCHG  rbx,ecx
2085     emit_opcode(cbuf,0x87);
2086     emit_opcode(cbuf,0xD9);
2087     // [Lock]
2088     emit_opcode(cbuf,0xF0);
2089     // CMPXCHG8 [Eptr]
2090     emit_opcode(cbuf,0x0F);
2091     emit_opcode(cbuf,0xC7);
2092     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2093     // XCHG  rbx,ecx
2094     emit_opcode(cbuf,0x87);
2095     emit_opcode(cbuf,0xD9);
2096   %}
2097 
2098   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2099     // [Lock]
2100     emit_opcode(cbuf,0xF0);
2101 
2102     // CMPXCHG [Eptr]
2103     emit_opcode(cbuf,0x0F);
2104     emit_opcode(cbuf,0xB1);
2105     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2106   %}
2107 
2108   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
2109     // [Lock]
2110     emit_opcode(cbuf,0xF0);
2111 
2112     // CMPXCHGB [Eptr]
2113     emit_opcode(cbuf,0x0F);
2114     emit_opcode(cbuf,0xB0);
2115     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2116   %}
2117 
2118   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
2119     // [Lock]
2120     emit_opcode(cbuf,0xF0);
2121 
2122     // 16-bit mode
2123     emit_opcode(cbuf, 0x66);
2124 
2125     // CMPXCHGW [Eptr]
2126     emit_opcode(cbuf,0x0F);
2127     emit_opcode(cbuf,0xB1);
2128     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2129   %}
2130 
2131   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2132     int res_encoding = $res$$reg;
2133 
2134     // MOV  res,0
2135     emit_opcode( cbuf, 0xB8 + res_encoding);
2136     emit_d32( cbuf, 0 );
2137     // JNE,s  fail
2138     emit_opcode(cbuf,0x75);
2139     emit_d8(cbuf, 5 );
2140     // MOV  res,1
2141     emit_opcode( cbuf, 0xB8 + res_encoding);
2142     emit_d32( cbuf, 1 );
2143     // fail:
2144   %}
2145 
2146   enc_class set_instruction_start( ) %{
2147     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2148   %}
2149 
2150   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2151     int reg_encoding = $ereg$$reg;
2152     int base  = $mem$$base;
2153     int index = $mem$$index;
2154     int scale = $mem$$scale;
2155     int displace = $mem$$disp;
2156     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2157     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2158   %}
2159 
2160   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2161     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2162     int base  = $mem$$base;
2163     int index = $mem$$index;
2164     int scale = $mem$$scale;
2165     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2166     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2167     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2168   %}
2169 
2170   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2171     int r1, r2;
2172     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2173     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2174     emit_opcode(cbuf,0x0F);
2175     emit_opcode(cbuf,$tertiary);
2176     emit_rm(cbuf, 0x3, r1, r2);
2177     emit_d8(cbuf,$cnt$$constant);
2178     emit_d8(cbuf,$primary);
2179     emit_rm(cbuf, 0x3, $secondary, r1);
2180     emit_d8(cbuf,$cnt$$constant);
2181   %}
2182 
2183   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2184     emit_opcode( cbuf, 0x8B ); // Move
2185     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2186     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2187       emit_d8(cbuf,$primary);
2188       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2189       emit_d8(cbuf,$cnt$$constant-32);
2190     }
2191     emit_d8(cbuf,$primary);
2192     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2193     emit_d8(cbuf,31);
2194   %}
2195 
2196   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2197     int r1, r2;
2198     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2199     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2200 
2201     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2202     emit_rm(cbuf, 0x3, r1, r2);
2203     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2204       emit_opcode(cbuf,$primary);
2205       emit_rm(cbuf, 0x3, $secondary, r1);
2206       emit_d8(cbuf,$cnt$$constant-32);
2207     }
2208     emit_opcode(cbuf,0x33);  // XOR r2,r2
2209     emit_rm(cbuf, 0x3, r2, r2);
2210   %}
2211 
2212   // Clone of RegMem but accepts an extra parameter to access each
2213   // half of a double in memory; it never needs relocation info.
2214   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2215     emit_opcode(cbuf,$opcode$$constant);
2216     int reg_encoding = $rm_reg$$reg;
2217     int base     = $mem$$base;
2218     int index    = $mem$$index;
2219     int scale    = $mem$$scale;
2220     int displace = $mem$$disp + $disp_for_half$$constant;
2221     relocInfo::relocType disp_reloc = relocInfo::none;
2222     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2223   %}
2224 
2225   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2226   //
2227   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2228   // and it never needs relocation information.
2229   // Frequently used to move data between FPU's Stack Top and memory.
2230   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2231     int rm_byte_opcode = $rm_opcode$$constant;
2232     int base     = $mem$$base;
2233     int index    = $mem$$index;
2234     int scale    = $mem$$scale;
2235     int displace = $mem$$disp;
2236     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2237     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2238   %}
2239 
2240   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2241     int rm_byte_opcode = $rm_opcode$$constant;
2242     int base     = $mem$$base;
2243     int index    = $mem$$index;
2244     int scale    = $mem$$scale;
2245     int displace = $mem$$disp;
2246     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2247     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2248   %}
2249 
2250   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2251     int reg_encoding = $dst$$reg;
2252     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2253     int index        = 0x04;            // 0x04 indicates no index
2254     int scale        = 0x00;            // 0x00 indicates no scale
2255     int displace     = $src1$$constant; // 0x00 indicates no displacement
2256     relocInfo::relocType disp_reloc = relocInfo::none;
2257     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2258   %}
2259 
2260   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2261     // Compare dst,src
2262     emit_opcode(cbuf,0x3B);
2263     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2264     // jmp dst < src around move
2265     emit_opcode(cbuf,0x7C);
2266     emit_d8(cbuf,2);
2267     // move dst,src
2268     emit_opcode(cbuf,0x8B);
2269     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2270   %}
2271 
2272   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2273     // Compare dst,src
2274     emit_opcode(cbuf,0x3B);
2275     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2276     // jmp dst > src around move
2277     emit_opcode(cbuf,0x7F);
2278     emit_d8(cbuf,2);
2279     // move dst,src
2280     emit_opcode(cbuf,0x8B);
2281     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2282   %}
2283 
2284   enc_class enc_FPR_store(memory mem, regDPR src) %{
2285     // If src is FPR1, we can just FST to store it.
2286     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2287     int reg_encoding = 0x2; // Just store
2288     int base  = $mem$$base;
2289     int index = $mem$$index;
2290     int scale = $mem$$scale;
2291     int displace = $mem$$disp;
2292     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2293     if( $src$$reg != FPR1L_enc ) {
2294       reg_encoding = 0x3;  // Store & pop
2295       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2296       emit_d8( cbuf, 0xC0-1+$src$$reg );
2297     }
2298     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2299     emit_opcode(cbuf,$primary);
2300     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2301   %}
2302 
2303   enc_class neg_reg(rRegI dst) %{
2304     // NEG $dst
2305     emit_opcode(cbuf,0xF7);
2306     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2307   %}
2308 
2309   enc_class setLT_reg(eCXRegI dst) %{
2310     // SETLT $dst
2311     emit_opcode(cbuf,0x0F);
2312     emit_opcode(cbuf,0x9C);
2313     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2314   %}
2315 
2316   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2317     int tmpReg = $tmp$$reg;
2318 
2319     // SUB $p,$q
2320     emit_opcode(cbuf,0x2B);
2321     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2322     // SBB $tmp,$tmp
2323     emit_opcode(cbuf,0x1B);
2324     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2325     // AND $tmp,$y
2326     emit_opcode(cbuf,0x23);
2327     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2328     // ADD $p,$tmp
2329     emit_opcode(cbuf,0x03);
2330     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2331   %}
2332 
2333   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2334     // TEST shift,32
2335     emit_opcode(cbuf,0xF7);
2336     emit_rm(cbuf, 0x3, 0, ECX_enc);
2337     emit_d32(cbuf,0x20);
2338     // JEQ,s small
2339     emit_opcode(cbuf, 0x74);
2340     emit_d8(cbuf, 0x04);
2341     // MOV    $dst.hi,$dst.lo
2342     emit_opcode( cbuf, 0x8B );
2343     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2344     // CLR    $dst.lo
2345     emit_opcode(cbuf, 0x33);
2346     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2347 // small:
2348     // SHLD   $dst.hi,$dst.lo,$shift
2349     emit_opcode(cbuf,0x0F);
2350     emit_opcode(cbuf,0xA5);
2351     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2352     // SHL    $dst.lo,$shift"
2353     emit_opcode(cbuf,0xD3);
2354     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2355   %}
2356 
2357   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2358     // TEST shift,32
2359     emit_opcode(cbuf,0xF7);
2360     emit_rm(cbuf, 0x3, 0, ECX_enc);
2361     emit_d32(cbuf,0x20);
2362     // JEQ,s small
2363     emit_opcode(cbuf, 0x74);
2364     emit_d8(cbuf, 0x04);
2365     // MOV    $dst.lo,$dst.hi
2366     emit_opcode( cbuf, 0x8B );
2367     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2368     // CLR    $dst.hi
2369     emit_opcode(cbuf, 0x33);
2370     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2371 // small:
2372     // SHRD   $dst.lo,$dst.hi,$shift
2373     emit_opcode(cbuf,0x0F);
2374     emit_opcode(cbuf,0xAD);
2375     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2376     // SHR    $dst.hi,$shift"
2377     emit_opcode(cbuf,0xD3);
2378     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2379   %}
2380 
2381   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2382     // TEST shift,32
2383     emit_opcode(cbuf,0xF7);
2384     emit_rm(cbuf, 0x3, 0, ECX_enc);
2385     emit_d32(cbuf,0x20);
2386     // JEQ,s small
2387     emit_opcode(cbuf, 0x74);
2388     emit_d8(cbuf, 0x05);
2389     // MOV    $dst.lo,$dst.hi
2390     emit_opcode( cbuf, 0x8B );
2391     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2392     // SAR    $dst.hi,31
2393     emit_opcode(cbuf, 0xC1);
2394     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2395     emit_d8(cbuf, 0x1F );
2396 // small:
2397     // SHRD   $dst.lo,$dst.hi,$shift
2398     emit_opcode(cbuf,0x0F);
2399     emit_opcode(cbuf,0xAD);
2400     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2401     // SAR    $dst.hi,$shift"
2402     emit_opcode(cbuf,0xD3);
2403     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2404   %}
2405 
2406 
2407   // ----------------- Encodings for floating point unit -----------------
2408   // May leave result in FPU-TOS or FPU reg depending on opcodes
2409   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2410     $$$emit8$primary;
2411     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2412   %}
2413 
2414   // Pop argument in FPR0 with FSTP ST(0)
2415   enc_class PopFPU() %{
2416     emit_opcode( cbuf, 0xDD );
2417     emit_d8( cbuf, 0xD8 );
2418   %}
2419 
2420   // !!!!! equivalent to Pop_Reg_F
2421   enc_class Pop_Reg_DPR( regDPR dst ) %{
2422     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2423     emit_d8( cbuf, 0xD8+$dst$$reg );
2424   %}
2425 
2426   enc_class Push_Reg_DPR( regDPR dst ) %{
2427     emit_opcode( cbuf, 0xD9 );
2428     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2429   %}
2430 
2431   enc_class strictfp_bias1( regDPR dst ) %{
2432     emit_opcode( cbuf, 0xDB );           // FLD m80real
2433     emit_opcode( cbuf, 0x2D );
2434     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2435     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2436     emit_opcode( cbuf, 0xC8+$dst$$reg );
2437   %}
2438 
2439   enc_class strictfp_bias2( regDPR dst ) %{
2440     emit_opcode( cbuf, 0xDB );           // FLD m80real
2441     emit_opcode( cbuf, 0x2D );
2442     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2443     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2444     emit_opcode( cbuf, 0xC8+$dst$$reg );
2445   %}
2446 
2447   // Special case for moving an integer register to a stack slot.
2448   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2449     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2450   %}
2451 
2452   // Special case for moving a register to a stack slot.
2453   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2454     // Opcode already emitted
2455     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2456     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2457     emit_d32(cbuf, $dst$$disp);   // Displacement
2458   %}
2459 
2460   // Push the integer in stackSlot 'src' onto FP-stack
2461   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2462     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2463   %}
2464 
2465   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2466   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2467     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2468   %}
2469 
2470   // Same as Pop_Mem_F except for opcode
2471   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2472   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2473     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2474   %}
2475 
2476   enc_class Pop_Reg_FPR( regFPR dst ) %{
2477     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2478     emit_d8( cbuf, 0xD8+$dst$$reg );
2479   %}
2480 
2481   enc_class Push_Reg_FPR( regFPR dst ) %{
2482     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2483     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2484   %}
2485 
2486   // Push FPU's float to a stack-slot, and pop FPU-stack
2487   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2488     int pop = 0x02;
2489     if ($src$$reg != FPR1L_enc) {
2490       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2491       emit_d8( cbuf, 0xC0-1+$src$$reg );
2492       pop = 0x03;
2493     }
2494     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2495   %}
2496 
2497   // Push FPU's double to a stack-slot, and pop FPU-stack
2498   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2499     int pop = 0x02;
2500     if ($src$$reg != FPR1L_enc) {
2501       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2502       emit_d8( cbuf, 0xC0-1+$src$$reg );
2503       pop = 0x03;
2504     }
2505     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2506   %}
2507 
2508   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2509   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2510     int pop = 0xD0 - 1; // -1 since we skip FLD
2511     if ($src$$reg != FPR1L_enc) {
2512       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2513       emit_d8( cbuf, 0xC0-1+$src$$reg );
2514       pop = 0xD8;
2515     }
2516     emit_opcode( cbuf, 0xDD );
2517     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2518   %}
2519 
2520 
2521   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2522     // load dst in FPR0
2523     emit_opcode( cbuf, 0xD9 );
2524     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2525     if ($src$$reg != FPR1L_enc) {
2526       // fincstp
2527       emit_opcode (cbuf, 0xD9);
2528       emit_opcode (cbuf, 0xF7);
2529       // swap src with FPR1:
2530       // FXCH FPR1 with src
2531       emit_opcode(cbuf, 0xD9);
2532       emit_d8(cbuf, 0xC8-1+$src$$reg );
2533       // fdecstp
2534       emit_opcode (cbuf, 0xD9);
2535       emit_opcode (cbuf, 0xF6);
2536     }
2537   %}
2538 
2539   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2540     MacroAssembler _masm(&cbuf);
2541     __ subptr(rsp, 8);
2542     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2543     __ fld_d(Address(rsp, 0));
2544     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2545     __ fld_d(Address(rsp, 0));
2546   %}
2547 
2548   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2549     MacroAssembler _masm(&cbuf);
2550     __ subptr(rsp, 4);
2551     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2552     __ fld_s(Address(rsp, 0));
2553     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2554     __ fld_s(Address(rsp, 0));
2555   %}
2556 
2557   enc_class Push_ResultD(regD dst) %{
2558     MacroAssembler _masm(&cbuf);
2559     __ fstp_d(Address(rsp, 0));
2560     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2561     __ addptr(rsp, 8);
2562   %}
2563 
2564   enc_class Push_ResultF(regF dst, immI d8) %{
2565     MacroAssembler _masm(&cbuf);
2566     __ fstp_s(Address(rsp, 0));
2567     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2568     __ addptr(rsp, $d8$$constant);
2569   %}
2570 
2571   enc_class Push_SrcD(regD src) %{
2572     MacroAssembler _masm(&cbuf);
2573     __ subptr(rsp, 8);
2574     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2575     __ fld_d(Address(rsp, 0));
2576   %}
2577 
2578   enc_class push_stack_temp_qword() %{
2579     MacroAssembler _masm(&cbuf);
2580     __ subptr(rsp, 8);
2581   %}
2582 
2583   enc_class pop_stack_temp_qword() %{
2584     MacroAssembler _masm(&cbuf);
2585     __ addptr(rsp, 8);
2586   %}
2587 
2588   enc_class push_xmm_to_fpr1(regD src) %{
2589     MacroAssembler _masm(&cbuf);
2590     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2591     __ fld_d(Address(rsp, 0));
2592   %}
2593 
2594   enc_class Push_Result_Mod_DPR( regDPR src) %{
2595     if ($src$$reg != FPR1L_enc) {
2596       // fincstp
2597       emit_opcode (cbuf, 0xD9);
2598       emit_opcode (cbuf, 0xF7);
2599       // FXCH FPR1 with src
2600       emit_opcode(cbuf, 0xD9);
2601       emit_d8(cbuf, 0xC8-1+$src$$reg );
2602       // fdecstp
2603       emit_opcode (cbuf, 0xD9);
2604       emit_opcode (cbuf, 0xF6);
2605     }
2606     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2607     // // FSTP   FPR$dst$$reg
2608     // emit_opcode( cbuf, 0xDD );
2609     // emit_d8( cbuf, 0xD8+$dst$$reg );
2610   %}
2611 
2612   enc_class fnstsw_sahf_skip_parity() %{
2613     // fnstsw ax
2614     emit_opcode( cbuf, 0xDF );
2615     emit_opcode( cbuf, 0xE0 );
2616     // sahf
2617     emit_opcode( cbuf, 0x9E );
2618     // jnp  ::skip
2619     emit_opcode( cbuf, 0x7B );
2620     emit_opcode( cbuf, 0x05 );
2621   %}
2622 
2623   enc_class emitModDPR() %{
2624     // fprem must be iterative
2625     // :: loop
2626     // fprem
2627     emit_opcode( cbuf, 0xD9 );
2628     emit_opcode( cbuf, 0xF8 );
2629     // wait
2630     emit_opcode( cbuf, 0x9b );
2631     // fnstsw ax
2632     emit_opcode( cbuf, 0xDF );
2633     emit_opcode( cbuf, 0xE0 );
2634     // sahf
2635     emit_opcode( cbuf, 0x9E );
2636     // jp  ::loop
2637     emit_opcode( cbuf, 0x0F );
2638     emit_opcode( cbuf, 0x8A );
2639     emit_opcode( cbuf, 0xF4 );
2640     emit_opcode( cbuf, 0xFF );
2641     emit_opcode( cbuf, 0xFF );
2642     emit_opcode( cbuf, 0xFF );
2643   %}
2644 
2645   enc_class fpu_flags() %{
2646     // fnstsw_ax
2647     emit_opcode( cbuf, 0xDF);
2648     emit_opcode( cbuf, 0xE0);
2649     // test ax,0x0400
2650     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2651     emit_opcode( cbuf, 0xA9 );
2652     emit_d16   ( cbuf, 0x0400 );
2653     // // // This sequence works, but stalls for 12-16 cycles on PPro
2654     // // test rax,0x0400
2655     // emit_opcode( cbuf, 0xA9 );
2656     // emit_d32   ( cbuf, 0x00000400 );
2657     //
2658     // jz exit (no unordered comparison)
2659     emit_opcode( cbuf, 0x74 );
2660     emit_d8    ( cbuf, 0x02 );
2661     // mov ah,1 - treat as LT case (set carry flag)
2662     emit_opcode( cbuf, 0xB4 );
2663     emit_d8    ( cbuf, 0x01 );
2664     // sahf
2665     emit_opcode( cbuf, 0x9E);
2666   %}
2667 
2668   enc_class cmpF_P6_fixup() %{
2669     // Fixup the integer flags in case comparison involved a NaN
2670     //
2671     // JNP exit (no unordered comparison, P-flag is set by NaN)
2672     emit_opcode( cbuf, 0x7B );
2673     emit_d8    ( cbuf, 0x03 );
2674     // MOV AH,1 - treat as LT case (set carry flag)
2675     emit_opcode( cbuf, 0xB4 );
2676     emit_d8    ( cbuf, 0x01 );
2677     // SAHF
2678     emit_opcode( cbuf, 0x9E);
2679     // NOP     // target for branch to avoid branch to branch
2680     emit_opcode( cbuf, 0x90);
2681   %}
2682 
2683 //     fnstsw_ax();
2684 //     sahf();
2685 //     movl(dst, nan_result);
2686 //     jcc(Assembler::parity, exit);
2687 //     movl(dst, less_result);
2688 //     jcc(Assembler::below, exit);
2689 //     movl(dst, equal_result);
2690 //     jcc(Assembler::equal, exit);
2691 //     movl(dst, greater_result);
2692 
2693 // less_result     =  1;
2694 // greater_result  = -1;
2695 // equal_result    = 0;
2696 // nan_result      = -1;
2697 
2698   enc_class CmpF_Result(rRegI dst) %{
2699     // fnstsw_ax();
2700     emit_opcode( cbuf, 0xDF);
2701     emit_opcode( cbuf, 0xE0);
2702     // sahf
2703     emit_opcode( cbuf, 0x9E);
2704     // movl(dst, nan_result);
2705     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2706     emit_d32( cbuf, -1 );
2707     // jcc(Assembler::parity, exit);
2708     emit_opcode( cbuf, 0x7A );
2709     emit_d8    ( cbuf, 0x13 );
2710     // movl(dst, less_result);
2711     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2712     emit_d32( cbuf, -1 );
2713     // jcc(Assembler::below, exit);
2714     emit_opcode( cbuf, 0x72 );
2715     emit_d8    ( cbuf, 0x0C );
2716     // movl(dst, equal_result);
2717     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2718     emit_d32( cbuf, 0 );
2719     // jcc(Assembler::equal, exit);
2720     emit_opcode( cbuf, 0x74 );
2721     emit_d8    ( cbuf, 0x05 );
2722     // movl(dst, greater_result);
2723     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2724     emit_d32( cbuf, 1 );
2725   %}
2726 
2727 
2728   // Compare the longs and set flags
2729   // BROKEN!  Do Not use as-is
2730   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2731     // CMP    $src1.hi,$src2.hi
2732     emit_opcode( cbuf, 0x3B );
2733     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2734     // JNE,s  done
2735     emit_opcode(cbuf,0x75);
2736     emit_d8(cbuf, 2 );
2737     // CMP    $src1.lo,$src2.lo
2738     emit_opcode( cbuf, 0x3B );
2739     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2740 // done:
2741   %}
2742 
2743   enc_class convert_int_long( regL dst, rRegI src ) %{
2744     // mov $dst.lo,$src
2745     int dst_encoding = $dst$$reg;
2746     int src_encoding = $src$$reg;
2747     encode_Copy( cbuf, dst_encoding  , src_encoding );
2748     // mov $dst.hi,$src
2749     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2750     // sar $dst.hi,31
2751     emit_opcode( cbuf, 0xC1 );
2752     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2753     emit_d8(cbuf, 0x1F );
2754   %}
2755 
2756   enc_class convert_long_double( eRegL src ) %{
2757     // push $src.hi
2758     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2759     // push $src.lo
2760     emit_opcode(cbuf, 0x50+$src$$reg  );
2761     // fild 64-bits at [SP]
2762     emit_opcode(cbuf,0xdf);
2763     emit_d8(cbuf, 0x6C);
2764     emit_d8(cbuf, 0x24);
2765     emit_d8(cbuf, 0x00);
2766     // pop stack
2767     emit_opcode(cbuf, 0x83); // add  SP, #8
2768     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2769     emit_d8(cbuf, 0x8);
2770   %}
2771 
2772   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2773     // IMUL   EDX:EAX,$src1
2774     emit_opcode( cbuf, 0xF7 );
2775     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2776     // SAR    EDX,$cnt-32
2777     int shift_count = ((int)$cnt$$constant) - 32;
2778     if (shift_count > 0) {
2779       emit_opcode(cbuf, 0xC1);
2780       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2781       emit_d8(cbuf, shift_count);
2782     }
2783   %}
2784 
2785   // this version doesn't have add sp, 8
2786   enc_class convert_long_double2( eRegL src ) %{
2787     // push $src.hi
2788     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2789     // push $src.lo
2790     emit_opcode(cbuf, 0x50+$src$$reg  );
2791     // fild 64-bits at [SP]
2792     emit_opcode(cbuf,0xdf);
2793     emit_d8(cbuf, 0x6C);
2794     emit_d8(cbuf, 0x24);
2795     emit_d8(cbuf, 0x00);
2796   %}
2797 
2798   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2799     // Basic idea: long = (long)int * (long)int
2800     // IMUL EDX:EAX, src
2801     emit_opcode( cbuf, 0xF7 );
2802     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2803   %}
2804 
2805   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2806     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2807     // MUL EDX:EAX, src
2808     emit_opcode( cbuf, 0xF7 );
2809     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2810   %}
2811 
2812   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2813     // Basic idea: lo(result) = lo(x_lo * y_lo)
2814     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2815     // MOV    $tmp,$src.lo
2816     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2817     // IMUL   $tmp,EDX
2818     emit_opcode( cbuf, 0x0F );
2819     emit_opcode( cbuf, 0xAF );
2820     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2821     // MOV    EDX,$src.hi
2822     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2823     // IMUL   EDX,EAX
2824     emit_opcode( cbuf, 0x0F );
2825     emit_opcode( cbuf, 0xAF );
2826     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2827     // ADD    $tmp,EDX
2828     emit_opcode( cbuf, 0x03 );
2829     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2830     // MUL   EDX:EAX,$src.lo
2831     emit_opcode( cbuf, 0xF7 );
2832     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2833     // ADD    EDX,ESI
2834     emit_opcode( cbuf, 0x03 );
2835     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2836   %}
2837 
2838   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2839     // Basic idea: lo(result) = lo(src * y_lo)
2840     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2841     // IMUL   $tmp,EDX,$src
2842     emit_opcode( cbuf, 0x6B );
2843     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2844     emit_d8( cbuf, (int)$src$$constant );
2845     // MOV    EDX,$src
2846     emit_opcode(cbuf, 0xB8 + EDX_enc);
2847     emit_d32( cbuf, (int)$src$$constant );
2848     // MUL   EDX:EAX,EDX
2849     emit_opcode( cbuf, 0xF7 );
2850     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2851     // ADD    EDX,ESI
2852     emit_opcode( cbuf, 0x03 );
2853     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2854   %}
2855 
2856   enc_class long_div( eRegL src1, eRegL src2 ) %{
2857     // PUSH src1.hi
2858     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2859     // PUSH src1.lo
2860     emit_opcode(cbuf,               0x50+$src1$$reg  );
2861     // PUSH src2.hi
2862     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2863     // PUSH src2.lo
2864     emit_opcode(cbuf,               0x50+$src2$$reg  );
2865     // CALL directly to the runtime
2866     cbuf.set_insts_mark();
2867     emit_opcode(cbuf,0xE8);       // Call into runtime
2868     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2869     // Restore stack
2870     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2871     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2872     emit_d8(cbuf, 4*4);
2873   %}
2874 
2875   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2876     // PUSH src1.hi
2877     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2878     // PUSH src1.lo
2879     emit_opcode(cbuf,               0x50+$src1$$reg  );
2880     // PUSH src2.hi
2881     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2882     // PUSH src2.lo
2883     emit_opcode(cbuf,               0x50+$src2$$reg  );
2884     // CALL directly to the runtime
2885     cbuf.set_insts_mark();
2886     emit_opcode(cbuf,0xE8);       // Call into runtime
2887     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2888     // Restore stack
2889     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2890     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2891     emit_d8(cbuf, 4*4);
2892   %}
2893 
2894   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2895     // MOV   $tmp,$src.lo
2896     emit_opcode(cbuf, 0x8B);
2897     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2898     // OR    $tmp,$src.hi
2899     emit_opcode(cbuf, 0x0B);
2900     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2901   %}
2902 
2903   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2904     // CMP    $src1.lo,$src2.lo
2905     emit_opcode( cbuf, 0x3B );
2906     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2907     // JNE,s  skip
2908     emit_cc(cbuf, 0x70, 0x5);
2909     emit_d8(cbuf,2);
2910     // CMP    $src1.hi,$src2.hi
2911     emit_opcode( cbuf, 0x3B );
2912     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2913   %}
2914 
2915   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2916     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2917     emit_opcode( cbuf, 0x3B );
2918     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2919     // MOV    $tmp,$src1.hi
2920     emit_opcode( cbuf, 0x8B );
2921     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2922     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2923     emit_opcode( cbuf, 0x1B );
2924     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2925   %}
2926 
2927   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2928     // XOR    $tmp,$tmp
2929     emit_opcode(cbuf,0x33);  // XOR
2930     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2931     // CMP    $tmp,$src.lo
2932     emit_opcode( cbuf, 0x3B );
2933     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2934     // SBB    $tmp,$src.hi
2935     emit_opcode( cbuf, 0x1B );
2936     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2937   %}
2938 
2939  // Sniff, sniff... smells like Gnu Superoptimizer
2940   enc_class neg_long( eRegL dst ) %{
2941     emit_opcode(cbuf,0xF7);    // NEG hi
2942     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2943     emit_opcode(cbuf,0xF7);    // NEG lo
2944     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2945     emit_opcode(cbuf,0x83);    // SBB hi,0
2946     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2947     emit_d8    (cbuf,0 );
2948   %}
2949 
2950   enc_class enc_pop_rdx() %{
2951     emit_opcode(cbuf,0x5A);
2952   %}
2953 
2954   enc_class enc_rethrow() %{
2955     cbuf.set_insts_mark();
2956     emit_opcode(cbuf, 0xE9);        // jmp    entry
2957     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2958                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2959   %}
2960 
2961 
2962   // Convert a double to an int.  Java semantics require we do complex
2963   // manglelations in the corner cases.  So we set the rounding mode to
2964   // 'zero', store the darned double down as an int, and reset the
2965   // rounding mode to 'nearest'.  The hardware throws an exception which
2966   // patches up the correct value directly to the stack.
2967   enc_class DPR2I_encoding( regDPR src ) %{
2968     // Flip to round-to-zero mode.  We attempted to allow invalid-op
2969     // exceptions here, so that a NAN or other corner-case value will
2970     // thrown an exception (but normal values get converted at full speed).
2971     // However, I2C adapters and other float-stack manglers leave pending
2972     // invalid-op exceptions hanging.  We would have to clear them before
2973     // enabling them and that is more expensive than just testing for the
2974     // invalid value Intel stores down in the corner cases.
2975     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2976     emit_opcode(cbuf,0x2D);
2977     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2978     // Allocate a word
2979     emit_opcode(cbuf,0x83);            // SUB ESP,4
2980     emit_opcode(cbuf,0xEC);
2981     emit_d8(cbuf,0x04);
2982     // Encoding assumes a double has been pushed into FPR0.
2983     // Store down the double as an int, popping the FPU stack
2984     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
2985     emit_opcode(cbuf,0x1C);
2986     emit_d8(cbuf,0x24);
2987     // Restore the rounding mode; mask the exception
2988     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
2989     emit_opcode(cbuf,0x2D);
2990     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
2991         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
2992         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
2993 
2994     // Load the converted int; adjust CPU stack
2995     emit_opcode(cbuf,0x58);       // POP EAX
2996     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
2997     emit_d32   (cbuf,0x80000000); //         0x80000000
2998     emit_opcode(cbuf,0x75);       // JNE around_slow_call
2999     emit_d8    (cbuf,0x07);       // Size of slow_call
3000     // Push src onto stack slow-path
3001     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3002     emit_d8    (cbuf,0xC0-1+$src$$reg );
3003     // CALL directly to the runtime
3004     cbuf.set_insts_mark();
3005     emit_opcode(cbuf,0xE8);       // Call into runtime
3006     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3007     // Carry on here...
3008   %}
3009 
3010   enc_class DPR2L_encoding( regDPR src ) %{
3011     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3012     emit_opcode(cbuf,0x2D);
3013     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3014     // Allocate a word
3015     emit_opcode(cbuf,0x83);            // SUB ESP,8
3016     emit_opcode(cbuf,0xEC);
3017     emit_d8(cbuf,0x08);
3018     // Encoding assumes a double has been pushed into FPR0.
3019     // Store down the double as a long, popping the FPU stack
3020     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3021     emit_opcode(cbuf,0x3C);
3022     emit_d8(cbuf,0x24);
3023     // Restore the rounding mode; mask the exception
3024     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3025     emit_opcode(cbuf,0x2D);
3026     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3027         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3028         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3029 
3030     // Load the converted int; adjust CPU stack
3031     emit_opcode(cbuf,0x58);       // POP EAX
3032     emit_opcode(cbuf,0x5A);       // POP EDX
3033     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3034     emit_d8    (cbuf,0xFA);       // rdx
3035     emit_d32   (cbuf,0x80000000); //         0x80000000
3036     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3037     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3038     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3039     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3040     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3041     emit_d8    (cbuf,0x07);       // Size of slow_call
3042     // Push src onto stack slow-path
3043     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3044     emit_d8    (cbuf,0xC0-1+$src$$reg );
3045     // CALL directly to the runtime
3046     cbuf.set_insts_mark();
3047     emit_opcode(cbuf,0xE8);       // Call into runtime
3048     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3049     // Carry on here...
3050   %}
3051 
3052   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3053     // Operand was loaded from memory into fp ST (stack top)
3054     // FMUL   ST,$src  /* D8 C8+i */
3055     emit_opcode(cbuf, 0xD8);
3056     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3057   %}
3058 
3059   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3060     // FADDP  ST,src2  /* D8 C0+i */
3061     emit_opcode(cbuf, 0xD8);
3062     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3063     //could use FADDP  src2,fpST  /* DE C0+i */
3064   %}
3065 
3066   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3067     // FADDP  src2,ST  /* DE C0+i */
3068     emit_opcode(cbuf, 0xDE);
3069     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3070   %}
3071 
3072   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3073     // Operand has been loaded into fp ST (stack top)
3074       // FSUB   ST,$src1
3075       emit_opcode(cbuf, 0xD8);
3076       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3077 
3078       // FDIV
3079       emit_opcode(cbuf, 0xD8);
3080       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3081   %}
3082 
3083   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3084     // Operand was loaded from memory into fp ST (stack top)
3085     // FADD   ST,$src  /* D8 C0+i */
3086     emit_opcode(cbuf, 0xD8);
3087     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3088 
3089     // FMUL  ST,src2  /* D8 C*+i */
3090     emit_opcode(cbuf, 0xD8);
3091     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3092   %}
3093 
3094 
3095   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3096     // Operand was loaded from memory into fp ST (stack top)
3097     // FADD   ST,$src  /* D8 C0+i */
3098     emit_opcode(cbuf, 0xD8);
3099     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3100 
3101     // FMULP  src2,ST  /* DE C8+i */
3102     emit_opcode(cbuf, 0xDE);
3103     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3104   %}
3105 
3106   // Atomically load the volatile long
3107   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3108     emit_opcode(cbuf,0xDF);
3109     int rm_byte_opcode = 0x05;
3110     int base     = $mem$$base;
3111     int index    = $mem$$index;
3112     int scale    = $mem$$scale;
3113     int displace = $mem$$disp;
3114     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3115     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3116     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3117   %}
3118 
3119   // Volatile Store Long.  Must be atomic, so move it into
3120   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3121   // target address before the store (for null-ptr checks)
3122   // so the memory operand is used twice in the encoding.
3123   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3124     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3125     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3126     emit_opcode(cbuf,0xDF);
3127     int rm_byte_opcode = 0x07;
3128     int base     = $mem$$base;
3129     int index    = $mem$$index;
3130     int scale    = $mem$$scale;
3131     int displace = $mem$$disp;
3132     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3133     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3134   %}
3135 
3136 %}
3137 
3138 
3139 //----------FRAME--------------------------------------------------------------
3140 // Definition of frame structure and management information.
3141 //
3142 //  S T A C K   L A Y O U T    Allocators stack-slot number
3143 //                             |   (to get allocators register number
3144 //  G  Owned by    |        |  v    add OptoReg::stack0())
3145 //  r   CALLER     |        |
3146 //  o     |        +--------+      pad to even-align allocators stack-slot
3147 //  w     V        |  pad0  |        numbers; owned by CALLER
3148 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3149 //  h     ^        |   in   |  5
3150 //        |        |  args  |  4   Holes in incoming args owned by SELF
3151 //  |     |        |        |  3
3152 //  |     |        +--------+
3153 //  V     |        | old out|      Empty on Intel, window on Sparc
3154 //        |    old |preserve|      Must be even aligned.
3155 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3156 //        |        |   in   |  3   area for Intel ret address
3157 //     Owned by    |preserve|      Empty on Sparc.
3158 //       SELF      +--------+
3159 //        |        |  pad2  |  2   pad to align old SP
3160 //        |        +--------+  1
3161 //        |        | locks  |  0
3162 //        |        +--------+----> OptoReg::stack0(), even aligned
3163 //        |        |  pad1  | 11   pad to align new SP
3164 //        |        +--------+
3165 //        |        |        | 10
3166 //        |        | spills |  9   spills
3167 //        V        |        |  8   (pad0 slot for callee)
3168 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3169 //        ^        |  out   |  7
3170 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3171 //     Owned by    +--------+
3172 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3173 //        |    new |preserve|      Must be even-aligned.
3174 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3175 //        |        |        |
3176 //
3177 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3178 //         known from SELF's arguments and the Java calling convention.
3179 //         Region 6-7 is determined per call site.
3180 // Note 2: If the calling convention leaves holes in the incoming argument
3181 //         area, those holes are owned by SELF.  Holes in the outgoing area
3182 //         are owned by the CALLEE.  Holes should not be nessecary in the
3183 //         incoming area, as the Java calling convention is completely under
3184 //         the control of the AD file.  Doubles can be sorted and packed to
3185 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3186 //         varargs C calling conventions.
3187 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3188 //         even aligned with pad0 as needed.
3189 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3190 //         region 6-11 is even aligned; it may be padded out more so that
3191 //         the region from SP to FP meets the minimum stack alignment.
3192 
3193 frame %{
3194   // What direction does stack grow in (assumed to be same for C & Java)
3195   stack_direction(TOWARDS_LOW);
3196 
3197   // These three registers define part of the calling convention
3198   // between compiled code and the interpreter.
3199   inline_cache_reg(EAX);                // Inline Cache Register
3200   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3201 
3202   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3203   cisc_spilling_operand_name(indOffset32);
3204 
3205   // Number of stack slots consumed by locking an object
3206   sync_stack_slots(1);
3207 
3208   // Compiled code's Frame Pointer
3209   frame_pointer(ESP);
3210   // Interpreter stores its frame pointer in a register which is
3211   // stored to the stack by I2CAdaptors.
3212   // I2CAdaptors convert from interpreted java to compiled java.
3213   interpreter_frame_pointer(EBP);
3214 
3215   // Stack alignment requirement
3216   // Alignment size in bytes (128-bit -> 16 bytes)
3217   stack_alignment(StackAlignmentInBytes);
3218 
3219   // Number of stack slots between incoming argument block and the start of
3220   // a new frame.  The PROLOG must add this many slots to the stack.  The
3221   // EPILOG must remove this many slots.  Intel needs one slot for
3222   // return address and one for rbp, (must save rbp)
3223   in_preserve_stack_slots(2+VerifyStackAtCalls);
3224 
3225   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3226   // for calls to C.  Supports the var-args backing area for register parms.
3227   varargs_C_out_slots_killed(0);
3228 
3229   // The after-PROLOG location of the return address.  Location of
3230   // return address specifies a type (REG or STACK) and a number
3231   // representing the register number (i.e. - use a register name) or
3232   // stack slot.
3233   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3234   // Otherwise, it is above the locks and verification slot and alignment word
3235   return_addr(STACK - 1 +
3236               align_up((Compile::current()->in_preserve_stack_slots() +
3237                         Compile::current()->fixed_slots()),
3238                        stack_alignment_in_slots()));
3239 
3240   // Body of function which returns an integer array locating
3241   // arguments either in registers or in stack slots.  Passed an array
3242   // of ideal registers called "sig" and a "length" count.  Stack-slot
3243   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3244   // arguments for a CALLEE.  Incoming stack arguments are
3245   // automatically biased by the preserve_stack_slots field above.
3246   calling_convention %{
3247     // No difference between ingoing/outgoing just pass false
3248     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3249   %}
3250 
3251 
3252   // Body of function which returns an integer array locating
3253   // arguments either in registers or in stack slots.  Passed an array
3254   // of ideal registers called "sig" and a "length" count.  Stack-slot
3255   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3256   // arguments for a CALLEE.  Incoming stack arguments are
3257   // automatically biased by the preserve_stack_slots field above.
3258   c_calling_convention %{
3259     // This is obviously always outgoing
3260     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3261   %}
3262 
3263   // Location of C & interpreter return values
3264   c_return_value %{
3265     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3266     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3267     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3268 
3269     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3270     // that C functions return float and double results in XMM0.
3271     if( ideal_reg == Op_RegD && UseSSE>=2 )
3272       return OptoRegPair(XMM0b_num,XMM0_num);
3273     if( ideal_reg == Op_RegF && UseSSE>=2 )
3274       return OptoRegPair(OptoReg::Bad,XMM0_num);
3275 
3276     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3277   %}
3278 
3279   // Location of return values
3280   return_value %{
3281     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3282     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3283     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3284     if( ideal_reg == Op_RegD && UseSSE>=2 )
3285       return OptoRegPair(XMM0b_num,XMM0_num);
3286     if( ideal_reg == Op_RegF && UseSSE>=1 )
3287       return OptoRegPair(OptoReg::Bad,XMM0_num);
3288     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3289   %}
3290 
3291 %}
3292 
3293 //----------ATTRIBUTES---------------------------------------------------------
3294 //----------Operand Attributes-------------------------------------------------
3295 op_attrib op_cost(0);        // Required cost attribute
3296 
3297 //----------Instruction Attributes---------------------------------------------
3298 ins_attrib ins_cost(100);       // Required cost attribute
3299 ins_attrib ins_size(8);         // Required size attribute (in bits)
3300 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3301                                 // non-matching short branch variant of some
3302                                                             // long branch?
3303 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3304                                 // specifies the alignment that some part of the instruction (not
3305                                 // necessarily the start) requires.  If > 1, a compute_padding()
3306                                 // function must be provided for the instruction
3307 
3308 //----------OPERANDS-----------------------------------------------------------
3309 // Operand definitions must precede instruction definitions for correct parsing
3310 // in the ADLC because operands constitute user defined types which are used in
3311 // instruction definitions.
3312 
3313 //----------Simple Operands----------------------------------------------------
3314 // Immediate Operands
3315 // Integer Immediate
3316 operand immI() %{
3317   match(ConI);
3318 
3319   op_cost(10);
3320   format %{ %}
3321   interface(CONST_INTER);
3322 %}
3323 
3324 // Constant for test vs zero
3325 operand immI_0() %{
3326   predicate(n->get_int() == 0);
3327   match(ConI);
3328 
3329   op_cost(0);
3330   format %{ %}
3331   interface(CONST_INTER);
3332 %}
3333 
3334 // Constant for increment
3335 operand immI_1() %{
3336   predicate(n->get_int() == 1);
3337   match(ConI);
3338 
3339   op_cost(0);
3340   format %{ %}
3341   interface(CONST_INTER);
3342 %}
3343 
3344 // Constant for decrement
3345 operand immI_M1() %{
3346   predicate(n->get_int() == -1);
3347   match(ConI);
3348 
3349   op_cost(0);
3350   format %{ %}
3351   interface(CONST_INTER);
3352 %}
3353 
3354 // Valid scale values for addressing modes
3355 operand immI2() %{
3356   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3357   match(ConI);
3358 
3359   format %{ %}
3360   interface(CONST_INTER);
3361 %}
3362 
3363 operand immI8() %{
3364   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3365   match(ConI);
3366 
3367   op_cost(5);
3368   format %{ %}
3369   interface(CONST_INTER);
3370 %}
3371 
3372 operand immU8() %{
3373   predicate((0 <= n->get_int()) && (n->get_int() <= 255));
3374   match(ConI);
3375 
3376   op_cost(5);
3377   format %{ %}
3378   interface(CONST_INTER);
3379 %}
3380 
3381 operand immI16() %{
3382   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3383   match(ConI);
3384 
3385   op_cost(10);
3386   format %{ %}
3387   interface(CONST_INTER);
3388 %}
3389 
3390 // Int Immediate non-negative
3391 operand immU31()
3392 %{
3393   predicate(n->get_int() >= 0);
3394   match(ConI);
3395 
3396   op_cost(0);
3397   format %{ %}
3398   interface(CONST_INTER);
3399 %}
3400 
3401 // Constant for long shifts
3402 operand immI_32() %{
3403   predicate( n->get_int() == 32 );
3404   match(ConI);
3405 
3406   op_cost(0);
3407   format %{ %}
3408   interface(CONST_INTER);
3409 %}
3410 
3411 operand immI_1_31() %{
3412   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3413   match(ConI);
3414 
3415   op_cost(0);
3416   format %{ %}
3417   interface(CONST_INTER);
3418 %}
3419 
3420 operand immI_32_63() %{
3421   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3422   match(ConI);
3423   op_cost(0);
3424 
3425   format %{ %}
3426   interface(CONST_INTER);
3427 %}
3428 
3429 operand immI_2() %{
3430   predicate( n->get_int() == 2 );
3431   match(ConI);
3432 
3433   op_cost(0);
3434   format %{ %}
3435   interface(CONST_INTER);
3436 %}
3437 
3438 operand immI_3() %{
3439   predicate( n->get_int() == 3 );
3440   match(ConI);
3441 
3442   op_cost(0);
3443   format %{ %}
3444   interface(CONST_INTER);
3445 %}
3446 
3447 operand immI_4()
3448 %{
3449   predicate(n->get_int() == 4);
3450   match(ConI);
3451 
3452   op_cost(0);
3453   format %{ %}
3454   interface(CONST_INTER);
3455 %}
3456 
3457 operand immI_8()
3458 %{
3459   predicate(n->get_int() == 8);
3460   match(ConI);
3461 
3462   op_cost(0);
3463   format %{ %}
3464   interface(CONST_INTER);
3465 %}
3466 
3467 // Pointer Immediate
3468 operand immP() %{
3469   match(ConP);
3470 
3471   op_cost(10);
3472   format %{ %}
3473   interface(CONST_INTER);
3474 %}
3475 
3476 // NULL Pointer Immediate
3477 operand immP0() %{
3478   predicate( n->get_ptr() == 0 );
3479   match(ConP);
3480   op_cost(0);
3481 
3482   format %{ %}
3483   interface(CONST_INTER);
3484 %}
3485 
3486 // Long Immediate
3487 operand immL() %{
3488   match(ConL);
3489 
3490   op_cost(20);
3491   format %{ %}
3492   interface(CONST_INTER);
3493 %}
3494 
3495 // Long Immediate zero
3496 operand immL0() %{
3497   predicate( n->get_long() == 0L );
3498   match(ConL);
3499   op_cost(0);
3500 
3501   format %{ %}
3502   interface(CONST_INTER);
3503 %}
3504 
3505 // Long Immediate zero
3506 operand immL_M1() %{
3507   predicate( n->get_long() == -1L );
3508   match(ConL);
3509   op_cost(0);
3510 
3511   format %{ %}
3512   interface(CONST_INTER);
3513 %}
3514 
3515 // Long immediate from 0 to 127.
3516 // Used for a shorter form of long mul by 10.
3517 operand immL_127() %{
3518   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3519   match(ConL);
3520   op_cost(0);
3521 
3522   format %{ %}
3523   interface(CONST_INTER);
3524 %}
3525 
3526 // Long Immediate: low 32-bit mask
3527 operand immL_32bits() %{
3528   predicate(n->get_long() == 0xFFFFFFFFL);
3529   match(ConL);
3530   op_cost(0);
3531 
3532   format %{ %}
3533   interface(CONST_INTER);
3534 %}
3535 
3536 // Long Immediate: low 32-bit mask
3537 operand immL32() %{
3538   predicate(n->get_long() == (int)(n->get_long()));
3539   match(ConL);
3540   op_cost(20);
3541 
3542   format %{ %}
3543   interface(CONST_INTER);
3544 %}
3545 
3546 //Double Immediate zero
3547 operand immDPR0() %{
3548   // Do additional (and counter-intuitive) test against NaN to work around VC++
3549   // bug that generates code such that NaNs compare equal to 0.0
3550   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3551   match(ConD);
3552 
3553   op_cost(5);
3554   format %{ %}
3555   interface(CONST_INTER);
3556 %}
3557 
3558 // Double Immediate one
3559 operand immDPR1() %{
3560   predicate( UseSSE<=1 && n->getd() == 1.0 );
3561   match(ConD);
3562 
3563   op_cost(5);
3564   format %{ %}
3565   interface(CONST_INTER);
3566 %}
3567 
3568 // Double Immediate
3569 operand immDPR() %{
3570   predicate(UseSSE<=1);
3571   match(ConD);
3572 
3573   op_cost(5);
3574   format %{ %}
3575   interface(CONST_INTER);
3576 %}
3577 
3578 operand immD() %{
3579   predicate(UseSSE>=2);
3580   match(ConD);
3581 
3582   op_cost(5);
3583   format %{ %}
3584   interface(CONST_INTER);
3585 %}
3586 
3587 // Double Immediate zero
3588 operand immD0() %{
3589   // Do additional (and counter-intuitive) test against NaN to work around VC++
3590   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3591   // compare equal to -0.0.
3592   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3593   match(ConD);
3594 
3595   format %{ %}
3596   interface(CONST_INTER);
3597 %}
3598 
3599 // Float Immediate zero
3600 operand immFPR0() %{
3601   predicate(UseSSE == 0 && n->getf() == 0.0F);
3602   match(ConF);
3603 
3604   op_cost(5);
3605   format %{ %}
3606   interface(CONST_INTER);
3607 %}
3608 
3609 // Float Immediate one
3610 operand immFPR1() %{
3611   predicate(UseSSE == 0 && n->getf() == 1.0F);
3612   match(ConF);
3613 
3614   op_cost(5);
3615   format %{ %}
3616   interface(CONST_INTER);
3617 %}
3618 
3619 // Float Immediate
3620 operand immFPR() %{
3621   predicate( UseSSE == 0 );
3622   match(ConF);
3623 
3624   op_cost(5);
3625   format %{ %}
3626   interface(CONST_INTER);
3627 %}
3628 
3629 // Float Immediate
3630 operand immF() %{
3631   predicate(UseSSE >= 1);
3632   match(ConF);
3633 
3634   op_cost(5);
3635   format %{ %}
3636   interface(CONST_INTER);
3637 %}
3638 
3639 // Float Immediate zero.  Zero and not -0.0
3640 operand immF0() %{
3641   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3642   match(ConF);
3643 
3644   op_cost(5);
3645   format %{ %}
3646   interface(CONST_INTER);
3647 %}
3648 
3649 // Immediates for special shifts (sign extend)
3650 
3651 // Constants for increment
3652 operand immI_16() %{
3653   predicate( n->get_int() == 16 );
3654   match(ConI);
3655 
3656   format %{ %}
3657   interface(CONST_INTER);
3658 %}
3659 
3660 operand immI_24() %{
3661   predicate( n->get_int() == 24 );
3662   match(ConI);
3663 
3664   format %{ %}
3665   interface(CONST_INTER);
3666 %}
3667 
3668 // Constant for byte-wide masking
3669 operand immI_255() %{
3670   predicate( n->get_int() == 255 );
3671   match(ConI);
3672 
3673   format %{ %}
3674   interface(CONST_INTER);
3675 %}
3676 
3677 // Constant for short-wide masking
3678 operand immI_65535() %{
3679   predicate(n->get_int() == 65535);
3680   match(ConI);
3681 
3682   format %{ %}
3683   interface(CONST_INTER);
3684 %}
3685 
3686 // Register Operands
3687 // Integer Register
3688 operand rRegI() %{
3689   constraint(ALLOC_IN_RC(int_reg));
3690   match(RegI);
3691   match(xRegI);
3692   match(eAXRegI);
3693   match(eBXRegI);
3694   match(eCXRegI);
3695   match(eDXRegI);
3696   match(eDIRegI);
3697   match(eSIRegI);
3698 
3699   format %{ %}
3700   interface(REG_INTER);
3701 %}
3702 
3703 // Subset of Integer Register
3704 operand xRegI(rRegI reg) %{
3705   constraint(ALLOC_IN_RC(int_x_reg));
3706   match(reg);
3707   match(eAXRegI);
3708   match(eBXRegI);
3709   match(eCXRegI);
3710   match(eDXRegI);
3711 
3712   format %{ %}
3713   interface(REG_INTER);
3714 %}
3715 
3716 // Special Registers
3717 operand eAXRegI(xRegI reg) %{
3718   constraint(ALLOC_IN_RC(eax_reg));
3719   match(reg);
3720   match(rRegI);
3721 
3722   format %{ "EAX" %}
3723   interface(REG_INTER);
3724 %}
3725 
3726 // Special Registers
3727 operand eBXRegI(xRegI reg) %{
3728   constraint(ALLOC_IN_RC(ebx_reg));
3729   match(reg);
3730   match(rRegI);
3731 
3732   format %{ "EBX" %}
3733   interface(REG_INTER);
3734 %}
3735 
3736 operand eCXRegI(xRegI reg) %{
3737   constraint(ALLOC_IN_RC(ecx_reg));
3738   match(reg);
3739   match(rRegI);
3740 
3741   format %{ "ECX" %}
3742   interface(REG_INTER);
3743 %}
3744 
3745 operand eDXRegI(xRegI reg) %{
3746   constraint(ALLOC_IN_RC(edx_reg));
3747   match(reg);
3748   match(rRegI);
3749 
3750   format %{ "EDX" %}
3751   interface(REG_INTER);
3752 %}
3753 
3754 operand eDIRegI(xRegI reg) %{
3755   constraint(ALLOC_IN_RC(edi_reg));
3756   match(reg);
3757   match(rRegI);
3758 
3759   format %{ "EDI" %}
3760   interface(REG_INTER);
3761 %}
3762 
3763 operand naxRegI() %{
3764   constraint(ALLOC_IN_RC(nax_reg));
3765   match(RegI);
3766   match(eCXRegI);
3767   match(eDXRegI);
3768   match(eSIRegI);
3769   match(eDIRegI);
3770 
3771   format %{ %}
3772   interface(REG_INTER);
3773 %}
3774 
3775 operand nadxRegI() %{
3776   constraint(ALLOC_IN_RC(nadx_reg));
3777   match(RegI);
3778   match(eBXRegI);
3779   match(eCXRegI);
3780   match(eSIRegI);
3781   match(eDIRegI);
3782 
3783   format %{ %}
3784   interface(REG_INTER);
3785 %}
3786 
3787 operand ncxRegI() %{
3788   constraint(ALLOC_IN_RC(ncx_reg));
3789   match(RegI);
3790   match(eAXRegI);
3791   match(eDXRegI);
3792   match(eSIRegI);
3793   match(eDIRegI);
3794 
3795   format %{ %}
3796   interface(REG_INTER);
3797 %}
3798 
3799 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3800 // //
3801 operand eSIRegI(xRegI reg) %{
3802    constraint(ALLOC_IN_RC(esi_reg));
3803    match(reg);
3804    match(rRegI);
3805 
3806    format %{ "ESI" %}
3807    interface(REG_INTER);
3808 %}
3809 
3810 // Pointer Register
3811 operand anyRegP() %{
3812   constraint(ALLOC_IN_RC(any_reg));
3813   match(RegP);
3814   match(eAXRegP);
3815   match(eBXRegP);
3816   match(eCXRegP);
3817   match(eDIRegP);
3818   match(eRegP);
3819 
3820   format %{ %}
3821   interface(REG_INTER);
3822 %}
3823 
3824 operand eRegP() %{
3825   constraint(ALLOC_IN_RC(int_reg));
3826   match(RegP);
3827   match(eAXRegP);
3828   match(eBXRegP);
3829   match(eCXRegP);
3830   match(eDIRegP);
3831 
3832   format %{ %}
3833   interface(REG_INTER);
3834 %}
3835 
3836 operand rRegP() %{
3837   constraint(ALLOC_IN_RC(int_reg));
3838   match(RegP);
3839   match(eAXRegP);
3840   match(eBXRegP);
3841   match(eCXRegP);
3842   match(eDIRegP);
3843 
3844   format %{ %}
3845   interface(REG_INTER);
3846 %}
3847 
3848 // On windows95, EBP is not safe to use for implicit null tests.
3849 operand eRegP_no_EBP() %{
3850   constraint(ALLOC_IN_RC(int_reg_no_ebp));
3851   match(RegP);
3852   match(eAXRegP);
3853   match(eBXRegP);
3854   match(eCXRegP);
3855   match(eDIRegP);
3856 
3857   op_cost(100);
3858   format %{ %}
3859   interface(REG_INTER);
3860 %}
3861 
3862 operand naxRegP() %{
3863   constraint(ALLOC_IN_RC(nax_reg));
3864   match(RegP);
3865   match(eBXRegP);
3866   match(eDXRegP);
3867   match(eCXRegP);
3868   match(eSIRegP);
3869   match(eDIRegP);
3870 
3871   format %{ %}
3872   interface(REG_INTER);
3873 %}
3874 
3875 operand nabxRegP() %{
3876   constraint(ALLOC_IN_RC(nabx_reg));
3877   match(RegP);
3878   match(eCXRegP);
3879   match(eDXRegP);
3880   match(eSIRegP);
3881   match(eDIRegP);
3882 
3883   format %{ %}
3884   interface(REG_INTER);
3885 %}
3886 
3887 operand pRegP() %{
3888   constraint(ALLOC_IN_RC(p_reg));
3889   match(RegP);
3890   match(eBXRegP);
3891   match(eDXRegP);
3892   match(eSIRegP);
3893   match(eDIRegP);
3894 
3895   format %{ %}
3896   interface(REG_INTER);
3897 %}
3898 
3899 // Special Registers
3900 // Return a pointer value
3901 operand eAXRegP(eRegP reg) %{
3902   constraint(ALLOC_IN_RC(eax_reg));
3903   match(reg);
3904   format %{ "EAX" %}
3905   interface(REG_INTER);
3906 %}
3907 
3908 // Used in AtomicAdd
3909 operand eBXRegP(eRegP reg) %{
3910   constraint(ALLOC_IN_RC(ebx_reg));
3911   match(reg);
3912   format %{ "EBX" %}
3913   interface(REG_INTER);
3914 %}
3915 
3916 // Tail-call (interprocedural jump) to interpreter
3917 operand eCXRegP(eRegP reg) %{
3918   constraint(ALLOC_IN_RC(ecx_reg));
3919   match(reg);
3920   format %{ "ECX" %}
3921   interface(REG_INTER);
3922 %}
3923 
3924 operand eDXRegP(eRegP reg) %{
3925   constraint(ALLOC_IN_RC(edx_reg));
3926   match(reg);
3927   format %{ "EDX" %}
3928   interface(REG_INTER);
3929 %}
3930 
3931 operand eSIRegP(eRegP reg) %{
3932   constraint(ALLOC_IN_RC(esi_reg));
3933   match(reg);
3934   format %{ "ESI" %}
3935   interface(REG_INTER);
3936 %}
3937 
3938 // Used in rep stosw
3939 operand eDIRegP(eRegP reg) %{
3940   constraint(ALLOC_IN_RC(edi_reg));
3941   match(reg);
3942   format %{ "EDI" %}
3943   interface(REG_INTER);
3944 %}
3945 
3946 operand eRegL() %{
3947   constraint(ALLOC_IN_RC(long_reg));
3948   match(RegL);
3949   match(eADXRegL);
3950 
3951   format %{ %}
3952   interface(REG_INTER);
3953 %}
3954 
3955 operand eADXRegL( eRegL reg ) %{
3956   constraint(ALLOC_IN_RC(eadx_reg));
3957   match(reg);
3958 
3959   format %{ "EDX:EAX" %}
3960   interface(REG_INTER);
3961 %}
3962 
3963 operand eBCXRegL( eRegL reg ) %{
3964   constraint(ALLOC_IN_RC(ebcx_reg));
3965   match(reg);
3966 
3967   format %{ "EBX:ECX" %}
3968   interface(REG_INTER);
3969 %}
3970 
3971 // Special case for integer high multiply
3972 operand eADXRegL_low_only() %{
3973   constraint(ALLOC_IN_RC(eadx_reg));
3974   match(RegL);
3975 
3976   format %{ "EAX" %}
3977   interface(REG_INTER);
3978 %}
3979 
3980 // Flags register, used as output of compare instructions
3981 operand rFlagsReg() %{
3982   constraint(ALLOC_IN_RC(int_flags));
3983   match(RegFlags);
3984 
3985   format %{ "EFLAGS" %}
3986   interface(REG_INTER);
3987 %}
3988 
3989 // Flags register, used as output of compare instructions
3990 operand eFlagsReg() %{
3991   constraint(ALLOC_IN_RC(int_flags));
3992   match(RegFlags);
3993 
3994   format %{ "EFLAGS" %}
3995   interface(REG_INTER);
3996 %}
3997 
3998 // Flags register, used as output of FLOATING POINT compare instructions
3999 operand eFlagsRegU() %{
4000   constraint(ALLOC_IN_RC(int_flags));
4001   match(RegFlags);
4002 
4003   format %{ "EFLAGS_U" %}
4004   interface(REG_INTER);
4005 %}
4006 
4007 operand eFlagsRegUCF() %{
4008   constraint(ALLOC_IN_RC(int_flags));
4009   match(RegFlags);
4010   predicate(false);
4011 
4012   format %{ "EFLAGS_U_CF" %}
4013   interface(REG_INTER);
4014 %}
4015 
4016 // Condition Code Register used by long compare
4017 operand flagsReg_long_LTGE() %{
4018   constraint(ALLOC_IN_RC(int_flags));
4019   match(RegFlags);
4020   format %{ "FLAGS_LTGE" %}
4021   interface(REG_INTER);
4022 %}
4023 operand flagsReg_long_EQNE() %{
4024   constraint(ALLOC_IN_RC(int_flags));
4025   match(RegFlags);
4026   format %{ "FLAGS_EQNE" %}
4027   interface(REG_INTER);
4028 %}
4029 operand flagsReg_long_LEGT() %{
4030   constraint(ALLOC_IN_RC(int_flags));
4031   match(RegFlags);
4032   format %{ "FLAGS_LEGT" %}
4033   interface(REG_INTER);
4034 %}
4035 
4036 // Condition Code Register used by unsigned long compare
4037 operand flagsReg_ulong_LTGE() %{
4038   constraint(ALLOC_IN_RC(int_flags));
4039   match(RegFlags);
4040   format %{ "FLAGS_U_LTGE" %}
4041   interface(REG_INTER);
4042 %}
4043 operand flagsReg_ulong_EQNE() %{
4044   constraint(ALLOC_IN_RC(int_flags));
4045   match(RegFlags);
4046   format %{ "FLAGS_U_EQNE" %}
4047   interface(REG_INTER);
4048 %}
4049 operand flagsReg_ulong_LEGT() %{
4050   constraint(ALLOC_IN_RC(int_flags));
4051   match(RegFlags);
4052   format %{ "FLAGS_U_LEGT" %}
4053   interface(REG_INTER);
4054 %}
4055 
4056 // Float register operands
4057 operand regDPR() %{
4058   predicate( UseSSE < 2 );
4059   constraint(ALLOC_IN_RC(fp_dbl_reg));
4060   match(RegD);
4061   match(regDPR1);
4062   match(regDPR2);
4063   format %{ %}
4064   interface(REG_INTER);
4065 %}
4066 
4067 operand regDPR1(regDPR reg) %{
4068   predicate( UseSSE < 2 );
4069   constraint(ALLOC_IN_RC(fp_dbl_reg0));
4070   match(reg);
4071   format %{ "FPR1" %}
4072   interface(REG_INTER);
4073 %}
4074 
4075 operand regDPR2(regDPR reg) %{
4076   predicate( UseSSE < 2 );
4077   constraint(ALLOC_IN_RC(fp_dbl_reg1));
4078   match(reg);
4079   format %{ "FPR2" %}
4080   interface(REG_INTER);
4081 %}
4082 
4083 operand regnotDPR1(regDPR reg) %{
4084   predicate( UseSSE < 2 );
4085   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4086   match(reg);
4087   format %{ %}
4088   interface(REG_INTER);
4089 %}
4090 
4091 // Float register operands
4092 operand regFPR() %{
4093   predicate( UseSSE < 2 );
4094   constraint(ALLOC_IN_RC(fp_flt_reg));
4095   match(RegF);
4096   match(regFPR1);
4097   format %{ %}
4098   interface(REG_INTER);
4099 %}
4100 
4101 // Float register operands
4102 operand regFPR1(regFPR reg) %{
4103   predicate( UseSSE < 2 );
4104   constraint(ALLOC_IN_RC(fp_flt_reg0));
4105   match(reg);
4106   format %{ "FPR1" %}
4107   interface(REG_INTER);
4108 %}
4109 
4110 // XMM Float register operands
4111 operand regF() %{
4112   predicate( UseSSE>=1 );
4113   constraint(ALLOC_IN_RC(float_reg_legacy));
4114   match(RegF);
4115   format %{ %}
4116   interface(REG_INTER);
4117 %}
4118 
4119 operand legRegF() %{
4120   predicate( UseSSE>=1 );
4121   constraint(ALLOC_IN_RC(float_reg_legacy));
4122   match(RegF);
4123   format %{ %}
4124   interface(REG_INTER);
4125 %}
4126 
4127 // Float register operands
4128 operand vlRegF() %{
4129    constraint(ALLOC_IN_RC(float_reg_vl));
4130    match(RegF);
4131 
4132    format %{ %}
4133    interface(REG_INTER);
4134 %}
4135 
4136 // XMM Double register operands
4137 operand regD() %{
4138   predicate( UseSSE>=2 );
4139   constraint(ALLOC_IN_RC(double_reg_legacy));
4140   match(RegD);
4141   format %{ %}
4142   interface(REG_INTER);
4143 %}
4144 
4145 // Double register operands
4146 operand legRegD() %{
4147   predicate( UseSSE>=2 );
4148   constraint(ALLOC_IN_RC(double_reg_legacy));
4149   match(RegD);
4150   format %{ %}
4151   interface(REG_INTER);
4152 %}
4153 
4154 operand vlRegD() %{
4155    constraint(ALLOC_IN_RC(double_reg_vl));
4156    match(RegD);
4157 
4158    format %{ %}
4159    interface(REG_INTER);
4160 %}
4161 
4162 //----------Memory Operands----------------------------------------------------
4163 // Direct Memory Operand
4164 operand direct(immP addr) %{
4165   match(addr);
4166 
4167   format %{ "[$addr]" %}
4168   interface(MEMORY_INTER) %{
4169     base(0xFFFFFFFF);
4170     index(0x4);
4171     scale(0x0);
4172     disp($addr);
4173   %}
4174 %}
4175 
4176 // Indirect Memory Operand
4177 operand indirect(eRegP reg) %{
4178   constraint(ALLOC_IN_RC(int_reg));
4179   match(reg);
4180 
4181   format %{ "[$reg]" %}
4182   interface(MEMORY_INTER) %{
4183     base($reg);
4184     index(0x4);
4185     scale(0x0);
4186     disp(0x0);
4187   %}
4188 %}
4189 
4190 // Indirect Memory Plus Short Offset Operand
4191 operand indOffset8(eRegP reg, immI8 off) %{
4192   match(AddP reg off);
4193 
4194   format %{ "[$reg + $off]" %}
4195   interface(MEMORY_INTER) %{
4196     base($reg);
4197     index(0x4);
4198     scale(0x0);
4199     disp($off);
4200   %}
4201 %}
4202 
4203 // Indirect Memory Plus Long Offset Operand
4204 operand indOffset32(eRegP reg, immI off) %{
4205   match(AddP reg off);
4206 
4207   format %{ "[$reg + $off]" %}
4208   interface(MEMORY_INTER) %{
4209     base($reg);
4210     index(0x4);
4211     scale(0x0);
4212     disp($off);
4213   %}
4214 %}
4215 
4216 // Indirect Memory Plus Long Offset Operand
4217 operand indOffset32X(rRegI reg, immP off) %{
4218   match(AddP off reg);
4219 
4220   format %{ "[$reg + $off]" %}
4221   interface(MEMORY_INTER) %{
4222     base($reg);
4223     index(0x4);
4224     scale(0x0);
4225     disp($off);
4226   %}
4227 %}
4228 
4229 // Indirect Memory Plus Index Register Plus Offset Operand
4230 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4231   match(AddP (AddP reg ireg) off);
4232 
4233   op_cost(10);
4234   format %{"[$reg + $off + $ireg]" %}
4235   interface(MEMORY_INTER) %{
4236     base($reg);
4237     index($ireg);
4238     scale(0x0);
4239     disp($off);
4240   %}
4241 %}
4242 
4243 // Indirect Memory Plus Index Register Plus Offset Operand
4244 operand indIndex(eRegP reg, rRegI ireg) %{
4245   match(AddP reg ireg);
4246 
4247   op_cost(10);
4248   format %{"[$reg + $ireg]" %}
4249   interface(MEMORY_INTER) %{
4250     base($reg);
4251     index($ireg);
4252     scale(0x0);
4253     disp(0x0);
4254   %}
4255 %}
4256 
4257 // // -------------------------------------------------------------------------
4258 // // 486 architecture doesn't support "scale * index + offset" with out a base
4259 // // -------------------------------------------------------------------------
4260 // // Scaled Memory Operands
4261 // // Indirect Memory Times Scale Plus Offset Operand
4262 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4263 //   match(AddP off (LShiftI ireg scale));
4264 //
4265 //   op_cost(10);
4266 //   format %{"[$off + $ireg << $scale]" %}
4267 //   interface(MEMORY_INTER) %{
4268 //     base(0x4);
4269 //     index($ireg);
4270 //     scale($scale);
4271 //     disp($off);
4272 //   %}
4273 // %}
4274 
4275 // Indirect Memory Times Scale Plus Index Register
4276 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4277   match(AddP reg (LShiftI ireg scale));
4278 
4279   op_cost(10);
4280   format %{"[$reg + $ireg << $scale]" %}
4281   interface(MEMORY_INTER) %{
4282     base($reg);
4283     index($ireg);
4284     scale($scale);
4285     disp(0x0);
4286   %}
4287 %}
4288 
4289 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4290 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4291   match(AddP (AddP reg (LShiftI ireg scale)) off);
4292 
4293   op_cost(10);
4294   format %{"[$reg + $off + $ireg << $scale]" %}
4295   interface(MEMORY_INTER) %{
4296     base($reg);
4297     index($ireg);
4298     scale($scale);
4299     disp($off);
4300   %}
4301 %}
4302 
4303 //----------Load Long Memory Operands------------------------------------------
4304 // The load-long idiom will use it's address expression again after loading
4305 // the first word of the long.  If the load-long destination overlaps with
4306 // registers used in the addressing expression, the 2nd half will be loaded
4307 // from a clobbered address.  Fix this by requiring that load-long use
4308 // address registers that do not overlap with the load-long target.
4309 
4310 // load-long support
4311 operand load_long_RegP() %{
4312   constraint(ALLOC_IN_RC(esi_reg));
4313   match(RegP);
4314   match(eSIRegP);
4315   op_cost(100);
4316   format %{  %}
4317   interface(REG_INTER);
4318 %}
4319 
4320 // Indirect Memory Operand Long
4321 operand load_long_indirect(load_long_RegP reg) %{
4322   constraint(ALLOC_IN_RC(esi_reg));
4323   match(reg);
4324 
4325   format %{ "[$reg]" %}
4326   interface(MEMORY_INTER) %{
4327     base($reg);
4328     index(0x4);
4329     scale(0x0);
4330     disp(0x0);
4331   %}
4332 %}
4333 
4334 // Indirect Memory Plus Long Offset Operand
4335 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4336   match(AddP reg off);
4337 
4338   format %{ "[$reg + $off]" %}
4339   interface(MEMORY_INTER) %{
4340     base($reg);
4341     index(0x4);
4342     scale(0x0);
4343     disp($off);
4344   %}
4345 %}
4346 
4347 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4348 
4349 
4350 //----------Special Memory Operands--------------------------------------------
4351 // Stack Slot Operand - This operand is used for loading and storing temporary
4352 //                      values on the stack where a match requires a value to
4353 //                      flow through memory.
4354 operand stackSlotP(sRegP reg) %{
4355   constraint(ALLOC_IN_RC(stack_slots));
4356   // No match rule because this operand is only generated in matching
4357   format %{ "[$reg]" %}
4358   interface(MEMORY_INTER) %{
4359     base(0x4);   // ESP
4360     index(0x4);  // No Index
4361     scale(0x0);  // No Scale
4362     disp($reg);  // Stack Offset
4363   %}
4364 %}
4365 
4366 operand stackSlotI(sRegI reg) %{
4367   constraint(ALLOC_IN_RC(stack_slots));
4368   // No match rule because this operand is only generated in matching
4369   format %{ "[$reg]" %}
4370   interface(MEMORY_INTER) %{
4371     base(0x4);   // ESP
4372     index(0x4);  // No Index
4373     scale(0x0);  // No Scale
4374     disp($reg);  // Stack Offset
4375   %}
4376 %}
4377 
4378 operand stackSlotF(sRegF reg) %{
4379   constraint(ALLOC_IN_RC(stack_slots));
4380   // No match rule because this operand is only generated in matching
4381   format %{ "[$reg]" %}
4382   interface(MEMORY_INTER) %{
4383     base(0x4);   // ESP
4384     index(0x4);  // No Index
4385     scale(0x0);  // No Scale
4386     disp($reg);  // Stack Offset
4387   %}
4388 %}
4389 
4390 operand stackSlotD(sRegD reg) %{
4391   constraint(ALLOC_IN_RC(stack_slots));
4392   // No match rule because this operand is only generated in matching
4393   format %{ "[$reg]" %}
4394   interface(MEMORY_INTER) %{
4395     base(0x4);   // ESP
4396     index(0x4);  // No Index
4397     scale(0x0);  // No Scale
4398     disp($reg);  // Stack Offset
4399   %}
4400 %}
4401 
4402 operand stackSlotL(sRegL reg) %{
4403   constraint(ALLOC_IN_RC(stack_slots));
4404   // No match rule because this operand is only generated in matching
4405   format %{ "[$reg]" %}
4406   interface(MEMORY_INTER) %{
4407     base(0x4);   // ESP
4408     index(0x4);  // No Index
4409     scale(0x0);  // No Scale
4410     disp($reg);  // Stack Offset
4411   %}
4412 %}
4413 
4414 //----------Memory Operands - Win95 Implicit Null Variants----------------
4415 // Indirect Memory Operand
4416 operand indirect_win95_safe(eRegP_no_EBP reg)
4417 %{
4418   constraint(ALLOC_IN_RC(int_reg));
4419   match(reg);
4420 
4421   op_cost(100);
4422   format %{ "[$reg]" %}
4423   interface(MEMORY_INTER) %{
4424     base($reg);
4425     index(0x4);
4426     scale(0x0);
4427     disp(0x0);
4428   %}
4429 %}
4430 
4431 // Indirect Memory Plus Short Offset Operand
4432 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4433 %{
4434   match(AddP reg off);
4435 
4436   op_cost(100);
4437   format %{ "[$reg + $off]" %}
4438   interface(MEMORY_INTER) %{
4439     base($reg);
4440     index(0x4);
4441     scale(0x0);
4442     disp($off);
4443   %}
4444 %}
4445 
4446 // Indirect Memory Plus Long Offset Operand
4447 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4448 %{
4449   match(AddP reg off);
4450 
4451   op_cost(100);
4452   format %{ "[$reg + $off]" %}
4453   interface(MEMORY_INTER) %{
4454     base($reg);
4455     index(0x4);
4456     scale(0x0);
4457     disp($off);
4458   %}
4459 %}
4460 
4461 // Indirect Memory Plus Index Register Plus Offset Operand
4462 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4463 %{
4464   match(AddP (AddP reg ireg) off);
4465 
4466   op_cost(100);
4467   format %{"[$reg + $off + $ireg]" %}
4468   interface(MEMORY_INTER) %{
4469     base($reg);
4470     index($ireg);
4471     scale(0x0);
4472     disp($off);
4473   %}
4474 %}
4475 
4476 // Indirect Memory Times Scale Plus Index Register
4477 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4478 %{
4479   match(AddP reg (LShiftI ireg scale));
4480 
4481   op_cost(100);
4482   format %{"[$reg + $ireg << $scale]" %}
4483   interface(MEMORY_INTER) %{
4484     base($reg);
4485     index($ireg);
4486     scale($scale);
4487     disp(0x0);
4488   %}
4489 %}
4490 
4491 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4492 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4493 %{
4494   match(AddP (AddP reg (LShiftI ireg scale)) off);
4495 
4496   op_cost(100);
4497   format %{"[$reg + $off + $ireg << $scale]" %}
4498   interface(MEMORY_INTER) %{
4499     base($reg);
4500     index($ireg);
4501     scale($scale);
4502     disp($off);
4503   %}
4504 %}
4505 
4506 //----------Conditional Branch Operands----------------------------------------
4507 // Comparison Op  - This is the operation of the comparison, and is limited to
4508 //                  the following set of codes:
4509 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4510 //
4511 // Other attributes of the comparison, such as unsignedness, are specified
4512 // by the comparison instruction that sets a condition code flags register.
4513 // That result is represented by a flags operand whose subtype is appropriate
4514 // to the unsignedness (etc.) of the comparison.
4515 //
4516 // Later, the instruction which matches both the Comparison Op (a Bool) and
4517 // the flags (produced by the Cmp) specifies the coding of the comparison op
4518 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4519 
4520 // Comparision Code
4521 operand cmpOp() %{
4522   match(Bool);
4523 
4524   format %{ "" %}
4525   interface(COND_INTER) %{
4526     equal(0x4, "e");
4527     not_equal(0x5, "ne");
4528     less(0xC, "l");
4529     greater_equal(0xD, "ge");
4530     less_equal(0xE, "le");
4531     greater(0xF, "g");
4532     overflow(0x0, "o");
4533     no_overflow(0x1, "no");
4534   %}
4535 %}
4536 
4537 // Comparison Code, unsigned compare.  Used by FP also, with
4538 // C2 (unordered) turned into GT or LT already.  The other bits
4539 // C0 and C3 are turned into Carry & Zero flags.
4540 operand cmpOpU() %{
4541   match(Bool);
4542 
4543   format %{ "" %}
4544   interface(COND_INTER) %{
4545     equal(0x4, "e");
4546     not_equal(0x5, "ne");
4547     less(0x2, "b");
4548     greater_equal(0x3, "nb");
4549     less_equal(0x6, "be");
4550     greater(0x7, "nbe");
4551     overflow(0x0, "o");
4552     no_overflow(0x1, "no");
4553   %}
4554 %}
4555 
4556 // Floating comparisons that don't require any fixup for the unordered case
4557 operand cmpOpUCF() %{
4558   match(Bool);
4559   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4560             n->as_Bool()->_test._test == BoolTest::ge ||
4561             n->as_Bool()->_test._test == BoolTest::le ||
4562             n->as_Bool()->_test._test == BoolTest::gt);
4563   format %{ "" %}
4564   interface(COND_INTER) %{
4565     equal(0x4, "e");
4566     not_equal(0x5, "ne");
4567     less(0x2, "b");
4568     greater_equal(0x3, "nb");
4569     less_equal(0x6, "be");
4570     greater(0x7, "nbe");
4571     overflow(0x0, "o");
4572     no_overflow(0x1, "no");
4573   %}
4574 %}
4575 
4576 
4577 // Floating comparisons that can be fixed up with extra conditional jumps
4578 operand cmpOpUCF2() %{
4579   match(Bool);
4580   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4581             n->as_Bool()->_test._test == BoolTest::eq);
4582   format %{ "" %}
4583   interface(COND_INTER) %{
4584     equal(0x4, "e");
4585     not_equal(0x5, "ne");
4586     less(0x2, "b");
4587     greater_equal(0x3, "nb");
4588     less_equal(0x6, "be");
4589     greater(0x7, "nbe");
4590     overflow(0x0, "o");
4591     no_overflow(0x1, "no");
4592   %}
4593 %}
4594 
4595 // Comparison Code for FP conditional move
4596 operand cmpOp_fcmov() %{
4597   match(Bool);
4598 
4599   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4600             n->as_Bool()->_test._test != BoolTest::no_overflow);
4601   format %{ "" %}
4602   interface(COND_INTER) %{
4603     equal        (0x0C8);
4604     not_equal    (0x1C8);
4605     less         (0x0C0);
4606     greater_equal(0x1C0);
4607     less_equal   (0x0D0);
4608     greater      (0x1D0);
4609     overflow(0x0, "o"); // not really supported by the instruction
4610     no_overflow(0x1, "no"); // not really supported by the instruction
4611   %}
4612 %}
4613 
4614 // Comparison Code used in long compares
4615 operand cmpOp_commute() %{
4616   match(Bool);
4617 
4618   format %{ "" %}
4619   interface(COND_INTER) %{
4620     equal(0x4, "e");
4621     not_equal(0x5, "ne");
4622     less(0xF, "g");
4623     greater_equal(0xE, "le");
4624     less_equal(0xD, "ge");
4625     greater(0xC, "l");
4626     overflow(0x0, "o");
4627     no_overflow(0x1, "no");
4628   %}
4629 %}
4630 
4631 // Comparison Code used in unsigned long compares
4632 operand cmpOpU_commute() %{
4633   match(Bool);
4634 
4635   format %{ "" %}
4636   interface(COND_INTER) %{
4637     equal(0x4, "e");
4638     not_equal(0x5, "ne");
4639     less(0x7, "nbe");
4640     greater_equal(0x6, "be");
4641     less_equal(0x3, "nb");
4642     greater(0x2, "b");
4643     overflow(0x0, "o");
4644     no_overflow(0x1, "no");
4645   %}
4646 %}
4647 
4648 //----------OPERAND CLASSES----------------------------------------------------
4649 // Operand Classes are groups of operands that are used as to simplify
4650 // instruction definitions by not requiring the AD writer to specify separate
4651 // instructions for every form of operand when the instruction accepts
4652 // multiple operand types with the same basic encoding and format.  The classic
4653 // case of this is memory operands.
4654 
4655 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4656                indIndex, indIndexScale, indIndexScaleOffset);
4657 
4658 // Long memory operations are encoded in 2 instructions and a +4 offset.
4659 // This means some kind of offset is always required and you cannot use
4660 // an oop as the offset (done when working on static globals).
4661 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4662                     indIndex, indIndexScale, indIndexScaleOffset);
4663 
4664 
4665 //----------PIPELINE-----------------------------------------------------------
4666 // Rules which define the behavior of the target architectures pipeline.
4667 pipeline %{
4668 
4669 //----------ATTRIBUTES---------------------------------------------------------
4670 attributes %{
4671   variable_size_instructions;        // Fixed size instructions
4672   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4673   instruction_unit_size = 1;         // An instruction is 1 bytes long
4674   instruction_fetch_unit_size = 16;  // The processor fetches one line
4675   instruction_fetch_units = 1;       // of 16 bytes
4676 
4677   // List of nop instructions
4678   nops( MachNop );
4679 %}
4680 
4681 //----------RESOURCES----------------------------------------------------------
4682 // Resources are the functional units available to the machine
4683 
4684 // Generic P2/P3 pipeline
4685 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4686 // 3 instructions decoded per cycle.
4687 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4688 // 2 ALU op, only ALU0 handles mul/div instructions.
4689 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4690            MS0, MS1, MEM = MS0 | MS1,
4691            BR, FPU,
4692            ALU0, ALU1, ALU = ALU0 | ALU1 );
4693 
4694 //----------PIPELINE DESCRIPTION-----------------------------------------------
4695 // Pipeline Description specifies the stages in the machine's pipeline
4696 
4697 // Generic P2/P3 pipeline
4698 pipe_desc(S0, S1, S2, S3, S4, S5);
4699 
4700 //----------PIPELINE CLASSES---------------------------------------------------
4701 // Pipeline Classes describe the stages in which input and output are
4702 // referenced by the hardware pipeline.
4703 
4704 // Naming convention: ialu or fpu
4705 // Then: _reg
4706 // Then: _reg if there is a 2nd register
4707 // Then: _long if it's a pair of instructions implementing a long
4708 // Then: _fat if it requires the big decoder
4709 //   Or: _mem if it requires the big decoder and a memory unit.
4710 
4711 // Integer ALU reg operation
4712 pipe_class ialu_reg(rRegI dst) %{
4713     single_instruction;
4714     dst    : S4(write);
4715     dst    : S3(read);
4716     DECODE : S0;        // any decoder
4717     ALU    : S3;        // any alu
4718 %}
4719 
4720 // Long ALU reg operation
4721 pipe_class ialu_reg_long(eRegL dst) %{
4722     instruction_count(2);
4723     dst    : S4(write);
4724     dst    : S3(read);
4725     DECODE : S0(2);     // any 2 decoders
4726     ALU    : S3(2);     // both alus
4727 %}
4728 
4729 // Integer ALU reg operation using big decoder
4730 pipe_class ialu_reg_fat(rRegI dst) %{
4731     single_instruction;
4732     dst    : S4(write);
4733     dst    : S3(read);
4734     D0     : S0;        // big decoder only
4735     ALU    : S3;        // any alu
4736 %}
4737 
4738 // Long ALU reg operation using big decoder
4739 pipe_class ialu_reg_long_fat(eRegL dst) %{
4740     instruction_count(2);
4741     dst    : S4(write);
4742     dst    : S3(read);
4743     D0     : S0(2);     // big decoder only; twice
4744     ALU    : S3(2);     // any 2 alus
4745 %}
4746 
4747 // Integer ALU reg-reg operation
4748 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4749     single_instruction;
4750     dst    : S4(write);
4751     src    : S3(read);
4752     DECODE : S0;        // any decoder
4753     ALU    : S3;        // any alu
4754 %}
4755 
4756 // Long ALU reg-reg operation
4757 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4758     instruction_count(2);
4759     dst    : S4(write);
4760     src    : S3(read);
4761     DECODE : S0(2);     // any 2 decoders
4762     ALU    : S3(2);     // both alus
4763 %}
4764 
4765 // Integer ALU reg-reg operation
4766 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4767     single_instruction;
4768     dst    : S4(write);
4769     src    : S3(read);
4770     D0     : S0;        // big decoder only
4771     ALU    : S3;        // any alu
4772 %}
4773 
4774 // Long ALU reg-reg operation
4775 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4776     instruction_count(2);
4777     dst    : S4(write);
4778     src    : S3(read);
4779     D0     : S0(2);     // big decoder only; twice
4780     ALU    : S3(2);     // both alus
4781 %}
4782 
4783 // Integer ALU reg-mem operation
4784 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4785     single_instruction;
4786     dst    : S5(write);
4787     mem    : S3(read);
4788     D0     : S0;        // big decoder only
4789     ALU    : S4;        // any alu
4790     MEM    : S3;        // any mem
4791 %}
4792 
4793 // Long ALU reg-mem operation
4794 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4795     instruction_count(2);
4796     dst    : S5(write);
4797     mem    : S3(read);
4798     D0     : S0(2);     // big decoder only; twice
4799     ALU    : S4(2);     // any 2 alus
4800     MEM    : S3(2);     // both mems
4801 %}
4802 
4803 // Integer mem operation (prefetch)
4804 pipe_class ialu_mem(memory mem)
4805 %{
4806     single_instruction;
4807     mem    : S3(read);
4808     D0     : S0;        // big decoder only
4809     MEM    : S3;        // any mem
4810 %}
4811 
4812 // Integer Store to Memory
4813 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4814     single_instruction;
4815     mem    : S3(read);
4816     src    : S5(read);
4817     D0     : S0;        // big decoder only
4818     ALU    : S4;        // any alu
4819     MEM    : S3;
4820 %}
4821 
4822 // Long Store to Memory
4823 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4824     instruction_count(2);
4825     mem    : S3(read);
4826     src    : S5(read);
4827     D0     : S0(2);     // big decoder only; twice
4828     ALU    : S4(2);     // any 2 alus
4829     MEM    : S3(2);     // Both mems
4830 %}
4831 
4832 // Integer Store to Memory
4833 pipe_class ialu_mem_imm(memory mem) %{
4834     single_instruction;
4835     mem    : S3(read);
4836     D0     : S0;        // big decoder only
4837     ALU    : S4;        // any alu
4838     MEM    : S3;
4839 %}
4840 
4841 // Integer ALU0 reg-reg operation
4842 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4843     single_instruction;
4844     dst    : S4(write);
4845     src    : S3(read);
4846     D0     : S0;        // Big decoder only
4847     ALU0   : S3;        // only alu0
4848 %}
4849 
4850 // Integer ALU0 reg-mem operation
4851 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4852     single_instruction;
4853     dst    : S5(write);
4854     mem    : S3(read);
4855     D0     : S0;        // big decoder only
4856     ALU0   : S4;        // ALU0 only
4857     MEM    : S3;        // any mem
4858 %}
4859 
4860 // Integer ALU reg-reg operation
4861 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4862     single_instruction;
4863     cr     : S4(write);
4864     src1   : S3(read);
4865     src2   : S3(read);
4866     DECODE : S0;        // any decoder
4867     ALU    : S3;        // any alu
4868 %}
4869 
4870 // Integer ALU reg-imm operation
4871 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4872     single_instruction;
4873     cr     : S4(write);
4874     src1   : S3(read);
4875     DECODE : S0;        // any decoder
4876     ALU    : S3;        // any alu
4877 %}
4878 
4879 // Integer ALU reg-mem operation
4880 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4881     single_instruction;
4882     cr     : S4(write);
4883     src1   : S3(read);
4884     src2   : S3(read);
4885     D0     : S0;        // big decoder only
4886     ALU    : S4;        // any alu
4887     MEM    : S3;
4888 %}
4889 
4890 // Conditional move reg-reg
4891 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4892     instruction_count(4);
4893     y      : S4(read);
4894     q      : S3(read);
4895     p      : S3(read);
4896     DECODE : S0(4);     // any decoder
4897 %}
4898 
4899 // Conditional move reg-reg
4900 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4901     single_instruction;
4902     dst    : S4(write);
4903     src    : S3(read);
4904     cr     : S3(read);
4905     DECODE : S0;        // any decoder
4906 %}
4907 
4908 // Conditional move reg-mem
4909 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4910     single_instruction;
4911     dst    : S4(write);
4912     src    : S3(read);
4913     cr     : S3(read);
4914     DECODE : S0;        // any decoder
4915     MEM    : S3;
4916 %}
4917 
4918 // Conditional move reg-reg long
4919 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4920     single_instruction;
4921     dst    : S4(write);
4922     src    : S3(read);
4923     cr     : S3(read);
4924     DECODE : S0(2);     // any 2 decoders
4925 %}
4926 
4927 // Conditional move double reg-reg
4928 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4929     single_instruction;
4930     dst    : S4(write);
4931     src    : S3(read);
4932     cr     : S3(read);
4933     DECODE : S0;        // any decoder
4934 %}
4935 
4936 // Float reg-reg operation
4937 pipe_class fpu_reg(regDPR dst) %{
4938     instruction_count(2);
4939     dst    : S3(read);
4940     DECODE : S0(2);     // any 2 decoders
4941     FPU    : S3;
4942 %}
4943 
4944 // Float reg-reg operation
4945 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4946     instruction_count(2);
4947     dst    : S4(write);
4948     src    : S3(read);
4949     DECODE : S0(2);     // any 2 decoders
4950     FPU    : S3;
4951 %}
4952 
4953 // Float reg-reg operation
4954 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4955     instruction_count(3);
4956     dst    : S4(write);
4957     src1   : S3(read);
4958     src2   : S3(read);
4959     DECODE : S0(3);     // any 3 decoders
4960     FPU    : S3(2);
4961 %}
4962 
4963 // Float reg-reg operation
4964 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4965     instruction_count(4);
4966     dst    : S4(write);
4967     src1   : S3(read);
4968     src2   : S3(read);
4969     src3   : S3(read);
4970     DECODE : S0(4);     // any 3 decoders
4971     FPU    : S3(2);
4972 %}
4973 
4974 // Float reg-reg operation
4975 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4976     instruction_count(4);
4977     dst    : S4(write);
4978     src1   : S3(read);
4979     src2   : S3(read);
4980     src3   : S3(read);
4981     DECODE : S1(3);     // any 3 decoders
4982     D0     : S0;        // Big decoder only
4983     FPU    : S3(2);
4984     MEM    : S3;
4985 %}
4986 
4987 // Float reg-mem operation
4988 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4989     instruction_count(2);
4990     dst    : S5(write);
4991     mem    : S3(read);
4992     D0     : S0;        // big decoder only
4993     DECODE : S1;        // any decoder for FPU POP
4994     FPU    : S4;
4995     MEM    : S3;        // any mem
4996 %}
4997 
4998 // Float reg-mem operation
4999 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
5000     instruction_count(3);
5001     dst    : S5(write);
5002     src1   : S3(read);
5003     mem    : S3(read);
5004     D0     : S0;        // big decoder only
5005     DECODE : S1(2);     // any decoder for FPU POP
5006     FPU    : S4;
5007     MEM    : S3;        // any mem
5008 %}
5009 
5010 // Float mem-reg operation
5011 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
5012     instruction_count(2);
5013     src    : S5(read);
5014     mem    : S3(read);
5015     DECODE : S0;        // any decoder for FPU PUSH
5016     D0     : S1;        // big decoder only
5017     FPU    : S4;
5018     MEM    : S3;        // any mem
5019 %}
5020 
5021 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
5022     instruction_count(3);
5023     src1   : S3(read);
5024     src2   : S3(read);
5025     mem    : S3(read);
5026     DECODE : S0(2);     // any decoder for FPU PUSH
5027     D0     : S1;        // big decoder only
5028     FPU    : S4;
5029     MEM    : S3;        // any mem
5030 %}
5031 
5032 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
5033     instruction_count(3);
5034     src1   : S3(read);
5035     src2   : S3(read);
5036     mem    : S4(read);
5037     DECODE : S0;        // any decoder for FPU PUSH
5038     D0     : S0(2);     // big decoder only
5039     FPU    : S4;
5040     MEM    : S3(2);     // any mem
5041 %}
5042 
5043 pipe_class fpu_mem_mem(memory dst, memory src1) %{
5044     instruction_count(2);
5045     src1   : S3(read);
5046     dst    : S4(read);
5047     D0     : S0(2);     // big decoder only
5048     MEM    : S3(2);     // any mem
5049 %}
5050 
5051 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
5052     instruction_count(3);
5053     src1   : S3(read);
5054     src2   : S3(read);
5055     dst    : S4(read);
5056     D0     : S0(3);     // big decoder only
5057     FPU    : S4;
5058     MEM    : S3(3);     // any mem
5059 %}
5060 
5061 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
5062     instruction_count(3);
5063     src1   : S4(read);
5064     mem    : S4(read);
5065     DECODE : S0;        // any decoder for FPU PUSH
5066     D0     : S0(2);     // big decoder only
5067     FPU    : S4;
5068     MEM    : S3(2);     // any mem
5069 %}
5070 
5071 // Float load constant
5072 pipe_class fpu_reg_con(regDPR dst) %{
5073     instruction_count(2);
5074     dst    : S5(write);
5075     D0     : S0;        // big decoder only for the load
5076     DECODE : S1;        // any decoder for FPU POP
5077     FPU    : S4;
5078     MEM    : S3;        // any mem
5079 %}
5080 
5081 // Float load constant
5082 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5083     instruction_count(3);
5084     dst    : S5(write);
5085     src    : S3(read);
5086     D0     : S0;        // big decoder only for the load
5087     DECODE : S1(2);     // any decoder for FPU POP
5088     FPU    : S4;
5089     MEM    : S3;        // any mem
5090 %}
5091 
5092 // UnConditional branch
5093 pipe_class pipe_jmp( label labl ) %{
5094     single_instruction;
5095     BR   : S3;
5096 %}
5097 
5098 // Conditional branch
5099 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5100     single_instruction;
5101     cr    : S1(read);
5102     BR    : S3;
5103 %}
5104 
5105 // Allocation idiom
5106 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5107     instruction_count(1); force_serialization;
5108     fixed_latency(6);
5109     heap_ptr : S3(read);
5110     DECODE   : S0(3);
5111     D0       : S2;
5112     MEM      : S3;
5113     ALU      : S3(2);
5114     dst      : S5(write);
5115     BR       : S5;
5116 %}
5117 
5118 // Generic big/slow expanded idiom
5119 pipe_class pipe_slow(  ) %{
5120     instruction_count(10); multiple_bundles; force_serialization;
5121     fixed_latency(100);
5122     D0  : S0(2);
5123     MEM : S3(2);
5124 %}
5125 
5126 // The real do-nothing guy
5127 pipe_class empty( ) %{
5128     instruction_count(0);
5129 %}
5130 
5131 // Define the class for the Nop node
5132 define %{
5133    MachNop = empty;
5134 %}
5135 
5136 %}
5137 
5138 //----------INSTRUCTIONS-------------------------------------------------------
5139 //
5140 // match      -- States which machine-independent subtree may be replaced
5141 //               by this instruction.
5142 // ins_cost   -- The estimated cost of this instruction is used by instruction
5143 //               selection to identify a minimum cost tree of machine
5144 //               instructions that matches a tree of machine-independent
5145 //               instructions.
5146 // format     -- A string providing the disassembly for this instruction.
5147 //               The value of an instruction's operand may be inserted
5148 //               by referring to it with a '$' prefix.
5149 // opcode     -- Three instruction opcodes may be provided.  These are referred
5150 //               to within an encode class as $primary, $secondary, and $tertiary
5151 //               respectively.  The primary opcode is commonly used to
5152 //               indicate the type of machine instruction, while secondary
5153 //               and tertiary are often used for prefix options or addressing
5154 //               modes.
5155 // ins_encode -- A list of encode classes with parameters. The encode class
5156 //               name must have been defined in an 'enc_class' specification
5157 //               in the encode section of the architecture description.
5158 
5159 //----------BSWAP-Instruction--------------------------------------------------
5160 instruct bytes_reverse_int(rRegI dst) %{
5161   match(Set dst (ReverseBytesI dst));
5162 
5163   format %{ "BSWAP  $dst" %}
5164   opcode(0x0F, 0xC8);
5165   ins_encode( OpcP, OpcSReg(dst) );
5166   ins_pipe( ialu_reg );
5167 %}
5168 
5169 instruct bytes_reverse_long(eRegL dst) %{
5170   match(Set dst (ReverseBytesL dst));
5171 
5172   format %{ "BSWAP  $dst.lo\n\t"
5173             "BSWAP  $dst.hi\n\t"
5174             "XCHG   $dst.lo $dst.hi" %}
5175 
5176   ins_cost(125);
5177   ins_encode( bswap_long_bytes(dst) );
5178   ins_pipe( ialu_reg_reg);
5179 %}
5180 
5181 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5182   match(Set dst (ReverseBytesUS dst));
5183   effect(KILL cr);
5184 
5185   format %{ "BSWAP  $dst\n\t"
5186             "SHR    $dst,16\n\t" %}
5187   ins_encode %{
5188     __ bswapl($dst$$Register);
5189     __ shrl($dst$$Register, 16);
5190   %}
5191   ins_pipe( ialu_reg );
5192 %}
5193 
5194 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5195   match(Set dst (ReverseBytesS dst));
5196   effect(KILL cr);
5197 
5198   format %{ "BSWAP  $dst\n\t"
5199             "SAR    $dst,16\n\t" %}
5200   ins_encode %{
5201     __ bswapl($dst$$Register);
5202     __ sarl($dst$$Register, 16);
5203   %}
5204   ins_pipe( ialu_reg );
5205 %}
5206 
5207 
5208 //---------- Zeros Count Instructions ------------------------------------------
5209 
5210 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5211   predicate(UseCountLeadingZerosInstruction);
5212   match(Set dst (CountLeadingZerosI src));
5213   effect(KILL cr);
5214 
5215   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5216   ins_encode %{
5217     __ lzcntl($dst$$Register, $src$$Register);
5218   %}
5219   ins_pipe(ialu_reg);
5220 %}
5221 
5222 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5223   predicate(!UseCountLeadingZerosInstruction);
5224   match(Set dst (CountLeadingZerosI src));
5225   effect(KILL cr);
5226 
5227   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5228             "JNZ    skip\n\t"
5229             "MOV    $dst, -1\n"
5230       "skip:\n\t"
5231             "NEG    $dst\n\t"
5232             "ADD    $dst, 31" %}
5233   ins_encode %{
5234     Register Rdst = $dst$$Register;
5235     Register Rsrc = $src$$Register;
5236     Label skip;
5237     __ bsrl(Rdst, Rsrc);
5238     __ jccb(Assembler::notZero, skip);
5239     __ movl(Rdst, -1);
5240     __ bind(skip);
5241     __ negl(Rdst);
5242     __ addl(Rdst, BitsPerInt - 1);
5243   %}
5244   ins_pipe(ialu_reg);
5245 %}
5246 
5247 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5248   predicate(UseCountLeadingZerosInstruction);
5249   match(Set dst (CountLeadingZerosL src));
5250   effect(TEMP dst, KILL cr);
5251 
5252   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5253             "JNC    done\n\t"
5254             "LZCNT  $dst, $src.lo\n\t"
5255             "ADD    $dst, 32\n"
5256       "done:" %}
5257   ins_encode %{
5258     Register Rdst = $dst$$Register;
5259     Register Rsrc = $src$$Register;
5260     Label done;
5261     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5262     __ jccb(Assembler::carryClear, done);
5263     __ lzcntl(Rdst, Rsrc);
5264     __ addl(Rdst, BitsPerInt);
5265     __ bind(done);
5266   %}
5267   ins_pipe(ialu_reg);
5268 %}
5269 
5270 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5271   predicate(!UseCountLeadingZerosInstruction);
5272   match(Set dst (CountLeadingZerosL src));
5273   effect(TEMP dst, KILL cr);
5274 
5275   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5276             "JZ     msw_is_zero\n\t"
5277             "ADD    $dst, 32\n\t"
5278             "JMP    not_zero\n"
5279       "msw_is_zero:\n\t"
5280             "BSR    $dst, $src.lo\n\t"
5281             "JNZ    not_zero\n\t"
5282             "MOV    $dst, -1\n"
5283       "not_zero:\n\t"
5284             "NEG    $dst\n\t"
5285             "ADD    $dst, 63\n" %}
5286  ins_encode %{
5287     Register Rdst = $dst$$Register;
5288     Register Rsrc = $src$$Register;
5289     Label msw_is_zero;
5290     Label not_zero;
5291     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5292     __ jccb(Assembler::zero, msw_is_zero);
5293     __ addl(Rdst, BitsPerInt);
5294     __ jmpb(not_zero);
5295     __ bind(msw_is_zero);
5296     __ bsrl(Rdst, Rsrc);
5297     __ jccb(Assembler::notZero, not_zero);
5298     __ movl(Rdst, -1);
5299     __ bind(not_zero);
5300     __ negl(Rdst);
5301     __ addl(Rdst, BitsPerLong - 1);
5302   %}
5303   ins_pipe(ialu_reg);
5304 %}
5305 
5306 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5307   predicate(UseCountTrailingZerosInstruction);
5308   match(Set dst (CountTrailingZerosI src));
5309   effect(KILL cr);
5310 
5311   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5312   ins_encode %{
5313     __ tzcntl($dst$$Register, $src$$Register);
5314   %}
5315   ins_pipe(ialu_reg);
5316 %}
5317 
5318 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5319   predicate(!UseCountTrailingZerosInstruction);
5320   match(Set dst (CountTrailingZerosI src));
5321   effect(KILL cr);
5322 
5323   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5324             "JNZ    done\n\t"
5325             "MOV    $dst, 32\n"
5326       "done:" %}
5327   ins_encode %{
5328     Register Rdst = $dst$$Register;
5329     Label done;
5330     __ bsfl(Rdst, $src$$Register);
5331     __ jccb(Assembler::notZero, done);
5332     __ movl(Rdst, BitsPerInt);
5333     __ bind(done);
5334   %}
5335   ins_pipe(ialu_reg);
5336 %}
5337 
5338 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5339   predicate(UseCountTrailingZerosInstruction);
5340   match(Set dst (CountTrailingZerosL src));
5341   effect(TEMP dst, KILL cr);
5342 
5343   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5344             "JNC    done\n\t"
5345             "TZCNT  $dst, $src.hi\n\t"
5346             "ADD    $dst, 32\n"
5347             "done:" %}
5348   ins_encode %{
5349     Register Rdst = $dst$$Register;
5350     Register Rsrc = $src$$Register;
5351     Label done;
5352     __ tzcntl(Rdst, Rsrc);
5353     __ jccb(Assembler::carryClear, done);
5354     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5355     __ addl(Rdst, BitsPerInt);
5356     __ bind(done);
5357   %}
5358   ins_pipe(ialu_reg);
5359 %}
5360 
5361 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5362   predicate(!UseCountTrailingZerosInstruction);
5363   match(Set dst (CountTrailingZerosL src));
5364   effect(TEMP dst, KILL cr);
5365 
5366   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5367             "JNZ    done\n\t"
5368             "BSF    $dst, $src.hi\n\t"
5369             "JNZ    msw_not_zero\n\t"
5370             "MOV    $dst, 32\n"
5371       "msw_not_zero:\n\t"
5372             "ADD    $dst, 32\n"
5373       "done:" %}
5374   ins_encode %{
5375     Register Rdst = $dst$$Register;
5376     Register Rsrc = $src$$Register;
5377     Label msw_not_zero;
5378     Label done;
5379     __ bsfl(Rdst, Rsrc);
5380     __ jccb(Assembler::notZero, done);
5381     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5382     __ jccb(Assembler::notZero, msw_not_zero);
5383     __ movl(Rdst, BitsPerInt);
5384     __ bind(msw_not_zero);
5385     __ addl(Rdst, BitsPerInt);
5386     __ bind(done);
5387   %}
5388   ins_pipe(ialu_reg);
5389 %}
5390 
5391 
5392 //---------- Population Count Instructions -------------------------------------
5393 
5394 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5395   predicate(UsePopCountInstruction);
5396   match(Set dst (PopCountI src));
5397   effect(KILL cr);
5398 
5399   format %{ "POPCNT $dst, $src" %}
5400   ins_encode %{
5401     __ popcntl($dst$$Register, $src$$Register);
5402   %}
5403   ins_pipe(ialu_reg);
5404 %}
5405 
5406 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5407   predicate(UsePopCountInstruction);
5408   match(Set dst (PopCountI (LoadI mem)));
5409   effect(KILL cr);
5410 
5411   format %{ "POPCNT $dst, $mem" %}
5412   ins_encode %{
5413     __ popcntl($dst$$Register, $mem$$Address);
5414   %}
5415   ins_pipe(ialu_reg);
5416 %}
5417 
5418 // Note: Long.bitCount(long) returns an int.
5419 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5420   predicate(UsePopCountInstruction);
5421   match(Set dst (PopCountL src));
5422   effect(KILL cr, TEMP tmp, TEMP dst);
5423 
5424   format %{ "POPCNT $dst, $src.lo\n\t"
5425             "POPCNT $tmp, $src.hi\n\t"
5426             "ADD    $dst, $tmp" %}
5427   ins_encode %{
5428     __ popcntl($dst$$Register, $src$$Register);
5429     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5430     __ addl($dst$$Register, $tmp$$Register);
5431   %}
5432   ins_pipe(ialu_reg);
5433 %}
5434 
5435 // Note: Long.bitCount(long) returns an int.
5436 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5437   predicate(UsePopCountInstruction);
5438   match(Set dst (PopCountL (LoadL mem)));
5439   effect(KILL cr, TEMP tmp, TEMP dst);
5440 
5441   format %{ "POPCNT $dst, $mem\n\t"
5442             "POPCNT $tmp, $mem+4\n\t"
5443             "ADD    $dst, $tmp" %}
5444   ins_encode %{
5445     //__ popcntl($dst$$Register, $mem$$Address$$first);
5446     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5447     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5448     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5449     __ addl($dst$$Register, $tmp$$Register);
5450   %}
5451   ins_pipe(ialu_reg);
5452 %}
5453 
5454 
5455 //----------Load/Store/Move Instructions---------------------------------------
5456 //----------Load Instructions--------------------------------------------------
5457 // Load Byte (8bit signed)
5458 instruct loadB(xRegI dst, memory mem) %{
5459   match(Set dst (LoadB mem));
5460 
5461   ins_cost(125);
5462   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5463 
5464   ins_encode %{
5465     __ movsbl($dst$$Register, $mem$$Address);
5466   %}
5467 
5468   ins_pipe(ialu_reg_mem);
5469 %}
5470 
5471 // Load Byte (8bit signed) into Long Register
5472 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5473   match(Set dst (ConvI2L (LoadB mem)));
5474   effect(KILL cr);
5475 
5476   ins_cost(375);
5477   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5478             "MOV    $dst.hi,$dst.lo\n\t"
5479             "SAR    $dst.hi,7" %}
5480 
5481   ins_encode %{
5482     __ movsbl($dst$$Register, $mem$$Address);
5483     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5484     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5485   %}
5486 
5487   ins_pipe(ialu_reg_mem);
5488 %}
5489 
5490 // Load Unsigned Byte (8bit UNsigned)
5491 instruct loadUB(xRegI dst, memory mem) %{
5492   match(Set dst (LoadUB mem));
5493 
5494   ins_cost(125);
5495   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5496 
5497   ins_encode %{
5498     __ movzbl($dst$$Register, $mem$$Address);
5499   %}
5500 
5501   ins_pipe(ialu_reg_mem);
5502 %}
5503 
5504 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5505 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5506   match(Set dst (ConvI2L (LoadUB mem)));
5507   effect(KILL cr);
5508 
5509   ins_cost(250);
5510   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5511             "XOR    $dst.hi,$dst.hi" %}
5512 
5513   ins_encode %{
5514     Register Rdst = $dst$$Register;
5515     __ movzbl(Rdst, $mem$$Address);
5516     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5517   %}
5518 
5519   ins_pipe(ialu_reg_mem);
5520 %}
5521 
5522 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5523 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5524   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5525   effect(KILL cr);
5526 
5527   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5528             "XOR    $dst.hi,$dst.hi\n\t"
5529             "AND    $dst.lo,right_n_bits($mask, 8)" %}
5530   ins_encode %{
5531     Register Rdst = $dst$$Register;
5532     __ movzbl(Rdst, $mem$$Address);
5533     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5534     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5535   %}
5536   ins_pipe(ialu_reg_mem);
5537 %}
5538 
5539 // Load Short (16bit signed)
5540 instruct loadS(rRegI dst, memory mem) %{
5541   match(Set dst (LoadS mem));
5542 
5543   ins_cost(125);
5544   format %{ "MOVSX  $dst,$mem\t# short" %}
5545 
5546   ins_encode %{
5547     __ movswl($dst$$Register, $mem$$Address);
5548   %}
5549 
5550   ins_pipe(ialu_reg_mem);
5551 %}
5552 
5553 // Load Short (16 bit signed) to Byte (8 bit signed)
5554 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5555   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5556 
5557   ins_cost(125);
5558   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5559   ins_encode %{
5560     __ movsbl($dst$$Register, $mem$$Address);
5561   %}
5562   ins_pipe(ialu_reg_mem);
5563 %}
5564 
5565 // Load Short (16bit signed) into Long Register
5566 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5567   match(Set dst (ConvI2L (LoadS mem)));
5568   effect(KILL cr);
5569 
5570   ins_cost(375);
5571   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5572             "MOV    $dst.hi,$dst.lo\n\t"
5573             "SAR    $dst.hi,15" %}
5574 
5575   ins_encode %{
5576     __ movswl($dst$$Register, $mem$$Address);
5577     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5578     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5579   %}
5580 
5581   ins_pipe(ialu_reg_mem);
5582 %}
5583 
5584 // Load Unsigned Short/Char (16bit unsigned)
5585 instruct loadUS(rRegI dst, memory mem) %{
5586   match(Set dst (LoadUS mem));
5587 
5588   ins_cost(125);
5589   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5590 
5591   ins_encode %{
5592     __ movzwl($dst$$Register, $mem$$Address);
5593   %}
5594 
5595   ins_pipe(ialu_reg_mem);
5596 %}
5597 
5598 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5599 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5600   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5601 
5602   ins_cost(125);
5603   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5604   ins_encode %{
5605     __ movsbl($dst$$Register, $mem$$Address);
5606   %}
5607   ins_pipe(ialu_reg_mem);
5608 %}
5609 
5610 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5611 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5612   match(Set dst (ConvI2L (LoadUS mem)));
5613   effect(KILL cr);
5614 
5615   ins_cost(250);
5616   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5617             "XOR    $dst.hi,$dst.hi" %}
5618 
5619   ins_encode %{
5620     __ movzwl($dst$$Register, $mem$$Address);
5621     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5622   %}
5623 
5624   ins_pipe(ialu_reg_mem);
5625 %}
5626 
5627 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5628 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5629   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5630   effect(KILL cr);
5631 
5632   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5633             "XOR    $dst.hi,$dst.hi" %}
5634   ins_encode %{
5635     Register Rdst = $dst$$Register;
5636     __ movzbl(Rdst, $mem$$Address);
5637     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5638   %}
5639   ins_pipe(ialu_reg_mem);
5640 %}
5641 
5642 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5643 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5644   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5645   effect(KILL cr);
5646 
5647   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5648             "XOR    $dst.hi,$dst.hi\n\t"
5649             "AND    $dst.lo,right_n_bits($mask, 16)" %}
5650   ins_encode %{
5651     Register Rdst = $dst$$Register;
5652     __ movzwl(Rdst, $mem$$Address);
5653     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5654     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5655   %}
5656   ins_pipe(ialu_reg_mem);
5657 %}
5658 
5659 // Load Integer
5660 instruct loadI(rRegI dst, memory mem) %{
5661   match(Set dst (LoadI mem));
5662 
5663   ins_cost(125);
5664   format %{ "MOV    $dst,$mem\t# int" %}
5665 
5666   ins_encode %{
5667     __ movl($dst$$Register, $mem$$Address);
5668   %}
5669 
5670   ins_pipe(ialu_reg_mem);
5671 %}
5672 
5673 // Load Integer (32 bit signed) to Byte (8 bit signed)
5674 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5675   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5676 
5677   ins_cost(125);
5678   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5679   ins_encode %{
5680     __ movsbl($dst$$Register, $mem$$Address);
5681   %}
5682   ins_pipe(ialu_reg_mem);
5683 %}
5684 
5685 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5686 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5687   match(Set dst (AndI (LoadI mem) mask));
5688 
5689   ins_cost(125);
5690   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5691   ins_encode %{
5692     __ movzbl($dst$$Register, $mem$$Address);
5693   %}
5694   ins_pipe(ialu_reg_mem);
5695 %}
5696 
5697 // Load Integer (32 bit signed) to Short (16 bit signed)
5698 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5699   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5700 
5701   ins_cost(125);
5702   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5703   ins_encode %{
5704     __ movswl($dst$$Register, $mem$$Address);
5705   %}
5706   ins_pipe(ialu_reg_mem);
5707 %}
5708 
5709 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5710 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5711   match(Set dst (AndI (LoadI mem) mask));
5712 
5713   ins_cost(125);
5714   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5715   ins_encode %{
5716     __ movzwl($dst$$Register, $mem$$Address);
5717   %}
5718   ins_pipe(ialu_reg_mem);
5719 %}
5720 
5721 // Load Integer into Long Register
5722 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5723   match(Set dst (ConvI2L (LoadI mem)));
5724   effect(KILL cr);
5725 
5726   ins_cost(375);
5727   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5728             "MOV    $dst.hi,$dst.lo\n\t"
5729             "SAR    $dst.hi,31" %}
5730 
5731   ins_encode %{
5732     __ movl($dst$$Register, $mem$$Address);
5733     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5734     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5735   %}
5736 
5737   ins_pipe(ialu_reg_mem);
5738 %}
5739 
5740 // Load Integer with mask 0xFF into Long Register
5741 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5742   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5743   effect(KILL cr);
5744 
5745   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5746             "XOR    $dst.hi,$dst.hi" %}
5747   ins_encode %{
5748     Register Rdst = $dst$$Register;
5749     __ movzbl(Rdst, $mem$$Address);
5750     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5751   %}
5752   ins_pipe(ialu_reg_mem);
5753 %}
5754 
5755 // Load Integer with mask 0xFFFF into Long Register
5756 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5757   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5758   effect(KILL cr);
5759 
5760   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5761             "XOR    $dst.hi,$dst.hi" %}
5762   ins_encode %{
5763     Register Rdst = $dst$$Register;
5764     __ movzwl(Rdst, $mem$$Address);
5765     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5766   %}
5767   ins_pipe(ialu_reg_mem);
5768 %}
5769 
5770 // Load Integer with 31-bit mask into Long Register
5771 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5772   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5773   effect(KILL cr);
5774 
5775   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5776             "XOR    $dst.hi,$dst.hi\n\t"
5777             "AND    $dst.lo,$mask" %}
5778   ins_encode %{
5779     Register Rdst = $dst$$Register;
5780     __ movl(Rdst, $mem$$Address);
5781     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5782     __ andl(Rdst, $mask$$constant);
5783   %}
5784   ins_pipe(ialu_reg_mem);
5785 %}
5786 
5787 // Load Unsigned Integer into Long Register
5788 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5789   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5790   effect(KILL cr);
5791 
5792   ins_cost(250);
5793   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5794             "XOR    $dst.hi,$dst.hi" %}
5795 
5796   ins_encode %{
5797     __ movl($dst$$Register, $mem$$Address);
5798     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5799   %}
5800 
5801   ins_pipe(ialu_reg_mem);
5802 %}
5803 
5804 // Load Long.  Cannot clobber address while loading, so restrict address
5805 // register to ESI
5806 instruct loadL(eRegL dst, load_long_memory mem) %{
5807   predicate(!((LoadLNode*)n)->require_atomic_access());
5808   match(Set dst (LoadL mem));
5809 
5810   ins_cost(250);
5811   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5812             "MOV    $dst.hi,$mem+4" %}
5813 
5814   ins_encode %{
5815     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5816     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5817     __ movl($dst$$Register, Amemlo);
5818     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5819   %}
5820 
5821   ins_pipe(ialu_reg_long_mem);
5822 %}
5823 
5824 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5825 // then store it down to the stack and reload on the int
5826 // side.
5827 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5828   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5829   match(Set dst (LoadL mem));
5830 
5831   ins_cost(200);
5832   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5833             "FISTp  $dst" %}
5834   ins_encode(enc_loadL_volatile(mem,dst));
5835   ins_pipe( fpu_reg_mem );
5836 %}
5837 
5838 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5839   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5840   match(Set dst (LoadL mem));
5841   effect(TEMP tmp);
5842   ins_cost(180);
5843   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5844             "MOVSD  $dst,$tmp" %}
5845   ins_encode %{
5846     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5847     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5848   %}
5849   ins_pipe( pipe_slow );
5850 %}
5851 
5852 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5853   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5854   match(Set dst (LoadL mem));
5855   effect(TEMP tmp);
5856   ins_cost(160);
5857   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5858             "MOVD   $dst.lo,$tmp\n\t"
5859             "PSRLQ  $tmp,32\n\t"
5860             "MOVD   $dst.hi,$tmp" %}
5861   ins_encode %{
5862     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5863     __ movdl($dst$$Register, $tmp$$XMMRegister);
5864     __ psrlq($tmp$$XMMRegister, 32);
5865     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5866   %}
5867   ins_pipe( pipe_slow );
5868 %}
5869 
5870 // Load Range
5871 instruct loadRange(rRegI dst, memory mem) %{
5872   match(Set dst (LoadRange mem));
5873 
5874   ins_cost(125);
5875   format %{ "MOV    $dst,$mem" %}
5876   opcode(0x8B);
5877   ins_encode( OpcP, RegMem(dst,mem));
5878   ins_pipe( ialu_reg_mem );
5879 %}
5880 
5881 
5882 // Load Pointer
5883 instruct loadP(eRegP dst, memory mem) %{
5884   match(Set dst (LoadP mem));
5885 
5886   ins_cost(125);
5887   format %{ "MOV    $dst,$mem" %}
5888   opcode(0x8B);
5889   ins_encode( OpcP, RegMem(dst,mem));
5890   ins_pipe( ialu_reg_mem );
5891 %}
5892 
5893 // Load Klass Pointer
5894 instruct loadKlass(eRegP dst, memory mem) %{
5895   match(Set dst (LoadKlass mem));
5896 
5897   ins_cost(125);
5898   format %{ "MOV    $dst,$mem" %}
5899   opcode(0x8B);
5900   ins_encode( OpcP, RegMem(dst,mem));
5901   ins_pipe( ialu_reg_mem );
5902 %}
5903 
5904 // Load Float
5905 instruct MoveF2LEG(legRegF dst, regF src) %{
5906   match(Set dst src);
5907   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
5908   ins_encode %{
5909     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
5910   %}
5911   ins_pipe( fpu_reg_reg );
5912 %}
5913 
5914 // Load Float
5915 instruct MoveLEG2F(regF dst, legRegF src) %{
5916   match(Set dst src);
5917   format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
5918   ins_encode %{
5919     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
5920   %}
5921   ins_pipe( fpu_reg_reg );
5922 %}
5923 
5924 // Load Double
5925 instruct MoveD2LEG(legRegD dst, regD src) %{
5926   match(Set dst src);
5927   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
5928   ins_encode %{
5929     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
5930   %}
5931   ins_pipe( fpu_reg_reg );
5932 %}
5933 
5934 // Load Double
5935 instruct MoveLEG2D(regD dst, legRegD src) %{
5936   match(Set dst src);
5937   format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
5938   ins_encode %{
5939     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
5940   %}
5941   ins_pipe( fpu_reg_reg );
5942 %}
5943 
5944 // Load Double
5945 instruct loadDPR(regDPR dst, memory mem) %{
5946   predicate(UseSSE<=1);
5947   match(Set dst (LoadD mem));
5948 
5949   ins_cost(150);
5950   format %{ "FLD_D  ST,$mem\n\t"
5951             "FSTP   $dst" %}
5952   opcode(0xDD);               /* DD /0 */
5953   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5954               Pop_Reg_DPR(dst) );
5955   ins_pipe( fpu_reg_mem );
5956 %}
5957 
5958 // Load Double to XMM
5959 instruct loadD(regD dst, memory mem) %{
5960   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5961   match(Set dst (LoadD mem));
5962   ins_cost(145);
5963   format %{ "MOVSD  $dst,$mem" %}
5964   ins_encode %{
5965     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5966   %}
5967   ins_pipe( pipe_slow );
5968 %}
5969 
5970 instruct loadD_partial(regD dst, memory mem) %{
5971   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5972   match(Set dst (LoadD mem));
5973   ins_cost(145);
5974   format %{ "MOVLPD $dst,$mem" %}
5975   ins_encode %{
5976     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5977   %}
5978   ins_pipe( pipe_slow );
5979 %}
5980 
5981 // Load to XMM register (single-precision floating point)
5982 // MOVSS instruction
5983 instruct loadF(regF dst, memory mem) %{
5984   predicate(UseSSE>=1);
5985   match(Set dst (LoadF mem));
5986   ins_cost(145);
5987   format %{ "MOVSS  $dst,$mem" %}
5988   ins_encode %{
5989     __ movflt ($dst$$XMMRegister, $mem$$Address);
5990   %}
5991   ins_pipe( pipe_slow );
5992 %}
5993 
5994 // Load Float
5995 instruct loadFPR(regFPR dst, memory mem) %{
5996   predicate(UseSSE==0);
5997   match(Set dst (LoadF mem));
5998 
5999   ins_cost(150);
6000   format %{ "FLD_S  ST,$mem\n\t"
6001             "FSTP   $dst" %}
6002   opcode(0xD9);               /* D9 /0 */
6003   ins_encode( OpcP, RMopc_Mem(0x00,mem),
6004               Pop_Reg_FPR(dst) );
6005   ins_pipe( fpu_reg_mem );
6006 %}
6007 
6008 // Load Effective Address
6009 instruct leaP8(eRegP dst, indOffset8 mem) %{
6010   match(Set dst mem);
6011 
6012   ins_cost(110);
6013   format %{ "LEA    $dst,$mem" %}
6014   opcode(0x8D);
6015   ins_encode( OpcP, RegMem(dst,mem));
6016   ins_pipe( ialu_reg_reg_fat );
6017 %}
6018 
6019 instruct leaP32(eRegP dst, indOffset32 mem) %{
6020   match(Set dst mem);
6021 
6022   ins_cost(110);
6023   format %{ "LEA    $dst,$mem" %}
6024   opcode(0x8D);
6025   ins_encode( OpcP, RegMem(dst,mem));
6026   ins_pipe( ialu_reg_reg_fat );
6027 %}
6028 
6029 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
6030   match(Set dst mem);
6031 
6032   ins_cost(110);
6033   format %{ "LEA    $dst,$mem" %}
6034   opcode(0x8D);
6035   ins_encode( OpcP, RegMem(dst,mem));
6036   ins_pipe( ialu_reg_reg_fat );
6037 %}
6038 
6039 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
6040   match(Set dst mem);
6041 
6042   ins_cost(110);
6043   format %{ "LEA    $dst,$mem" %}
6044   opcode(0x8D);
6045   ins_encode( OpcP, RegMem(dst,mem));
6046   ins_pipe( ialu_reg_reg_fat );
6047 %}
6048 
6049 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
6050   match(Set dst mem);
6051 
6052   ins_cost(110);
6053   format %{ "LEA    $dst,$mem" %}
6054   opcode(0x8D);
6055   ins_encode( OpcP, RegMem(dst,mem));
6056   ins_pipe( ialu_reg_reg_fat );
6057 %}
6058 
6059 // Load Constant
6060 instruct loadConI(rRegI dst, immI src) %{
6061   match(Set dst src);
6062 
6063   format %{ "MOV    $dst,$src" %}
6064   ins_encode( LdImmI(dst, src) );
6065   ins_pipe( ialu_reg_fat );
6066 %}
6067 
6068 // Load Constant zero
6069 instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
6070   match(Set dst src);
6071   effect(KILL cr);
6072 
6073   ins_cost(50);
6074   format %{ "XOR    $dst,$dst" %}
6075   opcode(0x33);  /* + rd */
6076   ins_encode( OpcP, RegReg( dst, dst ) );
6077   ins_pipe( ialu_reg );
6078 %}
6079 
6080 instruct loadConP(eRegP dst, immP src) %{
6081   match(Set dst src);
6082 
6083   format %{ "MOV    $dst,$src" %}
6084   opcode(0xB8);  /* + rd */
6085   ins_encode( LdImmP(dst, src) );
6086   ins_pipe( ialu_reg_fat );
6087 %}
6088 
6089 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
6090   match(Set dst src);
6091   effect(KILL cr);
6092   ins_cost(200);
6093   format %{ "MOV    $dst.lo,$src.lo\n\t"
6094             "MOV    $dst.hi,$src.hi" %}
6095   opcode(0xB8);
6096   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
6097   ins_pipe( ialu_reg_long_fat );
6098 %}
6099 
6100 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
6101   match(Set dst src);
6102   effect(KILL cr);
6103   ins_cost(150);
6104   format %{ "XOR    $dst.lo,$dst.lo\n\t"
6105             "XOR    $dst.hi,$dst.hi" %}
6106   opcode(0x33,0x33);
6107   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
6108   ins_pipe( ialu_reg_long );
6109 %}
6110 
6111 // The instruction usage is guarded by predicate in operand immFPR().
6112 instruct loadConFPR(regFPR dst, immFPR con) %{
6113   match(Set dst con);
6114   ins_cost(125);
6115   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
6116             "FSTP   $dst" %}
6117   ins_encode %{
6118     __ fld_s($constantaddress($con));
6119     __ fstp_d($dst$$reg);
6120   %}
6121   ins_pipe(fpu_reg_con);
6122 %}
6123 
6124 // The instruction usage is guarded by predicate in operand immFPR0().
6125 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6126   match(Set dst con);
6127   ins_cost(125);
6128   format %{ "FLDZ   ST\n\t"
6129             "FSTP   $dst" %}
6130   ins_encode %{
6131     __ fldz();
6132     __ fstp_d($dst$$reg);
6133   %}
6134   ins_pipe(fpu_reg_con);
6135 %}
6136 
6137 // The instruction usage is guarded by predicate in operand immFPR1().
6138 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6139   match(Set dst con);
6140   ins_cost(125);
6141   format %{ "FLD1   ST\n\t"
6142             "FSTP   $dst" %}
6143   ins_encode %{
6144     __ fld1();
6145     __ fstp_d($dst$$reg);
6146   %}
6147   ins_pipe(fpu_reg_con);
6148 %}
6149 
6150 // The instruction usage is guarded by predicate in operand immF().
6151 instruct loadConF(regF dst, immF con) %{
6152   match(Set dst con);
6153   ins_cost(125);
6154   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6155   ins_encode %{
6156     __ movflt($dst$$XMMRegister, $constantaddress($con));
6157   %}
6158   ins_pipe(pipe_slow);
6159 %}
6160 
6161 // The instruction usage is guarded by predicate in operand immF0().
6162 instruct loadConF0(regF dst, immF0 src) %{
6163   match(Set dst src);
6164   ins_cost(100);
6165   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6166   ins_encode %{
6167     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6168   %}
6169   ins_pipe(pipe_slow);
6170 %}
6171 
6172 // The instruction usage is guarded by predicate in operand immDPR().
6173 instruct loadConDPR(regDPR dst, immDPR con) %{
6174   match(Set dst con);
6175   ins_cost(125);
6176 
6177   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6178             "FSTP   $dst" %}
6179   ins_encode %{
6180     __ fld_d($constantaddress($con));
6181     __ fstp_d($dst$$reg);
6182   %}
6183   ins_pipe(fpu_reg_con);
6184 %}
6185 
6186 // The instruction usage is guarded by predicate in operand immDPR0().
6187 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6188   match(Set dst con);
6189   ins_cost(125);
6190 
6191   format %{ "FLDZ   ST\n\t"
6192             "FSTP   $dst" %}
6193   ins_encode %{
6194     __ fldz();
6195     __ fstp_d($dst$$reg);
6196   %}
6197   ins_pipe(fpu_reg_con);
6198 %}
6199 
6200 // The instruction usage is guarded by predicate in operand immDPR1().
6201 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6202   match(Set dst con);
6203   ins_cost(125);
6204 
6205   format %{ "FLD1   ST\n\t"
6206             "FSTP   $dst" %}
6207   ins_encode %{
6208     __ fld1();
6209     __ fstp_d($dst$$reg);
6210   %}
6211   ins_pipe(fpu_reg_con);
6212 %}
6213 
6214 // The instruction usage is guarded by predicate in operand immD().
6215 instruct loadConD(regD dst, immD con) %{
6216   match(Set dst con);
6217   ins_cost(125);
6218   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6219   ins_encode %{
6220     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6221   %}
6222   ins_pipe(pipe_slow);
6223 %}
6224 
6225 // The instruction usage is guarded by predicate in operand immD0().
6226 instruct loadConD0(regD dst, immD0 src) %{
6227   match(Set dst src);
6228   ins_cost(100);
6229   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6230   ins_encode %{
6231     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6232   %}
6233   ins_pipe( pipe_slow );
6234 %}
6235 
6236 // Load Stack Slot
6237 instruct loadSSI(rRegI dst, stackSlotI src) %{
6238   match(Set dst src);
6239   ins_cost(125);
6240 
6241   format %{ "MOV    $dst,$src" %}
6242   opcode(0x8B);
6243   ins_encode( OpcP, RegMem(dst,src));
6244   ins_pipe( ialu_reg_mem );
6245 %}
6246 
6247 instruct loadSSL(eRegL dst, stackSlotL src) %{
6248   match(Set dst src);
6249 
6250   ins_cost(200);
6251   format %{ "MOV    $dst,$src.lo\n\t"
6252             "MOV    $dst+4,$src.hi" %}
6253   opcode(0x8B, 0x8B);
6254   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6255   ins_pipe( ialu_mem_long_reg );
6256 %}
6257 
6258 // Load Stack Slot
6259 instruct loadSSP(eRegP dst, stackSlotP src) %{
6260   match(Set dst src);
6261   ins_cost(125);
6262 
6263   format %{ "MOV    $dst,$src" %}
6264   opcode(0x8B);
6265   ins_encode( OpcP, RegMem(dst,src));
6266   ins_pipe( ialu_reg_mem );
6267 %}
6268 
6269 // Load Stack Slot
6270 instruct loadSSF(regFPR dst, stackSlotF src) %{
6271   match(Set dst src);
6272   ins_cost(125);
6273 
6274   format %{ "FLD_S  $src\n\t"
6275             "FSTP   $dst" %}
6276   opcode(0xD9);               /* D9 /0, FLD m32real */
6277   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6278               Pop_Reg_FPR(dst) );
6279   ins_pipe( fpu_reg_mem );
6280 %}
6281 
6282 // Load Stack Slot
6283 instruct loadSSD(regDPR dst, stackSlotD src) %{
6284   match(Set dst src);
6285   ins_cost(125);
6286 
6287   format %{ "FLD_D  $src\n\t"
6288             "FSTP   $dst" %}
6289   opcode(0xDD);               /* DD /0, FLD m64real */
6290   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6291               Pop_Reg_DPR(dst) );
6292   ins_pipe( fpu_reg_mem );
6293 %}
6294 
6295 // Prefetch instructions for allocation.
6296 // Must be safe to execute with invalid address (cannot fault).
6297 
6298 instruct prefetchAlloc0( memory mem ) %{
6299   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6300   match(PrefetchAllocation mem);
6301   ins_cost(0);
6302   size(0);
6303   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6304   ins_encode();
6305   ins_pipe(empty);
6306 %}
6307 
6308 instruct prefetchAlloc( memory mem ) %{
6309   predicate(AllocatePrefetchInstr==3);
6310   match( PrefetchAllocation mem );
6311   ins_cost(100);
6312 
6313   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6314   ins_encode %{
6315     __ prefetchw($mem$$Address);
6316   %}
6317   ins_pipe(ialu_mem);
6318 %}
6319 
6320 instruct prefetchAllocNTA( memory mem ) %{
6321   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6322   match(PrefetchAllocation mem);
6323   ins_cost(100);
6324 
6325   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6326   ins_encode %{
6327     __ prefetchnta($mem$$Address);
6328   %}
6329   ins_pipe(ialu_mem);
6330 %}
6331 
6332 instruct prefetchAllocT0( memory mem ) %{
6333   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6334   match(PrefetchAllocation mem);
6335   ins_cost(100);
6336 
6337   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6338   ins_encode %{
6339     __ prefetcht0($mem$$Address);
6340   %}
6341   ins_pipe(ialu_mem);
6342 %}
6343 
6344 instruct prefetchAllocT2( memory mem ) %{
6345   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6346   match(PrefetchAllocation mem);
6347   ins_cost(100);
6348 
6349   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6350   ins_encode %{
6351     __ prefetcht2($mem$$Address);
6352   %}
6353   ins_pipe(ialu_mem);
6354 %}
6355 
6356 //----------Store Instructions-------------------------------------------------
6357 
6358 // Store Byte
6359 instruct storeB(memory mem, xRegI src) %{
6360   match(Set mem (StoreB mem src));
6361 
6362   ins_cost(125);
6363   format %{ "MOV8   $mem,$src" %}
6364   opcode(0x88);
6365   ins_encode( OpcP, RegMem( src, mem ) );
6366   ins_pipe( ialu_mem_reg );
6367 %}
6368 
6369 // Store Char/Short
6370 instruct storeC(memory mem, rRegI src) %{
6371   match(Set mem (StoreC mem src));
6372 
6373   ins_cost(125);
6374   format %{ "MOV16  $mem,$src" %}
6375   opcode(0x89, 0x66);
6376   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6377   ins_pipe( ialu_mem_reg );
6378 %}
6379 
6380 // Store Integer
6381 instruct storeI(memory mem, rRegI src) %{
6382   match(Set mem (StoreI mem src));
6383 
6384   ins_cost(125);
6385   format %{ "MOV    $mem,$src" %}
6386   opcode(0x89);
6387   ins_encode( OpcP, RegMem( src, mem ) );
6388   ins_pipe( ialu_mem_reg );
6389 %}
6390 
6391 // Store Long
6392 instruct storeL(long_memory mem, eRegL src) %{
6393   predicate(!((StoreLNode*)n)->require_atomic_access());
6394   match(Set mem (StoreL mem src));
6395 
6396   ins_cost(200);
6397   format %{ "MOV    $mem,$src.lo\n\t"
6398             "MOV    $mem+4,$src.hi" %}
6399   opcode(0x89, 0x89);
6400   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6401   ins_pipe( ialu_mem_long_reg );
6402 %}
6403 
6404 // Store Long to Integer
6405 instruct storeL2I(memory mem, eRegL src) %{
6406   match(Set mem (StoreI mem (ConvL2I src)));
6407 
6408   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6409   ins_encode %{
6410     __ movl($mem$$Address, $src$$Register);
6411   %}
6412   ins_pipe(ialu_mem_reg);
6413 %}
6414 
6415 // Volatile Store Long.  Must be atomic, so move it into
6416 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6417 // target address before the store (for null-ptr checks)
6418 // so the memory operand is used twice in the encoding.
6419 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6420   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6421   match(Set mem (StoreL mem src));
6422   effect( KILL cr );
6423   ins_cost(400);
6424   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6425             "FILD   $src\n\t"
6426             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6427   opcode(0x3B);
6428   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6429   ins_pipe( fpu_reg_mem );
6430 %}
6431 
6432 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6433   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6434   match(Set mem (StoreL mem src));
6435   effect( TEMP tmp, KILL cr );
6436   ins_cost(380);
6437   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6438             "MOVSD  $tmp,$src\n\t"
6439             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6440   ins_encode %{
6441     __ cmpl(rax, $mem$$Address);
6442     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6443     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6444   %}
6445   ins_pipe( pipe_slow );
6446 %}
6447 
6448 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6449   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6450   match(Set mem (StoreL mem src));
6451   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6452   ins_cost(360);
6453   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6454             "MOVD   $tmp,$src.lo\n\t"
6455             "MOVD   $tmp2,$src.hi\n\t"
6456             "PUNPCKLDQ $tmp,$tmp2\n\t"
6457             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6458   ins_encode %{
6459     __ cmpl(rax, $mem$$Address);
6460     __ movdl($tmp$$XMMRegister, $src$$Register);
6461     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6462     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6463     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6464   %}
6465   ins_pipe( pipe_slow );
6466 %}
6467 
6468 // Store Pointer; for storing unknown oops and raw pointers
6469 instruct storeP(memory mem, anyRegP src) %{
6470   match(Set mem (StoreP mem src));
6471 
6472   ins_cost(125);
6473   format %{ "MOV    $mem,$src" %}
6474   opcode(0x89);
6475   ins_encode( OpcP, RegMem( src, mem ) );
6476   ins_pipe( ialu_mem_reg );
6477 %}
6478 
6479 // Store Integer Immediate
6480 instruct storeImmI(memory mem, immI src) %{
6481   match(Set mem (StoreI mem src));
6482 
6483   ins_cost(150);
6484   format %{ "MOV    $mem,$src" %}
6485   opcode(0xC7);               /* C7 /0 */
6486   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6487   ins_pipe( ialu_mem_imm );
6488 %}
6489 
6490 // Store Short/Char Immediate
6491 instruct storeImmI16(memory mem, immI16 src) %{
6492   predicate(UseStoreImmI16);
6493   match(Set mem (StoreC mem src));
6494 
6495   ins_cost(150);
6496   format %{ "MOV16  $mem,$src" %}
6497   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6498   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6499   ins_pipe( ialu_mem_imm );
6500 %}
6501 
6502 // Store Pointer Immediate; null pointers or constant oops that do not
6503 // need card-mark barriers.
6504 instruct storeImmP(memory mem, immP src) %{
6505   match(Set mem (StoreP mem src));
6506 
6507   ins_cost(150);
6508   format %{ "MOV    $mem,$src" %}
6509   opcode(0xC7);               /* C7 /0 */
6510   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6511   ins_pipe( ialu_mem_imm );
6512 %}
6513 
6514 // Store Byte Immediate
6515 instruct storeImmB(memory mem, immI8 src) %{
6516   match(Set mem (StoreB mem src));
6517 
6518   ins_cost(150);
6519   format %{ "MOV8   $mem,$src" %}
6520   opcode(0xC6);               /* C6 /0 */
6521   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6522   ins_pipe( ialu_mem_imm );
6523 %}
6524 
6525 // Store CMS card-mark Immediate
6526 instruct storeImmCM(memory mem, immI8 src) %{
6527   match(Set mem (StoreCM mem src));
6528 
6529   ins_cost(150);
6530   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6531   opcode(0xC6);               /* C6 /0 */
6532   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6533   ins_pipe( ialu_mem_imm );
6534 %}
6535 
6536 // Store Double
6537 instruct storeDPR( memory mem, regDPR1 src) %{
6538   predicate(UseSSE<=1);
6539   match(Set mem (StoreD mem src));
6540 
6541   ins_cost(100);
6542   format %{ "FST_D  $mem,$src" %}
6543   opcode(0xDD);       /* DD /2 */
6544   ins_encode( enc_FPR_store(mem,src) );
6545   ins_pipe( fpu_mem_reg );
6546 %}
6547 
6548 // Store double does rounding on x86
6549 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6550   predicate(UseSSE<=1);
6551   match(Set mem (StoreD mem (RoundDouble src)));
6552 
6553   ins_cost(100);
6554   format %{ "FST_D  $mem,$src\t# round" %}
6555   opcode(0xDD);       /* DD /2 */
6556   ins_encode( enc_FPR_store(mem,src) );
6557   ins_pipe( fpu_mem_reg );
6558 %}
6559 
6560 // Store XMM register to memory (double-precision floating points)
6561 // MOVSD instruction
6562 instruct storeD(memory mem, regD src) %{
6563   predicate(UseSSE>=2);
6564   match(Set mem (StoreD mem src));
6565   ins_cost(95);
6566   format %{ "MOVSD  $mem,$src" %}
6567   ins_encode %{
6568     __ movdbl($mem$$Address, $src$$XMMRegister);
6569   %}
6570   ins_pipe( pipe_slow );
6571 %}
6572 
6573 // Load Double
6574 instruct MoveD2VL(vlRegD dst, regD src) %{
6575   match(Set dst src);
6576   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6577   ins_encode %{
6578     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6579   %}
6580   ins_pipe( fpu_reg_reg );
6581 %}
6582 
6583 // Load Double
6584 instruct MoveVL2D(regD dst, vlRegD src) %{
6585   match(Set dst src);
6586   format %{ "movsd $dst,$src\t! load double (8 bytes)" %}
6587   ins_encode %{
6588     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6589   %}
6590   ins_pipe( fpu_reg_reg );
6591 %}
6592 
6593 // Store XMM register to memory (single-precision floating point)
6594 // MOVSS instruction
6595 instruct storeF(memory mem, regF src) %{
6596   predicate(UseSSE>=1);
6597   match(Set mem (StoreF mem src));
6598   ins_cost(95);
6599   format %{ "MOVSS  $mem,$src" %}
6600   ins_encode %{
6601     __ movflt($mem$$Address, $src$$XMMRegister);
6602   %}
6603   ins_pipe( pipe_slow );
6604 %}
6605 
6606 // Load Float
6607 instruct MoveF2VL(vlRegF dst, regF src) %{
6608   match(Set dst src);
6609   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6610   ins_encode %{
6611     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6612   %}
6613   ins_pipe( fpu_reg_reg );
6614 %}
6615 
6616 // Load Float
6617 instruct MoveVL2F(regF dst, vlRegF src) %{
6618   match(Set dst src);
6619   format %{ "movss $dst,$src\t! load float (4 bytes)" %}
6620   ins_encode %{
6621     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6622   %}
6623   ins_pipe( fpu_reg_reg );
6624 %}
6625 
6626 // Store Float
6627 instruct storeFPR( memory mem, regFPR1 src) %{
6628   predicate(UseSSE==0);
6629   match(Set mem (StoreF mem src));
6630 
6631   ins_cost(100);
6632   format %{ "FST_S  $mem,$src" %}
6633   opcode(0xD9);       /* D9 /2 */
6634   ins_encode( enc_FPR_store(mem,src) );
6635   ins_pipe( fpu_mem_reg );
6636 %}
6637 
6638 // Store Float does rounding on x86
6639 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6640   predicate(UseSSE==0);
6641   match(Set mem (StoreF mem (RoundFloat src)));
6642 
6643   ins_cost(100);
6644   format %{ "FST_S  $mem,$src\t# round" %}
6645   opcode(0xD9);       /* D9 /2 */
6646   ins_encode( enc_FPR_store(mem,src) );
6647   ins_pipe( fpu_mem_reg );
6648 %}
6649 
6650 // Store Float does rounding on x86
6651 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6652   predicate(UseSSE<=1);
6653   match(Set mem (StoreF mem (ConvD2F src)));
6654 
6655   ins_cost(100);
6656   format %{ "FST_S  $mem,$src\t# D-round" %}
6657   opcode(0xD9);       /* D9 /2 */
6658   ins_encode( enc_FPR_store(mem,src) );
6659   ins_pipe( fpu_mem_reg );
6660 %}
6661 
6662 // Store immediate Float value (it is faster than store from FPU register)
6663 // The instruction usage is guarded by predicate in operand immFPR().
6664 instruct storeFPR_imm( memory mem, immFPR src) %{
6665   match(Set mem (StoreF mem src));
6666 
6667   ins_cost(50);
6668   format %{ "MOV    $mem,$src\t# store float" %}
6669   opcode(0xC7);               /* C7 /0 */
6670   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6671   ins_pipe( ialu_mem_imm );
6672 %}
6673 
6674 // Store immediate Float value (it is faster than store from XMM register)
6675 // The instruction usage is guarded by predicate in operand immF().
6676 instruct storeF_imm( memory mem, immF src) %{
6677   match(Set mem (StoreF mem src));
6678 
6679   ins_cost(50);
6680   format %{ "MOV    $mem,$src\t# store float" %}
6681   opcode(0xC7);               /* C7 /0 */
6682   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6683   ins_pipe( ialu_mem_imm );
6684 %}
6685 
6686 // Store Integer to stack slot
6687 instruct storeSSI(stackSlotI dst, rRegI src) %{
6688   match(Set dst src);
6689 
6690   ins_cost(100);
6691   format %{ "MOV    $dst,$src" %}
6692   opcode(0x89);
6693   ins_encode( OpcPRegSS( dst, src ) );
6694   ins_pipe( ialu_mem_reg );
6695 %}
6696 
6697 // Store Integer to stack slot
6698 instruct storeSSP(stackSlotP dst, eRegP src) %{
6699   match(Set dst src);
6700 
6701   ins_cost(100);
6702   format %{ "MOV    $dst,$src" %}
6703   opcode(0x89);
6704   ins_encode( OpcPRegSS( dst, src ) );
6705   ins_pipe( ialu_mem_reg );
6706 %}
6707 
6708 // Store Long to stack slot
6709 instruct storeSSL(stackSlotL dst, eRegL src) %{
6710   match(Set dst src);
6711 
6712   ins_cost(200);
6713   format %{ "MOV    $dst,$src.lo\n\t"
6714             "MOV    $dst+4,$src.hi" %}
6715   opcode(0x89, 0x89);
6716   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6717   ins_pipe( ialu_mem_long_reg );
6718 %}
6719 
6720 //----------MemBar Instructions-----------------------------------------------
6721 // Memory barrier flavors
6722 
6723 instruct membar_acquire() %{
6724   match(MemBarAcquire);
6725   match(LoadFence);
6726   ins_cost(400);
6727 
6728   size(0);
6729   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6730   ins_encode();
6731   ins_pipe(empty);
6732 %}
6733 
6734 instruct membar_acquire_lock() %{
6735   match(MemBarAcquireLock);
6736   ins_cost(0);
6737 
6738   size(0);
6739   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6740   ins_encode( );
6741   ins_pipe(empty);
6742 %}
6743 
6744 instruct membar_release() %{
6745   match(MemBarRelease);
6746   match(StoreFence);
6747   ins_cost(400);
6748 
6749   size(0);
6750   format %{ "MEMBAR-release ! (empty encoding)" %}
6751   ins_encode( );
6752   ins_pipe(empty);
6753 %}
6754 
6755 instruct membar_release_lock() %{
6756   match(MemBarReleaseLock);
6757   ins_cost(0);
6758 
6759   size(0);
6760   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6761   ins_encode( );
6762   ins_pipe(empty);
6763 %}
6764 
6765 instruct membar_volatile(eFlagsReg cr) %{
6766   match(MemBarVolatile);
6767   effect(KILL cr);
6768   ins_cost(400);
6769 
6770   format %{
6771     $$template
6772     $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6773   %}
6774   ins_encode %{
6775     __ membar(Assembler::StoreLoad);
6776   %}
6777   ins_pipe(pipe_slow);
6778 %}
6779 
6780 instruct unnecessary_membar_volatile() %{
6781   match(MemBarVolatile);
6782   predicate(Matcher::post_store_load_barrier(n));
6783   ins_cost(0);
6784 
6785   size(0);
6786   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6787   ins_encode( );
6788   ins_pipe(empty);
6789 %}
6790 
6791 instruct membar_storestore() %{
6792   match(MemBarStoreStore);
6793   ins_cost(0);
6794 
6795   size(0);
6796   format %{ "MEMBAR-storestore (empty encoding)" %}
6797   ins_encode( );
6798   ins_pipe(empty);
6799 %}
6800 
6801 //----------Move Instructions--------------------------------------------------
6802 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6803   match(Set dst (CastX2P src));
6804   format %{ "# X2P  $dst, $src" %}
6805   ins_encode( /*empty encoding*/ );
6806   ins_cost(0);
6807   ins_pipe(empty);
6808 %}
6809 
6810 instruct castP2X(rRegI dst, eRegP src ) %{
6811   match(Set dst (CastP2X src));
6812   ins_cost(50);
6813   format %{ "MOV    $dst, $src\t# CastP2X" %}
6814   ins_encode( enc_Copy( dst, src) );
6815   ins_pipe( ialu_reg_reg );
6816 %}
6817 
6818 //----------Conditional Move---------------------------------------------------
6819 // Conditional move
6820 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6821   predicate(!VM_Version::supports_cmov() );
6822   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6823   ins_cost(200);
6824   format %{ "J$cop,us skip\t# signed cmove\n\t"
6825             "MOV    $dst,$src\n"
6826       "skip:" %}
6827   ins_encode %{
6828     Label Lskip;
6829     // Invert sense of branch from sense of CMOV
6830     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6831     __ movl($dst$$Register, $src$$Register);
6832     __ bind(Lskip);
6833   %}
6834   ins_pipe( pipe_cmov_reg );
6835 %}
6836 
6837 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6838   predicate(!VM_Version::supports_cmov() );
6839   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6840   ins_cost(200);
6841   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6842             "MOV    $dst,$src\n"
6843       "skip:" %}
6844   ins_encode %{
6845     Label Lskip;
6846     // Invert sense of branch from sense of CMOV
6847     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6848     __ movl($dst$$Register, $src$$Register);
6849     __ bind(Lskip);
6850   %}
6851   ins_pipe( pipe_cmov_reg );
6852 %}
6853 
6854 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6855   predicate(VM_Version::supports_cmov() );
6856   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6857   ins_cost(200);
6858   format %{ "CMOV$cop $dst,$src" %}
6859   opcode(0x0F,0x40);
6860   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6861   ins_pipe( pipe_cmov_reg );
6862 %}
6863 
6864 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6865   predicate(VM_Version::supports_cmov() );
6866   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6867   ins_cost(200);
6868   format %{ "CMOV$cop $dst,$src" %}
6869   opcode(0x0F,0x40);
6870   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6871   ins_pipe( pipe_cmov_reg );
6872 %}
6873 
6874 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6875   predicate(VM_Version::supports_cmov() );
6876   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6877   ins_cost(200);
6878   expand %{
6879     cmovI_regU(cop, cr, dst, src);
6880   %}
6881 %}
6882 
6883 // Conditional move
6884 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6885   predicate(VM_Version::supports_cmov() );
6886   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6887   ins_cost(250);
6888   format %{ "CMOV$cop $dst,$src" %}
6889   opcode(0x0F,0x40);
6890   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6891   ins_pipe( pipe_cmov_mem );
6892 %}
6893 
6894 // Conditional move
6895 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6896   predicate(VM_Version::supports_cmov() );
6897   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6898   ins_cost(250);
6899   format %{ "CMOV$cop $dst,$src" %}
6900   opcode(0x0F,0x40);
6901   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6902   ins_pipe( pipe_cmov_mem );
6903 %}
6904 
6905 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6906   predicate(VM_Version::supports_cmov() );
6907   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6908   ins_cost(250);
6909   expand %{
6910     cmovI_memU(cop, cr, dst, src);
6911   %}
6912 %}
6913 
6914 // Conditional move
6915 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6916   predicate(VM_Version::supports_cmov() );
6917   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6918   ins_cost(200);
6919   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6920   opcode(0x0F,0x40);
6921   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6922   ins_pipe( pipe_cmov_reg );
6923 %}
6924 
6925 // Conditional move (non-P6 version)
6926 // Note:  a CMoveP is generated for  stubs and native wrappers
6927 //        regardless of whether we are on a P6, so we
6928 //        emulate a cmov here
6929 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6930   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6931   ins_cost(300);
6932   format %{ "Jn$cop   skip\n\t"
6933           "MOV    $dst,$src\t# pointer\n"
6934       "skip:" %}
6935   opcode(0x8b);
6936   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6937   ins_pipe( pipe_cmov_reg );
6938 %}
6939 
6940 // Conditional move
6941 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6942   predicate(VM_Version::supports_cmov() );
6943   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6944   ins_cost(200);
6945   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6946   opcode(0x0F,0x40);
6947   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6948   ins_pipe( pipe_cmov_reg );
6949 %}
6950 
6951 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6952   predicate(VM_Version::supports_cmov() );
6953   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6954   ins_cost(200);
6955   expand %{
6956     cmovP_regU(cop, cr, dst, src);
6957   %}
6958 %}
6959 
6960 // DISABLED: Requires the ADLC to emit a bottom_type call that
6961 // correctly meets the two pointer arguments; one is an incoming
6962 // register but the other is a memory operand.  ALSO appears to
6963 // be buggy with implicit null checks.
6964 //
6965 //// Conditional move
6966 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6967 //  predicate(VM_Version::supports_cmov() );
6968 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6969 //  ins_cost(250);
6970 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6971 //  opcode(0x0F,0x40);
6972 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6973 //  ins_pipe( pipe_cmov_mem );
6974 //%}
6975 //
6976 //// Conditional move
6977 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6978 //  predicate(VM_Version::supports_cmov() );
6979 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6980 //  ins_cost(250);
6981 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6982 //  opcode(0x0F,0x40);
6983 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6984 //  ins_pipe( pipe_cmov_mem );
6985 //%}
6986 
6987 // Conditional move
6988 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6989   predicate(UseSSE<=1);
6990   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6991   ins_cost(200);
6992   format %{ "FCMOV$cop $dst,$src\t# double" %}
6993   opcode(0xDA);
6994   ins_encode( enc_cmov_dpr(cop,src) );
6995   ins_pipe( pipe_cmovDPR_reg );
6996 %}
6997 
6998 // Conditional move
6999 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
7000   predicate(UseSSE==0);
7001   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7002   ins_cost(200);
7003   format %{ "FCMOV$cop $dst,$src\t# float" %}
7004   opcode(0xDA);
7005   ins_encode( enc_cmov_dpr(cop,src) );
7006   ins_pipe( pipe_cmovDPR_reg );
7007 %}
7008 
7009 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7010 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
7011   predicate(UseSSE<=1);
7012   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7013   ins_cost(200);
7014   format %{ "Jn$cop   skip\n\t"
7015             "MOV    $dst,$src\t# double\n"
7016       "skip:" %}
7017   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
7018   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
7019   ins_pipe( pipe_cmovDPR_reg );
7020 %}
7021 
7022 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7023 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
7024   predicate(UseSSE==0);
7025   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7026   ins_cost(200);
7027   format %{ "Jn$cop    skip\n\t"
7028             "MOV    $dst,$src\t# float\n"
7029       "skip:" %}
7030   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
7031   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
7032   ins_pipe( pipe_cmovDPR_reg );
7033 %}
7034 
7035 // No CMOVE with SSE/SSE2
7036 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
7037   predicate (UseSSE>=1);
7038   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7039   ins_cost(200);
7040   format %{ "Jn$cop   skip\n\t"
7041             "MOVSS  $dst,$src\t# float\n"
7042       "skip:" %}
7043   ins_encode %{
7044     Label skip;
7045     // Invert sense of branch from sense of CMOV
7046     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7047     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7048     __ bind(skip);
7049   %}
7050   ins_pipe( pipe_slow );
7051 %}
7052 
7053 // No CMOVE with SSE/SSE2
7054 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
7055   predicate (UseSSE>=2);
7056   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7057   ins_cost(200);
7058   format %{ "Jn$cop   skip\n\t"
7059             "MOVSD  $dst,$src\t# float\n"
7060       "skip:" %}
7061   ins_encode %{
7062     Label skip;
7063     // Invert sense of branch from sense of CMOV
7064     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7065     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7066     __ bind(skip);
7067   %}
7068   ins_pipe( pipe_slow );
7069 %}
7070 
7071 // unsigned version
7072 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
7073   predicate (UseSSE>=1);
7074   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7075   ins_cost(200);
7076   format %{ "Jn$cop   skip\n\t"
7077             "MOVSS  $dst,$src\t# float\n"
7078       "skip:" %}
7079   ins_encode %{
7080     Label skip;
7081     // Invert sense of branch from sense of CMOV
7082     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7083     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7084     __ bind(skip);
7085   %}
7086   ins_pipe( pipe_slow );
7087 %}
7088 
7089 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
7090   predicate (UseSSE>=1);
7091   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7092   ins_cost(200);
7093   expand %{
7094     fcmovF_regU(cop, cr, dst, src);
7095   %}
7096 %}
7097 
7098 // unsigned version
7099 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
7100   predicate (UseSSE>=2);
7101   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7102   ins_cost(200);
7103   format %{ "Jn$cop   skip\n\t"
7104             "MOVSD  $dst,$src\t# float\n"
7105       "skip:" %}
7106   ins_encode %{
7107     Label skip;
7108     // Invert sense of branch from sense of CMOV
7109     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7110     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7111     __ bind(skip);
7112   %}
7113   ins_pipe( pipe_slow );
7114 %}
7115 
7116 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
7117   predicate (UseSSE>=2);
7118   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7119   ins_cost(200);
7120   expand %{
7121     fcmovD_regU(cop, cr, dst, src);
7122   %}
7123 %}
7124 
7125 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
7126   predicate(VM_Version::supports_cmov() );
7127   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7128   ins_cost(200);
7129   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7130             "CMOV$cop $dst.hi,$src.hi" %}
7131   opcode(0x0F,0x40);
7132   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7133   ins_pipe( pipe_cmov_reg_long );
7134 %}
7135 
7136 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
7137   predicate(VM_Version::supports_cmov() );
7138   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7139   ins_cost(200);
7140   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7141             "CMOV$cop $dst.hi,$src.hi" %}
7142   opcode(0x0F,0x40);
7143   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7144   ins_pipe( pipe_cmov_reg_long );
7145 %}
7146 
7147 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
7148   predicate(VM_Version::supports_cmov() );
7149   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7150   ins_cost(200);
7151   expand %{
7152     cmovL_regU(cop, cr, dst, src);
7153   %}
7154 %}
7155 
7156 //----------Arithmetic Instructions--------------------------------------------
7157 //----------Addition Instructions----------------------------------------------
7158 
7159 // Integer Addition Instructions
7160 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7161   match(Set dst (AddI dst src));
7162   effect(KILL cr);
7163 
7164   size(2);
7165   format %{ "ADD    $dst,$src" %}
7166   opcode(0x03);
7167   ins_encode( OpcP, RegReg( dst, src) );
7168   ins_pipe( ialu_reg_reg );
7169 %}
7170 
7171 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7172   match(Set dst (AddI dst src));
7173   effect(KILL cr);
7174 
7175   format %{ "ADD    $dst,$src" %}
7176   opcode(0x81, 0x00); /* /0 id */
7177   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7178   ins_pipe( ialu_reg );
7179 %}
7180 
7181 instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
7182   predicate(UseIncDec);
7183   match(Set dst (AddI dst src));
7184   effect(KILL cr);
7185 
7186   size(1);
7187   format %{ "INC    $dst" %}
7188   opcode(0x40); /*  */
7189   ins_encode( Opc_plus( primary, dst ) );
7190   ins_pipe( ialu_reg );
7191 %}
7192 
7193 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7194   match(Set dst (AddI src0 src1));
7195   ins_cost(110);
7196 
7197   format %{ "LEA    $dst,[$src0 + $src1]" %}
7198   opcode(0x8D); /* 0x8D /r */
7199   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7200   ins_pipe( ialu_reg_reg );
7201 %}
7202 
7203 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7204   match(Set dst (AddP src0 src1));
7205   ins_cost(110);
7206 
7207   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7208   opcode(0x8D); /* 0x8D /r */
7209   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7210   ins_pipe( ialu_reg_reg );
7211 %}
7212 
7213 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7214   predicate(UseIncDec);
7215   match(Set dst (AddI dst src));
7216   effect(KILL cr);
7217 
7218   size(1);
7219   format %{ "DEC    $dst" %}
7220   opcode(0x48); /*  */
7221   ins_encode( Opc_plus( primary, dst ) );
7222   ins_pipe( ialu_reg );
7223 %}
7224 
7225 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7226   match(Set dst (AddP dst src));
7227   effect(KILL cr);
7228 
7229   size(2);
7230   format %{ "ADD    $dst,$src" %}
7231   opcode(0x03);
7232   ins_encode( OpcP, RegReg( dst, src) );
7233   ins_pipe( ialu_reg_reg );
7234 %}
7235 
7236 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7237   match(Set dst (AddP dst src));
7238   effect(KILL cr);
7239 
7240   format %{ "ADD    $dst,$src" %}
7241   opcode(0x81,0x00); /* Opcode 81 /0 id */
7242   // ins_encode( RegImm( dst, src) );
7243   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7244   ins_pipe( ialu_reg );
7245 %}
7246 
7247 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7248   match(Set dst (AddI dst (LoadI src)));
7249   effect(KILL cr);
7250 
7251   ins_cost(125);
7252   format %{ "ADD    $dst,$src" %}
7253   opcode(0x03);
7254   ins_encode( OpcP, RegMem( dst, src) );
7255   ins_pipe( ialu_reg_mem );
7256 %}
7257 
7258 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7259   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7260   effect(KILL cr);
7261 
7262   ins_cost(150);
7263   format %{ "ADD    $dst,$src" %}
7264   opcode(0x01);  /* Opcode 01 /r */
7265   ins_encode( OpcP, RegMem( src, dst ) );
7266   ins_pipe( ialu_mem_reg );
7267 %}
7268 
7269 // Add Memory with Immediate
7270 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7271   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7272   effect(KILL cr);
7273 
7274   ins_cost(125);
7275   format %{ "ADD    $dst,$src" %}
7276   opcode(0x81);               /* Opcode 81 /0 id */
7277   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7278   ins_pipe( ialu_mem_imm );
7279 %}
7280 
7281 instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
7282   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7283   effect(KILL cr);
7284 
7285   ins_cost(125);
7286   format %{ "INC    $dst" %}
7287   opcode(0xFF);               /* Opcode FF /0 */
7288   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7289   ins_pipe( ialu_mem_imm );
7290 %}
7291 
7292 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7293   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7294   effect(KILL cr);
7295 
7296   ins_cost(125);
7297   format %{ "DEC    $dst" %}
7298   opcode(0xFF);               /* Opcode FF /1 */
7299   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7300   ins_pipe( ialu_mem_imm );
7301 %}
7302 
7303 
7304 instruct checkCastPP( eRegP dst ) %{
7305   match(Set dst (CheckCastPP dst));
7306 
7307   size(0);
7308   format %{ "#checkcastPP of $dst" %}
7309   ins_encode( /*empty encoding*/ );
7310   ins_pipe( empty );
7311 %}
7312 
7313 instruct castPP( eRegP dst ) %{
7314   match(Set dst (CastPP dst));
7315   format %{ "#castPP of $dst" %}
7316   ins_encode( /*empty encoding*/ );
7317   ins_pipe( empty );
7318 %}
7319 
7320 instruct castII( rRegI dst ) %{
7321   match(Set dst (CastII dst));
7322   format %{ "#castII of $dst" %}
7323   ins_encode( /*empty encoding*/ );
7324   ins_cost(0);
7325   ins_pipe( empty );
7326 %}
7327 
7328 // Load-locked - same as a regular pointer load when used with compare-swap
7329 instruct loadPLocked(eRegP dst, memory mem) %{
7330   match(Set dst (LoadPLocked mem));
7331 
7332   ins_cost(125);
7333   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7334   opcode(0x8B);
7335   ins_encode( OpcP, RegMem(dst,mem));
7336   ins_pipe( ialu_reg_mem );
7337 %}
7338 
7339 // Conditional-store of the updated heap-top.
7340 // Used during allocation of the shared heap.
7341 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7342 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7343   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7344   // EAX is killed if there is contention, but then it's also unused.
7345   // In the common case of no contention, EAX holds the new oop address.
7346   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7347   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7348   ins_pipe( pipe_cmpxchg );
7349 %}
7350 
7351 // Conditional-store of an int value.
7352 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7353 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7354   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7355   effect(KILL oldval);
7356   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7357   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7358   ins_pipe( pipe_cmpxchg );
7359 %}
7360 
7361 // Conditional-store of a long value.
7362 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7363 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7364   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7365   effect(KILL oldval);
7366   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7367             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7368             "XCHG   EBX,ECX"
7369   %}
7370   ins_encode %{
7371     // Note: we need to swap rbx, and rcx before and after the
7372     //       cmpxchg8 instruction because the instruction uses
7373     //       rcx as the high order word of the new value to store but
7374     //       our register encoding uses rbx.
7375     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7376     __ lock();
7377     __ cmpxchg8($mem$$Address);
7378     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7379   %}
7380   ins_pipe( pipe_cmpxchg );
7381 %}
7382 
7383 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7384 
7385 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7386   predicate(VM_Version::supports_cx8());
7387   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7388   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7389   effect(KILL cr, KILL oldval);
7390   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7391             "MOV    $res,0\n\t"
7392             "JNE,s  fail\n\t"
7393             "MOV    $res,1\n"
7394           "fail:" %}
7395   ins_encode( enc_cmpxchg8(mem_ptr),
7396               enc_flags_ne_to_boolean(res) );
7397   ins_pipe( pipe_cmpxchg );
7398 %}
7399 
7400 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7401   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7402   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7403   effect(KILL cr, KILL oldval);
7404   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7405             "MOV    $res,0\n\t"
7406             "JNE,s  fail\n\t"
7407             "MOV    $res,1\n"
7408           "fail:" %}
7409   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7410   ins_pipe( pipe_cmpxchg );
7411 %}
7412 
7413 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7414   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
7415   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
7416   effect(KILL cr, KILL oldval);
7417   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7418             "MOV    $res,0\n\t"
7419             "JNE,s  fail\n\t"
7420             "MOV    $res,1\n"
7421           "fail:" %}
7422   ins_encode( enc_cmpxchgb(mem_ptr),
7423               enc_flags_ne_to_boolean(res) );
7424   ins_pipe( pipe_cmpxchg );
7425 %}
7426 
7427 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7428   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
7429   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
7430   effect(KILL cr, KILL oldval);
7431   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7432             "MOV    $res,0\n\t"
7433             "JNE,s  fail\n\t"
7434             "MOV    $res,1\n"
7435           "fail:" %}
7436   ins_encode( enc_cmpxchgw(mem_ptr),
7437               enc_flags_ne_to_boolean(res) );
7438   ins_pipe( pipe_cmpxchg );
7439 %}
7440 
7441 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7442   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7443   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7444   effect(KILL cr, KILL oldval);
7445   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7446             "MOV    $res,0\n\t"
7447             "JNE,s  fail\n\t"
7448             "MOV    $res,1\n"
7449           "fail:" %}
7450   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7451   ins_pipe( pipe_cmpxchg );
7452 %}
7453 
7454 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7455   predicate(VM_Version::supports_cx8());
7456   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7457   effect(KILL cr);
7458   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7459   ins_encode( enc_cmpxchg8(mem_ptr) );
7460   ins_pipe( pipe_cmpxchg );
7461 %}
7462 
7463 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7464   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7465   effect(KILL cr);
7466   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7467   ins_encode( enc_cmpxchg(mem_ptr) );
7468   ins_pipe( pipe_cmpxchg );
7469 %}
7470 
7471 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7472   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
7473   effect(KILL cr);
7474   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7475   ins_encode( enc_cmpxchgb(mem_ptr) );
7476   ins_pipe( pipe_cmpxchg );
7477 %}
7478 
7479 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7480   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
7481   effect(KILL cr);
7482   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7483   ins_encode( enc_cmpxchgw(mem_ptr) );
7484   ins_pipe( pipe_cmpxchg );
7485 %}
7486 
7487 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7488   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7489   effect(KILL cr);
7490   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7491   ins_encode( enc_cmpxchg(mem_ptr) );
7492   ins_pipe( pipe_cmpxchg );
7493 %}
7494 
7495 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7496   predicate(n->as_LoadStore()->result_not_used());
7497   match(Set dummy (GetAndAddB mem add));
7498   effect(KILL cr);
7499   format %{ "ADDB  [$mem],$add" %}
7500   ins_encode %{
7501     __ lock();
7502     __ addb($mem$$Address, $add$$constant);
7503   %}
7504   ins_pipe( pipe_cmpxchg );
7505 %}
7506 
7507 // Important to match to xRegI: only 8-bit regs.
7508 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
7509   match(Set newval (GetAndAddB mem newval));
7510   effect(KILL cr);
7511   format %{ "XADDB  [$mem],$newval" %}
7512   ins_encode %{
7513     __ lock();
7514     __ xaddb($mem$$Address, $newval$$Register);
7515   %}
7516   ins_pipe( pipe_cmpxchg );
7517 %}
7518 
7519 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7520   predicate(n->as_LoadStore()->result_not_used());
7521   match(Set dummy (GetAndAddS mem add));
7522   effect(KILL cr);
7523   format %{ "ADDS  [$mem],$add" %}
7524   ins_encode %{
7525     __ lock();
7526     __ addw($mem$$Address, $add$$constant);
7527   %}
7528   ins_pipe( pipe_cmpxchg );
7529 %}
7530 
7531 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
7532   match(Set newval (GetAndAddS mem newval));
7533   effect(KILL cr);
7534   format %{ "XADDS  [$mem],$newval" %}
7535   ins_encode %{
7536     __ lock();
7537     __ xaddw($mem$$Address, $newval$$Register);
7538   %}
7539   ins_pipe( pipe_cmpxchg );
7540 %}
7541 
7542 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7543   predicate(n->as_LoadStore()->result_not_used());
7544   match(Set dummy (GetAndAddI mem add));
7545   effect(KILL cr);
7546   format %{ "ADDL  [$mem],$add" %}
7547   ins_encode %{
7548     __ lock();
7549     __ addl($mem$$Address, $add$$constant);
7550   %}
7551   ins_pipe( pipe_cmpxchg );
7552 %}
7553 
7554 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7555   match(Set newval (GetAndAddI mem newval));
7556   effect(KILL cr);
7557   format %{ "XADDL  [$mem],$newval" %}
7558   ins_encode %{
7559     __ lock();
7560     __ xaddl($mem$$Address, $newval$$Register);
7561   %}
7562   ins_pipe( pipe_cmpxchg );
7563 %}
7564 
7565 // Important to match to xRegI: only 8-bit regs.
7566 instruct xchgB( memory mem, xRegI newval) %{
7567   match(Set newval (GetAndSetB mem newval));
7568   format %{ "XCHGB  $newval,[$mem]" %}
7569   ins_encode %{
7570     __ xchgb($newval$$Register, $mem$$Address);
7571   %}
7572   ins_pipe( pipe_cmpxchg );
7573 %}
7574 
7575 instruct xchgS( memory mem, rRegI newval) %{
7576   match(Set newval (GetAndSetS mem newval));
7577   format %{ "XCHGW  $newval,[$mem]" %}
7578   ins_encode %{
7579     __ xchgw($newval$$Register, $mem$$Address);
7580   %}
7581   ins_pipe( pipe_cmpxchg );
7582 %}
7583 
7584 instruct xchgI( memory mem, rRegI newval) %{
7585   match(Set newval (GetAndSetI mem newval));
7586   format %{ "XCHGL  $newval,[$mem]" %}
7587   ins_encode %{
7588     __ xchgl($newval$$Register, $mem$$Address);
7589   %}
7590   ins_pipe( pipe_cmpxchg );
7591 %}
7592 
7593 instruct xchgP( memory mem, pRegP newval) %{
7594   match(Set newval (GetAndSetP mem newval));
7595   format %{ "XCHGL  $newval,[$mem]" %}
7596   ins_encode %{
7597     __ xchgl($newval$$Register, $mem$$Address);
7598   %}
7599   ins_pipe( pipe_cmpxchg );
7600 %}
7601 
7602 //----------Subtraction Instructions-------------------------------------------
7603 
7604 // Integer Subtraction Instructions
7605 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7606   match(Set dst (SubI dst src));
7607   effect(KILL cr);
7608 
7609   size(2);
7610   format %{ "SUB    $dst,$src" %}
7611   opcode(0x2B);
7612   ins_encode( OpcP, RegReg( dst, src) );
7613   ins_pipe( ialu_reg_reg );
7614 %}
7615 
7616 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7617   match(Set dst (SubI dst src));
7618   effect(KILL cr);
7619 
7620   format %{ "SUB    $dst,$src" %}
7621   opcode(0x81,0x05);  /* Opcode 81 /5 */
7622   // ins_encode( RegImm( dst, src) );
7623   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7624   ins_pipe( ialu_reg );
7625 %}
7626 
7627 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7628   match(Set dst (SubI dst (LoadI src)));
7629   effect(KILL cr);
7630 
7631   ins_cost(125);
7632   format %{ "SUB    $dst,$src" %}
7633   opcode(0x2B);
7634   ins_encode( OpcP, RegMem( dst, src) );
7635   ins_pipe( ialu_reg_mem );
7636 %}
7637 
7638 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7639   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7640   effect(KILL cr);
7641 
7642   ins_cost(150);
7643   format %{ "SUB    $dst,$src" %}
7644   opcode(0x29);  /* Opcode 29 /r */
7645   ins_encode( OpcP, RegMem( src, dst ) );
7646   ins_pipe( ialu_mem_reg );
7647 %}
7648 
7649 // Subtract from a pointer
7650 instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
7651   match(Set dst (AddP dst (SubI zero src)));
7652   effect(KILL cr);
7653 
7654   size(2);
7655   format %{ "SUB    $dst,$src" %}
7656   opcode(0x2B);
7657   ins_encode( OpcP, RegReg( dst, src) );
7658   ins_pipe( ialu_reg_reg );
7659 %}
7660 
7661 instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
7662   match(Set dst (SubI zero dst));
7663   effect(KILL cr);
7664 
7665   size(2);
7666   format %{ "NEG    $dst" %}
7667   opcode(0xF7,0x03);  // Opcode F7 /3
7668   ins_encode( OpcP, RegOpc( dst ) );
7669   ins_pipe( ialu_reg );
7670 %}
7671 
7672 //----------Multiplication/Division Instructions-------------------------------
7673 // Integer Multiplication Instructions
7674 // Multiply Register
7675 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7676   match(Set dst (MulI dst src));
7677   effect(KILL cr);
7678 
7679   size(3);
7680   ins_cost(300);
7681   format %{ "IMUL   $dst,$src" %}
7682   opcode(0xAF, 0x0F);
7683   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7684   ins_pipe( ialu_reg_reg_alu0 );
7685 %}
7686 
7687 // Multiply 32-bit Immediate
7688 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7689   match(Set dst (MulI src imm));
7690   effect(KILL cr);
7691 
7692   ins_cost(300);
7693   format %{ "IMUL   $dst,$src,$imm" %}
7694   opcode(0x69);  /* 69 /r id */
7695   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7696   ins_pipe( ialu_reg_reg_alu0 );
7697 %}
7698 
7699 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7700   match(Set dst src);
7701   effect(KILL cr);
7702 
7703   // Note that this is artificially increased to make it more expensive than loadConL
7704   ins_cost(250);
7705   format %{ "MOV    EAX,$src\t// low word only" %}
7706   opcode(0xB8);
7707   ins_encode( LdImmL_Lo(dst, src) );
7708   ins_pipe( ialu_reg_fat );
7709 %}
7710 
7711 // Multiply by 32-bit Immediate, taking the shifted high order results
7712 //  (special case for shift by 32)
7713 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7714   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7715   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7716              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7717              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7718   effect(USE src1, KILL cr);
7719 
7720   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7721   ins_cost(0*100 + 1*400 - 150);
7722   format %{ "IMUL   EDX:EAX,$src1" %}
7723   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7724   ins_pipe( pipe_slow );
7725 %}
7726 
7727 // Multiply by 32-bit Immediate, taking the shifted high order results
7728 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7729   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7730   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7731              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7732              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7733   effect(USE src1, KILL cr);
7734 
7735   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7736   ins_cost(1*100 + 1*400 - 150);
7737   format %{ "IMUL   EDX:EAX,$src1\n\t"
7738             "SAR    EDX,$cnt-32" %}
7739   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7740   ins_pipe( pipe_slow );
7741 %}
7742 
7743 // Multiply Memory 32-bit Immediate
7744 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7745   match(Set dst (MulI (LoadI src) imm));
7746   effect(KILL cr);
7747 
7748   ins_cost(300);
7749   format %{ "IMUL   $dst,$src,$imm" %}
7750   opcode(0x69);  /* 69 /r id */
7751   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7752   ins_pipe( ialu_reg_mem_alu0 );
7753 %}
7754 
7755 // Multiply Memory
7756 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7757   match(Set dst (MulI dst (LoadI src)));
7758   effect(KILL cr);
7759 
7760   ins_cost(350);
7761   format %{ "IMUL   $dst,$src" %}
7762   opcode(0xAF, 0x0F);
7763   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7764   ins_pipe( ialu_reg_mem_alu0 );
7765 %}
7766 
7767 instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr)
7768 %{
7769   match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3)));
7770   effect(KILL cr, KILL src2);
7771 
7772   expand %{ mulI_eReg(dst, src1, cr);
7773            mulI_eReg(src2, src3, cr);
7774            addI_eReg(dst, src2, cr); %}
7775 %}
7776 
7777 // Multiply Register Int to Long
7778 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7779   // Basic Idea: long = (long)int * (long)int
7780   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7781   effect(DEF dst, USE src, USE src1, KILL flags);
7782 
7783   ins_cost(300);
7784   format %{ "IMUL   $dst,$src1" %}
7785 
7786   ins_encode( long_int_multiply( dst, src1 ) );
7787   ins_pipe( ialu_reg_reg_alu0 );
7788 %}
7789 
7790 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7791   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7792   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7793   effect(KILL flags);
7794 
7795   ins_cost(300);
7796   format %{ "MUL    $dst,$src1" %}
7797 
7798   ins_encode( long_uint_multiply(dst, src1) );
7799   ins_pipe( ialu_reg_reg_alu0 );
7800 %}
7801 
7802 // Multiply Register Long
7803 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7804   match(Set dst (MulL dst src));
7805   effect(KILL cr, TEMP tmp);
7806   ins_cost(4*100+3*400);
7807 // Basic idea: lo(result) = lo(x_lo * y_lo)
7808 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7809   format %{ "MOV    $tmp,$src.lo\n\t"
7810             "IMUL   $tmp,EDX\n\t"
7811             "MOV    EDX,$src.hi\n\t"
7812             "IMUL   EDX,EAX\n\t"
7813             "ADD    $tmp,EDX\n\t"
7814             "MUL    EDX:EAX,$src.lo\n\t"
7815             "ADD    EDX,$tmp" %}
7816   ins_encode( long_multiply( dst, src, tmp ) );
7817   ins_pipe( pipe_slow );
7818 %}
7819 
7820 // Multiply Register Long where the left operand's high 32 bits are zero
7821 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7822   predicate(is_operand_hi32_zero(n->in(1)));
7823   match(Set dst (MulL dst src));
7824   effect(KILL cr, TEMP tmp);
7825   ins_cost(2*100+2*400);
7826 // Basic idea: lo(result) = lo(x_lo * y_lo)
7827 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7828   format %{ "MOV    $tmp,$src.hi\n\t"
7829             "IMUL   $tmp,EAX\n\t"
7830             "MUL    EDX:EAX,$src.lo\n\t"
7831             "ADD    EDX,$tmp" %}
7832   ins_encode %{
7833     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7834     __ imull($tmp$$Register, rax);
7835     __ mull($src$$Register);
7836     __ addl(rdx, $tmp$$Register);
7837   %}
7838   ins_pipe( pipe_slow );
7839 %}
7840 
7841 // Multiply Register Long where the right operand's high 32 bits are zero
7842 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7843   predicate(is_operand_hi32_zero(n->in(2)));
7844   match(Set dst (MulL dst src));
7845   effect(KILL cr, TEMP tmp);
7846   ins_cost(2*100+2*400);
7847 // Basic idea: lo(result) = lo(x_lo * y_lo)
7848 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7849   format %{ "MOV    $tmp,$src.lo\n\t"
7850             "IMUL   $tmp,EDX\n\t"
7851             "MUL    EDX:EAX,$src.lo\n\t"
7852             "ADD    EDX,$tmp" %}
7853   ins_encode %{
7854     __ movl($tmp$$Register, $src$$Register);
7855     __ imull($tmp$$Register, rdx);
7856     __ mull($src$$Register);
7857     __ addl(rdx, $tmp$$Register);
7858   %}
7859   ins_pipe( pipe_slow );
7860 %}
7861 
7862 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7863 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7864   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7865   match(Set dst (MulL dst src));
7866   effect(KILL cr);
7867   ins_cost(1*400);
7868 // Basic idea: lo(result) = lo(x_lo * y_lo)
7869 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7870   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7871   ins_encode %{
7872     __ mull($src$$Register);
7873   %}
7874   ins_pipe( pipe_slow );
7875 %}
7876 
7877 // Multiply Register Long by small constant
7878 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7879   match(Set dst (MulL dst src));
7880   effect(KILL cr, TEMP tmp);
7881   ins_cost(2*100+2*400);
7882   size(12);
7883 // Basic idea: lo(result) = lo(src * EAX)
7884 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7885   format %{ "IMUL   $tmp,EDX,$src\n\t"
7886             "MOV    EDX,$src\n\t"
7887             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7888             "ADD    EDX,$tmp" %}
7889   ins_encode( long_multiply_con( dst, src, tmp ) );
7890   ins_pipe( pipe_slow );
7891 %}
7892 
7893 // Integer DIV with Register
7894 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7895   match(Set rax (DivI rax div));
7896   effect(KILL rdx, KILL cr);
7897   size(26);
7898   ins_cost(30*100+10*100);
7899   format %{ "CMP    EAX,0x80000000\n\t"
7900             "JNE,s  normal\n\t"
7901             "XOR    EDX,EDX\n\t"
7902             "CMP    ECX,-1\n\t"
7903             "JE,s   done\n"
7904     "normal: CDQ\n\t"
7905             "IDIV   $div\n\t"
7906     "done:"        %}
7907   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7908   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7909   ins_pipe( ialu_reg_reg_alu0 );
7910 %}
7911 
7912 // Divide Register Long
7913 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7914   match(Set dst (DivL src1 src2));
7915   effect( KILL cr, KILL cx, KILL bx );
7916   ins_cost(10000);
7917   format %{ "PUSH   $src1.hi\n\t"
7918             "PUSH   $src1.lo\n\t"
7919             "PUSH   $src2.hi\n\t"
7920             "PUSH   $src2.lo\n\t"
7921             "CALL   SharedRuntime::ldiv\n\t"
7922             "ADD    ESP,16" %}
7923   ins_encode( long_div(src1,src2) );
7924   ins_pipe( pipe_slow );
7925 %}
7926 
7927 // Integer DIVMOD with Register, both quotient and mod results
7928 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7929   match(DivModI rax div);
7930   effect(KILL cr);
7931   size(26);
7932   ins_cost(30*100+10*100);
7933   format %{ "CMP    EAX,0x80000000\n\t"
7934             "JNE,s  normal\n\t"
7935             "XOR    EDX,EDX\n\t"
7936             "CMP    ECX,-1\n\t"
7937             "JE,s   done\n"
7938     "normal: CDQ\n\t"
7939             "IDIV   $div\n\t"
7940     "done:"        %}
7941   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7942   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7943   ins_pipe( pipe_slow );
7944 %}
7945 
7946 // Integer MOD with Register
7947 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7948   match(Set rdx (ModI rax div));
7949   effect(KILL rax, KILL cr);
7950 
7951   size(26);
7952   ins_cost(300);
7953   format %{ "CDQ\n\t"
7954             "IDIV   $div" %}
7955   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7956   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7957   ins_pipe( ialu_reg_reg_alu0 );
7958 %}
7959 
7960 // Remainder Register Long
7961 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7962   match(Set dst (ModL src1 src2));
7963   effect( KILL cr, KILL cx, KILL bx );
7964   ins_cost(10000);
7965   format %{ "PUSH   $src1.hi\n\t"
7966             "PUSH   $src1.lo\n\t"
7967             "PUSH   $src2.hi\n\t"
7968             "PUSH   $src2.lo\n\t"
7969             "CALL   SharedRuntime::lrem\n\t"
7970             "ADD    ESP,16" %}
7971   ins_encode( long_mod(src1,src2) );
7972   ins_pipe( pipe_slow );
7973 %}
7974 
7975 // Divide Register Long (no special case since divisor != -1)
7976 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7977   match(Set dst (DivL dst imm));
7978   effect( TEMP tmp, TEMP tmp2, KILL cr );
7979   ins_cost(1000);
7980   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7981             "XOR    $tmp2,$tmp2\n\t"
7982             "CMP    $tmp,EDX\n\t"
7983             "JA,s   fast\n\t"
7984             "MOV    $tmp2,EAX\n\t"
7985             "MOV    EAX,EDX\n\t"
7986             "MOV    EDX,0\n\t"
7987             "JLE,s  pos\n\t"
7988             "LNEG   EAX : $tmp2\n\t"
7989             "DIV    $tmp # unsigned division\n\t"
7990             "XCHG   EAX,$tmp2\n\t"
7991             "DIV    $tmp\n\t"
7992             "LNEG   $tmp2 : EAX\n\t"
7993             "JMP,s  done\n"
7994     "pos:\n\t"
7995             "DIV    $tmp\n\t"
7996             "XCHG   EAX,$tmp2\n"
7997     "fast:\n\t"
7998             "DIV    $tmp\n"
7999     "done:\n\t"
8000             "MOV    EDX,$tmp2\n\t"
8001             "NEG    EDX:EAX # if $imm < 0" %}
8002   ins_encode %{
8003     int con = (int)$imm$$constant;
8004     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8005     int pcon = (con > 0) ? con : -con;
8006     Label Lfast, Lpos, Ldone;
8007 
8008     __ movl($tmp$$Register, pcon);
8009     __ xorl($tmp2$$Register,$tmp2$$Register);
8010     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8011     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
8012 
8013     __ movl($tmp2$$Register, $dst$$Register); // save
8014     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8015     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8016     __ jccb(Assembler::lessEqual, Lpos); // result is positive
8017 
8018     // Negative dividend.
8019     // convert value to positive to use unsigned division
8020     __ lneg($dst$$Register, $tmp2$$Register);
8021     __ divl($tmp$$Register);
8022     __ xchgl($dst$$Register, $tmp2$$Register);
8023     __ divl($tmp$$Register);
8024     // revert result back to negative
8025     __ lneg($tmp2$$Register, $dst$$Register);
8026     __ jmpb(Ldone);
8027 
8028     __ bind(Lpos);
8029     __ divl($tmp$$Register); // Use unsigned division
8030     __ xchgl($dst$$Register, $tmp2$$Register);
8031     // Fallthrow for final divide, tmp2 has 32 bit hi result
8032 
8033     __ bind(Lfast);
8034     // fast path: src is positive
8035     __ divl($tmp$$Register); // Use unsigned division
8036 
8037     __ bind(Ldone);
8038     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
8039     if (con < 0) {
8040       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
8041     }
8042   %}
8043   ins_pipe( pipe_slow );
8044 %}
8045 
8046 // Remainder Register Long (remainder fit into 32 bits)
8047 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
8048   match(Set dst (ModL dst imm));
8049   effect( TEMP tmp, TEMP tmp2, KILL cr );
8050   ins_cost(1000);
8051   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
8052             "CMP    $tmp,EDX\n\t"
8053             "JA,s   fast\n\t"
8054             "MOV    $tmp2,EAX\n\t"
8055             "MOV    EAX,EDX\n\t"
8056             "MOV    EDX,0\n\t"
8057             "JLE,s  pos\n\t"
8058             "LNEG   EAX : $tmp2\n\t"
8059             "DIV    $tmp # unsigned division\n\t"
8060             "MOV    EAX,$tmp2\n\t"
8061             "DIV    $tmp\n\t"
8062             "NEG    EDX\n\t"
8063             "JMP,s  done\n"
8064     "pos:\n\t"
8065             "DIV    $tmp\n\t"
8066             "MOV    EAX,$tmp2\n"
8067     "fast:\n\t"
8068             "DIV    $tmp\n"
8069     "done:\n\t"
8070             "MOV    EAX,EDX\n\t"
8071             "SAR    EDX,31\n\t" %}
8072   ins_encode %{
8073     int con = (int)$imm$$constant;
8074     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8075     int pcon = (con > 0) ? con : -con;
8076     Label  Lfast, Lpos, Ldone;
8077 
8078     __ movl($tmp$$Register, pcon);
8079     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8080     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
8081 
8082     __ movl($tmp2$$Register, $dst$$Register); // save
8083     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8084     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8085     __ jccb(Assembler::lessEqual, Lpos); // result is positive
8086 
8087     // Negative dividend.
8088     // convert value to positive to use unsigned division
8089     __ lneg($dst$$Register, $tmp2$$Register);
8090     __ divl($tmp$$Register);
8091     __ movl($dst$$Register, $tmp2$$Register);
8092     __ divl($tmp$$Register);
8093     // revert remainder back to negative
8094     __ negl(HIGH_FROM_LOW($dst$$Register));
8095     __ jmpb(Ldone);
8096 
8097     __ bind(Lpos);
8098     __ divl($tmp$$Register);
8099     __ movl($dst$$Register, $tmp2$$Register);
8100 
8101     __ bind(Lfast);
8102     // fast path: src is positive
8103     __ divl($tmp$$Register);
8104 
8105     __ bind(Ldone);
8106     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8107     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
8108 
8109   %}
8110   ins_pipe( pipe_slow );
8111 %}
8112 
8113 // Integer Shift Instructions
8114 // Shift Left by one
8115 instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
8116   match(Set dst (LShiftI dst shift));
8117   effect(KILL cr);
8118 
8119   size(2);
8120   format %{ "SHL    $dst,$shift" %}
8121   opcode(0xD1, 0x4);  /* D1 /4 */
8122   ins_encode( OpcP, RegOpc( dst ) );
8123   ins_pipe( ialu_reg );
8124 %}
8125 
8126 // Shift Left by 8-bit immediate
8127 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8128   match(Set dst (LShiftI dst shift));
8129   effect(KILL cr);
8130 
8131   size(3);
8132   format %{ "SHL    $dst,$shift" %}
8133   opcode(0xC1, 0x4);  /* C1 /4 ib */
8134   ins_encode( RegOpcImm( dst, shift) );
8135   ins_pipe( ialu_reg );
8136 %}
8137 
8138 // Shift Left by variable
8139 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8140   match(Set dst (LShiftI dst shift));
8141   effect(KILL cr);
8142 
8143   size(2);
8144   format %{ "SHL    $dst,$shift" %}
8145   opcode(0xD3, 0x4);  /* D3 /4 */
8146   ins_encode( OpcP, RegOpc( dst ) );
8147   ins_pipe( ialu_reg_reg );
8148 %}
8149 
8150 // Arithmetic shift right by one
8151 instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
8152   match(Set dst (RShiftI dst shift));
8153   effect(KILL cr);
8154 
8155   size(2);
8156   format %{ "SAR    $dst,$shift" %}
8157   opcode(0xD1, 0x7);  /* D1 /7 */
8158   ins_encode( OpcP, RegOpc( dst ) );
8159   ins_pipe( ialu_reg );
8160 %}
8161 
8162 // Arithmetic shift right by one
8163 instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
8164   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8165   effect(KILL cr);
8166   format %{ "SAR    $dst,$shift" %}
8167   opcode(0xD1, 0x7);  /* D1 /7 */
8168   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
8169   ins_pipe( ialu_mem_imm );
8170 %}
8171 
8172 // Arithmetic Shift Right by 8-bit immediate
8173 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8174   match(Set dst (RShiftI dst shift));
8175   effect(KILL cr);
8176 
8177   size(3);
8178   format %{ "SAR    $dst,$shift" %}
8179   opcode(0xC1, 0x7);  /* C1 /7 ib */
8180   ins_encode( RegOpcImm( dst, shift ) );
8181   ins_pipe( ialu_mem_imm );
8182 %}
8183 
8184 // Arithmetic Shift Right by 8-bit immediate
8185 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
8186   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8187   effect(KILL cr);
8188 
8189   format %{ "SAR    $dst,$shift" %}
8190   opcode(0xC1, 0x7);  /* C1 /7 ib */
8191   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
8192   ins_pipe( ialu_mem_imm );
8193 %}
8194 
8195 // Arithmetic Shift Right by variable
8196 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8197   match(Set dst (RShiftI dst shift));
8198   effect(KILL cr);
8199 
8200   size(2);
8201   format %{ "SAR    $dst,$shift" %}
8202   opcode(0xD3, 0x7);  /* D3 /7 */
8203   ins_encode( OpcP, RegOpc( dst ) );
8204   ins_pipe( ialu_reg_reg );
8205 %}
8206 
8207 // Logical shift right by one
8208 instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
8209   match(Set dst (URShiftI dst shift));
8210   effect(KILL cr);
8211 
8212   size(2);
8213   format %{ "SHR    $dst,$shift" %}
8214   opcode(0xD1, 0x5);  /* D1 /5 */
8215   ins_encode( OpcP, RegOpc( dst ) );
8216   ins_pipe( ialu_reg );
8217 %}
8218 
8219 // Logical Shift Right by 8-bit immediate
8220 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8221   match(Set dst (URShiftI dst shift));
8222   effect(KILL cr);
8223 
8224   size(3);
8225   format %{ "SHR    $dst,$shift" %}
8226   opcode(0xC1, 0x5);  /* C1 /5 ib */
8227   ins_encode( RegOpcImm( dst, shift) );
8228   ins_pipe( ialu_reg );
8229 %}
8230 
8231 
8232 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8233 // This idiom is used by the compiler for the i2b bytecode.
8234 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
8235   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8236 
8237   size(3);
8238   format %{ "MOVSX  $dst,$src :8" %}
8239   ins_encode %{
8240     __ movsbl($dst$$Register, $src$$Register);
8241   %}
8242   ins_pipe(ialu_reg_reg);
8243 %}
8244 
8245 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8246 // This idiom is used by the compiler the i2s bytecode.
8247 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
8248   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8249 
8250   size(3);
8251   format %{ "MOVSX  $dst,$src :16" %}
8252   ins_encode %{
8253     __ movswl($dst$$Register, $src$$Register);
8254   %}
8255   ins_pipe(ialu_reg_reg);
8256 %}
8257 
8258 
8259 // Logical Shift Right by variable
8260 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8261   match(Set dst (URShiftI dst shift));
8262   effect(KILL cr);
8263 
8264   size(2);
8265   format %{ "SHR    $dst,$shift" %}
8266   opcode(0xD3, 0x5);  /* D3 /5 */
8267   ins_encode( OpcP, RegOpc( dst ) );
8268   ins_pipe( ialu_reg_reg );
8269 %}
8270 
8271 
8272 //----------Logical Instructions-----------------------------------------------
8273 //----------Integer Logical Instructions---------------------------------------
8274 // And Instructions
8275 // And Register with Register
8276 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8277   match(Set dst (AndI dst src));
8278   effect(KILL cr);
8279 
8280   size(2);
8281   format %{ "AND    $dst,$src" %}
8282   opcode(0x23);
8283   ins_encode( OpcP, RegReg( dst, src) );
8284   ins_pipe( ialu_reg_reg );
8285 %}
8286 
8287 // And Register with Immediate
8288 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8289   match(Set dst (AndI dst src));
8290   effect(KILL cr);
8291 
8292   format %{ "AND    $dst,$src" %}
8293   opcode(0x81,0x04);  /* Opcode 81 /4 */
8294   // ins_encode( RegImm( dst, src) );
8295   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8296   ins_pipe( ialu_reg );
8297 %}
8298 
8299 // And Register with Memory
8300 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8301   match(Set dst (AndI dst (LoadI src)));
8302   effect(KILL cr);
8303 
8304   ins_cost(125);
8305   format %{ "AND    $dst,$src" %}
8306   opcode(0x23);
8307   ins_encode( OpcP, RegMem( dst, src) );
8308   ins_pipe( ialu_reg_mem );
8309 %}
8310 
8311 // And Memory with Register
8312 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8313   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8314   effect(KILL cr);
8315 
8316   ins_cost(150);
8317   format %{ "AND    $dst,$src" %}
8318   opcode(0x21);  /* Opcode 21 /r */
8319   ins_encode( OpcP, RegMem( src, dst ) );
8320   ins_pipe( ialu_mem_reg );
8321 %}
8322 
8323 // And Memory with Immediate
8324 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8325   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8326   effect(KILL cr);
8327 
8328   ins_cost(125);
8329   format %{ "AND    $dst,$src" %}
8330   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8331   // ins_encode( MemImm( dst, src) );
8332   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8333   ins_pipe( ialu_mem_imm );
8334 %}
8335 
8336 // BMI1 instructions
8337 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8338   match(Set dst (AndI (XorI src1 minus_1) src2));
8339   predicate(UseBMI1Instructions);
8340   effect(KILL cr);
8341 
8342   format %{ "ANDNL  $dst, $src1, $src2" %}
8343 
8344   ins_encode %{
8345     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8346   %}
8347   ins_pipe(ialu_reg);
8348 %}
8349 
8350 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8351   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8352   predicate(UseBMI1Instructions);
8353   effect(KILL cr);
8354 
8355   ins_cost(125);
8356   format %{ "ANDNL  $dst, $src1, $src2" %}
8357 
8358   ins_encode %{
8359     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8360   %}
8361   ins_pipe(ialu_reg_mem);
8362 %}
8363 
8364 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
8365   match(Set dst (AndI (SubI imm_zero src) src));
8366   predicate(UseBMI1Instructions);
8367   effect(KILL cr);
8368 
8369   format %{ "BLSIL  $dst, $src" %}
8370 
8371   ins_encode %{
8372     __ blsil($dst$$Register, $src$$Register);
8373   %}
8374   ins_pipe(ialu_reg);
8375 %}
8376 
8377 instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
8378   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8379   predicate(UseBMI1Instructions);
8380   effect(KILL cr);
8381 
8382   ins_cost(125);
8383   format %{ "BLSIL  $dst, $src" %}
8384 
8385   ins_encode %{
8386     __ blsil($dst$$Register, $src$$Address);
8387   %}
8388   ins_pipe(ialu_reg_mem);
8389 %}
8390 
8391 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8392 %{
8393   match(Set dst (XorI (AddI src minus_1) src));
8394   predicate(UseBMI1Instructions);
8395   effect(KILL cr);
8396 
8397   format %{ "BLSMSKL $dst, $src" %}
8398 
8399   ins_encode %{
8400     __ blsmskl($dst$$Register, $src$$Register);
8401   %}
8402 
8403   ins_pipe(ialu_reg);
8404 %}
8405 
8406 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8407 %{
8408   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8409   predicate(UseBMI1Instructions);
8410   effect(KILL cr);
8411 
8412   ins_cost(125);
8413   format %{ "BLSMSKL $dst, $src" %}
8414 
8415   ins_encode %{
8416     __ blsmskl($dst$$Register, $src$$Address);
8417   %}
8418 
8419   ins_pipe(ialu_reg_mem);
8420 %}
8421 
8422 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8423 %{
8424   match(Set dst (AndI (AddI src minus_1) src) );
8425   predicate(UseBMI1Instructions);
8426   effect(KILL cr);
8427 
8428   format %{ "BLSRL  $dst, $src" %}
8429 
8430   ins_encode %{
8431     __ blsrl($dst$$Register, $src$$Register);
8432   %}
8433 
8434   ins_pipe(ialu_reg);
8435 %}
8436 
8437 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8438 %{
8439   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8440   predicate(UseBMI1Instructions);
8441   effect(KILL cr);
8442 
8443   ins_cost(125);
8444   format %{ "BLSRL  $dst, $src" %}
8445 
8446   ins_encode %{
8447     __ blsrl($dst$$Register, $src$$Address);
8448   %}
8449 
8450   ins_pipe(ialu_reg_mem);
8451 %}
8452 
8453 // Or Instructions
8454 // Or Register with Register
8455 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8456   match(Set dst (OrI dst src));
8457   effect(KILL cr);
8458 
8459   size(2);
8460   format %{ "OR     $dst,$src" %}
8461   opcode(0x0B);
8462   ins_encode( OpcP, RegReg( dst, src) );
8463   ins_pipe( ialu_reg_reg );
8464 %}
8465 
8466 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8467   match(Set dst (OrI dst (CastP2X src)));
8468   effect(KILL cr);
8469 
8470   size(2);
8471   format %{ "OR     $dst,$src" %}
8472   opcode(0x0B);
8473   ins_encode( OpcP, RegReg( dst, src) );
8474   ins_pipe( ialu_reg_reg );
8475 %}
8476 
8477 
8478 // Or Register with Immediate
8479 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8480   match(Set dst (OrI dst src));
8481   effect(KILL cr);
8482 
8483   format %{ "OR     $dst,$src" %}
8484   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8485   // ins_encode( RegImm( dst, src) );
8486   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8487   ins_pipe( ialu_reg );
8488 %}
8489 
8490 // Or Register with Memory
8491 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8492   match(Set dst (OrI dst (LoadI src)));
8493   effect(KILL cr);
8494 
8495   ins_cost(125);
8496   format %{ "OR     $dst,$src" %}
8497   opcode(0x0B);
8498   ins_encode( OpcP, RegMem( dst, src) );
8499   ins_pipe( ialu_reg_mem );
8500 %}
8501 
8502 // Or Memory with Register
8503 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8504   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8505   effect(KILL cr);
8506 
8507   ins_cost(150);
8508   format %{ "OR     $dst,$src" %}
8509   opcode(0x09);  /* Opcode 09 /r */
8510   ins_encode( OpcP, RegMem( src, dst ) );
8511   ins_pipe( ialu_mem_reg );
8512 %}
8513 
8514 // Or Memory with Immediate
8515 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8516   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8517   effect(KILL cr);
8518 
8519   ins_cost(125);
8520   format %{ "OR     $dst,$src" %}
8521   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8522   // ins_encode( MemImm( dst, src) );
8523   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8524   ins_pipe( ialu_mem_imm );
8525 %}
8526 
8527 // ROL/ROR
8528 // ROL expand
8529 instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
8530   effect(USE_DEF dst, USE shift, KILL cr);
8531 
8532   format %{ "ROL    $dst, $shift" %}
8533   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8534   ins_encode( OpcP, RegOpc( dst ));
8535   ins_pipe( ialu_reg );
8536 %}
8537 
8538 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8539   effect(USE_DEF dst, USE shift, KILL cr);
8540 
8541   format %{ "ROL    $dst, $shift" %}
8542   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8543   ins_encode( RegOpcImm(dst, shift) );
8544   ins_pipe(ialu_reg);
8545 %}
8546 
8547 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8548   effect(USE_DEF dst, USE shift, KILL cr);
8549 
8550   format %{ "ROL    $dst, $shift" %}
8551   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8552   ins_encode(OpcP, RegOpc(dst));
8553   ins_pipe( ialu_reg_reg );
8554 %}
8555 // end of ROL expand
8556 
8557 // ROL 32bit by one once
8558 instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8559   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8560 
8561   expand %{
8562     rolI_eReg_imm1(dst, lshift, cr);
8563   %}
8564 %}
8565 
8566 // ROL 32bit var by imm8 once
8567 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8568   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8569   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8570 
8571   expand %{
8572     rolI_eReg_imm8(dst, lshift, cr);
8573   %}
8574 %}
8575 
8576 // ROL 32bit var by var once
8577 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
8578   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8579 
8580   expand %{
8581     rolI_eReg_CL(dst, shift, cr);
8582   %}
8583 %}
8584 
8585 // ROL 32bit var by var once
8586 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8587   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8588 
8589   expand %{
8590     rolI_eReg_CL(dst, shift, cr);
8591   %}
8592 %}
8593 
8594 // ROR expand
8595 instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
8596   effect(USE_DEF dst, USE shift, KILL cr);
8597 
8598   format %{ "ROR    $dst, $shift" %}
8599   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8600   ins_encode( OpcP, RegOpc( dst ) );
8601   ins_pipe( ialu_reg );
8602 %}
8603 
8604 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8605   effect (USE_DEF dst, USE shift, KILL cr);
8606 
8607   format %{ "ROR    $dst, $shift" %}
8608   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8609   ins_encode( RegOpcImm(dst, shift) );
8610   ins_pipe( ialu_reg );
8611 %}
8612 
8613 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8614   effect(USE_DEF dst, USE shift, KILL cr);
8615 
8616   format %{ "ROR    $dst, $shift" %}
8617   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8618   ins_encode(OpcP, RegOpc(dst));
8619   ins_pipe( ialu_reg_reg );
8620 %}
8621 // end of ROR expand
8622 
8623 // ROR right once
8624 instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8625   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8626 
8627   expand %{
8628     rorI_eReg_imm1(dst, rshift, cr);
8629   %}
8630 %}
8631 
8632 // ROR 32bit by immI8 once
8633 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8634   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8635   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8636 
8637   expand %{
8638     rorI_eReg_imm8(dst, rshift, cr);
8639   %}
8640 %}
8641 
8642 // ROR 32bit var by var once
8643 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
8644   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8645 
8646   expand %{
8647     rorI_eReg_CL(dst, shift, cr);
8648   %}
8649 %}
8650 
8651 // ROR 32bit var by var once
8652 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8653   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8654 
8655   expand %{
8656     rorI_eReg_CL(dst, shift, cr);
8657   %}
8658 %}
8659 
8660 // Xor Instructions
8661 // Xor Register with Register
8662 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8663   match(Set dst (XorI dst src));
8664   effect(KILL cr);
8665 
8666   size(2);
8667   format %{ "XOR    $dst,$src" %}
8668   opcode(0x33);
8669   ins_encode( OpcP, RegReg( dst, src) );
8670   ins_pipe( ialu_reg_reg );
8671 %}
8672 
8673 // Xor Register with Immediate -1
8674 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8675   match(Set dst (XorI dst imm));
8676 
8677   size(2);
8678   format %{ "NOT    $dst" %}
8679   ins_encode %{
8680      __ notl($dst$$Register);
8681   %}
8682   ins_pipe( ialu_reg );
8683 %}
8684 
8685 // Xor Register with Immediate
8686 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8687   match(Set dst (XorI dst src));
8688   effect(KILL cr);
8689 
8690   format %{ "XOR    $dst,$src" %}
8691   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8692   // ins_encode( RegImm( dst, src) );
8693   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8694   ins_pipe( ialu_reg );
8695 %}
8696 
8697 // Xor Register with Memory
8698 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8699   match(Set dst (XorI dst (LoadI src)));
8700   effect(KILL cr);
8701 
8702   ins_cost(125);
8703   format %{ "XOR    $dst,$src" %}
8704   opcode(0x33);
8705   ins_encode( OpcP, RegMem(dst, src) );
8706   ins_pipe( ialu_reg_mem );
8707 %}
8708 
8709 // Xor Memory with Register
8710 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8711   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8712   effect(KILL cr);
8713 
8714   ins_cost(150);
8715   format %{ "XOR    $dst,$src" %}
8716   opcode(0x31);  /* Opcode 31 /r */
8717   ins_encode( OpcP, RegMem( src, dst ) );
8718   ins_pipe( ialu_mem_reg );
8719 %}
8720 
8721 // Xor Memory with Immediate
8722 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8723   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8724   effect(KILL cr);
8725 
8726   ins_cost(125);
8727   format %{ "XOR    $dst,$src" %}
8728   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8729   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8730   ins_pipe( ialu_mem_imm );
8731 %}
8732 
8733 //----------Convert Int to Boolean---------------------------------------------
8734 
8735 instruct movI_nocopy(rRegI dst, rRegI src) %{
8736   effect( DEF dst, USE src );
8737   format %{ "MOV    $dst,$src" %}
8738   ins_encode( enc_Copy( dst, src) );
8739   ins_pipe( ialu_reg_reg );
8740 %}
8741 
8742 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8743   effect( USE_DEF dst, USE src, KILL cr );
8744 
8745   size(4);
8746   format %{ "NEG    $dst\n\t"
8747             "ADC    $dst,$src" %}
8748   ins_encode( neg_reg(dst),
8749               OpcRegReg(0x13,dst,src) );
8750   ins_pipe( ialu_reg_reg_long );
8751 %}
8752 
8753 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8754   match(Set dst (Conv2B src));
8755 
8756   expand %{
8757     movI_nocopy(dst,src);
8758     ci2b(dst,src,cr);
8759   %}
8760 %}
8761 
8762 instruct movP_nocopy(rRegI dst, eRegP src) %{
8763   effect( DEF dst, USE src );
8764   format %{ "MOV    $dst,$src" %}
8765   ins_encode( enc_Copy( dst, src) );
8766   ins_pipe( ialu_reg_reg );
8767 %}
8768 
8769 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8770   effect( USE_DEF dst, USE src, KILL cr );
8771   format %{ "NEG    $dst\n\t"
8772             "ADC    $dst,$src" %}
8773   ins_encode( neg_reg(dst),
8774               OpcRegReg(0x13,dst,src) );
8775   ins_pipe( ialu_reg_reg_long );
8776 %}
8777 
8778 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8779   match(Set dst (Conv2B src));
8780 
8781   expand %{
8782     movP_nocopy(dst,src);
8783     cp2b(dst,src,cr);
8784   %}
8785 %}
8786 
8787 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8788   match(Set dst (CmpLTMask p q));
8789   effect(KILL cr);
8790   ins_cost(400);
8791 
8792   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8793   format %{ "XOR    $dst,$dst\n\t"
8794             "CMP    $p,$q\n\t"
8795             "SETlt  $dst\n\t"
8796             "NEG    $dst" %}
8797   ins_encode %{
8798     Register Rp = $p$$Register;
8799     Register Rq = $q$$Register;
8800     Register Rd = $dst$$Register;
8801     Label done;
8802     __ xorl(Rd, Rd);
8803     __ cmpl(Rp, Rq);
8804     __ setb(Assembler::less, Rd);
8805     __ negl(Rd);
8806   %}
8807 
8808   ins_pipe(pipe_slow);
8809 %}
8810 
8811 instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
8812   match(Set dst (CmpLTMask dst zero));
8813   effect(DEF dst, KILL cr);
8814   ins_cost(100);
8815 
8816   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8817   ins_encode %{
8818   __ sarl($dst$$Register, 31);
8819   %}
8820   ins_pipe(ialu_reg);
8821 %}
8822 
8823 /* better to save a register than avoid a branch */
8824 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8825   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8826   effect(KILL cr);
8827   ins_cost(400);
8828   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8829             "JGE    done\n\t"
8830             "ADD    $p,$y\n"
8831             "done:  " %}
8832   ins_encode %{
8833     Register Rp = $p$$Register;
8834     Register Rq = $q$$Register;
8835     Register Ry = $y$$Register;
8836     Label done;
8837     __ subl(Rp, Rq);
8838     __ jccb(Assembler::greaterEqual, done);
8839     __ addl(Rp, Ry);
8840     __ bind(done);
8841   %}
8842 
8843   ins_pipe(pipe_cmplt);
8844 %}
8845 
8846 /* better to save a register than avoid a branch */
8847 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8848   match(Set y (AndI (CmpLTMask p q) y));
8849   effect(KILL cr);
8850 
8851   ins_cost(300);
8852 
8853   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8854             "JLT      done\n\t"
8855             "XORL     $y, $y\n"
8856             "done:  " %}
8857   ins_encode %{
8858     Register Rp = $p$$Register;
8859     Register Rq = $q$$Register;
8860     Register Ry = $y$$Register;
8861     Label done;
8862     __ cmpl(Rp, Rq);
8863     __ jccb(Assembler::less, done);
8864     __ xorl(Ry, Ry);
8865     __ bind(done);
8866   %}
8867 
8868   ins_pipe(pipe_cmplt);
8869 %}
8870 
8871 /* If I enable this, I encourage spilling in the inner loop of compress.
8872 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8873   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8874 */
8875 //----------Overflow Math Instructions-----------------------------------------
8876 
8877 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8878 %{
8879   match(Set cr (OverflowAddI op1 op2));
8880   effect(DEF cr, USE_KILL op1, USE op2);
8881 
8882   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8883 
8884   ins_encode %{
8885     __ addl($op1$$Register, $op2$$Register);
8886   %}
8887   ins_pipe(ialu_reg_reg);
8888 %}
8889 
8890 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8891 %{
8892   match(Set cr (OverflowAddI op1 op2));
8893   effect(DEF cr, USE_KILL op1, USE op2);
8894 
8895   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8896 
8897   ins_encode %{
8898     __ addl($op1$$Register, $op2$$constant);
8899   %}
8900   ins_pipe(ialu_reg_reg);
8901 %}
8902 
8903 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8904 %{
8905   match(Set cr (OverflowSubI op1 op2));
8906 
8907   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8908   ins_encode %{
8909     __ cmpl($op1$$Register, $op2$$Register);
8910   %}
8911   ins_pipe(ialu_reg_reg);
8912 %}
8913 
8914 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8915 %{
8916   match(Set cr (OverflowSubI op1 op2));
8917 
8918   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8919   ins_encode %{
8920     __ cmpl($op1$$Register, $op2$$constant);
8921   %}
8922   ins_pipe(ialu_reg_reg);
8923 %}
8924 
8925 instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
8926 %{
8927   match(Set cr (OverflowSubI zero op2));
8928   effect(DEF cr, USE_KILL op2);
8929 
8930   format %{ "NEG    $op2\t# overflow check int" %}
8931   ins_encode %{
8932     __ negl($op2$$Register);
8933   %}
8934   ins_pipe(ialu_reg_reg);
8935 %}
8936 
8937 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8938 %{
8939   match(Set cr (OverflowMulI op1 op2));
8940   effect(DEF cr, USE_KILL op1, USE op2);
8941 
8942   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8943   ins_encode %{
8944     __ imull($op1$$Register, $op2$$Register);
8945   %}
8946   ins_pipe(ialu_reg_reg_alu0);
8947 %}
8948 
8949 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8950 %{
8951   match(Set cr (OverflowMulI op1 op2));
8952   effect(DEF cr, TEMP tmp, USE op1, USE op2);
8953 
8954   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8955   ins_encode %{
8956     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8957   %}
8958   ins_pipe(ialu_reg_reg_alu0);
8959 %}
8960 
8961 // Integer Absolute Instructions
8962 instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
8963 %{
8964   match(Set dst (AbsI src));
8965   effect(TEMP dst, TEMP tmp, KILL cr);
8966   format %{ "movl $tmp, $src\n\t"
8967             "sarl $tmp, 31\n\t"
8968             "movl $dst, $src\n\t"
8969             "xorl $dst, $tmp\n\t"
8970             "subl $dst, $tmp\n"
8971           %}
8972   ins_encode %{
8973     __ movl($tmp$$Register, $src$$Register);
8974     __ sarl($tmp$$Register, 31);
8975     __ movl($dst$$Register, $src$$Register);
8976     __ xorl($dst$$Register, $tmp$$Register);
8977     __ subl($dst$$Register, $tmp$$Register);
8978   %}
8979 
8980   ins_pipe(ialu_reg_reg);
8981 %}
8982 
8983 //----------Long Instructions------------------------------------------------
8984 // Add Long Register with Register
8985 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8986   match(Set dst (AddL dst src));
8987   effect(KILL cr);
8988   ins_cost(200);
8989   format %{ "ADD    $dst.lo,$src.lo\n\t"
8990             "ADC    $dst.hi,$src.hi" %}
8991   opcode(0x03, 0x13);
8992   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8993   ins_pipe( ialu_reg_reg_long );
8994 %}
8995 
8996 // Add Long Register with Immediate
8997 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8998   match(Set dst (AddL dst src));
8999   effect(KILL cr);
9000   format %{ "ADD    $dst.lo,$src.lo\n\t"
9001             "ADC    $dst.hi,$src.hi" %}
9002   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
9003   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9004   ins_pipe( ialu_reg_long );
9005 %}
9006 
9007 // Add Long Register with Memory
9008 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9009   match(Set dst (AddL dst (LoadL mem)));
9010   effect(KILL cr);
9011   ins_cost(125);
9012   format %{ "ADD    $dst.lo,$mem\n\t"
9013             "ADC    $dst.hi,$mem+4" %}
9014   opcode(0x03, 0x13);
9015   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9016   ins_pipe( ialu_reg_long_mem );
9017 %}
9018 
9019 // Subtract Long Register with Register.
9020 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9021   match(Set dst (SubL dst src));
9022   effect(KILL cr);
9023   ins_cost(200);
9024   format %{ "SUB    $dst.lo,$src.lo\n\t"
9025             "SBB    $dst.hi,$src.hi" %}
9026   opcode(0x2B, 0x1B);
9027   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
9028   ins_pipe( ialu_reg_reg_long );
9029 %}
9030 
9031 // Subtract Long Register with Immediate
9032 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9033   match(Set dst (SubL dst src));
9034   effect(KILL cr);
9035   format %{ "SUB    $dst.lo,$src.lo\n\t"
9036             "SBB    $dst.hi,$src.hi" %}
9037   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
9038   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9039   ins_pipe( ialu_reg_long );
9040 %}
9041 
9042 // Subtract Long Register with Memory
9043 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9044   match(Set dst (SubL dst (LoadL mem)));
9045   effect(KILL cr);
9046   ins_cost(125);
9047   format %{ "SUB    $dst.lo,$mem\n\t"
9048             "SBB    $dst.hi,$mem+4" %}
9049   opcode(0x2B, 0x1B);
9050   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9051   ins_pipe( ialu_reg_long_mem );
9052 %}
9053 
9054 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
9055   match(Set dst (SubL zero dst));
9056   effect(KILL cr);
9057   ins_cost(300);
9058   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
9059   ins_encode( neg_long(dst) );
9060   ins_pipe( ialu_reg_reg_long );
9061 %}
9062 
9063 // And Long Register with Register
9064 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9065   match(Set dst (AndL dst src));
9066   effect(KILL cr);
9067   format %{ "AND    $dst.lo,$src.lo\n\t"
9068             "AND    $dst.hi,$src.hi" %}
9069   opcode(0x23,0x23);
9070   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9071   ins_pipe( ialu_reg_reg_long );
9072 %}
9073 
9074 // And Long Register with Immediate
9075 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9076   match(Set dst (AndL dst src));
9077   effect(KILL cr);
9078   format %{ "AND    $dst.lo,$src.lo\n\t"
9079             "AND    $dst.hi,$src.hi" %}
9080   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
9081   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9082   ins_pipe( ialu_reg_long );
9083 %}
9084 
9085 // And Long Register with Memory
9086 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9087   match(Set dst (AndL dst (LoadL mem)));
9088   effect(KILL cr);
9089   ins_cost(125);
9090   format %{ "AND    $dst.lo,$mem\n\t"
9091             "AND    $dst.hi,$mem+4" %}
9092   opcode(0x23, 0x23);
9093   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9094   ins_pipe( ialu_reg_long_mem );
9095 %}
9096 
9097 // BMI1 instructions
9098 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
9099   match(Set dst (AndL (XorL src1 minus_1) src2));
9100   predicate(UseBMI1Instructions);
9101   effect(KILL cr, TEMP dst);
9102 
9103   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
9104             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
9105          %}
9106 
9107   ins_encode %{
9108     Register Rdst = $dst$$Register;
9109     Register Rsrc1 = $src1$$Register;
9110     Register Rsrc2 = $src2$$Register;
9111     __ andnl(Rdst, Rsrc1, Rsrc2);
9112     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
9113   %}
9114   ins_pipe(ialu_reg_reg_long);
9115 %}
9116 
9117 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
9118   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
9119   predicate(UseBMI1Instructions);
9120   effect(KILL cr, TEMP dst);
9121 
9122   ins_cost(125);
9123   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
9124             "ANDNL  $dst.hi, $src1.hi, $src2+4"
9125          %}
9126 
9127   ins_encode %{
9128     Register Rdst = $dst$$Register;
9129     Register Rsrc1 = $src1$$Register;
9130     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
9131 
9132     __ andnl(Rdst, Rsrc1, $src2$$Address);
9133     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
9134   %}
9135   ins_pipe(ialu_reg_mem);
9136 %}
9137 
9138 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
9139   match(Set dst (AndL (SubL imm_zero src) src));
9140   predicate(UseBMI1Instructions);
9141   effect(KILL cr, TEMP dst);
9142 
9143   format %{ "MOVL   $dst.hi, 0\n\t"
9144             "BLSIL  $dst.lo, $src.lo\n\t"
9145             "JNZ    done\n\t"
9146             "BLSIL  $dst.hi, $src.hi\n"
9147             "done:"
9148          %}
9149 
9150   ins_encode %{
9151     Label done;
9152     Register Rdst = $dst$$Register;
9153     Register Rsrc = $src$$Register;
9154     __ movl(HIGH_FROM_LOW(Rdst), 0);
9155     __ blsil(Rdst, Rsrc);
9156     __ jccb(Assembler::notZero, done);
9157     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9158     __ bind(done);
9159   %}
9160   ins_pipe(ialu_reg);
9161 %}
9162 
9163 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
9164   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
9165   predicate(UseBMI1Instructions);
9166   effect(KILL cr, TEMP dst);
9167 
9168   ins_cost(125);
9169   format %{ "MOVL   $dst.hi, 0\n\t"
9170             "BLSIL  $dst.lo, $src\n\t"
9171             "JNZ    done\n\t"
9172             "BLSIL  $dst.hi, $src+4\n"
9173             "done:"
9174          %}
9175 
9176   ins_encode %{
9177     Label done;
9178     Register Rdst = $dst$$Register;
9179     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9180 
9181     __ movl(HIGH_FROM_LOW(Rdst), 0);
9182     __ blsil(Rdst, $src$$Address);
9183     __ jccb(Assembler::notZero, done);
9184     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
9185     __ bind(done);
9186   %}
9187   ins_pipe(ialu_reg_mem);
9188 %}
9189 
9190 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9191 %{
9192   match(Set dst (XorL (AddL src minus_1) src));
9193   predicate(UseBMI1Instructions);
9194   effect(KILL cr, TEMP dst);
9195 
9196   format %{ "MOVL    $dst.hi, 0\n\t"
9197             "BLSMSKL $dst.lo, $src.lo\n\t"
9198             "JNC     done\n\t"
9199             "BLSMSKL $dst.hi, $src.hi\n"
9200             "done:"
9201          %}
9202 
9203   ins_encode %{
9204     Label done;
9205     Register Rdst = $dst$$Register;
9206     Register Rsrc = $src$$Register;
9207     __ movl(HIGH_FROM_LOW(Rdst), 0);
9208     __ blsmskl(Rdst, Rsrc);
9209     __ jccb(Assembler::carryClear, done);
9210     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9211     __ bind(done);
9212   %}
9213 
9214   ins_pipe(ialu_reg);
9215 %}
9216 
9217 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9218 %{
9219   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
9220   predicate(UseBMI1Instructions);
9221   effect(KILL cr, TEMP dst);
9222 
9223   ins_cost(125);
9224   format %{ "MOVL    $dst.hi, 0\n\t"
9225             "BLSMSKL $dst.lo, $src\n\t"
9226             "JNC     done\n\t"
9227             "BLSMSKL $dst.hi, $src+4\n"
9228             "done:"
9229          %}
9230 
9231   ins_encode %{
9232     Label done;
9233     Register Rdst = $dst$$Register;
9234     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9235 
9236     __ movl(HIGH_FROM_LOW(Rdst), 0);
9237     __ blsmskl(Rdst, $src$$Address);
9238     __ jccb(Assembler::carryClear, done);
9239     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
9240     __ bind(done);
9241   %}
9242 
9243   ins_pipe(ialu_reg_mem);
9244 %}
9245 
9246 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9247 %{
9248   match(Set dst (AndL (AddL src minus_1) src) );
9249   predicate(UseBMI1Instructions);
9250   effect(KILL cr, TEMP dst);
9251 
9252   format %{ "MOVL   $dst.hi, $src.hi\n\t"
9253             "BLSRL  $dst.lo, $src.lo\n\t"
9254             "JNC    done\n\t"
9255             "BLSRL  $dst.hi, $src.hi\n"
9256             "done:"
9257   %}
9258 
9259   ins_encode %{
9260     Label done;
9261     Register Rdst = $dst$$Register;
9262     Register Rsrc = $src$$Register;
9263     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9264     __ blsrl(Rdst, Rsrc);
9265     __ jccb(Assembler::carryClear, done);
9266     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9267     __ bind(done);
9268   %}
9269 
9270   ins_pipe(ialu_reg);
9271 %}
9272 
9273 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9274 %{
9275   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
9276   predicate(UseBMI1Instructions);
9277   effect(KILL cr, TEMP dst);
9278 
9279   ins_cost(125);
9280   format %{ "MOVL   $dst.hi, $src+4\n\t"
9281             "BLSRL  $dst.lo, $src\n\t"
9282             "JNC    done\n\t"
9283             "BLSRL  $dst.hi, $src+4\n"
9284             "done:"
9285   %}
9286 
9287   ins_encode %{
9288     Label done;
9289     Register Rdst = $dst$$Register;
9290     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9291     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
9292     __ blsrl(Rdst, $src$$Address);
9293     __ jccb(Assembler::carryClear, done);
9294     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
9295     __ bind(done);
9296   %}
9297 
9298   ins_pipe(ialu_reg_mem);
9299 %}
9300 
9301 // Or Long Register with Register
9302 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9303   match(Set dst (OrL dst src));
9304   effect(KILL cr);
9305   format %{ "OR     $dst.lo,$src.lo\n\t"
9306             "OR     $dst.hi,$src.hi" %}
9307   opcode(0x0B,0x0B);
9308   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9309   ins_pipe( ialu_reg_reg_long );
9310 %}
9311 
9312 // Or Long Register with Immediate
9313 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9314   match(Set dst (OrL dst src));
9315   effect(KILL cr);
9316   format %{ "OR     $dst.lo,$src.lo\n\t"
9317             "OR     $dst.hi,$src.hi" %}
9318   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9319   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9320   ins_pipe( ialu_reg_long );
9321 %}
9322 
9323 // Or Long Register with Memory
9324 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9325   match(Set dst (OrL dst (LoadL mem)));
9326   effect(KILL cr);
9327   ins_cost(125);
9328   format %{ "OR     $dst.lo,$mem\n\t"
9329             "OR     $dst.hi,$mem+4" %}
9330   opcode(0x0B,0x0B);
9331   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9332   ins_pipe( ialu_reg_long_mem );
9333 %}
9334 
9335 // Xor Long Register with Register
9336 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9337   match(Set dst (XorL dst src));
9338   effect(KILL cr);
9339   format %{ "XOR    $dst.lo,$src.lo\n\t"
9340             "XOR    $dst.hi,$src.hi" %}
9341   opcode(0x33,0x33);
9342   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9343   ins_pipe( ialu_reg_reg_long );
9344 %}
9345 
9346 // Xor Long Register with Immediate -1
9347 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9348   match(Set dst (XorL dst imm));
9349   format %{ "NOT    $dst.lo\n\t"
9350             "NOT    $dst.hi" %}
9351   ins_encode %{
9352      __ notl($dst$$Register);
9353      __ notl(HIGH_FROM_LOW($dst$$Register));
9354   %}
9355   ins_pipe( ialu_reg_long );
9356 %}
9357 
9358 // Xor Long Register with Immediate
9359 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9360   match(Set dst (XorL dst src));
9361   effect(KILL cr);
9362   format %{ "XOR    $dst.lo,$src.lo\n\t"
9363             "XOR    $dst.hi,$src.hi" %}
9364   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9365   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9366   ins_pipe( ialu_reg_long );
9367 %}
9368 
9369 // Xor Long Register with Memory
9370 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9371   match(Set dst (XorL dst (LoadL mem)));
9372   effect(KILL cr);
9373   ins_cost(125);
9374   format %{ "XOR    $dst.lo,$mem\n\t"
9375             "XOR    $dst.hi,$mem+4" %}
9376   opcode(0x33,0x33);
9377   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9378   ins_pipe( ialu_reg_long_mem );
9379 %}
9380 
9381 // Shift Left Long by 1
9382 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9383   predicate(UseNewLongLShift);
9384   match(Set dst (LShiftL dst cnt));
9385   effect(KILL cr);
9386   ins_cost(100);
9387   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9388             "ADC    $dst.hi,$dst.hi" %}
9389   ins_encode %{
9390     __ addl($dst$$Register,$dst$$Register);
9391     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9392   %}
9393   ins_pipe( ialu_reg_long );
9394 %}
9395 
9396 // Shift Left Long by 2
9397 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9398   predicate(UseNewLongLShift);
9399   match(Set dst (LShiftL dst cnt));
9400   effect(KILL cr);
9401   ins_cost(100);
9402   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9403             "ADC    $dst.hi,$dst.hi\n\t"
9404             "ADD    $dst.lo,$dst.lo\n\t"
9405             "ADC    $dst.hi,$dst.hi" %}
9406   ins_encode %{
9407     __ addl($dst$$Register,$dst$$Register);
9408     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9409     __ addl($dst$$Register,$dst$$Register);
9410     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9411   %}
9412   ins_pipe( ialu_reg_long );
9413 %}
9414 
9415 // Shift Left Long by 3
9416 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9417   predicate(UseNewLongLShift);
9418   match(Set dst (LShiftL dst cnt));
9419   effect(KILL cr);
9420   ins_cost(100);
9421   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9422             "ADC    $dst.hi,$dst.hi\n\t"
9423             "ADD    $dst.lo,$dst.lo\n\t"
9424             "ADC    $dst.hi,$dst.hi\n\t"
9425             "ADD    $dst.lo,$dst.lo\n\t"
9426             "ADC    $dst.hi,$dst.hi" %}
9427   ins_encode %{
9428     __ addl($dst$$Register,$dst$$Register);
9429     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9430     __ addl($dst$$Register,$dst$$Register);
9431     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9432     __ addl($dst$$Register,$dst$$Register);
9433     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9434   %}
9435   ins_pipe( ialu_reg_long );
9436 %}
9437 
9438 // Shift Left Long by 1-31
9439 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9440   match(Set dst (LShiftL dst cnt));
9441   effect(KILL cr);
9442   ins_cost(200);
9443   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9444             "SHL    $dst.lo,$cnt" %}
9445   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9446   ins_encode( move_long_small_shift(dst,cnt) );
9447   ins_pipe( ialu_reg_long );
9448 %}
9449 
9450 // Shift Left Long by 32-63
9451 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9452   match(Set dst (LShiftL dst cnt));
9453   effect(KILL cr);
9454   ins_cost(300);
9455   format %{ "MOV    $dst.hi,$dst.lo\n"
9456           "\tSHL    $dst.hi,$cnt-32\n"
9457           "\tXOR    $dst.lo,$dst.lo" %}
9458   opcode(0xC1, 0x4);  /* C1 /4 ib */
9459   ins_encode( move_long_big_shift_clr(dst,cnt) );
9460   ins_pipe( ialu_reg_long );
9461 %}
9462 
9463 // Shift Left Long by variable
9464 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9465   match(Set dst (LShiftL dst shift));
9466   effect(KILL cr);
9467   ins_cost(500+200);
9468   size(17);
9469   format %{ "TEST   $shift,32\n\t"
9470             "JEQ,s  small\n\t"
9471             "MOV    $dst.hi,$dst.lo\n\t"
9472             "XOR    $dst.lo,$dst.lo\n"
9473     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9474             "SHL    $dst.lo,$shift" %}
9475   ins_encode( shift_left_long( dst, shift ) );
9476   ins_pipe( pipe_slow );
9477 %}
9478 
9479 // Shift Right Long by 1-31
9480 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9481   match(Set dst (URShiftL dst cnt));
9482   effect(KILL cr);
9483   ins_cost(200);
9484   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9485             "SHR    $dst.hi,$cnt" %}
9486   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9487   ins_encode( move_long_small_shift(dst,cnt) );
9488   ins_pipe( ialu_reg_long );
9489 %}
9490 
9491 // Shift Right Long by 32-63
9492 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9493   match(Set dst (URShiftL dst cnt));
9494   effect(KILL cr);
9495   ins_cost(300);
9496   format %{ "MOV    $dst.lo,$dst.hi\n"
9497           "\tSHR    $dst.lo,$cnt-32\n"
9498           "\tXOR    $dst.hi,$dst.hi" %}
9499   opcode(0xC1, 0x5);  /* C1 /5 ib */
9500   ins_encode( move_long_big_shift_clr(dst,cnt) );
9501   ins_pipe( ialu_reg_long );
9502 %}
9503 
9504 // Shift Right Long by variable
9505 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9506   match(Set dst (URShiftL dst shift));
9507   effect(KILL cr);
9508   ins_cost(600);
9509   size(17);
9510   format %{ "TEST   $shift,32\n\t"
9511             "JEQ,s  small\n\t"
9512             "MOV    $dst.lo,$dst.hi\n\t"
9513             "XOR    $dst.hi,$dst.hi\n"
9514     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9515             "SHR    $dst.hi,$shift" %}
9516   ins_encode( shift_right_long( dst, shift ) );
9517   ins_pipe( pipe_slow );
9518 %}
9519 
9520 // Shift Right Long by 1-31
9521 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9522   match(Set dst (RShiftL dst cnt));
9523   effect(KILL cr);
9524   ins_cost(200);
9525   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9526             "SAR    $dst.hi,$cnt" %}
9527   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9528   ins_encode( move_long_small_shift(dst,cnt) );
9529   ins_pipe( ialu_reg_long );
9530 %}
9531 
9532 // Shift Right Long by 32-63
9533 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9534   match(Set dst (RShiftL dst cnt));
9535   effect(KILL cr);
9536   ins_cost(300);
9537   format %{ "MOV    $dst.lo,$dst.hi\n"
9538           "\tSAR    $dst.lo,$cnt-32\n"
9539           "\tSAR    $dst.hi,31" %}
9540   opcode(0xC1, 0x7);  /* C1 /7 ib */
9541   ins_encode( move_long_big_shift_sign(dst,cnt) );
9542   ins_pipe( ialu_reg_long );
9543 %}
9544 
9545 // Shift Right arithmetic Long by variable
9546 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9547   match(Set dst (RShiftL dst shift));
9548   effect(KILL cr);
9549   ins_cost(600);
9550   size(18);
9551   format %{ "TEST   $shift,32\n\t"
9552             "JEQ,s  small\n\t"
9553             "MOV    $dst.lo,$dst.hi\n\t"
9554             "SAR    $dst.hi,31\n"
9555     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9556             "SAR    $dst.hi,$shift" %}
9557   ins_encode( shift_right_arith_long( dst, shift ) );
9558   ins_pipe( pipe_slow );
9559 %}
9560 
9561 
9562 //----------Double Instructions------------------------------------------------
9563 // Double Math
9564 
9565 // Compare & branch
9566 
9567 // P6 version of float compare, sets condition codes in EFLAGS
9568 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9569   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9570   match(Set cr (CmpD src1 src2));
9571   effect(KILL rax);
9572   ins_cost(150);
9573   format %{ "FLD    $src1\n\t"
9574             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9575             "JNP    exit\n\t"
9576             "MOV    ah,1       // saw a NaN, set CF\n\t"
9577             "SAHF\n"
9578      "exit:\tNOP               // avoid branch to branch" %}
9579   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9580   ins_encode( Push_Reg_DPR(src1),
9581               OpcP, RegOpc(src2),
9582               cmpF_P6_fixup );
9583   ins_pipe( pipe_slow );
9584 %}
9585 
9586 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9587   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9588   match(Set cr (CmpD src1 src2));
9589   ins_cost(150);
9590   format %{ "FLD    $src1\n\t"
9591             "FUCOMIP ST,$src2  // P6 instruction" %}
9592   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9593   ins_encode( Push_Reg_DPR(src1),
9594               OpcP, RegOpc(src2));
9595   ins_pipe( pipe_slow );
9596 %}
9597 
9598 // Compare & branch
9599 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9600   predicate(UseSSE<=1);
9601   match(Set cr (CmpD src1 src2));
9602   effect(KILL rax);
9603   ins_cost(200);
9604   format %{ "FLD    $src1\n\t"
9605             "FCOMp  $src2\n\t"
9606             "FNSTSW AX\n\t"
9607             "TEST   AX,0x400\n\t"
9608             "JZ,s   flags\n\t"
9609             "MOV    AH,1\t# unordered treat as LT\n"
9610     "flags:\tSAHF" %}
9611   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9612   ins_encode( Push_Reg_DPR(src1),
9613               OpcP, RegOpc(src2),
9614               fpu_flags);
9615   ins_pipe( pipe_slow );
9616 %}
9617 
9618 // Compare vs zero into -1,0,1
9619 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9620   predicate(UseSSE<=1);
9621   match(Set dst (CmpD3 src1 zero));
9622   effect(KILL cr, KILL rax);
9623   ins_cost(280);
9624   format %{ "FTSTD  $dst,$src1" %}
9625   opcode(0xE4, 0xD9);
9626   ins_encode( Push_Reg_DPR(src1),
9627               OpcS, OpcP, PopFPU,
9628               CmpF_Result(dst));
9629   ins_pipe( pipe_slow );
9630 %}
9631 
9632 // Compare into -1,0,1
9633 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9634   predicate(UseSSE<=1);
9635   match(Set dst (CmpD3 src1 src2));
9636   effect(KILL cr, KILL rax);
9637   ins_cost(300);
9638   format %{ "FCMPD  $dst,$src1,$src2" %}
9639   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9640   ins_encode( Push_Reg_DPR(src1),
9641               OpcP, RegOpc(src2),
9642               CmpF_Result(dst));
9643   ins_pipe( pipe_slow );
9644 %}
9645 
9646 // float compare and set condition codes in EFLAGS by XMM regs
9647 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9648   predicate(UseSSE>=2);
9649   match(Set cr (CmpD src1 src2));
9650   ins_cost(145);
9651   format %{ "UCOMISD $src1,$src2\n\t"
9652             "JNP,s   exit\n\t"
9653             "PUSHF\t# saw NaN, set CF\n\t"
9654             "AND     [rsp], #0xffffff2b\n\t"
9655             "POPF\n"
9656     "exit:" %}
9657   ins_encode %{
9658     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9659     emit_cmpfp_fixup(_masm);
9660   %}
9661   ins_pipe( pipe_slow );
9662 %}
9663 
9664 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9665   predicate(UseSSE>=2);
9666   match(Set cr (CmpD src1 src2));
9667   ins_cost(100);
9668   format %{ "UCOMISD $src1,$src2" %}
9669   ins_encode %{
9670     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9671   %}
9672   ins_pipe( pipe_slow );
9673 %}
9674 
9675 // float compare and set condition codes in EFLAGS by XMM regs
9676 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9677   predicate(UseSSE>=2);
9678   match(Set cr (CmpD src1 (LoadD src2)));
9679   ins_cost(145);
9680   format %{ "UCOMISD $src1,$src2\n\t"
9681             "JNP,s   exit\n\t"
9682             "PUSHF\t# saw NaN, set CF\n\t"
9683             "AND     [rsp], #0xffffff2b\n\t"
9684             "POPF\n"
9685     "exit:" %}
9686   ins_encode %{
9687     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9688     emit_cmpfp_fixup(_masm);
9689   %}
9690   ins_pipe( pipe_slow );
9691 %}
9692 
9693 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9694   predicate(UseSSE>=2);
9695   match(Set cr (CmpD src1 (LoadD src2)));
9696   ins_cost(100);
9697   format %{ "UCOMISD $src1,$src2" %}
9698   ins_encode %{
9699     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9700   %}
9701   ins_pipe( pipe_slow );
9702 %}
9703 
9704 // Compare into -1,0,1 in XMM
9705 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9706   predicate(UseSSE>=2);
9707   match(Set dst (CmpD3 src1 src2));
9708   effect(KILL cr);
9709   ins_cost(255);
9710   format %{ "UCOMISD $src1, $src2\n\t"
9711             "MOV     $dst, #-1\n\t"
9712             "JP,s    done\n\t"
9713             "JB,s    done\n\t"
9714             "SETNE   $dst\n\t"
9715             "MOVZB   $dst, $dst\n"
9716     "done:" %}
9717   ins_encode %{
9718     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9719     emit_cmpfp3(_masm, $dst$$Register);
9720   %}
9721   ins_pipe( pipe_slow );
9722 %}
9723 
9724 // Compare into -1,0,1 in XMM and memory
9725 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9726   predicate(UseSSE>=2);
9727   match(Set dst (CmpD3 src1 (LoadD src2)));
9728   effect(KILL cr);
9729   ins_cost(275);
9730   format %{ "UCOMISD $src1, $src2\n\t"
9731             "MOV     $dst, #-1\n\t"
9732             "JP,s    done\n\t"
9733             "JB,s    done\n\t"
9734             "SETNE   $dst\n\t"
9735             "MOVZB   $dst, $dst\n"
9736     "done:" %}
9737   ins_encode %{
9738     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9739     emit_cmpfp3(_masm, $dst$$Register);
9740   %}
9741   ins_pipe( pipe_slow );
9742 %}
9743 
9744 
9745 instruct subDPR_reg(regDPR dst, regDPR src) %{
9746   predicate (UseSSE <=1);
9747   match(Set dst (SubD dst src));
9748 
9749   format %{ "FLD    $src\n\t"
9750             "DSUBp  $dst,ST" %}
9751   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9752   ins_cost(150);
9753   ins_encode( Push_Reg_DPR(src),
9754               OpcP, RegOpc(dst) );
9755   ins_pipe( fpu_reg_reg );
9756 %}
9757 
9758 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9759   predicate (UseSSE <=1);
9760   match(Set dst (RoundDouble (SubD src1 src2)));
9761   ins_cost(250);
9762 
9763   format %{ "FLD    $src2\n\t"
9764             "DSUB   ST,$src1\n\t"
9765             "FSTP_D $dst\t# D-round" %}
9766   opcode(0xD8, 0x5);
9767   ins_encode( Push_Reg_DPR(src2),
9768               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9769   ins_pipe( fpu_mem_reg_reg );
9770 %}
9771 
9772 
9773 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9774   predicate (UseSSE <=1);
9775   match(Set dst (SubD dst (LoadD src)));
9776   ins_cost(150);
9777 
9778   format %{ "FLD    $src\n\t"
9779             "DSUBp  $dst,ST" %}
9780   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9781   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9782               OpcP, RegOpc(dst) );
9783   ins_pipe( fpu_reg_mem );
9784 %}
9785 
9786 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9787   predicate (UseSSE<=1);
9788   match(Set dst (AbsD src));
9789   ins_cost(100);
9790   format %{ "FABS" %}
9791   opcode(0xE1, 0xD9);
9792   ins_encode( OpcS, OpcP );
9793   ins_pipe( fpu_reg_reg );
9794 %}
9795 
9796 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9797   predicate(UseSSE<=1);
9798   match(Set dst (NegD src));
9799   ins_cost(100);
9800   format %{ "FCHS" %}
9801   opcode(0xE0, 0xD9);
9802   ins_encode( OpcS, OpcP );
9803   ins_pipe( fpu_reg_reg );
9804 %}
9805 
9806 instruct addDPR_reg(regDPR dst, regDPR src) %{
9807   predicate(UseSSE<=1);
9808   match(Set dst (AddD dst src));
9809   format %{ "FLD    $src\n\t"
9810             "DADD   $dst,ST" %}
9811   size(4);
9812   ins_cost(150);
9813   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9814   ins_encode( Push_Reg_DPR(src),
9815               OpcP, RegOpc(dst) );
9816   ins_pipe( fpu_reg_reg );
9817 %}
9818 
9819 
9820 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9821   predicate(UseSSE<=1);
9822   match(Set dst (RoundDouble (AddD src1 src2)));
9823   ins_cost(250);
9824 
9825   format %{ "FLD    $src2\n\t"
9826             "DADD   ST,$src1\n\t"
9827             "FSTP_D $dst\t# D-round" %}
9828   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9829   ins_encode( Push_Reg_DPR(src2),
9830               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9831   ins_pipe( fpu_mem_reg_reg );
9832 %}
9833 
9834 
9835 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9836   predicate(UseSSE<=1);
9837   match(Set dst (AddD dst (LoadD src)));
9838   ins_cost(150);
9839 
9840   format %{ "FLD    $src\n\t"
9841             "DADDp  $dst,ST" %}
9842   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9843   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9844               OpcP, RegOpc(dst) );
9845   ins_pipe( fpu_reg_mem );
9846 %}
9847 
9848 // add-to-memory
9849 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9850   predicate(UseSSE<=1);
9851   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9852   ins_cost(150);
9853 
9854   format %{ "FLD_D  $dst\n\t"
9855             "DADD   ST,$src\n\t"
9856             "FST_D  $dst" %}
9857   opcode(0xDD, 0x0);
9858   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9859               Opcode(0xD8), RegOpc(src),
9860               set_instruction_start,
9861               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9862   ins_pipe( fpu_reg_mem );
9863 %}
9864 
9865 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9866   predicate(UseSSE<=1);
9867   match(Set dst (AddD dst con));
9868   ins_cost(125);
9869   format %{ "FLD1\n\t"
9870             "DADDp  $dst,ST" %}
9871   ins_encode %{
9872     __ fld1();
9873     __ faddp($dst$$reg);
9874   %}
9875   ins_pipe(fpu_reg);
9876 %}
9877 
9878 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9879   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9880   match(Set dst (AddD dst con));
9881   ins_cost(200);
9882   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9883             "DADDp  $dst,ST" %}
9884   ins_encode %{
9885     __ fld_d($constantaddress($con));
9886     __ faddp($dst$$reg);
9887   %}
9888   ins_pipe(fpu_reg_mem);
9889 %}
9890 
9891 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9892   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9893   match(Set dst (RoundDouble (AddD src con)));
9894   ins_cost(200);
9895   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9896             "DADD   ST,$src\n\t"
9897             "FSTP_D $dst\t# D-round" %}
9898   ins_encode %{
9899     __ fld_d($constantaddress($con));
9900     __ fadd($src$$reg);
9901     __ fstp_d(Address(rsp, $dst$$disp));
9902   %}
9903   ins_pipe(fpu_mem_reg_con);
9904 %}
9905 
9906 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9907   predicate(UseSSE<=1);
9908   match(Set dst (MulD dst src));
9909   format %{ "FLD    $src\n\t"
9910             "DMULp  $dst,ST" %}
9911   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9912   ins_cost(150);
9913   ins_encode( Push_Reg_DPR(src),
9914               OpcP, RegOpc(dst) );
9915   ins_pipe( fpu_reg_reg );
9916 %}
9917 
9918 // Strict FP instruction biases argument before multiply then
9919 // biases result to avoid double rounding of subnormals.
9920 //
9921 // scale arg1 by multiplying arg1 by 2^(-15360)
9922 // load arg2
9923 // multiply scaled arg1 by arg2
9924 // rescale product by 2^(15360)
9925 //
9926 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9927   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9928   match(Set dst (MulD dst src));
9929   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9930 
9931   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9932             "DMULp  $dst,ST\n\t"
9933             "FLD    $src\n\t"
9934             "DMULp  $dst,ST\n\t"
9935             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9936             "DMULp  $dst,ST\n\t" %}
9937   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9938   ins_encode( strictfp_bias1(dst),
9939               Push_Reg_DPR(src),
9940               OpcP, RegOpc(dst),
9941               strictfp_bias2(dst) );
9942   ins_pipe( fpu_reg_reg );
9943 %}
9944 
9945 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9946   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9947   match(Set dst (MulD dst con));
9948   ins_cost(200);
9949   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9950             "DMULp  $dst,ST" %}
9951   ins_encode %{
9952     __ fld_d($constantaddress($con));
9953     __ fmulp($dst$$reg);
9954   %}
9955   ins_pipe(fpu_reg_mem);
9956 %}
9957 
9958 
9959 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9960   predicate( UseSSE<=1 );
9961   match(Set dst (MulD dst (LoadD src)));
9962   ins_cost(200);
9963   format %{ "FLD_D  $src\n\t"
9964             "DMULp  $dst,ST" %}
9965   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9966   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9967               OpcP, RegOpc(dst) );
9968   ins_pipe( fpu_reg_mem );
9969 %}
9970 
9971 //
9972 // Cisc-alternate to reg-reg multiply
9973 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9974   predicate( UseSSE<=1 );
9975   match(Set dst (MulD src (LoadD mem)));
9976   ins_cost(250);
9977   format %{ "FLD_D  $mem\n\t"
9978             "DMUL   ST,$src\n\t"
9979             "FSTP_D $dst" %}
9980   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9981   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9982               OpcReg_FPR(src),
9983               Pop_Reg_DPR(dst) );
9984   ins_pipe( fpu_reg_reg_mem );
9985 %}
9986 
9987 
9988 // MACRO3 -- addDPR a mulDPR
9989 // This instruction is a '2-address' instruction in that the result goes
9990 // back to src2.  This eliminates a move from the macro; possibly the
9991 // register allocator will have to add it back (and maybe not).
9992 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9993   predicate( UseSSE<=1 );
9994   match(Set src2 (AddD (MulD src0 src1) src2));
9995   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9996             "DMUL   ST,$src1\n\t"
9997             "DADDp  $src2,ST" %}
9998   ins_cost(250);
9999   opcode(0xDD); /* LoadD DD /0 */
10000   ins_encode( Push_Reg_FPR(src0),
10001               FMul_ST_reg(src1),
10002               FAddP_reg_ST(src2) );
10003   ins_pipe( fpu_reg_reg_reg );
10004 %}
10005 
10006 
10007 // MACRO3 -- subDPR a mulDPR
10008 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
10009   predicate( UseSSE<=1 );
10010   match(Set src2 (SubD (MulD src0 src1) src2));
10011   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
10012             "DMUL   ST,$src1\n\t"
10013             "DSUBRp $src2,ST" %}
10014   ins_cost(250);
10015   ins_encode( Push_Reg_FPR(src0),
10016               FMul_ST_reg(src1),
10017               Opcode(0xDE), Opc_plus(0xE0,src2));
10018   ins_pipe( fpu_reg_reg_reg );
10019 %}
10020 
10021 
10022 instruct divDPR_reg(regDPR dst, regDPR src) %{
10023   predicate( UseSSE<=1 );
10024   match(Set dst (DivD dst src));
10025 
10026   format %{ "FLD    $src\n\t"
10027             "FDIVp  $dst,ST" %}
10028   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10029   ins_cost(150);
10030   ins_encode( Push_Reg_DPR(src),
10031               OpcP, RegOpc(dst) );
10032   ins_pipe( fpu_reg_reg );
10033 %}
10034 
10035 // Strict FP instruction biases argument before division then
10036 // biases result, to avoid double rounding of subnormals.
10037 //
10038 // scale dividend by multiplying dividend by 2^(-15360)
10039 // load divisor
10040 // divide scaled dividend by divisor
10041 // rescale quotient by 2^(15360)
10042 //
10043 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
10044   predicate (UseSSE<=1);
10045   match(Set dst (DivD dst src));
10046   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
10047   ins_cost(01);
10048 
10049   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
10050             "DMULp  $dst,ST\n\t"
10051             "FLD    $src\n\t"
10052             "FDIVp  $dst,ST\n\t"
10053             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
10054             "DMULp  $dst,ST\n\t" %}
10055   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10056   ins_encode( strictfp_bias1(dst),
10057               Push_Reg_DPR(src),
10058               OpcP, RegOpc(dst),
10059               strictfp_bias2(dst) );
10060   ins_pipe( fpu_reg_reg );
10061 %}
10062 
10063 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
10064   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
10065   match(Set dst (RoundDouble (DivD src1 src2)));
10066 
10067   format %{ "FLD    $src1\n\t"
10068             "FDIV   ST,$src2\n\t"
10069             "FSTP_D $dst\t# D-round" %}
10070   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
10071   ins_encode( Push_Reg_DPR(src1),
10072               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
10073   ins_pipe( fpu_mem_reg_reg );
10074 %}
10075 
10076 
10077 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
10078   predicate(UseSSE<=1);
10079   match(Set dst (ModD dst src));
10080   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10081 
10082   format %{ "DMOD   $dst,$src" %}
10083   ins_cost(250);
10084   ins_encode(Push_Reg_Mod_DPR(dst, src),
10085               emitModDPR(),
10086               Push_Result_Mod_DPR(src),
10087               Pop_Reg_DPR(dst));
10088   ins_pipe( pipe_slow );
10089 %}
10090 
10091 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
10092   predicate(UseSSE>=2);
10093   match(Set dst (ModD src0 src1));
10094   effect(KILL rax, KILL cr);
10095 
10096   format %{ "SUB    ESP,8\t # DMOD\n"
10097           "\tMOVSD  [ESP+0],$src1\n"
10098           "\tFLD_D  [ESP+0]\n"
10099           "\tMOVSD  [ESP+0],$src0\n"
10100           "\tFLD_D  [ESP+0]\n"
10101      "loop:\tFPREM\n"
10102           "\tFWAIT\n"
10103           "\tFNSTSW AX\n"
10104           "\tSAHF\n"
10105           "\tJP     loop\n"
10106           "\tFSTP_D [ESP+0]\n"
10107           "\tMOVSD  $dst,[ESP+0]\n"
10108           "\tADD    ESP,8\n"
10109           "\tFSTP   ST0\t # Restore FPU Stack"
10110     %}
10111   ins_cost(250);
10112   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
10113   ins_pipe( pipe_slow );
10114 %}
10115 
10116 instruct atanDPR_reg(regDPR dst, regDPR src) %{
10117   predicate (UseSSE<=1);
10118   match(Set dst(AtanD dst src));
10119   format %{ "DATA   $dst,$src" %}
10120   opcode(0xD9, 0xF3);
10121   ins_encode( Push_Reg_DPR(src),
10122               OpcP, OpcS, RegOpc(dst) );
10123   ins_pipe( pipe_slow );
10124 %}
10125 
10126 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10127   predicate (UseSSE>=2);
10128   match(Set dst(AtanD dst src));
10129   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10130   format %{ "DATA   $dst,$src" %}
10131   opcode(0xD9, 0xF3);
10132   ins_encode( Push_SrcD(src),
10133               OpcP, OpcS, Push_ResultD(dst) );
10134   ins_pipe( pipe_slow );
10135 %}
10136 
10137 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10138   predicate (UseSSE<=1);
10139   match(Set dst (SqrtD src));
10140   format %{ "DSQRT  $dst,$src" %}
10141   opcode(0xFA, 0xD9);
10142   ins_encode( Push_Reg_DPR(src),
10143               OpcS, OpcP, Pop_Reg_DPR(dst) );
10144   ins_pipe( pipe_slow );
10145 %}
10146 
10147 //-------------Float Instructions-------------------------------
10148 // Float Math
10149 
10150 // Code for float compare:
10151 //     fcompp();
10152 //     fwait(); fnstsw_ax();
10153 //     sahf();
10154 //     movl(dst, unordered_result);
10155 //     jcc(Assembler::parity, exit);
10156 //     movl(dst, less_result);
10157 //     jcc(Assembler::below, exit);
10158 //     movl(dst, equal_result);
10159 //     jcc(Assembler::equal, exit);
10160 //     movl(dst, greater_result);
10161 //   exit:
10162 
10163 // P6 version of float compare, sets condition codes in EFLAGS
10164 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10165   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10166   match(Set cr (CmpF src1 src2));
10167   effect(KILL rax);
10168   ins_cost(150);
10169   format %{ "FLD    $src1\n\t"
10170             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10171             "JNP    exit\n\t"
10172             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10173             "SAHF\n"
10174      "exit:\tNOP               // avoid branch to branch" %}
10175   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10176   ins_encode( Push_Reg_DPR(src1),
10177               OpcP, RegOpc(src2),
10178               cmpF_P6_fixup );
10179   ins_pipe( pipe_slow );
10180 %}
10181 
10182 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10183   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10184   match(Set cr (CmpF src1 src2));
10185   ins_cost(100);
10186   format %{ "FLD    $src1\n\t"
10187             "FUCOMIP ST,$src2  // P6 instruction" %}
10188   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10189   ins_encode( Push_Reg_DPR(src1),
10190               OpcP, RegOpc(src2));
10191   ins_pipe( pipe_slow );
10192 %}
10193 
10194 
10195 // Compare & branch
10196 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10197   predicate(UseSSE == 0);
10198   match(Set cr (CmpF src1 src2));
10199   effect(KILL rax);
10200   ins_cost(200);
10201   format %{ "FLD    $src1\n\t"
10202             "FCOMp  $src2\n\t"
10203             "FNSTSW AX\n\t"
10204             "TEST   AX,0x400\n\t"
10205             "JZ,s   flags\n\t"
10206             "MOV    AH,1\t# unordered treat as LT\n"
10207     "flags:\tSAHF" %}
10208   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10209   ins_encode( Push_Reg_DPR(src1),
10210               OpcP, RegOpc(src2),
10211               fpu_flags);
10212   ins_pipe( pipe_slow );
10213 %}
10214 
10215 // Compare vs zero into -1,0,1
10216 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10217   predicate(UseSSE == 0);
10218   match(Set dst (CmpF3 src1 zero));
10219   effect(KILL cr, KILL rax);
10220   ins_cost(280);
10221   format %{ "FTSTF  $dst,$src1" %}
10222   opcode(0xE4, 0xD9);
10223   ins_encode( Push_Reg_DPR(src1),
10224               OpcS, OpcP, PopFPU,
10225               CmpF_Result(dst));
10226   ins_pipe( pipe_slow );
10227 %}
10228 
10229 // Compare into -1,0,1
10230 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10231   predicate(UseSSE == 0);
10232   match(Set dst (CmpF3 src1 src2));
10233   effect(KILL cr, KILL rax);
10234   ins_cost(300);
10235   format %{ "FCMPF  $dst,$src1,$src2" %}
10236   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10237   ins_encode( Push_Reg_DPR(src1),
10238               OpcP, RegOpc(src2),
10239               CmpF_Result(dst));
10240   ins_pipe( pipe_slow );
10241 %}
10242 
10243 // float compare and set condition codes in EFLAGS by XMM regs
10244 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10245   predicate(UseSSE>=1);
10246   match(Set cr (CmpF src1 src2));
10247   ins_cost(145);
10248   format %{ "UCOMISS $src1,$src2\n\t"
10249             "JNP,s   exit\n\t"
10250             "PUSHF\t# saw NaN, set CF\n\t"
10251             "AND     [rsp], #0xffffff2b\n\t"
10252             "POPF\n"
10253     "exit:" %}
10254   ins_encode %{
10255     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10256     emit_cmpfp_fixup(_masm);
10257   %}
10258   ins_pipe( pipe_slow );
10259 %}
10260 
10261 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10262   predicate(UseSSE>=1);
10263   match(Set cr (CmpF src1 src2));
10264   ins_cost(100);
10265   format %{ "UCOMISS $src1,$src2" %}
10266   ins_encode %{
10267     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10268   %}
10269   ins_pipe( pipe_slow );
10270 %}
10271 
10272 // float compare and set condition codes in EFLAGS by XMM regs
10273 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10274   predicate(UseSSE>=1);
10275   match(Set cr (CmpF src1 (LoadF src2)));
10276   ins_cost(165);
10277   format %{ "UCOMISS $src1,$src2\n\t"
10278             "JNP,s   exit\n\t"
10279             "PUSHF\t# saw NaN, set CF\n\t"
10280             "AND     [rsp], #0xffffff2b\n\t"
10281             "POPF\n"
10282     "exit:" %}
10283   ins_encode %{
10284     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10285     emit_cmpfp_fixup(_masm);
10286   %}
10287   ins_pipe( pipe_slow );
10288 %}
10289 
10290 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10291   predicate(UseSSE>=1);
10292   match(Set cr (CmpF src1 (LoadF src2)));
10293   ins_cost(100);
10294   format %{ "UCOMISS $src1,$src2" %}
10295   ins_encode %{
10296     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10297   %}
10298   ins_pipe( pipe_slow );
10299 %}
10300 
10301 // Compare into -1,0,1 in XMM
10302 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10303   predicate(UseSSE>=1);
10304   match(Set dst (CmpF3 src1 src2));
10305   effect(KILL cr);
10306   ins_cost(255);
10307   format %{ "UCOMISS $src1, $src2\n\t"
10308             "MOV     $dst, #-1\n\t"
10309             "JP,s    done\n\t"
10310             "JB,s    done\n\t"
10311             "SETNE   $dst\n\t"
10312             "MOVZB   $dst, $dst\n"
10313     "done:" %}
10314   ins_encode %{
10315     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10316     emit_cmpfp3(_masm, $dst$$Register);
10317   %}
10318   ins_pipe( pipe_slow );
10319 %}
10320 
10321 // Compare into -1,0,1 in XMM and memory
10322 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10323   predicate(UseSSE>=1);
10324   match(Set dst (CmpF3 src1 (LoadF src2)));
10325   effect(KILL cr);
10326   ins_cost(275);
10327   format %{ "UCOMISS $src1, $src2\n\t"
10328             "MOV     $dst, #-1\n\t"
10329             "JP,s    done\n\t"
10330             "JB,s    done\n\t"
10331             "SETNE   $dst\n\t"
10332             "MOVZB   $dst, $dst\n"
10333     "done:" %}
10334   ins_encode %{
10335     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10336     emit_cmpfp3(_masm, $dst$$Register);
10337   %}
10338   ins_pipe( pipe_slow );
10339 %}
10340 
10341 // Spill to obtain 24-bit precision
10342 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10343   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10344   match(Set dst (SubF src1 src2));
10345 
10346   format %{ "FSUB   $dst,$src1 - $src2" %}
10347   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10348   ins_encode( Push_Reg_FPR(src1),
10349               OpcReg_FPR(src2),
10350               Pop_Mem_FPR(dst) );
10351   ins_pipe( fpu_mem_reg_reg );
10352 %}
10353 //
10354 // This instruction does not round to 24-bits
10355 instruct subFPR_reg(regFPR dst, regFPR src) %{
10356   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10357   match(Set dst (SubF dst src));
10358 
10359   format %{ "FSUB   $dst,$src" %}
10360   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10361   ins_encode( Push_Reg_FPR(src),
10362               OpcP, RegOpc(dst) );
10363   ins_pipe( fpu_reg_reg );
10364 %}
10365 
10366 // Spill to obtain 24-bit precision
10367 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10368   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10369   match(Set dst (AddF src1 src2));
10370 
10371   format %{ "FADD   $dst,$src1,$src2" %}
10372   opcode(0xD8, 0x0); /* D8 C0+i */
10373   ins_encode( Push_Reg_FPR(src2),
10374               OpcReg_FPR(src1),
10375               Pop_Mem_FPR(dst) );
10376   ins_pipe( fpu_mem_reg_reg );
10377 %}
10378 //
10379 // This instruction does not round to 24-bits
10380 instruct addFPR_reg(regFPR dst, regFPR src) %{
10381   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10382   match(Set dst (AddF dst src));
10383 
10384   format %{ "FLD    $src\n\t"
10385             "FADDp  $dst,ST" %}
10386   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10387   ins_encode( Push_Reg_FPR(src),
10388               OpcP, RegOpc(dst) );
10389   ins_pipe( fpu_reg_reg );
10390 %}
10391 
10392 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10393   predicate(UseSSE==0);
10394   match(Set dst (AbsF src));
10395   ins_cost(100);
10396   format %{ "FABS" %}
10397   opcode(0xE1, 0xD9);
10398   ins_encode( OpcS, OpcP );
10399   ins_pipe( fpu_reg_reg );
10400 %}
10401 
10402 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10403   predicate(UseSSE==0);
10404   match(Set dst (NegF src));
10405   ins_cost(100);
10406   format %{ "FCHS" %}
10407   opcode(0xE0, 0xD9);
10408   ins_encode( OpcS, OpcP );
10409   ins_pipe( fpu_reg_reg );
10410 %}
10411 
10412 // Cisc-alternate to addFPR_reg
10413 // Spill to obtain 24-bit precision
10414 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10415   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10416   match(Set dst (AddF src1 (LoadF src2)));
10417 
10418   format %{ "FLD    $src2\n\t"
10419             "FADD   ST,$src1\n\t"
10420             "FSTP_S $dst" %}
10421   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10422   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10423               OpcReg_FPR(src1),
10424               Pop_Mem_FPR(dst) );
10425   ins_pipe( fpu_mem_reg_mem );
10426 %}
10427 //
10428 // Cisc-alternate to addFPR_reg
10429 // This instruction does not round to 24-bits
10430 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10431   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10432   match(Set dst (AddF dst (LoadF src)));
10433 
10434   format %{ "FADD   $dst,$src" %}
10435   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10436   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10437               OpcP, RegOpc(dst) );
10438   ins_pipe( fpu_reg_mem );
10439 %}
10440 
10441 // // Following two instructions for _222_mpegaudio
10442 // Spill to obtain 24-bit precision
10443 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10444   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10445   match(Set dst (AddF src1 src2));
10446 
10447   format %{ "FADD   $dst,$src1,$src2" %}
10448   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10449   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10450               OpcReg_FPR(src2),
10451               Pop_Mem_FPR(dst) );
10452   ins_pipe( fpu_mem_reg_mem );
10453 %}
10454 
10455 // Cisc-spill variant
10456 // Spill to obtain 24-bit precision
10457 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10458   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10459   match(Set dst (AddF src1 (LoadF src2)));
10460 
10461   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10462   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10463   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10464               set_instruction_start,
10465               OpcP, RMopc_Mem(secondary,src1),
10466               Pop_Mem_FPR(dst) );
10467   ins_pipe( fpu_mem_mem_mem );
10468 %}
10469 
10470 // Spill to obtain 24-bit precision
10471 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10472   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10473   match(Set dst (AddF src1 src2));
10474 
10475   format %{ "FADD   $dst,$src1,$src2" %}
10476   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10477   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10478               set_instruction_start,
10479               OpcP, RMopc_Mem(secondary,src1),
10480               Pop_Mem_FPR(dst) );
10481   ins_pipe( fpu_mem_mem_mem );
10482 %}
10483 
10484 
10485 // Spill to obtain 24-bit precision
10486 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10487   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10488   match(Set dst (AddF src con));
10489   format %{ "FLD    $src\n\t"
10490             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10491             "FSTP_S $dst"  %}
10492   ins_encode %{
10493     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10494     __ fadd_s($constantaddress($con));
10495     __ fstp_s(Address(rsp, $dst$$disp));
10496   %}
10497   ins_pipe(fpu_mem_reg_con);
10498 %}
10499 //
10500 // This instruction does not round to 24-bits
10501 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10502   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10503   match(Set dst (AddF src con));
10504   format %{ "FLD    $src\n\t"
10505             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10506             "FSTP   $dst"  %}
10507   ins_encode %{
10508     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10509     __ fadd_s($constantaddress($con));
10510     __ fstp_d($dst$$reg);
10511   %}
10512   ins_pipe(fpu_reg_reg_con);
10513 %}
10514 
10515 // Spill to obtain 24-bit precision
10516 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10517   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10518   match(Set dst (MulF src1 src2));
10519 
10520   format %{ "FLD    $src1\n\t"
10521             "FMUL   $src2\n\t"
10522             "FSTP_S $dst"  %}
10523   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10524   ins_encode( Push_Reg_FPR(src1),
10525               OpcReg_FPR(src2),
10526               Pop_Mem_FPR(dst) );
10527   ins_pipe( fpu_mem_reg_reg );
10528 %}
10529 //
10530 // This instruction does not round to 24-bits
10531 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10532   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10533   match(Set dst (MulF src1 src2));
10534 
10535   format %{ "FLD    $src1\n\t"
10536             "FMUL   $src2\n\t"
10537             "FSTP_S $dst"  %}
10538   opcode(0xD8, 0x1); /* D8 C8+i */
10539   ins_encode( Push_Reg_FPR(src2),
10540               OpcReg_FPR(src1),
10541               Pop_Reg_FPR(dst) );
10542   ins_pipe( fpu_reg_reg_reg );
10543 %}
10544 
10545 
10546 // Spill to obtain 24-bit precision
10547 // Cisc-alternate to reg-reg multiply
10548 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10549   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10550   match(Set dst (MulF src1 (LoadF src2)));
10551 
10552   format %{ "FLD_S  $src2\n\t"
10553             "FMUL   $src1\n\t"
10554             "FSTP_S $dst"  %}
10555   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10556   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10557               OpcReg_FPR(src1),
10558               Pop_Mem_FPR(dst) );
10559   ins_pipe( fpu_mem_reg_mem );
10560 %}
10561 //
10562 // This instruction does not round to 24-bits
10563 // Cisc-alternate to reg-reg multiply
10564 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10565   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10566   match(Set dst (MulF src1 (LoadF src2)));
10567 
10568   format %{ "FMUL   $dst,$src1,$src2" %}
10569   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10570   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10571               OpcReg_FPR(src1),
10572               Pop_Reg_FPR(dst) );
10573   ins_pipe( fpu_reg_reg_mem );
10574 %}
10575 
10576 // Spill to obtain 24-bit precision
10577 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10578   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10579   match(Set dst (MulF src1 src2));
10580 
10581   format %{ "FMUL   $dst,$src1,$src2" %}
10582   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10583   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10584               set_instruction_start,
10585               OpcP, RMopc_Mem(secondary,src1),
10586               Pop_Mem_FPR(dst) );
10587   ins_pipe( fpu_mem_mem_mem );
10588 %}
10589 
10590 // Spill to obtain 24-bit precision
10591 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10592   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10593   match(Set dst (MulF src con));
10594 
10595   format %{ "FLD    $src\n\t"
10596             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10597             "FSTP_S $dst"  %}
10598   ins_encode %{
10599     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10600     __ fmul_s($constantaddress($con));
10601     __ fstp_s(Address(rsp, $dst$$disp));
10602   %}
10603   ins_pipe(fpu_mem_reg_con);
10604 %}
10605 //
10606 // This instruction does not round to 24-bits
10607 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10608   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10609   match(Set dst (MulF src con));
10610 
10611   format %{ "FLD    $src\n\t"
10612             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10613             "FSTP   $dst"  %}
10614   ins_encode %{
10615     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10616     __ fmul_s($constantaddress($con));
10617     __ fstp_d($dst$$reg);
10618   %}
10619   ins_pipe(fpu_reg_reg_con);
10620 %}
10621 
10622 
10623 //
10624 // MACRO1 -- subsume unshared load into mulFPR
10625 // This instruction does not round to 24-bits
10626 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10627   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10628   match(Set dst (MulF (LoadF mem1) src));
10629 
10630   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10631             "FMUL   ST,$src\n\t"
10632             "FSTP   $dst" %}
10633   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10634   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10635               OpcReg_FPR(src),
10636               Pop_Reg_FPR(dst) );
10637   ins_pipe( fpu_reg_reg_mem );
10638 %}
10639 //
10640 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10641 // This instruction does not round to 24-bits
10642 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10643   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10644   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10645   ins_cost(95);
10646 
10647   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10648             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10649             "FADD   ST,$src2\n\t"
10650             "FSTP   $dst" %}
10651   opcode(0xD9); /* LoadF D9 /0 */
10652   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10653               FMul_ST_reg(src1),
10654               FAdd_ST_reg(src2),
10655               Pop_Reg_FPR(dst) );
10656   ins_pipe( fpu_reg_mem_reg_reg );
10657 %}
10658 
10659 // MACRO3 -- addFPR a mulFPR
10660 // This instruction does not round to 24-bits.  It is a '2-address'
10661 // instruction in that the result goes back to src2.  This eliminates
10662 // a move from the macro; possibly the register allocator will have
10663 // to add it back (and maybe not).
10664 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10665   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10666   match(Set src2 (AddF (MulF src0 src1) src2));
10667 
10668   format %{ "FLD    $src0     ===MACRO3===\n\t"
10669             "FMUL   ST,$src1\n\t"
10670             "FADDP  $src2,ST" %}
10671   opcode(0xD9); /* LoadF D9 /0 */
10672   ins_encode( Push_Reg_FPR(src0),
10673               FMul_ST_reg(src1),
10674               FAddP_reg_ST(src2) );
10675   ins_pipe( fpu_reg_reg_reg );
10676 %}
10677 
10678 // MACRO4 -- divFPR subFPR
10679 // This instruction does not round to 24-bits
10680 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10681   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10682   match(Set dst (DivF (SubF src2 src1) src3));
10683 
10684   format %{ "FLD    $src2   ===MACRO4===\n\t"
10685             "FSUB   ST,$src1\n\t"
10686             "FDIV   ST,$src3\n\t"
10687             "FSTP  $dst" %}
10688   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10689   ins_encode( Push_Reg_FPR(src2),
10690               subFPR_divFPR_encode(src1,src3),
10691               Pop_Reg_FPR(dst) );
10692   ins_pipe( fpu_reg_reg_reg_reg );
10693 %}
10694 
10695 // Spill to obtain 24-bit precision
10696 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10697   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10698   match(Set dst (DivF src1 src2));
10699 
10700   format %{ "FDIV   $dst,$src1,$src2" %}
10701   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10702   ins_encode( Push_Reg_FPR(src1),
10703               OpcReg_FPR(src2),
10704               Pop_Mem_FPR(dst) );
10705   ins_pipe( fpu_mem_reg_reg );
10706 %}
10707 //
10708 // This instruction does not round to 24-bits
10709 instruct divFPR_reg(regFPR dst, regFPR src) %{
10710   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10711   match(Set dst (DivF dst src));
10712 
10713   format %{ "FDIV   $dst,$src" %}
10714   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10715   ins_encode( Push_Reg_FPR(src),
10716               OpcP, RegOpc(dst) );
10717   ins_pipe( fpu_reg_reg );
10718 %}
10719 
10720 
10721 // Spill to obtain 24-bit precision
10722 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10723   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10724   match(Set dst (ModF src1 src2));
10725   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10726 
10727   format %{ "FMOD   $dst,$src1,$src2" %}
10728   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10729               emitModDPR(),
10730               Push_Result_Mod_DPR(src2),
10731               Pop_Mem_FPR(dst));
10732   ins_pipe( pipe_slow );
10733 %}
10734 //
10735 // This instruction does not round to 24-bits
10736 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10737   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10738   match(Set dst (ModF dst src));
10739   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10740 
10741   format %{ "FMOD   $dst,$src" %}
10742   ins_encode(Push_Reg_Mod_DPR(dst, src),
10743               emitModDPR(),
10744               Push_Result_Mod_DPR(src),
10745               Pop_Reg_FPR(dst));
10746   ins_pipe( pipe_slow );
10747 %}
10748 
10749 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10750   predicate(UseSSE>=1);
10751   match(Set dst (ModF src0 src1));
10752   effect(KILL rax, KILL cr);
10753   format %{ "SUB    ESP,4\t # FMOD\n"
10754           "\tMOVSS  [ESP+0],$src1\n"
10755           "\tFLD_S  [ESP+0]\n"
10756           "\tMOVSS  [ESP+0],$src0\n"
10757           "\tFLD_S  [ESP+0]\n"
10758      "loop:\tFPREM\n"
10759           "\tFWAIT\n"
10760           "\tFNSTSW AX\n"
10761           "\tSAHF\n"
10762           "\tJP     loop\n"
10763           "\tFSTP_S [ESP+0]\n"
10764           "\tMOVSS  $dst,[ESP+0]\n"
10765           "\tADD    ESP,4\n"
10766           "\tFSTP   ST0\t # Restore FPU Stack"
10767     %}
10768   ins_cost(250);
10769   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10770   ins_pipe( pipe_slow );
10771 %}
10772 
10773 
10774 //----------Arithmetic Conversion Instructions---------------------------------
10775 // The conversions operations are all Alpha sorted.  Please keep it that way!
10776 
10777 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10778   predicate(UseSSE==0);
10779   match(Set dst (RoundFloat src));
10780   ins_cost(125);
10781   format %{ "FST_S  $dst,$src\t# F-round" %}
10782   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10783   ins_pipe( fpu_mem_reg );
10784 %}
10785 
10786 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10787   predicate(UseSSE<=1);
10788   match(Set dst (RoundDouble src));
10789   ins_cost(125);
10790   format %{ "FST_D  $dst,$src\t# D-round" %}
10791   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10792   ins_pipe( fpu_mem_reg );
10793 %}
10794 
10795 // Force rounding to 24-bit precision and 6-bit exponent
10796 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10797   predicate(UseSSE==0);
10798   match(Set dst (ConvD2F src));
10799   format %{ "FST_S  $dst,$src\t# F-round" %}
10800   expand %{
10801     roundFloat_mem_reg(dst,src);
10802   %}
10803 %}
10804 
10805 // Force rounding to 24-bit precision and 6-bit exponent
10806 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10807   predicate(UseSSE==1);
10808   match(Set dst (ConvD2F src));
10809   effect( KILL cr );
10810   format %{ "SUB    ESP,4\n\t"
10811             "FST_S  [ESP],$src\t# F-round\n\t"
10812             "MOVSS  $dst,[ESP]\n\t"
10813             "ADD ESP,4" %}
10814   ins_encode %{
10815     __ subptr(rsp, 4);
10816     if ($src$$reg != FPR1L_enc) {
10817       __ fld_s($src$$reg-1);
10818       __ fstp_s(Address(rsp, 0));
10819     } else {
10820       __ fst_s(Address(rsp, 0));
10821     }
10822     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10823     __ addptr(rsp, 4);
10824   %}
10825   ins_pipe( pipe_slow );
10826 %}
10827 
10828 // Force rounding double precision to single precision
10829 instruct convD2F_reg(regF dst, regD src) %{
10830   predicate(UseSSE>=2);
10831   match(Set dst (ConvD2F src));
10832   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10833   ins_encode %{
10834     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10835   %}
10836   ins_pipe( pipe_slow );
10837 %}
10838 
10839 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10840   predicate(UseSSE==0);
10841   match(Set dst (ConvF2D src));
10842   format %{ "FST_S  $dst,$src\t# D-round" %}
10843   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10844   ins_pipe( fpu_reg_reg );
10845 %}
10846 
10847 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10848   predicate(UseSSE==1);
10849   match(Set dst (ConvF2D src));
10850   format %{ "FST_D  $dst,$src\t# D-round" %}
10851   expand %{
10852     roundDouble_mem_reg(dst,src);
10853   %}
10854 %}
10855 
10856 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10857   predicate(UseSSE==1);
10858   match(Set dst (ConvF2D src));
10859   effect( KILL cr );
10860   format %{ "SUB    ESP,4\n\t"
10861             "MOVSS  [ESP] $src\n\t"
10862             "FLD_S  [ESP]\n\t"
10863             "ADD    ESP,4\n\t"
10864             "FSTP   $dst\t# D-round" %}
10865   ins_encode %{
10866     __ subptr(rsp, 4);
10867     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10868     __ fld_s(Address(rsp, 0));
10869     __ addptr(rsp, 4);
10870     __ fstp_d($dst$$reg);
10871   %}
10872   ins_pipe( pipe_slow );
10873 %}
10874 
10875 instruct convF2D_reg(regD dst, regF src) %{
10876   predicate(UseSSE>=2);
10877   match(Set dst (ConvF2D src));
10878   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10879   ins_encode %{
10880     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10881   %}
10882   ins_pipe( pipe_slow );
10883 %}
10884 
10885 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10886 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10887   predicate(UseSSE<=1);
10888   match(Set dst (ConvD2I src));
10889   effect( KILL tmp, KILL cr );
10890   format %{ "FLD    $src\t# Convert double to int \n\t"
10891             "FLDCW  trunc mode\n\t"
10892             "SUB    ESP,4\n\t"
10893             "FISTp  [ESP + #0]\n\t"
10894             "FLDCW  std/24-bit mode\n\t"
10895             "POP    EAX\n\t"
10896             "CMP    EAX,0x80000000\n\t"
10897             "JNE,s  fast\n\t"
10898             "FLD_D  $src\n\t"
10899             "CALL   d2i_wrapper\n"
10900       "fast:" %}
10901   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10902   ins_pipe( pipe_slow );
10903 %}
10904 
10905 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10906 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10907   predicate(UseSSE>=2);
10908   match(Set dst (ConvD2I src));
10909   effect( KILL tmp, KILL cr );
10910   format %{ "CVTTSD2SI $dst, $src\n\t"
10911             "CMP    $dst,0x80000000\n\t"
10912             "JNE,s  fast\n\t"
10913             "SUB    ESP, 8\n\t"
10914             "MOVSD  [ESP], $src\n\t"
10915             "FLD_D  [ESP]\n\t"
10916             "ADD    ESP, 8\n\t"
10917             "CALL   d2i_wrapper\n"
10918       "fast:" %}
10919   ins_encode %{
10920     Label fast;
10921     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10922     __ cmpl($dst$$Register, 0x80000000);
10923     __ jccb(Assembler::notEqual, fast);
10924     __ subptr(rsp, 8);
10925     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10926     __ fld_d(Address(rsp, 0));
10927     __ addptr(rsp, 8);
10928     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10929     __ bind(fast);
10930   %}
10931   ins_pipe( pipe_slow );
10932 %}
10933 
10934 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10935   predicate(UseSSE<=1);
10936   match(Set dst (ConvD2L src));
10937   effect( KILL cr );
10938   format %{ "FLD    $src\t# Convert double to long\n\t"
10939             "FLDCW  trunc mode\n\t"
10940             "SUB    ESP,8\n\t"
10941             "FISTp  [ESP + #0]\n\t"
10942             "FLDCW  std/24-bit mode\n\t"
10943             "POP    EAX\n\t"
10944             "POP    EDX\n\t"
10945             "CMP    EDX,0x80000000\n\t"
10946             "JNE,s  fast\n\t"
10947             "TEST   EAX,EAX\n\t"
10948             "JNE,s  fast\n\t"
10949             "FLD    $src\n\t"
10950             "CALL   d2l_wrapper\n"
10951       "fast:" %}
10952   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10953   ins_pipe( pipe_slow );
10954 %}
10955 
10956 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10957 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10958   predicate (UseSSE>=2);
10959   match(Set dst (ConvD2L src));
10960   effect( KILL cr );
10961   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10962             "MOVSD  [ESP],$src\n\t"
10963             "FLD_D  [ESP]\n\t"
10964             "FLDCW  trunc mode\n\t"
10965             "FISTp  [ESP + #0]\n\t"
10966             "FLDCW  std/24-bit mode\n\t"
10967             "POP    EAX\n\t"
10968             "POP    EDX\n\t"
10969             "CMP    EDX,0x80000000\n\t"
10970             "JNE,s  fast\n\t"
10971             "TEST   EAX,EAX\n\t"
10972             "JNE,s  fast\n\t"
10973             "SUB    ESP,8\n\t"
10974             "MOVSD  [ESP],$src\n\t"
10975             "FLD_D  [ESP]\n\t"
10976             "ADD    ESP,8\n\t"
10977             "CALL   d2l_wrapper\n"
10978       "fast:" %}
10979   ins_encode %{
10980     Label fast;
10981     __ subptr(rsp, 8);
10982     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10983     __ fld_d(Address(rsp, 0));
10984     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10985     __ fistp_d(Address(rsp, 0));
10986     // Restore the rounding mode, mask the exception
10987     if (Compile::current()->in_24_bit_fp_mode()) {
10988       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10989     } else {
10990       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10991     }
10992     // Load the converted long, adjust CPU stack
10993     __ pop(rax);
10994     __ pop(rdx);
10995     __ cmpl(rdx, 0x80000000);
10996     __ jccb(Assembler::notEqual, fast);
10997     __ testl(rax, rax);
10998     __ jccb(Assembler::notEqual, fast);
10999     __ subptr(rsp, 8);
11000     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
11001     __ fld_d(Address(rsp, 0));
11002     __ addptr(rsp, 8);
11003     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11004     __ bind(fast);
11005   %}
11006   ins_pipe( pipe_slow );
11007 %}
11008 
11009 // Convert a double to an int.  Java semantics require we do complex
11010 // manglations in the corner cases.  So we set the rounding mode to
11011 // 'zero', store the darned double down as an int, and reset the
11012 // rounding mode to 'nearest'.  The hardware stores a flag value down
11013 // if we would overflow or converted a NAN; we check for this and
11014 // and go the slow path if needed.
11015 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
11016   predicate(UseSSE==0);
11017   match(Set dst (ConvF2I src));
11018   effect( KILL tmp, KILL cr );
11019   format %{ "FLD    $src\t# Convert float to int \n\t"
11020             "FLDCW  trunc mode\n\t"
11021             "SUB    ESP,4\n\t"
11022             "FISTp  [ESP + #0]\n\t"
11023             "FLDCW  std/24-bit mode\n\t"
11024             "POP    EAX\n\t"
11025             "CMP    EAX,0x80000000\n\t"
11026             "JNE,s  fast\n\t"
11027             "FLD    $src\n\t"
11028             "CALL   d2i_wrapper\n"
11029       "fast:" %}
11030   // DPR2I_encoding works for FPR2I
11031   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
11032   ins_pipe( pipe_slow );
11033 %}
11034 
11035 // Convert a float in xmm to an int reg.
11036 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
11037   predicate(UseSSE>=1);
11038   match(Set dst (ConvF2I src));
11039   effect( KILL tmp, KILL cr );
11040   format %{ "CVTTSS2SI $dst, $src\n\t"
11041             "CMP    $dst,0x80000000\n\t"
11042             "JNE,s  fast\n\t"
11043             "SUB    ESP, 4\n\t"
11044             "MOVSS  [ESP], $src\n\t"
11045             "FLD    [ESP]\n\t"
11046             "ADD    ESP, 4\n\t"
11047             "CALL   d2i_wrapper\n"
11048       "fast:" %}
11049   ins_encode %{
11050     Label fast;
11051     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
11052     __ cmpl($dst$$Register, 0x80000000);
11053     __ jccb(Assembler::notEqual, fast);
11054     __ subptr(rsp, 4);
11055     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11056     __ fld_s(Address(rsp, 0));
11057     __ addptr(rsp, 4);
11058     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
11059     __ bind(fast);
11060   %}
11061   ins_pipe( pipe_slow );
11062 %}
11063 
11064 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
11065   predicate(UseSSE==0);
11066   match(Set dst (ConvF2L src));
11067   effect( KILL cr );
11068   format %{ "FLD    $src\t# Convert float to long\n\t"
11069             "FLDCW  trunc mode\n\t"
11070             "SUB    ESP,8\n\t"
11071             "FISTp  [ESP + #0]\n\t"
11072             "FLDCW  std/24-bit mode\n\t"
11073             "POP    EAX\n\t"
11074             "POP    EDX\n\t"
11075             "CMP    EDX,0x80000000\n\t"
11076             "JNE,s  fast\n\t"
11077             "TEST   EAX,EAX\n\t"
11078             "JNE,s  fast\n\t"
11079             "FLD    $src\n\t"
11080             "CALL   d2l_wrapper\n"
11081       "fast:" %}
11082   // DPR2L_encoding works for FPR2L
11083   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
11084   ins_pipe( pipe_slow );
11085 %}
11086 
11087 // XMM lacks a float/double->long conversion, so use the old FPU stack.
11088 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
11089   predicate (UseSSE>=1);
11090   match(Set dst (ConvF2L src));
11091   effect( KILL cr );
11092   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
11093             "MOVSS  [ESP],$src\n\t"
11094             "FLD_S  [ESP]\n\t"
11095             "FLDCW  trunc mode\n\t"
11096             "FISTp  [ESP + #0]\n\t"
11097             "FLDCW  std/24-bit mode\n\t"
11098             "POP    EAX\n\t"
11099             "POP    EDX\n\t"
11100             "CMP    EDX,0x80000000\n\t"
11101             "JNE,s  fast\n\t"
11102             "TEST   EAX,EAX\n\t"
11103             "JNE,s  fast\n\t"
11104             "SUB    ESP,4\t# Convert float to long\n\t"
11105             "MOVSS  [ESP],$src\n\t"
11106             "FLD_S  [ESP]\n\t"
11107             "ADD    ESP,4\n\t"
11108             "CALL   d2l_wrapper\n"
11109       "fast:" %}
11110   ins_encode %{
11111     Label fast;
11112     __ subptr(rsp, 8);
11113     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11114     __ fld_s(Address(rsp, 0));
11115     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
11116     __ fistp_d(Address(rsp, 0));
11117     // Restore the rounding mode, mask the exception
11118     if (Compile::current()->in_24_bit_fp_mode()) {
11119       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
11120     } else {
11121       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
11122     }
11123     // Load the converted long, adjust CPU stack
11124     __ pop(rax);
11125     __ pop(rdx);
11126     __ cmpl(rdx, 0x80000000);
11127     __ jccb(Assembler::notEqual, fast);
11128     __ testl(rax, rax);
11129     __ jccb(Assembler::notEqual, fast);
11130     __ subptr(rsp, 4);
11131     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11132     __ fld_s(Address(rsp, 0));
11133     __ addptr(rsp, 4);
11134     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11135     __ bind(fast);
11136   %}
11137   ins_pipe( pipe_slow );
11138 %}
11139 
11140 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11141   predicate( UseSSE<=1 );
11142   match(Set dst (ConvI2D src));
11143   format %{ "FILD   $src\n\t"
11144             "FSTP   $dst" %}
11145   opcode(0xDB, 0x0);  /* DB /0 */
11146   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11147   ins_pipe( fpu_reg_mem );
11148 %}
11149 
11150 instruct convI2D_reg(regD dst, rRegI src) %{
11151   predicate( UseSSE>=2 && !UseXmmI2D );
11152   match(Set dst (ConvI2D src));
11153   format %{ "CVTSI2SD $dst,$src" %}
11154   ins_encode %{
11155     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11156   %}
11157   ins_pipe( pipe_slow );
11158 %}
11159 
11160 instruct convI2D_mem(regD dst, memory mem) %{
11161   predicate( UseSSE>=2 );
11162   match(Set dst (ConvI2D (LoadI mem)));
11163   format %{ "CVTSI2SD $dst,$mem" %}
11164   ins_encode %{
11165     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11166   %}
11167   ins_pipe( pipe_slow );
11168 %}
11169 
11170 instruct convXI2D_reg(regD dst, rRegI src)
11171 %{
11172   predicate( UseSSE>=2 && UseXmmI2D );
11173   match(Set dst (ConvI2D src));
11174 
11175   format %{ "MOVD  $dst,$src\n\t"
11176             "CVTDQ2PD $dst,$dst\t# i2d" %}
11177   ins_encode %{
11178     __ movdl($dst$$XMMRegister, $src$$Register);
11179     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11180   %}
11181   ins_pipe(pipe_slow); // XXX
11182 %}
11183 
11184 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11185   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11186   match(Set dst (ConvI2D (LoadI mem)));
11187   format %{ "FILD   $mem\n\t"
11188             "FSTP   $dst" %}
11189   opcode(0xDB);      /* DB /0 */
11190   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11191               Pop_Reg_DPR(dst));
11192   ins_pipe( fpu_reg_mem );
11193 %}
11194 
11195 // Convert a byte to a float; no rounding step needed.
11196 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11197   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11198   match(Set dst (ConvI2F src));
11199   format %{ "FILD   $src\n\t"
11200             "FSTP   $dst" %}
11201 
11202   opcode(0xDB, 0x0);  /* DB /0 */
11203   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11204   ins_pipe( fpu_reg_mem );
11205 %}
11206 
11207 // In 24-bit mode, force exponent rounding by storing back out
11208 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11209   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11210   match(Set dst (ConvI2F src));
11211   ins_cost(200);
11212   format %{ "FILD   $src\n\t"
11213             "FSTP_S $dst" %}
11214   opcode(0xDB, 0x0);  /* DB /0 */
11215   ins_encode( Push_Mem_I(src),
11216               Pop_Mem_FPR(dst));
11217   ins_pipe( fpu_mem_mem );
11218 %}
11219 
11220 // In 24-bit mode, force exponent rounding by storing back out
11221 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11222   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11223   match(Set dst (ConvI2F (LoadI mem)));
11224   ins_cost(200);
11225   format %{ "FILD   $mem\n\t"
11226             "FSTP_S $dst" %}
11227   opcode(0xDB);  /* DB /0 */
11228   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11229               Pop_Mem_FPR(dst));
11230   ins_pipe( fpu_mem_mem );
11231 %}
11232 
11233 // This instruction does not round to 24-bits
11234 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11235   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11236   match(Set dst (ConvI2F src));
11237   format %{ "FILD   $src\n\t"
11238             "FSTP   $dst" %}
11239   opcode(0xDB, 0x0);  /* DB /0 */
11240   ins_encode( Push_Mem_I(src),
11241               Pop_Reg_FPR(dst));
11242   ins_pipe( fpu_reg_mem );
11243 %}
11244 
11245 // This instruction does not round to 24-bits
11246 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11247   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11248   match(Set dst (ConvI2F (LoadI mem)));
11249   format %{ "FILD   $mem\n\t"
11250             "FSTP   $dst" %}
11251   opcode(0xDB);      /* DB /0 */
11252   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11253               Pop_Reg_FPR(dst));
11254   ins_pipe( fpu_reg_mem );
11255 %}
11256 
11257 // Convert an int to a float in xmm; no rounding step needed.
11258 instruct convI2F_reg(regF dst, rRegI src) %{
11259   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11260   match(Set dst (ConvI2F src));
11261   format %{ "CVTSI2SS $dst, $src" %}
11262   ins_encode %{
11263     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11264   %}
11265   ins_pipe( pipe_slow );
11266 %}
11267 
11268  instruct convXI2F_reg(regF dst, rRegI src)
11269 %{
11270   predicate( UseSSE>=2 && UseXmmI2F );
11271   match(Set dst (ConvI2F src));
11272 
11273   format %{ "MOVD  $dst,$src\n\t"
11274             "CVTDQ2PS $dst,$dst\t# i2f" %}
11275   ins_encode %{
11276     __ movdl($dst$$XMMRegister, $src$$Register);
11277     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11278   %}
11279   ins_pipe(pipe_slow); // XXX
11280 %}
11281 
11282 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11283   match(Set dst (ConvI2L src));
11284   effect(KILL cr);
11285   ins_cost(375);
11286   format %{ "MOV    $dst.lo,$src\n\t"
11287             "MOV    $dst.hi,$src\n\t"
11288             "SAR    $dst.hi,31" %}
11289   ins_encode(convert_int_long(dst,src));
11290   ins_pipe( ialu_reg_reg_long );
11291 %}
11292 
11293 // Zero-extend convert int to long
11294 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11295   match(Set dst (AndL (ConvI2L src) mask) );
11296   effect( KILL flags );
11297   ins_cost(250);
11298   format %{ "MOV    $dst.lo,$src\n\t"
11299             "XOR    $dst.hi,$dst.hi" %}
11300   opcode(0x33); // XOR
11301   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11302   ins_pipe( ialu_reg_reg_long );
11303 %}
11304 
11305 // Zero-extend long
11306 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11307   match(Set dst (AndL src mask) );
11308   effect( KILL flags );
11309   ins_cost(250);
11310   format %{ "MOV    $dst.lo,$src.lo\n\t"
11311             "XOR    $dst.hi,$dst.hi\n\t" %}
11312   opcode(0x33); // XOR
11313   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11314   ins_pipe( ialu_reg_reg_long );
11315 %}
11316 
11317 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11318   predicate (UseSSE<=1);
11319   match(Set dst (ConvL2D src));
11320   effect( KILL cr );
11321   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11322             "PUSH   $src.lo\n\t"
11323             "FILD   ST,[ESP + #0]\n\t"
11324             "ADD    ESP,8\n\t"
11325             "FSTP_D $dst\t# D-round" %}
11326   opcode(0xDF, 0x5);  /* DF /5 */
11327   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11328   ins_pipe( pipe_slow );
11329 %}
11330 
11331 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11332   predicate (UseSSE>=2);
11333   match(Set dst (ConvL2D src));
11334   effect( KILL cr );
11335   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11336             "PUSH   $src.lo\n\t"
11337             "FILD_D [ESP]\n\t"
11338             "FSTP_D [ESP]\n\t"
11339             "MOVSD  $dst,[ESP]\n\t"
11340             "ADD    ESP,8" %}
11341   opcode(0xDF, 0x5);  /* DF /5 */
11342   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11343   ins_pipe( pipe_slow );
11344 %}
11345 
11346 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11347   predicate (UseSSE>=1);
11348   match(Set dst (ConvL2F src));
11349   effect( KILL cr );
11350   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11351             "PUSH   $src.lo\n\t"
11352             "FILD_D [ESP]\n\t"
11353             "FSTP_S [ESP]\n\t"
11354             "MOVSS  $dst,[ESP]\n\t"
11355             "ADD    ESP,8" %}
11356   opcode(0xDF, 0x5);  /* DF /5 */
11357   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11358   ins_pipe( pipe_slow );
11359 %}
11360 
11361 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11362   match(Set dst (ConvL2F src));
11363   effect( KILL cr );
11364   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11365             "PUSH   $src.lo\n\t"
11366             "FILD   ST,[ESP + #0]\n\t"
11367             "ADD    ESP,8\n\t"
11368             "FSTP_S $dst\t# F-round" %}
11369   opcode(0xDF, 0x5);  /* DF /5 */
11370   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11371   ins_pipe( pipe_slow );
11372 %}
11373 
11374 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11375   match(Set dst (ConvL2I src));
11376   effect( DEF dst, USE src );
11377   format %{ "MOV    $dst,$src.lo" %}
11378   ins_encode(enc_CopyL_Lo(dst,src));
11379   ins_pipe( ialu_reg_reg );
11380 %}
11381 
11382 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11383   match(Set dst (MoveF2I src));
11384   effect( DEF dst, USE src );
11385   ins_cost(100);
11386   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11387   ins_encode %{
11388     __ movl($dst$$Register, Address(rsp, $src$$disp));
11389   %}
11390   ins_pipe( ialu_reg_mem );
11391 %}
11392 
11393 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11394   predicate(UseSSE==0);
11395   match(Set dst (MoveF2I src));
11396   effect( DEF dst, USE src );
11397 
11398   ins_cost(125);
11399   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11400   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11401   ins_pipe( fpu_mem_reg );
11402 %}
11403 
11404 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11405   predicate(UseSSE>=1);
11406   match(Set dst (MoveF2I src));
11407   effect( DEF dst, USE src );
11408 
11409   ins_cost(95);
11410   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11411   ins_encode %{
11412     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11413   %}
11414   ins_pipe( pipe_slow );
11415 %}
11416 
11417 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11418   predicate(UseSSE>=2);
11419   match(Set dst (MoveF2I src));
11420   effect( DEF dst, USE src );
11421   ins_cost(85);
11422   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11423   ins_encode %{
11424     __ movdl($dst$$Register, $src$$XMMRegister);
11425   %}
11426   ins_pipe( pipe_slow );
11427 %}
11428 
11429 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11430   match(Set dst (MoveI2F src));
11431   effect( DEF dst, USE src );
11432 
11433   ins_cost(100);
11434   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11435   ins_encode %{
11436     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11437   %}
11438   ins_pipe( ialu_mem_reg );
11439 %}
11440 
11441 
11442 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11443   predicate(UseSSE==0);
11444   match(Set dst (MoveI2F src));
11445   effect(DEF dst, USE src);
11446 
11447   ins_cost(125);
11448   format %{ "FLD_S  $src\n\t"
11449             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11450   opcode(0xD9);               /* D9 /0, FLD m32real */
11451   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11452               Pop_Reg_FPR(dst) );
11453   ins_pipe( fpu_reg_mem );
11454 %}
11455 
11456 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11457   predicate(UseSSE>=1);
11458   match(Set dst (MoveI2F src));
11459   effect( DEF dst, USE src );
11460 
11461   ins_cost(95);
11462   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11463   ins_encode %{
11464     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11465   %}
11466   ins_pipe( pipe_slow );
11467 %}
11468 
11469 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11470   predicate(UseSSE>=2);
11471   match(Set dst (MoveI2F src));
11472   effect( DEF dst, USE src );
11473 
11474   ins_cost(85);
11475   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11476   ins_encode %{
11477     __ movdl($dst$$XMMRegister, $src$$Register);
11478   %}
11479   ins_pipe( pipe_slow );
11480 %}
11481 
11482 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11483   match(Set dst (MoveD2L src));
11484   effect(DEF dst, USE src);
11485 
11486   ins_cost(250);
11487   format %{ "MOV    $dst.lo,$src\n\t"
11488             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11489   opcode(0x8B, 0x8B);
11490   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11491   ins_pipe( ialu_mem_long_reg );
11492 %}
11493 
11494 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11495   predicate(UseSSE<=1);
11496   match(Set dst (MoveD2L src));
11497   effect(DEF dst, USE src);
11498 
11499   ins_cost(125);
11500   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11501   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11502   ins_pipe( fpu_mem_reg );
11503 %}
11504 
11505 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11506   predicate(UseSSE>=2);
11507   match(Set dst (MoveD2L src));
11508   effect(DEF dst, USE src);
11509   ins_cost(95);
11510   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11511   ins_encode %{
11512     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11513   %}
11514   ins_pipe( pipe_slow );
11515 %}
11516 
11517 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11518   predicate(UseSSE>=2);
11519   match(Set dst (MoveD2L src));
11520   effect(DEF dst, USE src, TEMP tmp);
11521   ins_cost(85);
11522   format %{ "MOVD   $dst.lo,$src\n\t"
11523             "PSHUFLW $tmp,$src,0x4E\n\t"
11524             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11525   ins_encode %{
11526     __ movdl($dst$$Register, $src$$XMMRegister);
11527     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11528     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11529   %}
11530   ins_pipe( pipe_slow );
11531 %}
11532 
11533 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11534   match(Set dst (MoveL2D src));
11535   effect(DEF dst, USE src);
11536 
11537   ins_cost(200);
11538   format %{ "MOV    $dst,$src.lo\n\t"
11539             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11540   opcode(0x89, 0x89);
11541   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11542   ins_pipe( ialu_mem_long_reg );
11543 %}
11544 
11545 
11546 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11547   predicate(UseSSE<=1);
11548   match(Set dst (MoveL2D src));
11549   effect(DEF dst, USE src);
11550   ins_cost(125);
11551 
11552   format %{ "FLD_D  $src\n\t"
11553             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11554   opcode(0xDD);               /* DD /0, FLD m64real */
11555   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11556               Pop_Reg_DPR(dst) );
11557   ins_pipe( fpu_reg_mem );
11558 %}
11559 
11560 
11561 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11562   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11563   match(Set dst (MoveL2D src));
11564   effect(DEF dst, USE src);
11565 
11566   ins_cost(95);
11567   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11568   ins_encode %{
11569     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11570   %}
11571   ins_pipe( pipe_slow );
11572 %}
11573 
11574 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11575   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11576   match(Set dst (MoveL2D src));
11577   effect(DEF dst, USE src);
11578 
11579   ins_cost(95);
11580   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11581   ins_encode %{
11582     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11583   %}
11584   ins_pipe( pipe_slow );
11585 %}
11586 
11587 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11588   predicate(UseSSE>=2);
11589   match(Set dst (MoveL2D src));
11590   effect(TEMP dst, USE src, TEMP tmp);
11591   ins_cost(85);
11592   format %{ "MOVD   $dst,$src.lo\n\t"
11593             "MOVD   $tmp,$src.hi\n\t"
11594             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11595   ins_encode %{
11596     __ movdl($dst$$XMMRegister, $src$$Register);
11597     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11598     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11599   %}
11600   ins_pipe( pipe_slow );
11601 %}
11602 
11603 
11604 // =======================================================================
11605 // fast clearing of an array
11606 instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11607   predicate(!((ClearArrayNode*)n)->is_large());
11608   match(Set dummy (ClearArray cnt base));
11609   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11610 
11611   format %{ $$template
11612     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11613     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11614     $$emit$$"JG     LARGE\n\t"
11615     $$emit$$"SHL    ECX, 1\n\t"
11616     $$emit$$"DEC    ECX\n\t"
11617     $$emit$$"JS     DONE\t# Zero length\n\t"
11618     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11619     $$emit$$"DEC    ECX\n\t"
11620     $$emit$$"JGE    LOOP\n\t"
11621     $$emit$$"JMP    DONE\n\t"
11622     $$emit$$"# LARGE:\n\t"
11623     if (UseFastStosb) {
11624        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11625        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11626     } else if (UseXMMForObjInit) {
11627        $$emit$$"MOV     RDI,RAX\n\t"
11628        $$emit$$"VPXOR    YMM0,YMM0,YMM0\n\t"
11629        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11630        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11631        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11632        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11633        $$emit$$"ADD     0x40,RAX\n\t"
11634        $$emit$$"# L_zero_64_bytes:\n\t"
11635        $$emit$$"SUB     0x8,RCX\n\t"
11636        $$emit$$"JGE     L_loop\n\t"
11637        $$emit$$"ADD     0x4,RCX\n\t"
11638        $$emit$$"JL      L_tail\n\t"
11639        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11640        $$emit$$"ADD     0x20,RAX\n\t"
11641        $$emit$$"SUB     0x4,RCX\n\t"
11642        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11643        $$emit$$"ADD     0x4,RCX\n\t"
11644        $$emit$$"JLE     L_end\n\t"
11645        $$emit$$"DEC     RCX\n\t"
11646        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11647        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11648        $$emit$$"ADD     0x8,RAX\n\t"
11649        $$emit$$"DEC     RCX\n\t"
11650        $$emit$$"JGE     L_sloop\n\t"
11651        $$emit$$"# L_end:\n\t"
11652     } else {
11653        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11654        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11655     }
11656     $$emit$$"# DONE"
11657   %}
11658   ins_encode %{
11659     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11660                  $tmp$$XMMRegister, false);
11661   %}
11662   ins_pipe( pipe_slow );
11663 %}
11664 
11665 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11666   predicate(((ClearArrayNode*)n)->is_large());
11667   match(Set dummy (ClearArray cnt base));
11668   effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
11669   format %{ $$template
11670     if (UseFastStosb) {
11671        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11672        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11673        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11674     } else if (UseXMMForObjInit) {
11675        $$emit$$"MOV     RDI,RAX\t# ClearArray:\n\t"
11676        $$emit$$"VPXOR   YMM0,YMM0,YMM0\n\t"
11677        $$emit$$"JMPQ    L_zero_64_bytes\n\t"
11678        $$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
11679        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11680        $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
11681        $$emit$$"ADD     0x40,RAX\n\t"
11682        $$emit$$"# L_zero_64_bytes:\n\t"
11683        $$emit$$"SUB     0x8,RCX\n\t"
11684        $$emit$$"JGE     L_loop\n\t"
11685        $$emit$$"ADD     0x4,RCX\n\t"
11686        $$emit$$"JL      L_tail\n\t"
11687        $$emit$$"VMOVDQU YMM0,(RAX)\n\t"
11688        $$emit$$"ADD     0x20,RAX\n\t"
11689        $$emit$$"SUB     0x4,RCX\n\t"
11690        $$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
11691        $$emit$$"ADD     0x4,RCX\n\t"
11692        $$emit$$"JLE     L_end\n\t"
11693        $$emit$$"DEC     RCX\n\t"
11694        $$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
11695        $$emit$$"VMOVQ   XMM0,(RAX)\n\t"
11696        $$emit$$"ADD     0x8,RAX\n\t"
11697        $$emit$$"DEC     RCX\n\t"
11698        $$emit$$"JGE     L_sloop\n\t"
11699        $$emit$$"# L_end:\n\t"
11700     } else {
11701        $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11702        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11703        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11704     }
11705     $$emit$$"# DONE"
11706   %}
11707   ins_encode %{
11708     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
11709                  $tmp$$XMMRegister, true);
11710   %}
11711   ins_pipe( pipe_slow );
11712 %}
11713 
11714 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11715                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11716   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11717   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11718   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11719 
11720   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11721   ins_encode %{
11722     __ string_compare($str1$$Register, $str2$$Register,
11723                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11724                       $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11725   %}
11726   ins_pipe( pipe_slow );
11727 %}
11728 
11729 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11730                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11731   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11732   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11733   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11734 
11735   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11736   ins_encode %{
11737     __ string_compare($str1$$Register, $str2$$Register,
11738                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11739                       $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11740   %}
11741   ins_pipe( pipe_slow );
11742 %}
11743 
11744 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11745                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11746   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11747   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11748   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11749 
11750   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11751   ins_encode %{
11752     __ string_compare($str1$$Register, $str2$$Register,
11753                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11754                       $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11755   %}
11756   ins_pipe( pipe_slow );
11757 %}
11758 
11759 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11760                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11761   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11762   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11763   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11764 
11765   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11766   ins_encode %{
11767     __ string_compare($str2$$Register, $str1$$Register,
11768                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11769                       $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11770   %}
11771   ins_pipe( pipe_slow );
11772 %}
11773 
11774 // fast string equals
11775 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11776                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11777   match(Set result (StrEquals (Binary str1 str2) cnt));
11778   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11779 
11780   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11781   ins_encode %{
11782     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11783                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11784                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11785   %}
11786 
11787   ins_pipe( pipe_slow );
11788 %}
11789 
11790 // fast search of substring with known size.
11791 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11792                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11793   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11794   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11795   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11796 
11797   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11798   ins_encode %{
11799     int icnt2 = (int)$int_cnt2$$constant;
11800     if (icnt2 >= 16) {
11801       // IndexOf for constant substrings with size >= 16 elements
11802       // which don't need to be loaded through stack.
11803       __ string_indexofC8($str1$$Register, $str2$$Register,
11804                           $cnt1$$Register, $cnt2$$Register,
11805                           icnt2, $result$$Register,
11806                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11807     } else {
11808       // Small strings are loaded through stack if they cross page boundary.
11809       __ string_indexof($str1$$Register, $str2$$Register,
11810                         $cnt1$$Register, $cnt2$$Register,
11811                         icnt2, $result$$Register,
11812                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11813     }
11814   %}
11815   ins_pipe( pipe_slow );
11816 %}
11817 
11818 // fast search of substring with known size.
11819 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11820                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11821   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11822   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11823   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11824 
11825   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11826   ins_encode %{
11827     int icnt2 = (int)$int_cnt2$$constant;
11828     if (icnt2 >= 8) {
11829       // IndexOf for constant substrings with size >= 8 elements
11830       // which don't need to be loaded through stack.
11831       __ string_indexofC8($str1$$Register, $str2$$Register,
11832                           $cnt1$$Register, $cnt2$$Register,
11833                           icnt2, $result$$Register,
11834                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11835     } else {
11836       // Small strings are loaded through stack if they cross page boundary.
11837       __ string_indexof($str1$$Register, $str2$$Register,
11838                         $cnt1$$Register, $cnt2$$Register,
11839                         icnt2, $result$$Register,
11840                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11841     }
11842   %}
11843   ins_pipe( pipe_slow );
11844 %}
11845 
11846 // fast search of substring with known size.
11847 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11848                              eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11849   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11850   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11851   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11852 
11853   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec1, $cnt1, $cnt2, $tmp" %}
11854   ins_encode %{
11855     int icnt2 = (int)$int_cnt2$$constant;
11856     if (icnt2 >= 8) {
11857       // IndexOf for constant substrings with size >= 8 elements
11858       // which don't need to be loaded through stack.
11859       __ string_indexofC8($str1$$Register, $str2$$Register,
11860                           $cnt1$$Register, $cnt2$$Register,
11861                           icnt2, $result$$Register,
11862                           $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11863     } else {
11864       // Small strings are loaded through stack if they cross page boundary.
11865       __ string_indexof($str1$$Register, $str2$$Register,
11866                         $cnt1$$Register, $cnt2$$Register,
11867                         icnt2, $result$$Register,
11868                         $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11869     }
11870   %}
11871   ins_pipe( pipe_slow );
11872 %}
11873 
11874 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11875                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11876   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11877   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11878   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11879 
11880   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11881   ins_encode %{
11882     __ string_indexof($str1$$Register, $str2$$Register,
11883                       $cnt1$$Register, $cnt2$$Register,
11884                       (-1), $result$$Register,
11885                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11886   %}
11887   ins_pipe( pipe_slow );
11888 %}
11889 
11890 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11891                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11892   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11893   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11894   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11895 
11896   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11897   ins_encode %{
11898     __ string_indexof($str1$$Register, $str2$$Register,
11899                       $cnt1$$Register, $cnt2$$Register,
11900                       (-1), $result$$Register,
11901                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11902   %}
11903   ins_pipe( pipe_slow );
11904 %}
11905 
11906 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11907                          eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{
11908   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11909   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11910   effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11911 
11912   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11913   ins_encode %{
11914     __ string_indexof($str1$$Register, $str2$$Register,
11915                       $cnt1$$Register, $cnt2$$Register,
11916                       (-1), $result$$Register,
11917                       $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11918   %}
11919   ins_pipe( pipe_slow );
11920 %}
11921 
11922 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11923                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11924   predicate(UseSSE42Intrinsics);
11925   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11926   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11927   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11928   ins_encode %{
11929     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11930                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11931   %}
11932   ins_pipe( pipe_slow );
11933 %}
11934 
11935 // fast array equals
11936 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11937                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11938 %{
11939   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11940   match(Set result (AryEq ary1 ary2));
11941   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11942   //ins_cost(300);
11943 
11944   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11945   ins_encode %{
11946     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11947                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11948                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11949   %}
11950   ins_pipe( pipe_slow );
11951 %}
11952 
11953 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11954                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11955 %{
11956   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11957   match(Set result (AryEq ary1 ary2));
11958   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11959   //ins_cost(300);
11960 
11961   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11962   ins_encode %{
11963     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11964                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11965                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11966   %}
11967   ins_pipe( pipe_slow );
11968 %}
11969 
11970 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
11971                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
11972 %{
11973   match(Set result (HasNegatives ary1 len));
11974   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11975 
11976   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11977   ins_encode %{
11978     __ has_negatives($ary1$$Register, $len$$Register,
11979                      $result$$Register, $tmp3$$Register,
11980                      $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11981   %}
11982   ins_pipe( pipe_slow );
11983 %}
11984 
11985 // fast char[] to byte[] compression
11986 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11987                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11988   match(Set result (StrCompressedCopy src (Binary dst len)));
11989   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11990 
11991   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11992   ins_encode %{
11993     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11994                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11995                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11996   %}
11997   ins_pipe( pipe_slow );
11998 %}
11999 
12000 // fast byte[] to char[] inflation
12001 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
12002                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
12003   match(Set dummy (StrInflatedCopy src (Binary dst len)));
12004   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
12005 
12006   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
12007   ins_encode %{
12008     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
12009                           $tmp1$$XMMRegister, $tmp2$$Register);
12010   %}
12011   ins_pipe( pipe_slow );
12012 %}
12013 
12014 // encode char[] to byte[] in ISO_8859_1
12015 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
12016                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
12017                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
12018   match(Set result (EncodeISOArray src (Binary dst len)));
12019   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
12020 
12021   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
12022   ins_encode %{
12023     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
12024                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
12025                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
12026   %}
12027   ins_pipe( pipe_slow );
12028 %}
12029 
12030 
12031 //----------Control Flow Instructions------------------------------------------
12032 // Signed compare Instructions
12033 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
12034   match(Set cr (CmpI op1 op2));
12035   effect( DEF cr, USE op1, USE op2 );
12036   format %{ "CMP    $op1,$op2" %}
12037   opcode(0x3B);  /* Opcode 3B /r */
12038   ins_encode( OpcP, RegReg( op1, op2) );
12039   ins_pipe( ialu_cr_reg_reg );
12040 %}
12041 
12042 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
12043   match(Set cr (CmpI op1 op2));
12044   effect( DEF cr, USE op1 );
12045   format %{ "CMP    $op1,$op2" %}
12046   opcode(0x81,0x07);  /* Opcode 81 /7 */
12047   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12048   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12049   ins_pipe( ialu_cr_reg_imm );
12050 %}
12051 
12052 // Cisc-spilled version of cmpI_eReg
12053 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
12054   match(Set cr (CmpI op1 (LoadI op2)));
12055 
12056   format %{ "CMP    $op1,$op2" %}
12057   ins_cost(500);
12058   opcode(0x3B);  /* Opcode 3B /r */
12059   ins_encode( OpcP, RegMem( op1, op2) );
12060   ins_pipe( ialu_cr_reg_mem );
12061 %}
12062 
12063 instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
12064   match(Set cr (CmpI src zero));
12065   effect( DEF cr, USE src );
12066 
12067   format %{ "TEST   $src,$src" %}
12068   opcode(0x85);
12069   ins_encode( OpcP, RegReg( src, src ) );
12070   ins_pipe( ialu_cr_reg_imm );
12071 %}
12072 
12073 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
12074   match(Set cr (CmpI (AndI src con) zero));
12075 
12076   format %{ "TEST   $src,$con" %}
12077   opcode(0xF7,0x00);
12078   ins_encode( OpcP, RegOpc(src), Con32(con) );
12079   ins_pipe( ialu_cr_reg_imm );
12080 %}
12081 
12082 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
12083   match(Set cr (CmpI (AndI src mem) zero));
12084 
12085   format %{ "TEST   $src,$mem" %}
12086   opcode(0x85);
12087   ins_encode( OpcP, RegMem( src, mem ) );
12088   ins_pipe( ialu_cr_reg_mem );
12089 %}
12090 
12091 // Unsigned compare Instructions; really, same as signed except they
12092 // produce an eFlagsRegU instead of eFlagsReg.
12093 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
12094   match(Set cr (CmpU op1 op2));
12095 
12096   format %{ "CMPu   $op1,$op2" %}
12097   opcode(0x3B);  /* Opcode 3B /r */
12098   ins_encode( OpcP, RegReg( op1, op2) );
12099   ins_pipe( ialu_cr_reg_reg );
12100 %}
12101 
12102 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
12103   match(Set cr (CmpU op1 op2));
12104 
12105   format %{ "CMPu   $op1,$op2" %}
12106   opcode(0x81,0x07);  /* Opcode 81 /7 */
12107   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12108   ins_pipe( ialu_cr_reg_imm );
12109 %}
12110 
12111 // // Cisc-spilled version of cmpU_eReg
12112 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
12113   match(Set cr (CmpU op1 (LoadI op2)));
12114 
12115   format %{ "CMPu   $op1,$op2" %}
12116   ins_cost(500);
12117   opcode(0x3B);  /* Opcode 3B /r */
12118   ins_encode( OpcP, RegMem( op1, op2) );
12119   ins_pipe( ialu_cr_reg_mem );
12120 %}
12121 
12122 // // Cisc-spilled version of cmpU_eReg
12123 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
12124 //  match(Set cr (CmpU (LoadI op1) op2));
12125 //
12126 //  format %{ "CMPu   $op1,$op2" %}
12127 //  ins_cost(500);
12128 //  opcode(0x39);  /* Opcode 39 /r */
12129 //  ins_encode( OpcP, RegMem( op1, op2) );
12130 //%}
12131 
12132 instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
12133   match(Set cr (CmpU src zero));
12134 
12135   format %{ "TESTu  $src,$src" %}
12136   opcode(0x85);
12137   ins_encode( OpcP, RegReg( src, src ) );
12138   ins_pipe( ialu_cr_reg_imm );
12139 %}
12140 
12141 // Unsigned pointer compare Instructions
12142 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12143   match(Set cr (CmpP op1 op2));
12144 
12145   format %{ "CMPu   $op1,$op2" %}
12146   opcode(0x3B);  /* Opcode 3B /r */
12147   ins_encode( OpcP, RegReg( op1, op2) );
12148   ins_pipe( ialu_cr_reg_reg );
12149 %}
12150 
12151 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12152   match(Set cr (CmpP op1 op2));
12153 
12154   format %{ "CMPu   $op1,$op2" %}
12155   opcode(0x81,0x07);  /* Opcode 81 /7 */
12156   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12157   ins_pipe( ialu_cr_reg_imm );
12158 %}
12159 
12160 // // Cisc-spilled version of cmpP_eReg
12161 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12162   match(Set cr (CmpP op1 (LoadP op2)));
12163 
12164   format %{ "CMPu   $op1,$op2" %}
12165   ins_cost(500);
12166   opcode(0x3B);  /* Opcode 3B /r */
12167   ins_encode( OpcP, RegMem( op1, op2) );
12168   ins_pipe( ialu_cr_reg_mem );
12169 %}
12170 
12171 // // Cisc-spilled version of cmpP_eReg
12172 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12173 //  match(Set cr (CmpP (LoadP op1) op2));
12174 //
12175 //  format %{ "CMPu   $op1,$op2" %}
12176 //  ins_cost(500);
12177 //  opcode(0x39);  /* Opcode 39 /r */
12178 //  ins_encode( OpcP, RegMem( op1, op2) );
12179 //%}
12180 
12181 // Compare raw pointer (used in out-of-heap check).
12182 // Only works because non-oop pointers must be raw pointers
12183 // and raw pointers have no anti-dependencies.
12184 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12185   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12186   match(Set cr (CmpP op1 (LoadP op2)));
12187 
12188   format %{ "CMPu   $op1,$op2" %}
12189   opcode(0x3B);  /* Opcode 3B /r */
12190   ins_encode( OpcP, RegMem( op1, op2) );
12191   ins_pipe( ialu_cr_reg_mem );
12192 %}
12193 
12194 //
12195 // This will generate a signed flags result. This should be ok
12196 // since any compare to a zero should be eq/neq.
12197 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12198   match(Set cr (CmpP src zero));
12199 
12200   format %{ "TEST   $src,$src" %}
12201   opcode(0x85);
12202   ins_encode( OpcP, RegReg( src, src ) );
12203   ins_pipe( ialu_cr_reg_imm );
12204 %}
12205 
12206 // Cisc-spilled version of testP_reg
12207 // This will generate a signed flags result. This should be ok
12208 // since any compare to a zero should be eq/neq.
12209 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
12210   match(Set cr (CmpP (LoadP op) zero));
12211 
12212   format %{ "TEST   $op,0xFFFFFFFF" %}
12213   ins_cost(500);
12214   opcode(0xF7);               /* Opcode F7 /0 */
12215   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12216   ins_pipe( ialu_cr_reg_imm );
12217 %}
12218 
12219 // Yanked all unsigned pointer compare operations.
12220 // Pointer compares are done with CmpP which is already unsigned.
12221 
12222 //----------Max and Min--------------------------------------------------------
12223 // Min Instructions
12224 ////
12225 //   *** Min and Max using the conditional move are slower than the
12226 //   *** branch version on a Pentium III.
12227 // // Conditional move for min
12228 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12229 //  effect( USE_DEF op2, USE op1, USE cr );
12230 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12231 //  opcode(0x4C,0x0F);
12232 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12233 //  ins_pipe( pipe_cmov_reg );
12234 //%}
12235 //
12236 //// Min Register with Register (P6 version)
12237 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12238 //  predicate(VM_Version::supports_cmov() );
12239 //  match(Set op2 (MinI op1 op2));
12240 //  ins_cost(200);
12241 //  expand %{
12242 //    eFlagsReg cr;
12243 //    compI_eReg(cr,op1,op2);
12244 //    cmovI_reg_lt(op2,op1,cr);
12245 //  %}
12246 //%}
12247 
12248 // Min Register with Register (generic version)
12249 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12250   match(Set dst (MinI dst src));
12251   effect(KILL flags);
12252   ins_cost(300);
12253 
12254   format %{ "MIN    $dst,$src" %}
12255   opcode(0xCC);
12256   ins_encode( min_enc(dst,src) );
12257   ins_pipe( pipe_slow );
12258 %}
12259 
12260 // Max Register with Register
12261 //   *** Min and Max using the conditional move are slower than the
12262 //   *** branch version on a Pentium III.
12263 // // Conditional move for max
12264 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12265 //  effect( USE_DEF op2, USE op1, USE cr );
12266 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12267 //  opcode(0x4F,0x0F);
12268 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12269 //  ins_pipe( pipe_cmov_reg );
12270 //%}
12271 //
12272 // // Max Register with Register (P6 version)
12273 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12274 //  predicate(VM_Version::supports_cmov() );
12275 //  match(Set op2 (MaxI op1 op2));
12276 //  ins_cost(200);
12277 //  expand %{
12278 //    eFlagsReg cr;
12279 //    compI_eReg(cr,op1,op2);
12280 //    cmovI_reg_gt(op2,op1,cr);
12281 //  %}
12282 //%}
12283 
12284 // Max Register with Register (generic version)
12285 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12286   match(Set dst (MaxI dst src));
12287   effect(KILL flags);
12288   ins_cost(300);
12289 
12290   format %{ "MAX    $dst,$src" %}
12291   opcode(0xCC);
12292   ins_encode( max_enc(dst,src) );
12293   ins_pipe( pipe_slow );
12294 %}
12295 
12296 // ============================================================================
12297 // Counted Loop limit node which represents exact final iterator value.
12298 // Note: the resulting value should fit into integer range since
12299 // counted loops have limit check on overflow.
12300 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12301   match(Set limit (LoopLimit (Binary init limit) stride));
12302   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12303   ins_cost(300);
12304 
12305   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12306   ins_encode %{
12307     int strd = (int)$stride$$constant;
12308     assert(strd != 1 && strd != -1, "sanity");
12309     int m1 = (strd > 0) ? 1 : -1;
12310     // Convert limit to long (EAX:EDX)
12311     __ cdql();
12312     // Convert init to long (init:tmp)
12313     __ movl($tmp$$Register, $init$$Register);
12314     __ sarl($tmp$$Register, 31);
12315     // $limit - $init
12316     __ subl($limit$$Register, $init$$Register);
12317     __ sbbl($limit_hi$$Register, $tmp$$Register);
12318     // + ($stride - 1)
12319     if (strd > 0) {
12320       __ addl($limit$$Register, (strd - 1));
12321       __ adcl($limit_hi$$Register, 0);
12322       __ movl($tmp$$Register, strd);
12323     } else {
12324       __ addl($limit$$Register, (strd + 1));
12325       __ adcl($limit_hi$$Register, -1);
12326       __ lneg($limit_hi$$Register, $limit$$Register);
12327       __ movl($tmp$$Register, -strd);
12328     }
12329     // signed devision: (EAX:EDX) / pos_stride
12330     __ idivl($tmp$$Register);
12331     if (strd < 0) {
12332       // restore sign
12333       __ negl($tmp$$Register);
12334     }
12335     // (EAX) * stride
12336     __ mull($tmp$$Register);
12337     // + init (ignore upper bits)
12338     __ addl($limit$$Register, $init$$Register);
12339   %}
12340   ins_pipe( pipe_slow );
12341 %}
12342 
12343 // ============================================================================
12344 // Branch Instructions
12345 // Jump Table
12346 instruct jumpXtnd(rRegI switch_val) %{
12347   match(Jump switch_val);
12348   ins_cost(350);
12349   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12350   ins_encode %{
12351     // Jump to Address(table_base + switch_reg)
12352     Address index(noreg, $switch_val$$Register, Address::times_1);
12353     __ jump(ArrayAddress($constantaddress, index));
12354   %}
12355   ins_pipe(pipe_jmp);
12356 %}
12357 
12358 // Jump Direct - Label defines a relative address from JMP+1
12359 instruct jmpDir(label labl) %{
12360   match(Goto);
12361   effect(USE labl);
12362 
12363   ins_cost(300);
12364   format %{ "JMP    $labl" %}
12365   size(5);
12366   ins_encode %{
12367     Label* L = $labl$$label;
12368     __ jmp(*L, false); // Always long jump
12369   %}
12370   ins_pipe( pipe_jmp );
12371 %}
12372 
12373 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12374 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12375   match(If cop cr);
12376   effect(USE labl);
12377 
12378   ins_cost(300);
12379   format %{ "J$cop    $labl" %}
12380   size(6);
12381   ins_encode %{
12382     Label* L = $labl$$label;
12383     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12384   %}
12385   ins_pipe( pipe_jcc );
12386 %}
12387 
12388 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12389 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12390   predicate(!n->has_vector_mask_set());
12391   match(CountedLoopEnd cop cr);
12392   effect(USE labl);
12393 
12394   ins_cost(300);
12395   format %{ "J$cop    $labl\t# Loop end" %}
12396   size(6);
12397   ins_encode %{
12398     Label* L = $labl$$label;
12399     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12400   %}
12401   ins_pipe( pipe_jcc );
12402 %}
12403 
12404 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12405 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12406   predicate(!n->has_vector_mask_set());
12407   match(CountedLoopEnd cop cmp);
12408   effect(USE labl);
12409 
12410   ins_cost(300);
12411   format %{ "J$cop,u  $labl\t# Loop end" %}
12412   size(6);
12413   ins_encode %{
12414     Label* L = $labl$$label;
12415     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12416   %}
12417   ins_pipe( pipe_jcc );
12418 %}
12419 
12420 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12421   predicate(!n->has_vector_mask_set());
12422   match(CountedLoopEnd cop cmp);
12423   effect(USE labl);
12424 
12425   ins_cost(200);
12426   format %{ "J$cop,u  $labl\t# Loop end" %}
12427   size(6);
12428   ins_encode %{
12429     Label* L = $labl$$label;
12430     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12431   %}
12432   ins_pipe( pipe_jcc );
12433 %}
12434 
12435 // mask version
12436 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12437 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{
12438   predicate(n->has_vector_mask_set());
12439   match(CountedLoopEnd cop cr);
12440   effect(USE labl);
12441 
12442   ins_cost(400);
12443   format %{ "J$cop    $labl\t# Loop end\n\t"
12444             "restorevectmask \t# vector mask restore for loops" %}
12445   size(10);
12446   ins_encode %{
12447     Label* L = $labl$$label;
12448     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12449     __ restorevectmask();
12450   %}
12451   ins_pipe( pipe_jcc );
12452 %}
12453 
12454 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12455 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12456   predicate(n->has_vector_mask_set());
12457   match(CountedLoopEnd cop cmp);
12458   effect(USE labl);
12459 
12460   ins_cost(400);
12461   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12462             "restorevectmask \t# vector mask restore for loops" %}
12463   size(10);
12464   ins_encode %{
12465     Label* L = $labl$$label;
12466     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12467     __ restorevectmask();
12468   %}
12469   ins_pipe( pipe_jcc );
12470 %}
12471 
12472 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12473   predicate(n->has_vector_mask_set());
12474   match(CountedLoopEnd cop cmp);
12475   effect(USE labl);
12476 
12477   ins_cost(300);
12478   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12479             "restorevectmask \t# vector mask restore for loops" %}
12480   size(10);
12481   ins_encode %{
12482     Label* L = $labl$$label;
12483     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12484     __ restorevectmask();
12485   %}
12486   ins_pipe( pipe_jcc );
12487 %}
12488 
12489 // Jump Direct Conditional - using unsigned comparison
12490 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12491   match(If cop cmp);
12492   effect(USE labl);
12493 
12494   ins_cost(300);
12495   format %{ "J$cop,u  $labl" %}
12496   size(6);
12497   ins_encode %{
12498     Label* L = $labl$$label;
12499     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12500   %}
12501   ins_pipe(pipe_jcc);
12502 %}
12503 
12504 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12505   match(If cop cmp);
12506   effect(USE labl);
12507 
12508   ins_cost(200);
12509   format %{ "J$cop,u  $labl" %}
12510   size(6);
12511   ins_encode %{
12512     Label* L = $labl$$label;
12513     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12514   %}
12515   ins_pipe(pipe_jcc);
12516 %}
12517 
12518 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12519   match(If cop cmp);
12520   effect(USE labl);
12521 
12522   ins_cost(200);
12523   format %{ $$template
12524     if ($cop$$cmpcode == Assembler::notEqual) {
12525       $$emit$$"JP,u   $labl\n\t"
12526       $$emit$$"J$cop,u   $labl"
12527     } else {
12528       $$emit$$"JP,u   done\n\t"
12529       $$emit$$"J$cop,u   $labl\n\t"
12530       $$emit$$"done:"
12531     }
12532   %}
12533   ins_encode %{
12534     Label* l = $labl$$label;
12535     if ($cop$$cmpcode == Assembler::notEqual) {
12536       __ jcc(Assembler::parity, *l, false);
12537       __ jcc(Assembler::notEqual, *l, false);
12538     } else if ($cop$$cmpcode == Assembler::equal) {
12539       Label done;
12540       __ jccb(Assembler::parity, done);
12541       __ jcc(Assembler::equal, *l, false);
12542       __ bind(done);
12543     } else {
12544        ShouldNotReachHere();
12545     }
12546   %}
12547   ins_pipe(pipe_jcc);
12548 %}
12549 
12550 // ============================================================================
12551 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12552 // array for an instance of the superklass.  Set a hidden internal cache on a
12553 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12554 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12555 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12556   match(Set result (PartialSubtypeCheck sub super));
12557   effect( KILL rcx, KILL cr );
12558 
12559   ins_cost(1100);  // slightly larger than the next version
12560   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12561             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12562             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12563             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12564             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12565             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12566             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12567      "miss:\t" %}
12568 
12569   opcode(0x1); // Force a XOR of EDI
12570   ins_encode( enc_PartialSubtypeCheck() );
12571   ins_pipe( pipe_slow );
12572 %}
12573 
12574 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12575   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12576   effect( KILL rcx, KILL result );
12577 
12578   ins_cost(1000);
12579   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12580             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12581             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12582             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12583             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12584             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12585      "miss:\t" %}
12586 
12587   opcode(0x0);  // No need to XOR EDI
12588   ins_encode( enc_PartialSubtypeCheck() );
12589   ins_pipe( pipe_slow );
12590 %}
12591 
12592 // ============================================================================
12593 // Branch Instructions -- short offset versions
12594 //
12595 // These instructions are used to replace jumps of a long offset (the default
12596 // match) with jumps of a shorter offset.  These instructions are all tagged
12597 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12598 // match rules in general matching.  Instead, the ADLC generates a conversion
12599 // method in the MachNode which can be used to do in-place replacement of the
12600 // long variant with the shorter variant.  The compiler will determine if a
12601 // branch can be taken by the is_short_branch_offset() predicate in the machine
12602 // specific code section of the file.
12603 
12604 // Jump Direct - Label defines a relative address from JMP+1
12605 instruct jmpDir_short(label labl) %{
12606   match(Goto);
12607   effect(USE labl);
12608 
12609   ins_cost(300);
12610   format %{ "JMP,s  $labl" %}
12611   size(2);
12612   ins_encode %{
12613     Label* L = $labl$$label;
12614     __ jmpb(*L);
12615   %}
12616   ins_pipe( pipe_jmp );
12617   ins_short_branch(1);
12618 %}
12619 
12620 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12621 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12622   match(If cop cr);
12623   effect(USE labl);
12624 
12625   ins_cost(300);
12626   format %{ "J$cop,s  $labl" %}
12627   size(2);
12628   ins_encode %{
12629     Label* L = $labl$$label;
12630     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12631   %}
12632   ins_pipe( pipe_jcc );
12633   ins_short_branch(1);
12634 %}
12635 
12636 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12637 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12638   match(CountedLoopEnd cop cr);
12639   effect(USE labl);
12640 
12641   ins_cost(300);
12642   format %{ "J$cop,s  $labl\t# Loop end" %}
12643   size(2);
12644   ins_encode %{
12645     Label* L = $labl$$label;
12646     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12647   %}
12648   ins_pipe( pipe_jcc );
12649   ins_short_branch(1);
12650 %}
12651 
12652 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12653 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12654   match(CountedLoopEnd cop cmp);
12655   effect(USE labl);
12656 
12657   ins_cost(300);
12658   format %{ "J$cop,us $labl\t# Loop end" %}
12659   size(2);
12660   ins_encode %{
12661     Label* L = $labl$$label;
12662     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12663   %}
12664   ins_pipe( pipe_jcc );
12665   ins_short_branch(1);
12666 %}
12667 
12668 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12669   match(CountedLoopEnd cop cmp);
12670   effect(USE labl);
12671 
12672   ins_cost(300);
12673   format %{ "J$cop,us $labl\t# Loop end" %}
12674   size(2);
12675   ins_encode %{
12676     Label* L = $labl$$label;
12677     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12678   %}
12679   ins_pipe( pipe_jcc );
12680   ins_short_branch(1);
12681 %}
12682 
12683 // Jump Direct Conditional - using unsigned comparison
12684 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12685   match(If cop cmp);
12686   effect(USE labl);
12687 
12688   ins_cost(300);
12689   format %{ "J$cop,us $labl" %}
12690   size(2);
12691   ins_encode %{
12692     Label* L = $labl$$label;
12693     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12694   %}
12695   ins_pipe( pipe_jcc );
12696   ins_short_branch(1);
12697 %}
12698 
12699 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12700   match(If cop cmp);
12701   effect(USE labl);
12702 
12703   ins_cost(300);
12704   format %{ "J$cop,us $labl" %}
12705   size(2);
12706   ins_encode %{
12707     Label* L = $labl$$label;
12708     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12709   %}
12710   ins_pipe( pipe_jcc );
12711   ins_short_branch(1);
12712 %}
12713 
12714 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12715   match(If cop cmp);
12716   effect(USE labl);
12717 
12718   ins_cost(300);
12719   format %{ $$template
12720     if ($cop$$cmpcode == Assembler::notEqual) {
12721       $$emit$$"JP,u,s   $labl\n\t"
12722       $$emit$$"J$cop,u,s   $labl"
12723     } else {
12724       $$emit$$"JP,u,s   done\n\t"
12725       $$emit$$"J$cop,u,s  $labl\n\t"
12726       $$emit$$"done:"
12727     }
12728   %}
12729   size(4);
12730   ins_encode %{
12731     Label* l = $labl$$label;
12732     if ($cop$$cmpcode == Assembler::notEqual) {
12733       __ jccb(Assembler::parity, *l);
12734       __ jccb(Assembler::notEqual, *l);
12735     } else if ($cop$$cmpcode == Assembler::equal) {
12736       Label done;
12737       __ jccb(Assembler::parity, done);
12738       __ jccb(Assembler::equal, *l);
12739       __ bind(done);
12740     } else {
12741        ShouldNotReachHere();
12742     }
12743   %}
12744   ins_pipe(pipe_jcc);
12745   ins_short_branch(1);
12746 %}
12747 
12748 // ============================================================================
12749 // Long Compare
12750 //
12751 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12752 // is tricky.  The flavor of compare used depends on whether we are testing
12753 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12754 // The GE test is the negated LT test.  The LE test can be had by commuting
12755 // the operands (yielding a GE test) and then negating; negate again for the
12756 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12757 // NE test is negated from that.
12758 
12759 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12760 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12761 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12762 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12763 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12764 // foo match ends up with the wrong leaf.  One fix is to not match both
12765 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12766 // both forms beat the trinary form of long-compare and both are very useful
12767 // on Intel which has so few registers.
12768 
12769 // Manifest a CmpL result in an integer register.  Very painful.
12770 // This is the test to avoid.
12771 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12772   match(Set dst (CmpL3 src1 src2));
12773   effect( KILL flags );
12774   ins_cost(1000);
12775   format %{ "XOR    $dst,$dst\n\t"
12776             "CMP    $src1.hi,$src2.hi\n\t"
12777             "JLT,s  m_one\n\t"
12778             "JGT,s  p_one\n\t"
12779             "CMP    $src1.lo,$src2.lo\n\t"
12780             "JB,s   m_one\n\t"
12781             "JEQ,s  done\n"
12782     "p_one:\tINC    $dst\n\t"
12783             "JMP,s  done\n"
12784     "m_one:\tDEC    $dst\n"
12785      "done:" %}
12786   ins_encode %{
12787     Label p_one, m_one, done;
12788     __ xorptr($dst$$Register, $dst$$Register);
12789     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12790     __ jccb(Assembler::less,    m_one);
12791     __ jccb(Assembler::greater, p_one);
12792     __ cmpl($src1$$Register, $src2$$Register);
12793     __ jccb(Assembler::below,   m_one);
12794     __ jccb(Assembler::equal,   done);
12795     __ bind(p_one);
12796     __ incrementl($dst$$Register);
12797     __ jmpb(done);
12798     __ bind(m_one);
12799     __ decrementl($dst$$Register);
12800     __ bind(done);
12801   %}
12802   ins_pipe( pipe_slow );
12803 %}
12804 
12805 //======
12806 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12807 // compares.  Can be used for LE or GT compares by reversing arguments.
12808 // NOT GOOD FOR EQ/NE tests.
12809 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12810   match( Set flags (CmpL src zero ));
12811   ins_cost(100);
12812   format %{ "TEST   $src.hi,$src.hi" %}
12813   opcode(0x85);
12814   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12815   ins_pipe( ialu_cr_reg_reg );
12816 %}
12817 
12818 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12819 // compares.  Can be used for LE or GT compares by reversing arguments.
12820 // NOT GOOD FOR EQ/NE tests.
12821 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12822   match( Set flags (CmpL src1 src2 ));
12823   effect( TEMP tmp );
12824   ins_cost(300);
12825   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12826             "MOV    $tmp,$src1.hi\n\t"
12827             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12828   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12829   ins_pipe( ialu_cr_reg_reg );
12830 %}
12831 
12832 // Long compares reg < zero/req OR reg >= zero/req.
12833 // Just a wrapper for a normal branch, plus the predicate test.
12834 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12835   match(If cmp flags);
12836   effect(USE labl);
12837   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12838   expand %{
12839     jmpCon(cmp,flags,labl);    // JLT or JGE...
12840   %}
12841 %}
12842 
12843 //======
12844 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12845 // compares.  Can be used for LE or GT compares by reversing arguments.
12846 // NOT GOOD FOR EQ/NE tests.
12847 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
12848   match(Set flags (CmpUL src zero));
12849   ins_cost(100);
12850   format %{ "TEST   $src.hi,$src.hi" %}
12851   opcode(0x85);
12852   ins_encode(OpcP, RegReg_Hi2(src, src));
12853   ins_pipe(ialu_cr_reg_reg);
12854 %}
12855 
12856 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12857 // compares.  Can be used for LE or GT compares by reversing arguments.
12858 // NOT GOOD FOR EQ/NE tests.
12859 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
12860   match(Set flags (CmpUL src1 src2));
12861   effect(TEMP tmp);
12862   ins_cost(300);
12863   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12864             "MOV    $tmp,$src1.hi\n\t"
12865             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
12866   ins_encode(long_cmp_flags2(src1, src2, tmp));
12867   ins_pipe(ialu_cr_reg_reg);
12868 %}
12869 
12870 // Unsigned long compares reg < zero/req OR reg >= zero/req.
12871 // Just a wrapper for a normal branch, plus the predicate test.
12872 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
12873   match(If cmp flags);
12874   effect(USE labl);
12875   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
12876   expand %{
12877     jmpCon(cmp, flags, labl);    // JLT or JGE...
12878   %}
12879 %}
12880 
12881 // Compare 2 longs and CMOVE longs.
12882 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12883   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12884   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12885   ins_cost(400);
12886   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12887             "CMOV$cmp $dst.hi,$src.hi" %}
12888   opcode(0x0F,0x40);
12889   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12890   ins_pipe( pipe_cmov_reg_long );
12891 %}
12892 
12893 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12894   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12895   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12896   ins_cost(500);
12897   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12898             "CMOV$cmp $dst.hi,$src.hi" %}
12899   opcode(0x0F,0x40);
12900   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12901   ins_pipe( pipe_cmov_reg_long );
12902 %}
12903 
12904 // Compare 2 longs and CMOVE ints.
12905 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12906   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12907   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12908   ins_cost(200);
12909   format %{ "CMOV$cmp $dst,$src" %}
12910   opcode(0x0F,0x40);
12911   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12912   ins_pipe( pipe_cmov_reg );
12913 %}
12914 
12915 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12916   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12917   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12918   ins_cost(250);
12919   format %{ "CMOV$cmp $dst,$src" %}
12920   opcode(0x0F,0x40);
12921   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12922   ins_pipe( pipe_cmov_mem );
12923 %}
12924 
12925 // Compare 2 longs and CMOVE ints.
12926 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12927   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12928   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12929   ins_cost(200);
12930   format %{ "CMOV$cmp $dst,$src" %}
12931   opcode(0x0F,0x40);
12932   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12933   ins_pipe( pipe_cmov_reg );
12934 %}
12935 
12936 // Compare 2 longs and CMOVE doubles
12937 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12938   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12939   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12940   ins_cost(200);
12941   expand %{
12942     fcmovDPR_regS(cmp,flags,dst,src);
12943   %}
12944 %}
12945 
12946 // Compare 2 longs and CMOVE doubles
12947 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12948   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12949   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12950   ins_cost(200);
12951   expand %{
12952     fcmovD_regS(cmp,flags,dst,src);
12953   %}
12954 %}
12955 
12956 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12957   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12958   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12959   ins_cost(200);
12960   expand %{
12961     fcmovFPR_regS(cmp,flags,dst,src);
12962   %}
12963 %}
12964 
12965 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12966   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12967   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12968   ins_cost(200);
12969   expand %{
12970     fcmovF_regS(cmp,flags,dst,src);
12971   %}
12972 %}
12973 
12974 //======
12975 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12976 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12977   match( Set flags (CmpL src zero ));
12978   effect(TEMP tmp);
12979   ins_cost(200);
12980   format %{ "MOV    $tmp,$src.lo\n\t"
12981             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12982   ins_encode( long_cmp_flags0( src, tmp ) );
12983   ins_pipe( ialu_reg_reg_long );
12984 %}
12985 
12986 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12987 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12988   match( Set flags (CmpL src1 src2 ));
12989   ins_cost(200+300);
12990   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12991             "JNE,s  skip\n\t"
12992             "CMP    $src1.hi,$src2.hi\n\t"
12993      "skip:\t" %}
12994   ins_encode( long_cmp_flags1( src1, src2 ) );
12995   ins_pipe( ialu_cr_reg_reg );
12996 %}
12997 
12998 // Long compare reg == zero/reg OR reg != zero/reg
12999 // Just a wrapper for a normal branch, plus the predicate test.
13000 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
13001   match(If cmp flags);
13002   effect(USE labl);
13003   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13004   expand %{
13005     jmpCon(cmp,flags,labl);    // JEQ or JNE...
13006   %}
13007 %}
13008 
13009 //======
13010 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13011 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
13012   match(Set flags (CmpUL src zero));
13013   effect(TEMP tmp);
13014   ins_cost(200);
13015   format %{ "MOV    $tmp,$src.lo\n\t"
13016             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
13017   ins_encode(long_cmp_flags0(src, tmp));
13018   ins_pipe(ialu_reg_reg_long);
13019 %}
13020 
13021 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
13022 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
13023   match(Set flags (CmpUL src1 src2));
13024   ins_cost(200+300);
13025   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
13026             "JNE,s  skip\n\t"
13027             "CMP    $src1.hi,$src2.hi\n\t"
13028      "skip:\t" %}
13029   ins_encode(long_cmp_flags1(src1, src2));
13030   ins_pipe(ialu_cr_reg_reg);
13031 %}
13032 
13033 // Unsigned long compare reg == zero/reg OR reg != zero/reg
13034 // Just a wrapper for a normal branch, plus the predicate test.
13035 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
13036   match(If cmp flags);
13037   effect(USE labl);
13038   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
13039   expand %{
13040     jmpCon(cmp, flags, labl);    // JEQ or JNE...
13041   %}
13042 %}
13043 
13044 // Compare 2 longs and CMOVE longs.
13045 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
13046   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13047   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13048   ins_cost(400);
13049   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13050             "CMOV$cmp $dst.hi,$src.hi" %}
13051   opcode(0x0F,0x40);
13052   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13053   ins_pipe( pipe_cmov_reg_long );
13054 %}
13055 
13056 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
13057   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13058   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13059   ins_cost(500);
13060   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13061             "CMOV$cmp $dst.hi,$src.hi" %}
13062   opcode(0x0F,0x40);
13063   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13064   ins_pipe( pipe_cmov_reg_long );
13065 %}
13066 
13067 // Compare 2 longs and CMOVE ints.
13068 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
13069   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13070   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13071   ins_cost(200);
13072   format %{ "CMOV$cmp $dst,$src" %}
13073   opcode(0x0F,0x40);
13074   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13075   ins_pipe( pipe_cmov_reg );
13076 %}
13077 
13078 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
13079   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13080   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13081   ins_cost(250);
13082   format %{ "CMOV$cmp $dst,$src" %}
13083   opcode(0x0F,0x40);
13084   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13085   ins_pipe( pipe_cmov_mem );
13086 %}
13087 
13088 // Compare 2 longs and CMOVE ints.
13089 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
13090   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
13091   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13092   ins_cost(200);
13093   format %{ "CMOV$cmp $dst,$src" %}
13094   opcode(0x0F,0x40);
13095   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13096   ins_pipe( pipe_cmov_reg );
13097 %}
13098 
13099 // Compare 2 longs and CMOVE doubles
13100 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13101   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13102   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13103   ins_cost(200);
13104   expand %{
13105     fcmovDPR_regS(cmp,flags,dst,src);
13106   %}
13107 %}
13108 
13109 // Compare 2 longs and CMOVE doubles
13110 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13111   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13112   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13113   ins_cost(200);
13114   expand %{
13115     fcmovD_regS(cmp,flags,dst,src);
13116   %}
13117 %}
13118 
13119 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13120   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13121   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13122   ins_cost(200);
13123   expand %{
13124     fcmovFPR_regS(cmp,flags,dst,src);
13125   %}
13126 %}
13127 
13128 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13129   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13130   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13131   ins_cost(200);
13132   expand %{
13133     fcmovF_regS(cmp,flags,dst,src);
13134   %}
13135 %}
13136 
13137 //======
13138 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13139 // Same as cmpL_reg_flags_LEGT except must negate src
13140 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
13141   match( Set flags (CmpL src zero ));
13142   effect( TEMP tmp );
13143   ins_cost(300);
13144   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13145             "CMP    $tmp,$src.lo\n\t"
13146             "SBB    $tmp,$src.hi\n\t" %}
13147   ins_encode( long_cmp_flags3(src, tmp) );
13148   ins_pipe( ialu_reg_reg_long );
13149 %}
13150 
13151 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13152 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13153 // requires a commuted test to get the same result.
13154 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
13155   match( Set flags (CmpL src1 src2 ));
13156   effect( TEMP tmp );
13157   ins_cost(300);
13158   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13159             "MOV    $tmp,$src2.hi\n\t"
13160             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13161   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13162   ins_pipe( ialu_cr_reg_reg );
13163 %}
13164 
13165 // Long compares reg < zero/req OR reg >= zero/req.
13166 // Just a wrapper for a normal branch, plus the predicate test
13167 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13168   match(If cmp flags);
13169   effect(USE labl);
13170   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13171   ins_cost(300);
13172   expand %{
13173     jmpCon(cmp,flags,labl);    // JGT or JLE...
13174   %}
13175 %}
13176 
13177 //======
13178 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13179 // Same as cmpUL_reg_flags_LEGT except must negate src
13180 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13181   match(Set flags (CmpUL src zero));
13182   effect(TEMP tmp);
13183   ins_cost(300);
13184   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13185             "CMP    $tmp,$src.lo\n\t"
13186             "SBB    $tmp,$src.hi\n\t" %}
13187   ins_encode(long_cmp_flags3(src, tmp));
13188   ins_pipe(ialu_reg_reg_long);
13189 %}
13190 
13191 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13192 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13193 // requires a commuted test to get the same result.
13194 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13195   match(Set flags (CmpUL src1 src2));
13196   effect(TEMP tmp);
13197   ins_cost(300);
13198   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13199             "MOV    $tmp,$src2.hi\n\t"
13200             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13201   ins_encode(long_cmp_flags2( src2, src1, tmp));
13202   ins_pipe(ialu_cr_reg_reg);
13203 %}
13204 
13205 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13206 // Just a wrapper for a normal branch, plus the predicate test
13207 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13208   match(If cmp flags);
13209   effect(USE labl);
13210   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13211   ins_cost(300);
13212   expand %{
13213     jmpCon(cmp, flags, labl);    // JGT or JLE...
13214   %}
13215 %}
13216 
13217 // Compare 2 longs and CMOVE longs.
13218 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13219   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13220   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13221   ins_cost(400);
13222   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13223             "CMOV$cmp $dst.hi,$src.hi" %}
13224   opcode(0x0F,0x40);
13225   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13226   ins_pipe( pipe_cmov_reg_long );
13227 %}
13228 
13229 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13230   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13231   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13232   ins_cost(500);
13233   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13234             "CMOV$cmp $dst.hi,$src.hi+4" %}
13235   opcode(0x0F,0x40);
13236   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13237   ins_pipe( pipe_cmov_reg_long );
13238 %}
13239 
13240 // Compare 2 longs and CMOVE ints.
13241 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13242   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13243   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13244   ins_cost(200);
13245   format %{ "CMOV$cmp $dst,$src" %}
13246   opcode(0x0F,0x40);
13247   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13248   ins_pipe( pipe_cmov_reg );
13249 %}
13250 
13251 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13252   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13253   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13254   ins_cost(250);
13255   format %{ "CMOV$cmp $dst,$src" %}
13256   opcode(0x0F,0x40);
13257   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13258   ins_pipe( pipe_cmov_mem );
13259 %}
13260 
13261 // Compare 2 longs and CMOVE ptrs.
13262 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13263   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13264   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13265   ins_cost(200);
13266   format %{ "CMOV$cmp $dst,$src" %}
13267   opcode(0x0F,0x40);
13268   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13269   ins_pipe( pipe_cmov_reg );
13270 %}
13271 
13272 // Compare 2 longs and CMOVE doubles
13273 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13274   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13275   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13276   ins_cost(200);
13277   expand %{
13278     fcmovDPR_regS(cmp,flags,dst,src);
13279   %}
13280 %}
13281 
13282 // Compare 2 longs and CMOVE doubles
13283 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13284   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13285   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13286   ins_cost(200);
13287   expand %{
13288     fcmovD_regS(cmp,flags,dst,src);
13289   %}
13290 %}
13291 
13292 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13293   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13294   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13295   ins_cost(200);
13296   expand %{
13297     fcmovFPR_regS(cmp,flags,dst,src);
13298   %}
13299 %}
13300 
13301 
13302 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13303   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13304   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13305   ins_cost(200);
13306   expand %{
13307     fcmovF_regS(cmp,flags,dst,src);
13308   %}
13309 %}
13310 
13311 
13312 // ============================================================================
13313 // Procedure Call/Return Instructions
13314 // Call Java Static Instruction
13315 // Note: If this code changes, the corresponding ret_addr_offset() and
13316 //       compute_padding() functions will have to be adjusted.
13317 instruct CallStaticJavaDirect(method meth) %{
13318   match(CallStaticJava);
13319   effect(USE meth);
13320 
13321   ins_cost(300);
13322   format %{ "CALL,static " %}
13323   opcode(0xE8); /* E8 cd */
13324   ins_encode( pre_call_resets,
13325               Java_Static_Call( meth ),
13326               call_epilog,
13327               post_call_FPU );
13328   ins_pipe( pipe_slow );
13329   ins_alignment(4);
13330 %}
13331 
13332 // Call Java Dynamic Instruction
13333 // Note: If this code changes, the corresponding ret_addr_offset() and
13334 //       compute_padding() functions will have to be adjusted.
13335 instruct CallDynamicJavaDirect(method meth) %{
13336   match(CallDynamicJava);
13337   effect(USE meth);
13338 
13339   ins_cost(300);
13340   format %{ "MOV    EAX,(oop)-1\n\t"
13341             "CALL,dynamic" %}
13342   opcode(0xE8); /* E8 cd */
13343   ins_encode( pre_call_resets,
13344               Java_Dynamic_Call( meth ),
13345               call_epilog,
13346               post_call_FPU );
13347   ins_pipe( pipe_slow );
13348   ins_alignment(4);
13349 %}
13350 
13351 // Call Runtime Instruction
13352 instruct CallRuntimeDirect(method meth) %{
13353   match(CallRuntime );
13354   effect(USE meth);
13355 
13356   ins_cost(300);
13357   format %{ "CALL,runtime " %}
13358   opcode(0xE8); /* E8 cd */
13359   // Use FFREEs to clear entries in float stack
13360   ins_encode( pre_call_resets,
13361               FFree_Float_Stack_All,
13362               Java_To_Runtime( meth ),
13363               post_call_FPU );
13364   ins_pipe( pipe_slow );
13365 %}
13366 
13367 // Call runtime without safepoint
13368 instruct CallLeafDirect(method meth) %{
13369   match(CallLeaf);
13370   effect(USE meth);
13371 
13372   ins_cost(300);
13373   format %{ "CALL_LEAF,runtime " %}
13374   opcode(0xE8); /* E8 cd */
13375   ins_encode( pre_call_resets,
13376               FFree_Float_Stack_All,
13377               Java_To_Runtime( meth ),
13378               Verify_FPU_For_Leaf, post_call_FPU );
13379   ins_pipe( pipe_slow );
13380 %}
13381 
13382 instruct CallLeafNoFPDirect(method meth) %{
13383   match(CallLeafNoFP);
13384   effect(USE meth);
13385 
13386   ins_cost(300);
13387   format %{ "CALL_LEAF_NOFP,runtime " %}
13388   opcode(0xE8); /* E8 cd */
13389   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13390   ins_pipe( pipe_slow );
13391 %}
13392 
13393 
13394 // Return Instruction
13395 // Remove the return address & jump to it.
13396 instruct Ret() %{
13397   match(Return);
13398   format %{ "RET" %}
13399   opcode(0xC3);
13400   ins_encode(OpcP);
13401   ins_pipe( pipe_jmp );
13402 %}
13403 
13404 // Tail Call; Jump from runtime stub to Java code.
13405 // Also known as an 'interprocedural jump'.
13406 // Target of jump will eventually return to caller.
13407 // TailJump below removes the return address.
13408 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
13409   match(TailCall jump_target method_oop );
13410   ins_cost(300);
13411   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
13412   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13413   ins_encode( OpcP, RegOpc(jump_target) );
13414   ins_pipe( pipe_jmp );
13415 %}
13416 
13417 
13418 // Tail Jump; remove the return address; jump to target.
13419 // TailCall above leaves the return address around.
13420 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13421   match( TailJump jump_target ex_oop );
13422   ins_cost(300);
13423   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13424             "JMP    $jump_target " %}
13425   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13426   ins_encode( enc_pop_rdx,
13427               OpcP, RegOpc(jump_target) );
13428   ins_pipe( pipe_jmp );
13429 %}
13430 
13431 // Create exception oop: created by stack-crawling runtime code.
13432 // Created exception is now available to this handler, and is setup
13433 // just prior to jumping to this handler.  No code emitted.
13434 instruct CreateException( eAXRegP ex_oop )
13435 %{
13436   match(Set ex_oop (CreateEx));
13437 
13438   size(0);
13439   // use the following format syntax
13440   format %{ "# exception oop is in EAX; no code emitted" %}
13441   ins_encode();
13442   ins_pipe( empty );
13443 %}
13444 
13445 
13446 // Rethrow exception:
13447 // The exception oop will come in the first argument position.
13448 // Then JUMP (not call) to the rethrow stub code.
13449 instruct RethrowException()
13450 %{
13451   match(Rethrow);
13452 
13453   // use the following format syntax
13454   format %{ "JMP    rethrow_stub" %}
13455   ins_encode(enc_rethrow);
13456   ins_pipe( pipe_jmp );
13457 %}
13458 
13459 // inlined locking and unlocking
13460 
13461 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13462   predicate(Compile::current()->use_rtm());
13463   match(Set cr (FastLock object box));
13464   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13465   ins_cost(300);
13466   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13467   ins_encode %{
13468     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13469                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13470                  _counters, _rtm_counters, _stack_rtm_counters,
13471                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13472                  true, ra_->C->profile_rtm());
13473   %}
13474   ins_pipe(pipe_slow);
13475 %}
13476 
13477 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13478   predicate(!Compile::current()->use_rtm());
13479   match(Set cr (FastLock object box));
13480   effect(TEMP tmp, TEMP scr, USE_KILL box);
13481   ins_cost(300);
13482   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13483   ins_encode %{
13484     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13485                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
13486   %}
13487   ins_pipe(pipe_slow);
13488 %}
13489 
13490 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13491   match(Set cr (FastUnlock object box));
13492   effect(TEMP tmp, USE_KILL box);
13493   ins_cost(300);
13494   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13495   ins_encode %{
13496     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13497   %}
13498   ins_pipe(pipe_slow);
13499 %}
13500 
13501 
13502 
13503 // ============================================================================
13504 // Safepoint Instruction
13505 instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{
13506   match(SafePoint poll);
13507   effect(KILL cr, USE poll);
13508 
13509   format %{ "TSTL   #EAX,[$poll]\t! Safepoint: poll for GC" %}
13510   ins_cost(125);
13511   // EBP would need size(3)
13512   size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */
13513   ins_encode %{
13514     __ relocate(relocInfo::poll_type);
13515     address pre_pc = __ pc();
13516     __ testl(rax, Address($poll$$Register, 0));
13517     address post_pc = __ pc();
13518     guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]");
13519   %}
13520   ins_pipe(ialu_reg_mem);
13521 %}
13522 
13523 
13524 // ============================================================================
13525 // This name is KNOWN by the ADLC and cannot be changed.
13526 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13527 // for this guy.
13528 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13529   match(Set dst (ThreadLocal));
13530   effect(DEF dst, KILL cr);
13531 
13532   format %{ "MOV    $dst, Thread::current()" %}
13533   ins_encode %{
13534     Register dstReg = as_Register($dst$$reg);
13535     __ get_thread(dstReg);
13536   %}
13537   ins_pipe( ialu_reg_fat );
13538 %}
13539 
13540 
13541 
13542 //----------PEEPHOLE RULES-----------------------------------------------------
13543 // These must follow all instruction definitions as they use the names
13544 // defined in the instructions definitions.
13545 //
13546 // peepmatch ( root_instr_name [preceding_instruction]* );
13547 //
13548 // peepconstraint %{
13549 // (instruction_number.operand_name relational_op instruction_number.operand_name
13550 //  [, ...] );
13551 // // instruction numbers are zero-based using left to right order in peepmatch
13552 //
13553 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13554 // // provide an instruction_number.operand_name for each operand that appears
13555 // // in the replacement instruction's match rule
13556 //
13557 // ---------VM FLAGS---------------------------------------------------------
13558 //
13559 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13560 //
13561 // Each peephole rule is given an identifying number starting with zero and
13562 // increasing by one in the order seen by the parser.  An individual peephole
13563 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13564 // on the command-line.
13565 //
13566 // ---------CURRENT LIMITATIONS----------------------------------------------
13567 //
13568 // Only match adjacent instructions in same basic block
13569 // Only equality constraints
13570 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13571 // Only one replacement instruction
13572 //
13573 // ---------EXAMPLE----------------------------------------------------------
13574 //
13575 // // pertinent parts of existing instructions in architecture description
13576 // instruct movI(rRegI dst, rRegI src) %{
13577 //   match(Set dst (CopyI src));
13578 // %}
13579 //
13580 // instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
13581 //   match(Set dst (AddI dst src));
13582 //   effect(KILL cr);
13583 // %}
13584 //
13585 // // Change (inc mov) to lea
13586 // peephole %{
13587 //   // increment preceeded by register-register move
13588 //   peepmatch ( incI_eReg movI );
13589 //   // require that the destination register of the increment
13590 //   // match the destination register of the move
13591 //   peepconstraint ( 0.dst == 1.dst );
13592 //   // construct a replacement instruction that sets
13593 //   // the destination to ( move's source register + one )
13594 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13595 // %}
13596 //
13597 // Implementation no longer uses movX instructions since
13598 // machine-independent system no longer uses CopyX nodes.
13599 //
13600 // peephole %{
13601 //   peepmatch ( incI_eReg movI );
13602 //   peepconstraint ( 0.dst == 1.dst );
13603 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13604 // %}
13605 //
13606 // peephole %{
13607 //   peepmatch ( decI_eReg movI );
13608 //   peepconstraint ( 0.dst == 1.dst );
13609 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13610 // %}
13611 //
13612 // peephole %{
13613 //   peepmatch ( addI_eReg_imm movI );
13614 //   peepconstraint ( 0.dst == 1.dst );
13615 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13616 // %}
13617 //
13618 // peephole %{
13619 //   peepmatch ( addP_eReg_imm movP );
13620 //   peepconstraint ( 0.dst == 1.dst );
13621 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13622 // %}
13623 
13624 // // Change load of spilled value to only a spill
13625 // instruct storeI(memory mem, rRegI src) %{
13626 //   match(Set mem (StoreI mem src));
13627 // %}
13628 //
13629 // instruct loadI(rRegI dst, memory mem) %{
13630 //   match(Set dst (LoadI mem));
13631 // %}
13632 //
13633 peephole %{
13634   peepmatch ( loadI storeI );
13635   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13636   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13637 %}
13638 
13639 //----------SMARTSPILL RULES---------------------------------------------------
13640 // These must follow all instruction definitions as they use the names
13641 // defined in the instructions definitions.