1 //
   2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 //
 105 // Empty fill registers, which are never used, but supply alignment to xmm regs
 106 //
 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2));
 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3));
 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4));
 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5));
 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6));
 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7));
 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8));
 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9));
 115 
 116 // Specify priority of register selection within phases of register
 117 // allocation.  Highest priority is first.  A useful heuristic is to
 118 // give registers a low priority when they are required by machine
 119 // instructions, like EAX and EDX.  Registers which are used as
 120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 121 // For the Intel integer registers, the equivalent Long pairs are
 122 // EDX:EAX, EBX:ECX, and EDI:EBP.
 123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 126                     FPR6L, FPR6H, FPR7L, FPR7H,
 127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
 128 
 129 
 130 //----------Architecture Description Register Classes--------------------------
 131 // Several register classes are automatically defined based upon information in
 132 // this architecture description.
 133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 134 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 137 //
 138 // Class for no registers (empty set).
 139 reg_class no_reg();
 140 
 141 // Class for all registers
 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 143 // Class for all registers (excluding EBP)
 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
 145 // Dynamic register class that selects at runtime between register classes
 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
 149 
 150 // Class for general registers
 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 152 // Class for general registers (excluding EBP).
 153 // This register class can be used for implicit null checks on win95.
 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
 155 // Used also if the PreserveFramePointer flag is true.
 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
 159 
 160 // Class of "X" registers
 161 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 162 
 163 // Class of registers that can appear in an address with no offset.
 164 // EBP and ESP require an extra instruction byte for zero offset.
 165 // Used in fast-unlock
 166 reg_class p_reg(EDX, EDI, ESI, EBX);
 167 
 168 // Class for general registers excluding ECX
 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
 170 // Class for general registers excluding ECX (and EBP)
 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
 174 
 175 // Class for general registers excluding EAX
 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 177 
 178 // Class for general registers excluding EAX and EBX.
 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
 180 // Class for general registers excluding EAX and EBX (and EBP)
 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
 184 
 185 // Class of EAX (for multiply and divide operations)
 186 reg_class eax_reg(EAX);
 187 
 188 // Class of EBX (for atomic add)
 189 reg_class ebx_reg(EBX);
 190 
 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 192 reg_class ecx_reg(ECX);
 193 
 194 // Class of EDX (for multiply and divide operations)
 195 reg_class edx_reg(EDX);
 196 
 197 // Class of EDI (for synchronization)
 198 reg_class edi_reg(EDI);
 199 
 200 // Class of ESI (for synchronization)
 201 reg_class esi_reg(ESI);
 202 
 203 // Singleton class for stack pointer
 204 reg_class sp_reg(ESP);
 205 
 206 // Singleton class for instruction pointer
 207 // reg_class ip_reg(EIP);
 208 
 209 // Class of integer register pairs
 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
 211 // Class of integer register pairs (excluding EBP and EDI);
 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
 215 
 216 // Class of integer register pairs that aligns with calling convention
 217 reg_class eadx_reg( EAX,EDX );
 218 reg_class ebcx_reg( ECX,EBX );
 219 
 220 // Not AX or DX, used in divides
 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
 222 // Not AX or DX (and neither EBP), used in divides
 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 226 
 227 // Floating point registers.  Notice FPR0 is not a choice.
 228 // FPR0 is not ever allocated; we use clever encodings to fake
 229 // a 2-address instructions out of Intels FP stack.
 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 231 
 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 233                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 234                       FPR7L,FPR7H );
 235 
 236 reg_class fp_flt_reg0( FPR1L );
 237 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 238 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 240                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 241 
 242 %}
 243 
 244 
 245 //----------SOURCE BLOCK-------------------------------------------------------
 246 // This is a block of C++ code which provides values, functions, and
 247 // definitions necessary in the rest of the architecture description
 248 source_hpp %{
 249 // Must be visible to the DFA in dfa_x86_32.cpp
 250 extern bool is_operand_hi32_zero(Node* n);
 251 %}
 252 
 253 source %{
 254 #define   RELOC_IMM32    Assembler::imm_operand
 255 #define   RELOC_DISP32   Assembler::disp32_operand
 256 
 257 #define __ _masm.
 258 
 259 // How to find the high register of a Long pair, given the low register
 260 #define   HIGH_FROM_LOW(x) ((x)+2)
 261 
 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 263 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 264 // fast versions of NegF/NegD and AbsF/AbsD.
 265 
 266 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 268   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 269   // of 128-bits operands for SSE instructions.
 270   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 271   // Store the value to a 128-bits operand.
 272   operand[0] = lo;
 273   operand[1] = hi;
 274   return operand;
 275 }
 276 
 277 // Buffer for 128-bits masks used by SSE instructions.
 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 279 
 280 // Static initialization during VM startup.
 281 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 283 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 285 
 286 // Offset hacking within calls.
 287 static int pre_call_resets_size() {
 288   int size = 0;
 289   Compile* C = Compile::current();
 290   if (C->in_24_bit_fp_mode()) {
 291     size += 6; // fldcw
 292   }
 293   if (C->max_vector_size() > 16) {
 294     size += 3; // vzeroupper
 295   }
 296   return size;
 297 }
 298 
 299 // !!!!! Special hack to get all type of calls to specify the byte offset
 300 //       from the start of the call to the point where the return address
 301 //       will point.
 302 int MachCallStaticJavaNode::ret_addr_offset() {
 303   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
 304 }
 305 
 306 int MachCallDynamicJavaNode::ret_addr_offset() {
 307   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 308 }
 309 
 310 static int sizeof_FFree_Float_Stack_All = -1;
 311 
 312 int MachCallRuntimeNode::ret_addr_offset() {
 313   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 314   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 315 }
 316 
 317 // Indicate if the safepoint node needs the polling page as an input.
 318 // Since x86 does have absolute addressing, it doesn't.
 319 bool SafePointNode::needs_polling_address_input() {
 320   return false;
 321 }
 322 
 323 //
 324 // Compute padding required for nodes which need alignment
 325 //
 326 
 327 // The address of the call instruction needs to be 4-byte aligned to
 328 // ensure that it does not span a cache line so that it can be patched.
 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 330   current_offset += pre_call_resets_size();  // skip fldcw, if any
 331   current_offset += 1;      // skip call opcode byte
 332   return round_to(current_offset, alignment_required()) - current_offset;
 333 }
 334 
 335 // The address of the call instruction needs to be 4-byte aligned to
 336 // ensure that it does not span a cache line so that it can be patched.
 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 338   current_offset += pre_call_resets_size();  // skip fldcw, if any
 339   current_offset += 5;      // skip MOV instruction
 340   current_offset += 1;      // skip call opcode byte
 341   return round_to(current_offset, alignment_required()) - current_offset;
 342 }
 343 
 344 // EMIT_RM()
 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 346   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 347   cbuf.insts()->emit_int8(c);
 348 }
 349 
 350 // EMIT_CC()
 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 352   unsigned char c = (unsigned char)( f1 | f2 );
 353   cbuf.insts()->emit_int8(c);
 354 }
 355 
 356 // EMIT_OPCODE()
 357 void emit_opcode(CodeBuffer &cbuf, int code) {
 358   cbuf.insts()->emit_int8((unsigned char) code);
 359 }
 360 
 361 // EMIT_OPCODE() w/ relocation information
 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 363   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 364   emit_opcode(cbuf, code);
 365 }
 366 
 367 // EMIT_D8()
 368 void emit_d8(CodeBuffer &cbuf, int d8) {
 369   cbuf.insts()->emit_int8((unsigned char) d8);
 370 }
 371 
 372 // EMIT_D16()
 373 void emit_d16(CodeBuffer &cbuf, int d16) {
 374   cbuf.insts()->emit_int16(d16);
 375 }
 376 
 377 // EMIT_D32()
 378 void emit_d32(CodeBuffer &cbuf, int d32) {
 379   cbuf.insts()->emit_int32(d32);
 380 }
 381 
 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 384         int format) {
 385   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 386   cbuf.insts()->emit_int32(d32);
 387 }
 388 
 389 // emit 32 bit value and construct relocation entry from RelocationHolder
 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 391         int format) {
 392 #ifdef ASSERT
 393   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 394     assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
 395   }
 396 #endif
 397   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 398   cbuf.insts()->emit_int32(d32);
 399 }
 400 
 401 // Access stack slot for load or store
 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 403   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 404   if( -128 <= disp && disp <= 127 ) {
 405     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 406     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 407     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 408   } else {
 409     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 410     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 411     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 412   }
 413 }
 414 
 415    // rRegI ereg, memory mem) %{    // emit_reg_mem
 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 417   // There is no index & no scale, use form without SIB byte
 418   if ((index == 0x4) &&
 419       (scale == 0) && (base != ESP_enc)) {
 420     // If no displacement, mode is 0x0; unless base is [EBP]
 421     if ( (displace == 0) && (base != EBP_enc) ) {
 422       emit_rm(cbuf, 0x0, reg_encoding, base);
 423     }
 424     else {                    // If 8-bit displacement, mode 0x1
 425       if ((displace >= -128) && (displace <= 127)
 426           && (disp_reloc == relocInfo::none) ) {
 427         emit_rm(cbuf, 0x1, reg_encoding, base);
 428         emit_d8(cbuf, displace);
 429       }
 430       else {                  // If 32-bit displacement
 431         if (base == -1) { // Special flag for absolute address
 432           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 433           // (manual lies; no SIB needed here)
 434           if ( disp_reloc != relocInfo::none ) {
 435             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 436           } else {
 437             emit_d32      (cbuf, displace);
 438           }
 439         }
 440         else {                // Normal base + offset
 441           emit_rm(cbuf, 0x2, reg_encoding, base);
 442           if ( disp_reloc != relocInfo::none ) {
 443             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 444           } else {
 445             emit_d32      (cbuf, displace);
 446           }
 447         }
 448       }
 449     }
 450   }
 451   else {                      // Else, encode with the SIB byte
 452     // If no displacement, mode is 0x0; unless base is [EBP]
 453     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 454       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 455       emit_rm(cbuf, scale, index, base);
 456     }
 457     else {                    // If 8-bit displacement, mode 0x1
 458       if ((displace >= -128) && (displace <= 127)
 459           && (disp_reloc == relocInfo::none) ) {
 460         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 461         emit_rm(cbuf, scale, index, base);
 462         emit_d8(cbuf, displace);
 463       }
 464       else {                  // If 32-bit displacement
 465         if (base == 0x04 ) {
 466           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 467           emit_rm(cbuf, scale, index, 0x04);
 468         } else {
 469           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 470           emit_rm(cbuf, scale, index, base);
 471         }
 472         if ( disp_reloc != relocInfo::none ) {
 473           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 474         } else {
 475           emit_d32      (cbuf, displace);
 476         }
 477       }
 478     }
 479   }
 480 }
 481 
 482 
 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 484   if( dst_encoding == src_encoding ) {
 485     // reg-reg copy, use an empty encoding
 486   } else {
 487     emit_opcode( cbuf, 0x8B );
 488     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 489   }
 490 }
 491 
 492 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 493   Label exit;
 494   __ jccb(Assembler::noParity, exit);
 495   __ pushf();
 496   //
 497   // comiss/ucomiss instructions set ZF,PF,CF flags and
 498   // zero OF,AF,SF for NaN values.
 499   // Fixup flags by zeroing ZF,PF so that compare of NaN
 500   // values returns 'less than' result (CF is set).
 501   // Leave the rest of flags unchanged.
 502   //
 503   //    7 6 5 4 3 2 1 0
 504   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 505   //    0 0 1 0 1 0 1 1   (0x2B)
 506   //
 507   __ andl(Address(rsp, 0), 0xffffff2b);
 508   __ popf();
 509   __ bind(exit);
 510 }
 511 
 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 513   Label done;
 514   __ movl(dst, -1);
 515   __ jcc(Assembler::parity, done);
 516   __ jcc(Assembler::below, done);
 517   __ setb(Assembler::notEqual, dst);
 518   __ movzbl(dst, dst);
 519   __ bind(done);
 520 }
 521 
 522 
 523 //=============================================================================
 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 525 
 526 int Compile::ConstantTable::calculate_table_base_offset() const {
 527   return 0;  // absolute addressing, no offset
 528 }
 529 
 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 532   ShouldNotReachHere();
 533 }
 534 
 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 536   // Empty encoding
 537 }
 538 
 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 540   return 0;
 541 }
 542 
 543 #ifndef PRODUCT
 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 545   st->print("# MachConstantBaseNode (empty encoding)");
 546 }
 547 #endif
 548 
 549 
 550 //=============================================================================
 551 #ifndef PRODUCT
 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 553   Compile* C = ra_->C;
 554 
 555   int framesize = C->frame_size_in_bytes();
 556   int bangsize = C->bang_size_in_bytes();
 557   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 558   // Remove wordSize for return addr which is already pushed.
 559   framesize -= wordSize;
 560 
 561   if (C->need_stack_bang(bangsize)) {
 562     framesize -= wordSize;
 563     st->print("# stack bang (%d bytes)", bangsize);
 564     st->print("\n\t");
 565     st->print("PUSH   EBP\t# Save EBP");
 566     if (PreserveFramePointer) {
 567       st->print("\n\t");
 568       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 569     }
 570     if (framesize) {
 571       st->print("\n\t");
 572       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 573     }
 574   } else {
 575     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 576     st->print("\n\t");
 577     framesize -= wordSize;
 578     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 579     if (PreserveFramePointer) {
 580       st->print("\n\t");
 581       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 582       if (framesize > 0) {
 583         st->print("\n\t");
 584         st->print("ADD    EBP, #%d", framesize);
 585       }
 586     }
 587   }
 588 
 589   if (VerifyStackAtCalls) {
 590     st->print("\n\t");
 591     framesize -= wordSize;
 592     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 593   }
 594 
 595   if( C->in_24_bit_fp_mode() ) {
 596     st->print("\n\t");
 597     st->print("FLDCW  \t# load 24 bit fpu control word");
 598   }
 599   if (UseSSE >= 2 && VerifyFPU) {
 600     st->print("\n\t");
 601     st->print("# verify FPU stack (must be clean on entry)");
 602   }
 603 
 604 #ifdef ASSERT
 605   if (VerifyStackAtCalls) {
 606     st->print("\n\t");
 607     st->print("# stack alignment check");
 608   }
 609 #endif
 610   st->cr();
 611 }
 612 #endif
 613 
 614 
 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 616   Compile* C = ra_->C;
 617   MacroAssembler _masm(&cbuf);
 618 
 619   int framesize = C->frame_size_in_bytes();
 620   int bangsize = C->bang_size_in_bytes();
 621 
 622   __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode());
 623 
 624   C->set_frame_complete(cbuf.insts_size());
 625 
 626   if (C->has_mach_constant_base_node()) {
 627     // NOTE: We set the table base offset here because users might be
 628     // emitted before MachConstantBaseNode.
 629     Compile::ConstantTable& constant_table = C->constant_table();
 630     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 631   }
 632 }
 633 
 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 635   return MachNode::size(ra_); // too many variables; just compute it the hard way
 636 }
 637 
 638 int MachPrologNode::reloc() const {
 639   return 0; // a large enough number
 640 }
 641 
 642 //=============================================================================
 643 #ifndef PRODUCT
 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 645   Compile *C = ra_->C;
 646   int framesize = C->frame_size_in_bytes();
 647   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 648   // Remove two words for return addr and rbp,
 649   framesize -= 2*wordSize;
 650 
 651   if (C->max_vector_size() > 16) {
 652     st->print("VZEROUPPER");
 653     st->cr(); st->print("\t");
 654   }
 655   if (C->in_24_bit_fp_mode()) {
 656     st->print("FLDCW  standard control word");
 657     st->cr(); st->print("\t");
 658   }
 659   if (framesize) {
 660     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 661     st->cr(); st->print("\t");
 662   }
 663   st->print_cr("POPL   EBP"); st->print("\t");
 664   if (do_polling() && C->is_method_compilation()) {
 665     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 666     st->cr(); st->print("\t");
 667   }
 668 }
 669 #endif
 670 
 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 672   Compile *C = ra_->C;
 673   MacroAssembler _masm(&cbuf);
 674 
 675   if (C->max_vector_size() > 16) {
 676     // Clear upper bits of YMM registers when current compiled code uses
 677     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 678     _masm.vzeroupper();
 679   }
 680   // If method set FPU control word, restore to standard control word
 681   if (C->in_24_bit_fp_mode()) {
 682     _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 683   }
 684 
 685   int framesize = C->frame_size_in_bytes();
 686   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 687   // Remove two words for return addr and rbp,
 688   framesize -= 2*wordSize;
 689 
 690   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 691 
 692   if (framesize >= 128) {
 693     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 694     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 695     emit_d32(cbuf, framesize);
 696   } else if (framesize) {
 697     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 698     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 699     emit_d8(cbuf, framesize);
 700   }
 701 
 702   emit_opcode(cbuf, 0x58 | EBP_enc);
 703 
 704   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 705     __ reserved_stack_check();
 706   }
 707 
 708   if (do_polling() && C->is_method_compilation()) {
 709     cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 710     emit_opcode(cbuf,0x85);
 711     emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 712     emit_d32(cbuf, (intptr_t)os::get_polling_page());
 713   }
 714 }
 715 
 716 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 717   Compile *C = ra_->C;
 718   // If method set FPU control word, restore to standard control word
 719   int size = C->in_24_bit_fp_mode() ? 6 : 0;
 720   if (C->max_vector_size() > 16) size += 3; // vzeroupper
 721   if (do_polling() && C->is_method_compilation()) size += 6;
 722 
 723   int framesize = C->frame_size_in_bytes();
 724   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 725   // Remove two words for return addr and rbp,
 726   framesize -= 2*wordSize;
 727 
 728   size++; // popl rbp,
 729 
 730   if (framesize >= 128) {
 731     size += 6;
 732   } else {
 733     size += framesize ? 3 : 0;
 734   }
 735   size += 64; // added to support ReservedStackAccess
 736   return size;
 737 }
 738 
 739 int MachEpilogNode::reloc() const {
 740   return 0; // a large enough number
 741 }
 742 
 743 const Pipeline * MachEpilogNode::pipeline() const {
 744   return MachNode::pipeline_class();
 745 }
 746 
 747 int MachEpilogNode::safepoint_offset() const { return 0; }
 748 
 749 //=============================================================================
 750 
 751 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 752 static enum RC rc_class( OptoReg::Name reg ) {
 753 
 754   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 755   if (OptoReg::is_stack(reg)) return rc_stack;
 756 
 757   VMReg r = OptoReg::as_VMReg(reg);
 758   if (r->is_Register()) return rc_int;
 759   if (r->is_FloatRegister()) {
 760     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 761     return rc_float;
 762   }
 763   assert(r->is_XMMRegister(), "must be");
 764   return rc_xmm;
 765 }
 766 
 767 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 768                         int opcode, const char *op_str, int size, outputStream* st ) {
 769   if( cbuf ) {
 770     emit_opcode  (*cbuf, opcode );
 771     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 772 #ifndef PRODUCT
 773   } else if( !do_size ) {
 774     if( size != 0 ) st->print("\n\t");
 775     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 776       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 777       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 778     } else { // FLD, FST, PUSH, POP
 779       st->print("%s [ESP + #%d]",op_str,offset);
 780     }
 781 #endif
 782   }
 783   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 784   return size+3+offset_size;
 785 }
 786 
 787 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 788 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 789                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 790   int in_size_in_bits = Assembler::EVEX_32bit;
 791   int evex_encoding = 0;
 792   if (reg_lo+1 == reg_hi) {
 793     in_size_in_bits = Assembler::EVEX_64bit;
 794     evex_encoding = Assembler::VEX_W;
 795   }
 796   if (cbuf) {
 797     MacroAssembler _masm(cbuf);
 798     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 
 799     //                          it maps more cases to single byte displacement
 800     _masm.set_managed();
 801     if (reg_lo+1 == reg_hi) { // double move?
 802       if (is_load) {
 803         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 804       } else {
 805         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 806       }
 807     } else {
 808       if (is_load) {
 809         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 810       } else {
 811         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 812       }
 813     }
 814 #ifndef PRODUCT
 815   } else if (!do_size) {
 816     if (size != 0) st->print("\n\t");
 817     if (reg_lo+1 == reg_hi) { // double move?
 818       if (is_load) st->print("%s %s,[ESP + #%d]",
 819                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 820                               Matcher::regName[reg_lo], offset);
 821       else         st->print("MOVSD  [ESP + #%d],%s",
 822                               offset, Matcher::regName[reg_lo]);
 823     } else {
 824       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 825                               Matcher::regName[reg_lo], offset);
 826       else         st->print("MOVSS  [ESP + #%d],%s",
 827                               offset, Matcher::regName[reg_lo]);
 828     }
 829 #endif
 830   }
 831   bool is_single_byte = false;
 832   if ((UseAVX > 2) && (offset != 0)) {
 833     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
 834   }
 835   int offset_size = 0;
 836   if (UseAVX > 2 ) {
 837     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
 838   } else {
 839     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 840   }
 841   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
 842   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 843   return size+5+offset_size;
 844 }
 845 
 846 
 847 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 848                             int src_hi, int dst_hi, int size, outputStream* st ) {
 849   if (cbuf) {
 850     MacroAssembler _masm(cbuf);
 851     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 852     _masm.set_managed();
 853     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 854       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 855                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 856     } else {
 857       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 858                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 859     }
 860 #ifndef PRODUCT
 861   } else if (!do_size) {
 862     if (size != 0) st->print("\n\t");
 863     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 864       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 865         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 866       } else {
 867         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 868       }
 869     } else {
 870       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 871         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 872       } else {
 873         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 874       }
 875     }
 876 #endif
 877   }
 878   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 879   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
 880   int sz = (UseAVX > 2) ? 6 : 4;
 881   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 882       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 883   return size + sz;
 884 }
 885 
 886 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 887                             int src_hi, int dst_hi, int size, outputStream* st ) {
 888   // 32-bit
 889   if (cbuf) {
 890     MacroAssembler _masm(cbuf);
 891     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 892     _masm.set_managed();
 893     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 894              as_Register(Matcher::_regEncode[src_lo]));
 895 #ifndef PRODUCT
 896   } else if (!do_size) {
 897     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 898 #endif
 899   }
 900   return (UseAVX> 2) ? 6 : 4;
 901 }
 902 
 903 
 904 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 905                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 906   // 32-bit
 907   if (cbuf) {
 908     MacroAssembler _masm(cbuf);
 909     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 910     _masm.set_managed();
 911     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 912              as_XMMRegister(Matcher::_regEncode[src_lo]));
 913 #ifndef PRODUCT
 914   } else if (!do_size) {
 915     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 916 #endif
 917   }
 918   return (UseAVX> 2) ? 6 : 4;
 919 }
 920 
 921 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 922   if( cbuf ) {
 923     emit_opcode(*cbuf, 0x8B );
 924     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 925 #ifndef PRODUCT
 926   } else if( !do_size ) {
 927     if( size != 0 ) st->print("\n\t");
 928     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 929 #endif
 930   }
 931   return size+2;
 932 }
 933 
 934 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 935                                  int offset, int size, outputStream* st ) {
 936   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 937     if( cbuf ) {
 938       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 939       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 940 #ifndef PRODUCT
 941     } else if( !do_size ) {
 942       if( size != 0 ) st->print("\n\t");
 943       st->print("FLD    %s",Matcher::regName[src_lo]);
 944 #endif
 945     }
 946     size += 2;
 947   }
 948 
 949   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 950   const char *op_str;
 951   int op;
 952   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 953     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 954     op = 0xDD;
 955   } else {                   // 32-bit store
 956     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 957     op = 0xD9;
 958     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 959   }
 960 
 961   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 962 }
 963 
 964 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 965 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 966                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 967 
 968 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 969                             int stack_offset, int reg, uint ireg, outputStream* st);
 970 
 971 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 972                                      int dst_offset, uint ireg, outputStream* st) {
 973   int calc_size = 0;
 974   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 975   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 976   switch (ireg) {
 977   case Op_VecS:
 978     calc_size = 3+src_offset_size + 3+dst_offset_size;
 979     break;
 980   case Op_VecD: {
 981     calc_size = 3+src_offset_size + 3+dst_offset_size;
 982     int tmp_src_offset = src_offset + 4;
 983     int tmp_dst_offset = dst_offset + 4;
 984     src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4);
 985     dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4);
 986     calc_size += 3+src_offset_size + 3+dst_offset_size;
 987     break;
 988   }   
 989   case Op_VecX:
 990   case Op_VecY:
 991   case Op_VecZ:
 992     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 993     break;
 994   default:
 995     ShouldNotReachHere();
 996   }
 997   if (cbuf) {
 998     MacroAssembler _masm(cbuf);
 999     int offset = __ offset();
1000     switch (ireg) {
1001     case Op_VecS:
1002       __ pushl(Address(rsp, src_offset));
1003       __ popl (Address(rsp, dst_offset));
1004       break;
1005     case Op_VecD:
1006       __ pushl(Address(rsp, src_offset));
1007       __ popl (Address(rsp, dst_offset));
1008       __ pushl(Address(rsp, src_offset+4));
1009       __ popl (Address(rsp, dst_offset+4));
1010       break;
1011     case Op_VecX:
1012       __ movdqu(Address(rsp, -16), xmm0);
1013       __ movdqu(xmm0, Address(rsp, src_offset));
1014       __ movdqu(Address(rsp, dst_offset), xmm0);
1015       __ movdqu(xmm0, Address(rsp, -16));
1016       break;
1017     case Op_VecY:
1018       __ vmovdqu(Address(rsp, -32), xmm0);
1019       __ vmovdqu(xmm0, Address(rsp, src_offset));
1020       __ vmovdqu(Address(rsp, dst_offset), xmm0);
1021       __ vmovdqu(xmm0, Address(rsp, -32));
1022       break;
1023     case Op_VecZ:
1024       __ evmovdquq(Address(rsp, -64), xmm0, 2);
1025       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
1026       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
1027       __ evmovdquq(xmm0, Address(rsp, -64), 2);
1028       break;
1029     default:
1030       ShouldNotReachHere();
1031     }
1032     int size = __ offset() - offset;
1033     assert(size == calc_size, "incorrect size calculation");
1034     return size;
1035 #ifndef PRODUCT
1036   } else if (!do_size) {
1037     switch (ireg) {
1038     case Op_VecS:
1039       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1040                 "popl    [rsp + #%d]",
1041                 src_offset, dst_offset);
1042       break;
1043     case Op_VecD:
1044       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1045                 "popq    [rsp + #%d]\n\t"
1046                 "pushl   [rsp + #%d]\n\t"
1047                 "popq    [rsp + #%d]",
1048                 src_offset, dst_offset, src_offset+4, dst_offset+4);
1049       break;
1050      case Op_VecX:
1051       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1052                 "movdqu  xmm0, [rsp + #%d]\n\t"
1053                 "movdqu  [rsp + #%d], xmm0\n\t"
1054                 "movdqu  xmm0, [rsp - #16]",
1055                 src_offset, dst_offset);
1056       break;
1057     case Op_VecY:
1058       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1059                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1060                 "vmovdqu [rsp + #%d], xmm0\n\t"
1061                 "vmovdqu xmm0, [rsp - #32]",
1062                 src_offset, dst_offset);
1063       break;
1064     case Op_VecZ:
1065       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1066                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1067                 "vmovdqu [rsp + #%d], xmm0\n\t"
1068                 "vmovdqu xmm0, [rsp - #64]",
1069                 src_offset, dst_offset);
1070       break;
1071     default:
1072       ShouldNotReachHere();
1073     }
1074 #endif
1075   }
1076   return calc_size;
1077 }
1078 
1079 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1080   // Get registers to move
1081   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1082   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1083   OptoReg::Name dst_second = ra_->get_reg_second(this );
1084   OptoReg::Name dst_first = ra_->get_reg_first(this );
1085 
1086   enum RC src_second_rc = rc_class(src_second);
1087   enum RC src_first_rc = rc_class(src_first);
1088   enum RC dst_second_rc = rc_class(dst_second);
1089   enum RC dst_first_rc = rc_class(dst_first);
1090 
1091   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1092 
1093   // Generate spill code!
1094   int size = 0;
1095 
1096   if( src_first == dst_first && src_second == dst_second )
1097     return size;            // Self copy, no move
1098 
1099   if (bottom_type()->isa_vect() != NULL) {
1100     uint ireg = ideal_reg();
1101     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1102     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1103     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1104     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1105       // mem -> mem
1106       int src_offset = ra_->reg2offset(src_first);
1107       int dst_offset = ra_->reg2offset(dst_first);
1108       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1109     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1110       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1111     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1112       int stack_offset = ra_->reg2offset(dst_first);
1113       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1114     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1115       int stack_offset = ra_->reg2offset(src_first);
1116       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1117     } else {
1118       ShouldNotReachHere();
1119     }
1120   }
1121 
1122   // --------------------------------------
1123   // Check for mem-mem move.  push/pop to move.
1124   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1125     if( src_second == dst_first ) { // overlapping stack copy ranges
1126       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1127       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1128       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1129       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1130     }
1131     // move low bits
1132     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1133     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1134     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1135       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1136       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1137     }
1138     return size;
1139   }
1140 
1141   // --------------------------------------
1142   // Check for integer reg-reg copy
1143   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1144     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1145 
1146   // Check for integer store
1147   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1148     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1149 
1150   // Check for integer load
1151   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1152     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1153 
1154   // Check for integer reg-xmm reg copy
1155   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1156     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1157             "no 64 bit integer-float reg moves" );
1158     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1159   }
1160   // --------------------------------------
1161   // Check for float reg-reg copy
1162   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1163     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1164             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1165     if( cbuf ) {
1166 
1167       // Note the mucking with the register encode to compensate for the 0/1
1168       // indexing issue mentioned in a comment in the reg_def sections
1169       // for FPR registers many lines above here.
1170 
1171       if( src_first != FPR1L_num ) {
1172         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1173         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1174         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1175         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1176      } else {
1177         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1178         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1179      }
1180 #ifndef PRODUCT
1181     } else if( !do_size ) {
1182       if( size != 0 ) st->print("\n\t");
1183       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1184       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1185 #endif
1186     }
1187     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1188   }
1189 
1190   // Check for float store
1191   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1192     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1193   }
1194 
1195   // Check for float load
1196   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1197     int offset = ra_->reg2offset(src_first);
1198     const char *op_str;
1199     int op;
1200     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1201       op_str = "FLD_D";
1202       op = 0xDD;
1203     } else {                   // 32-bit load
1204       op_str = "FLD_S";
1205       op = 0xD9;
1206       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1207     }
1208     if( cbuf ) {
1209       emit_opcode  (*cbuf, op );
1210       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1211       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1212       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1213 #ifndef PRODUCT
1214     } else if( !do_size ) {
1215       if( size != 0 ) st->print("\n\t");
1216       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1217 #endif
1218     }
1219     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1220     return size + 3+offset_size+2;
1221   }
1222 
1223   // Check for xmm reg-reg copy
1224   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1225     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1226             (src_first+1 == src_second && dst_first+1 == dst_second),
1227             "no non-adjacent float-moves" );
1228     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1229   }
1230 
1231   // Check for xmm reg-integer reg copy
1232   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1233     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1234             "no 64 bit float-integer reg moves" );
1235     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1236   }
1237 
1238   // Check for xmm store
1239   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1240     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1241   }
1242 
1243   // Check for float xmm load
1244   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1245     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1246   }
1247 
1248   // Copy from float reg to xmm reg
1249   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1250     // copy to the top of stack from floating point reg
1251     // and use LEA to preserve flags
1252     if( cbuf ) {
1253       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1254       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1255       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1256       emit_d8(*cbuf,0xF8);
1257 #ifndef PRODUCT
1258     } else if( !do_size ) {
1259       if( size != 0 ) st->print("\n\t");
1260       st->print("LEA    ESP,[ESP-8]");
1261 #endif
1262     }
1263     size += 4;
1264 
1265     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1266 
1267     // Copy from the temp memory to the xmm reg.
1268     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1269 
1270     if( cbuf ) {
1271       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1272       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1273       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1274       emit_d8(*cbuf,0x08);
1275 #ifndef PRODUCT
1276     } else if( !do_size ) {
1277       if( size != 0 ) st->print("\n\t");
1278       st->print("LEA    ESP,[ESP+8]");
1279 #endif
1280     }
1281     size += 4;
1282     return size;
1283   }
1284 
1285   assert( size > 0, "missed a case" );
1286 
1287   // --------------------------------------------------------------------
1288   // Check for second bits still needing moving.
1289   if( src_second == dst_second )
1290     return size;               // Self copy; no move
1291   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1292 
1293   // Check for second word int-int move
1294   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1295     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1296 
1297   // Check for second word integer store
1298   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1299     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1300 
1301   // Check for second word integer load
1302   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1303     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1304 
1305 
1306   Unimplemented();
1307   return 0; // Mute compiler
1308 }
1309 
1310 #ifndef PRODUCT
1311 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1312   implementation( NULL, ra_, false, st );
1313 }
1314 #endif
1315 
1316 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1317   implementation( &cbuf, ra_, false, NULL );
1318 }
1319 
1320 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1321   return implementation( NULL, ra_, true, NULL );
1322 }
1323 
1324 
1325 //=============================================================================
1326 #ifndef PRODUCT
1327 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1328   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1329   int reg = ra_->get_reg_first(this);
1330   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1331 }
1332 #endif
1333 
1334 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1335   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1336   int reg = ra_->get_encode(this);
1337   if( offset >= 128 ) {
1338     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1339     emit_rm(cbuf, 0x2, reg, 0x04);
1340     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1341     emit_d32(cbuf, offset);
1342   }
1343   else {
1344     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1345     emit_rm(cbuf, 0x1, reg, 0x04);
1346     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1347     emit_d8(cbuf, offset);
1348   }
1349 }
1350 
1351 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1352   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1353   if( offset >= 128 ) {
1354     return 7;
1355   }
1356   else {
1357     return 4;
1358   }
1359 }
1360 
1361 //=============================================================================
1362 #ifndef PRODUCT
1363 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1364   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1365   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1366   st->print_cr("\tNOP");
1367   st->print_cr("\tNOP");
1368   if( !OptoBreakpoint )
1369     st->print_cr("\tNOP");
1370 }
1371 #endif
1372 
1373 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1374   MacroAssembler masm(&cbuf);
1375 #ifdef ASSERT
1376   uint insts_size = cbuf.insts_size();
1377 #endif
1378   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1379   masm.jump_cc(Assembler::notEqual,
1380                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1381   /* WARNING these NOPs are critical so that verified entry point is properly
1382      aligned for patching by NativeJump::patch_verified_entry() */
1383   int nops_cnt = 2;
1384   if( !OptoBreakpoint ) // Leave space for int3
1385      nops_cnt += 1;
1386   masm.nop(nops_cnt);
1387 
1388   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1389 }
1390 
1391 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1392   return OptoBreakpoint ? 11 : 12;
1393 }
1394 
1395 
1396 //=============================================================================
1397 
1398 int Matcher::regnum_to_fpu_offset(int regnum) {
1399   return regnum - 32; // The FP registers are in the second chunk
1400 }
1401 
1402 // This is UltraSparc specific, true just means we have fast l2f conversion
1403 const bool Matcher::convL2FSupported(void) {
1404   return true;
1405 }
1406 
1407 // Is this branch offset short enough that a short branch can be used?
1408 //
1409 // NOTE: If the platform does not provide any short branch variants, then
1410 //       this method should return false for offset 0.
1411 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1412   // The passed offset is relative to address of the branch.
1413   // On 86 a branch displacement is calculated relative to address
1414   // of a next instruction.
1415   offset -= br_size;
1416 
1417   // the short version of jmpConUCF2 contains multiple branches,
1418   // making the reach slightly less
1419   if (rule == jmpConUCF2_rule)
1420     return (-126 <= offset && offset <= 125);
1421   return (-128 <= offset && offset <= 127);
1422 }
1423 
1424 const bool Matcher::isSimpleConstant64(jlong value) {
1425   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1426   return false;
1427 }
1428 
1429 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1430 const bool Matcher::init_array_count_is_in_bytes = false;
1431 
1432 // Needs 2 CMOV's for longs.
1433 const int Matcher::long_cmove_cost() { return 1; }
1434 
1435 // No CMOVF/CMOVD with SSE/SSE2
1436 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1437 
1438 // Does the CPU require late expand (see block.cpp for description of late expand)?
1439 const bool Matcher::require_postalloc_expand = false;
1440 
1441 // Should the Matcher clone shifts on addressing modes, expecting them to
1442 // be subsumed into complex addressing expressions or compute them into
1443 // registers?  True for Intel but false for most RISCs
1444 const bool Matcher::clone_shift_expressions = true;
1445 
1446 // Do we need to mask the count passed to shift instructions or does
1447 // the cpu only look at the lower 5/6 bits anyway?
1448 const bool Matcher::need_masked_shift_count = false;
1449 
1450 bool Matcher::narrow_oop_use_complex_address() {
1451   ShouldNotCallThis();
1452   return true;
1453 }
1454 
1455 bool Matcher::narrow_klass_use_complex_address() {
1456   ShouldNotCallThis();
1457   return true;
1458 }
1459 
1460 
1461 // Is it better to copy float constants, or load them directly from memory?
1462 // Intel can load a float constant from a direct address, requiring no
1463 // extra registers.  Most RISCs will have to materialize an address into a
1464 // register first, so they would do better to copy the constant from stack.
1465 const bool Matcher::rematerialize_float_constants = true;
1466 
1467 // If CPU can load and store mis-aligned doubles directly then no fixup is
1468 // needed.  Else we split the double into 2 integer pieces and move it
1469 // piece-by-piece.  Only happens when passing doubles into C code as the
1470 // Java calling convention forces doubles to be aligned.
1471 const bool Matcher::misaligned_doubles_ok = true;
1472 
1473 
1474 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1475   // Get the memory operand from the node
1476   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1477   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1478   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1479   uint opcnt     = 1;                 // First operand
1480   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1481   while( idx >= skipped+num_edges ) {
1482     skipped += num_edges;
1483     opcnt++;                          // Bump operand count
1484     assert( opcnt < numopnds, "Accessing non-existent operand" );
1485     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1486   }
1487 
1488   MachOper *memory = node->_opnds[opcnt];
1489   MachOper *new_memory = NULL;
1490   switch (memory->opcode()) {
1491   case DIRECT:
1492   case INDOFFSET32X:
1493     // No transformation necessary.
1494     return;
1495   case INDIRECT:
1496     new_memory = new indirect_win95_safeOper( );
1497     break;
1498   case INDOFFSET8:
1499     new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1500     break;
1501   case INDOFFSET32:
1502     new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1503     break;
1504   case INDINDEXOFFSET:
1505     new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1506     break;
1507   case INDINDEXSCALE:
1508     new_memory = new indIndexScale_win95_safeOper(memory->scale());
1509     break;
1510   case INDINDEXSCALEOFFSET:
1511     new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1512     break;
1513   case LOAD_LONG_INDIRECT:
1514   case LOAD_LONG_INDOFFSET32:
1515     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1516     return;
1517   default:
1518     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1519     return;
1520   }
1521   node->_opnds[opcnt] = new_memory;
1522 }
1523 
1524 // Advertise here if the CPU requires explicit rounding operations
1525 // to implement the UseStrictFP mode.
1526 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1527 
1528 // Are floats conerted to double when stored to stack during deoptimization?
1529 // On x32 it is stored with convertion only when FPU is used for floats.
1530 bool Matcher::float_in_double() { return (UseSSE == 0); }
1531 
1532 // Do ints take an entire long register or just half?
1533 const bool Matcher::int_in_long = false;
1534 
1535 // Return whether or not this register is ever used as an argument.  This
1536 // function is used on startup to build the trampoline stubs in generateOptoStub.
1537 // Registers not mentioned will be killed by the VM call in the trampoline, and
1538 // arguments in those registers not be available to the callee.
1539 bool Matcher::can_be_java_arg( int reg ) {
1540   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1541   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1542   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1543   return false;
1544 }
1545 
1546 bool Matcher::is_spillable_arg( int reg ) {
1547   return can_be_java_arg(reg);
1548 }
1549 
1550 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1551   // Use hardware integer DIV instruction when
1552   // it is faster than a code which use multiply.
1553   // Only when constant divisor fits into 32 bit
1554   // (min_jint is excluded to get only correct
1555   // positive 32 bit values from negative).
1556   return VM_Version::has_fast_idiv() &&
1557          (divisor == (int)divisor && divisor != min_jint);
1558 }
1559 
1560 // Register for DIVI projection of divmodI
1561 RegMask Matcher::divI_proj_mask() {
1562   return EAX_REG_mask();
1563 }
1564 
1565 // Register for MODI projection of divmodI
1566 RegMask Matcher::modI_proj_mask() {
1567   return EDX_REG_mask();
1568 }
1569 
1570 // Register for DIVL projection of divmodL
1571 RegMask Matcher::divL_proj_mask() {
1572   ShouldNotReachHere();
1573   return RegMask();
1574 }
1575 
1576 // Register for MODL projection of divmodL
1577 RegMask Matcher::modL_proj_mask() {
1578   ShouldNotReachHere();
1579   return RegMask();
1580 }
1581 
1582 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1583   return NO_REG_mask();
1584 }
1585 
1586 // Returns true if the high 32 bits of the value is known to be zero.
1587 bool is_operand_hi32_zero(Node* n) {
1588   int opc = n->Opcode();
1589   if (opc == Op_AndL) {
1590     Node* o2 = n->in(2);
1591     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1592       return true;
1593     }
1594   }
1595   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1596     return true;
1597   }
1598   return false;
1599 }
1600 
1601 %}
1602 
1603 //----------ENCODING BLOCK-----------------------------------------------------
1604 // This block specifies the encoding classes used by the compiler to output
1605 // byte streams.  Encoding classes generate functions which are called by
1606 // Machine Instruction Nodes in order to generate the bit encoding of the
1607 // instruction.  Operands specify their base encoding interface with the
1608 // interface keyword.  There are currently supported four interfaces,
1609 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1610 // operand to generate a function which returns its register number when
1611 // queried.   CONST_INTER causes an operand to generate a function which
1612 // returns the value of the constant when queried.  MEMORY_INTER causes an
1613 // operand to generate four functions which return the Base Register, the
1614 // Index Register, the Scale Value, and the Offset Value of the operand when
1615 // queried.  COND_INTER causes an operand to generate six functions which
1616 // return the encoding code (ie - encoding bits for the instruction)
1617 // associated with each basic boolean condition for a conditional instruction.
1618 // Instructions specify two basic values for encoding.  They use the
1619 // ins_encode keyword to specify their encoding class (which must be one of
1620 // the class names specified in the encoding block), and they use the
1621 // opcode keyword to specify, in order, their primary, secondary, and
1622 // tertiary opcode.  Only the opcode sections which a particular instruction
1623 // needs for encoding need to be specified.
1624 encode %{
1625   // Build emit functions for each basic byte or larger field in the intel
1626   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1627   // code in the enc_class source block.  Emit functions will live in the
1628   // main source block for now.  In future, we can generalize this by
1629   // adding a syntax that specifies the sizes of fields in an order,
1630   // so that the adlc can build the emit functions automagically
1631 
1632   // Emit primary opcode
1633   enc_class OpcP %{
1634     emit_opcode(cbuf, $primary);
1635   %}
1636 
1637   // Emit secondary opcode
1638   enc_class OpcS %{
1639     emit_opcode(cbuf, $secondary);
1640   %}
1641 
1642   // Emit opcode directly
1643   enc_class Opcode(immI d8) %{
1644     emit_opcode(cbuf, $d8$$constant);
1645   %}
1646 
1647   enc_class SizePrefix %{
1648     emit_opcode(cbuf,0x66);
1649   %}
1650 
1651   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1652     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1653   %}
1654 
1655   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1656     emit_opcode(cbuf,$opcode$$constant);
1657     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1658   %}
1659 
1660   enc_class mov_r32_imm0( rRegI dst ) %{
1661     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1662     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1663   %}
1664 
1665   enc_class cdq_enc %{
1666     // Full implementation of Java idiv and irem; checks for
1667     // special case as described in JVM spec., p.243 & p.271.
1668     //
1669     //         normal case                           special case
1670     //
1671     // input : rax,: dividend                         min_int
1672     //         reg: divisor                          -1
1673     //
1674     // output: rax,: quotient  (= rax, idiv reg)       min_int
1675     //         rdx: remainder (= rax, irem reg)       0
1676     //
1677     //  Code sequnce:
1678     //
1679     //  81 F8 00 00 00 80    cmp         rax,80000000h
1680     //  0F 85 0B 00 00 00    jne         normal_case
1681     //  33 D2                xor         rdx,edx
1682     //  83 F9 FF             cmp         rcx,0FFh
1683     //  0F 84 03 00 00 00    je          done
1684     //                  normal_case:
1685     //  99                   cdq
1686     //  F7 F9                idiv        rax,ecx
1687     //                  done:
1688     //
1689     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1690     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1691     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1692     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1693     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1694     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1695     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1696     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1697     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1698     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1699     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1700     // normal_case:
1701     emit_opcode(cbuf,0x99);                                         // cdq
1702     // idiv (note: must be emitted by the user of this rule)
1703     // normal:
1704   %}
1705 
1706   // Dense encoding for older common ops
1707   enc_class Opc_plus(immI opcode, rRegI reg) %{
1708     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1709   %}
1710 
1711 
1712   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1713   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1714     // Check for 8-bit immediate, and set sign extend bit in opcode
1715     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1716       emit_opcode(cbuf, $primary | 0x02);
1717     }
1718     else {                          // If 32-bit immediate
1719       emit_opcode(cbuf, $primary);
1720     }
1721   %}
1722 
1723   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1724     // Emit primary opcode and set sign-extend bit
1725     // Check for 8-bit immediate, and set sign extend bit in opcode
1726     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1727       emit_opcode(cbuf, $primary | 0x02);    }
1728     else {                          // If 32-bit immediate
1729       emit_opcode(cbuf, $primary);
1730     }
1731     // Emit r/m byte with secondary opcode, after primary opcode.
1732     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1733   %}
1734 
1735   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1736     // Check for 8-bit immediate, and set sign extend bit in opcode
1737     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1738       $$$emit8$imm$$constant;
1739     }
1740     else {                          // If 32-bit immediate
1741       // Output immediate
1742       $$$emit32$imm$$constant;
1743     }
1744   %}
1745 
1746   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1747     // Emit primary opcode and set sign-extend bit
1748     // Check for 8-bit immediate, and set sign extend bit in opcode
1749     int con = (int)$imm$$constant; // Throw away top bits
1750     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1751     // Emit r/m byte with secondary opcode, after primary opcode.
1752     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1753     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1754     else                               emit_d32(cbuf,con);
1755   %}
1756 
1757   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1758     // Emit primary opcode and set sign-extend bit
1759     // Check for 8-bit immediate, and set sign extend bit in opcode
1760     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1761     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1762     // Emit r/m byte with tertiary opcode, after primary opcode.
1763     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1764     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1765     else                               emit_d32(cbuf,con);
1766   %}
1767 
1768   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1769     emit_cc(cbuf, $secondary, $dst$$reg );
1770   %}
1771 
1772   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1773     int destlo = $dst$$reg;
1774     int desthi = HIGH_FROM_LOW(destlo);
1775     // bswap lo
1776     emit_opcode(cbuf, 0x0F);
1777     emit_cc(cbuf, 0xC8, destlo);
1778     // bswap hi
1779     emit_opcode(cbuf, 0x0F);
1780     emit_cc(cbuf, 0xC8, desthi);
1781     // xchg lo and hi
1782     emit_opcode(cbuf, 0x87);
1783     emit_rm(cbuf, 0x3, destlo, desthi);
1784   %}
1785 
1786   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1787     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1788   %}
1789 
1790   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1791     $$$emit8$primary;
1792     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1793   %}
1794 
1795   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1796     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1797     emit_d8(cbuf, op >> 8 );
1798     emit_d8(cbuf, op & 255);
1799   %}
1800 
1801   // emulate a CMOV with a conditional branch around a MOV
1802   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1803     // Invert sense of branch from sense of CMOV
1804     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1805     emit_d8( cbuf, $brOffs$$constant );
1806   %}
1807 
1808   enc_class enc_PartialSubtypeCheck( ) %{
1809     Register Redi = as_Register(EDI_enc); // result register
1810     Register Reax = as_Register(EAX_enc); // super class
1811     Register Recx = as_Register(ECX_enc); // killed
1812     Register Resi = as_Register(ESI_enc); // sub class
1813     Label miss;
1814 
1815     MacroAssembler _masm(&cbuf);
1816     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1817                                      NULL, &miss,
1818                                      /*set_cond_codes:*/ true);
1819     if ($primary) {
1820       __ xorptr(Redi, Redi);
1821     }
1822     __ bind(miss);
1823   %}
1824 
1825   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1826     MacroAssembler masm(&cbuf);
1827     int start = masm.offset();
1828     if (UseSSE >= 2) {
1829       if (VerifyFPU) {
1830         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1831       }
1832     } else {
1833       // External c_calling_convention expects the FPU stack to be 'clean'.
1834       // Compiled code leaves it dirty.  Do cleanup now.
1835       masm.empty_FPU_stack();
1836     }
1837     if (sizeof_FFree_Float_Stack_All == -1) {
1838       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1839     } else {
1840       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1841     }
1842   %}
1843 
1844   enc_class Verify_FPU_For_Leaf %{
1845     if( VerifyFPU ) {
1846       MacroAssembler masm(&cbuf);
1847       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1848     }
1849   %}
1850 
1851   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1852     // This is the instruction starting address for relocation info.
1853     cbuf.set_insts_mark();
1854     $$$emit8$primary;
1855     // CALL directly to the runtime
1856     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1857                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1858 
1859     if (UseSSE >= 2) {
1860       MacroAssembler _masm(&cbuf);
1861       BasicType rt = tf()->return_type();
1862 
1863       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1864         // A C runtime call where the return value is unused.  In SSE2+
1865         // mode the result needs to be removed from the FPU stack.  It's
1866         // likely that this function call could be removed by the
1867         // optimizer if the C function is a pure function.
1868         __ ffree(0);
1869       } else if (rt == T_FLOAT) {
1870         __ lea(rsp, Address(rsp, -4));
1871         __ fstp_s(Address(rsp, 0));
1872         __ movflt(xmm0, Address(rsp, 0));
1873         __ lea(rsp, Address(rsp,  4));
1874       } else if (rt == T_DOUBLE) {
1875         __ lea(rsp, Address(rsp, -8));
1876         __ fstp_d(Address(rsp, 0));
1877         __ movdbl(xmm0, Address(rsp, 0));
1878         __ lea(rsp, Address(rsp,  8));
1879       }
1880     }
1881   %}
1882 
1883 
1884   enc_class pre_call_resets %{
1885     // If method sets FPU control word restore it here
1886     debug_only(int off0 = cbuf.insts_size());
1887     if (ra_->C->in_24_bit_fp_mode()) {
1888       MacroAssembler _masm(&cbuf);
1889       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1890     }
1891     if (ra_->C->max_vector_size() > 16) {
1892       // Clear upper bits of YMM registers when current compiled code uses
1893       // wide vectors to avoid AVX <-> SSE transition penalty during call.
1894       MacroAssembler _masm(&cbuf);
1895       __ vzeroupper();
1896     }
1897     debug_only(int off1 = cbuf.insts_size());
1898     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1899   %}
1900 
1901   enc_class post_call_FPU %{
1902     // If method sets FPU control word do it here also
1903     if (Compile::current()->in_24_bit_fp_mode()) {
1904       MacroAssembler masm(&cbuf);
1905       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1906     }
1907   %}
1908 
1909   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1910     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1911     // who we intended to call.
1912     cbuf.set_insts_mark();
1913     $$$emit8$primary;
1914 
1915     if (!_method) {
1916       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1917                      runtime_call_Relocation::spec(),
1918                      RELOC_IMM32);
1919     } else {
1920       int method_index = resolved_method_index(cbuf);
1921       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1922                                                   : static_call_Relocation::spec(method_index);
1923       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1924                      rspec, RELOC_DISP32);
1925       // Emit stubs for static call.
1926       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1927       if (stub == NULL) {
1928         ciEnv::current()->record_failure("CodeCache is full");
1929         return;
1930       }
1931     }
1932   %}
1933 
1934   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1935     MacroAssembler _masm(&cbuf);
1936     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1937   %}
1938 
1939   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1940     int disp = in_bytes(Method::from_compiled_offset());
1941     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1942 
1943     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1944     cbuf.set_insts_mark();
1945     $$$emit8$primary;
1946     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1947     emit_d8(cbuf, disp);             // Displacement
1948 
1949   %}
1950 
1951 //   Following encoding is no longer used, but may be restored if calling
1952 //   convention changes significantly.
1953 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1954 //
1955 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1956 //     // int ic_reg     = Matcher::inline_cache_reg();
1957 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1958 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1959 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1960 //
1961 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1962 //     // // so we load it immediately before the call
1963 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1964 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1965 //
1966 //     // xor rbp,ebp
1967 //     emit_opcode(cbuf, 0x33);
1968 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1969 //
1970 //     // CALL to interpreter.
1971 //     cbuf.set_insts_mark();
1972 //     $$$emit8$primary;
1973 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1974 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1975 //   %}
1976 
1977   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1978     $$$emit8$primary;
1979     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1980     $$$emit8$shift$$constant;
1981   %}
1982 
1983   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1984     // Load immediate does not have a zero or sign extended version
1985     // for 8-bit immediates
1986     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1987     $$$emit32$src$$constant;
1988   %}
1989 
1990   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1991     // Load immediate does not have a zero or sign extended version
1992     // for 8-bit immediates
1993     emit_opcode(cbuf, $primary + $dst$$reg);
1994     $$$emit32$src$$constant;
1995   %}
1996 
1997   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1998     // Load immediate does not have a zero or sign extended version
1999     // for 8-bit immediates
2000     int dst_enc = $dst$$reg;
2001     int src_con = $src$$constant & 0x0FFFFFFFFL;
2002     if (src_con == 0) {
2003       // xor dst, dst
2004       emit_opcode(cbuf, 0x33);
2005       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2006     } else {
2007       emit_opcode(cbuf, $primary + dst_enc);
2008       emit_d32(cbuf, src_con);
2009     }
2010   %}
2011 
2012   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2013     // Load immediate does not have a zero or sign extended version
2014     // for 8-bit immediates
2015     int dst_enc = $dst$$reg + 2;
2016     int src_con = ((julong)($src$$constant)) >> 32;
2017     if (src_con == 0) {
2018       // xor dst, dst
2019       emit_opcode(cbuf, 0x33);
2020       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2021     } else {
2022       emit_opcode(cbuf, $primary + dst_enc);
2023       emit_d32(cbuf, src_con);
2024     }
2025   %}
2026 
2027 
2028   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2029   enc_class enc_Copy( rRegI dst, rRegI src ) %{
2030     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2031   %}
2032 
2033   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2034     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2035   %}
2036 
2037   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2038     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2039   %}
2040 
2041   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2042     $$$emit8$primary;
2043     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2044   %}
2045 
2046   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2047     $$$emit8$secondary;
2048     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2049   %}
2050 
2051   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2052     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2053   %}
2054 
2055   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2056     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2057   %}
2058 
2059   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2060     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2061   %}
2062 
2063   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2064     // Output immediate
2065     $$$emit32$src$$constant;
2066   %}
2067 
2068   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2069     // Output Float immediate bits
2070     jfloat jf = $src$$constant;
2071     int    jf_as_bits = jint_cast( jf );
2072     emit_d32(cbuf, jf_as_bits);
2073   %}
2074 
2075   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2076     // Output Float immediate bits
2077     jfloat jf = $src$$constant;
2078     int    jf_as_bits = jint_cast( jf );
2079     emit_d32(cbuf, jf_as_bits);
2080   %}
2081 
2082   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2083     // Output immediate
2084     $$$emit16$src$$constant;
2085   %}
2086 
2087   enc_class Con_d32(immI src) %{
2088     emit_d32(cbuf,$src$$constant);
2089   %}
2090 
2091   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2092     // Output immediate memory reference
2093     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2094     emit_d32(cbuf, 0x00);
2095   %}
2096 
2097   enc_class lock_prefix( ) %{
2098     if( os::is_MP() )
2099       emit_opcode(cbuf,0xF0);         // [Lock]
2100   %}
2101 
2102   // Cmp-xchg long value.
2103   // Note: we need to swap rbx, and rcx before and after the
2104   //       cmpxchg8 instruction because the instruction uses
2105   //       rcx as the high order word of the new value to store but
2106   //       our register encoding uses rbx,.
2107   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2108 
2109     // XCHG  rbx,ecx
2110     emit_opcode(cbuf,0x87);
2111     emit_opcode(cbuf,0xD9);
2112     // [Lock]
2113     if( os::is_MP() )
2114       emit_opcode(cbuf,0xF0);
2115     // CMPXCHG8 [Eptr]
2116     emit_opcode(cbuf,0x0F);
2117     emit_opcode(cbuf,0xC7);
2118     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2119     // XCHG  rbx,ecx
2120     emit_opcode(cbuf,0x87);
2121     emit_opcode(cbuf,0xD9);
2122   %}
2123 
2124   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2125     // [Lock]
2126     if( os::is_MP() )
2127       emit_opcode(cbuf,0xF0);
2128 
2129     // CMPXCHG [Eptr]
2130     emit_opcode(cbuf,0x0F);
2131     emit_opcode(cbuf,0xB1);
2132     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2133   %}
2134 
2135   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2136     int res_encoding = $res$$reg;
2137 
2138     // MOV  res,0
2139     emit_opcode( cbuf, 0xB8 + res_encoding);
2140     emit_d32( cbuf, 0 );
2141     // JNE,s  fail
2142     emit_opcode(cbuf,0x75);
2143     emit_d8(cbuf, 5 );
2144     // MOV  res,1
2145     emit_opcode( cbuf, 0xB8 + res_encoding);
2146     emit_d32( cbuf, 1 );
2147     // fail:
2148   %}
2149 
2150   enc_class set_instruction_start( ) %{
2151     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2152   %}
2153 
2154   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2155     int reg_encoding = $ereg$$reg;
2156     int base  = $mem$$base;
2157     int index = $mem$$index;
2158     int scale = $mem$$scale;
2159     int displace = $mem$$disp;
2160     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2161     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2162   %}
2163 
2164   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2165     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2166     int base  = $mem$$base;
2167     int index = $mem$$index;
2168     int scale = $mem$$scale;
2169     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2170     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2171     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2172   %}
2173 
2174   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2175     int r1, r2;
2176     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2177     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2178     emit_opcode(cbuf,0x0F);
2179     emit_opcode(cbuf,$tertiary);
2180     emit_rm(cbuf, 0x3, r1, r2);
2181     emit_d8(cbuf,$cnt$$constant);
2182     emit_d8(cbuf,$primary);
2183     emit_rm(cbuf, 0x3, $secondary, r1);
2184     emit_d8(cbuf,$cnt$$constant);
2185   %}
2186 
2187   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2188     emit_opcode( cbuf, 0x8B ); // Move
2189     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2190     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2191       emit_d8(cbuf,$primary);
2192       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2193       emit_d8(cbuf,$cnt$$constant-32);
2194     }
2195     emit_d8(cbuf,$primary);
2196     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2197     emit_d8(cbuf,31);
2198   %}
2199 
2200   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2201     int r1, r2;
2202     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2203     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2204 
2205     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2206     emit_rm(cbuf, 0x3, r1, r2);
2207     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2208       emit_opcode(cbuf,$primary);
2209       emit_rm(cbuf, 0x3, $secondary, r1);
2210       emit_d8(cbuf,$cnt$$constant-32);
2211     }
2212     emit_opcode(cbuf,0x33);  // XOR r2,r2
2213     emit_rm(cbuf, 0x3, r2, r2);
2214   %}
2215 
2216   // Clone of RegMem but accepts an extra parameter to access each
2217   // half of a double in memory; it never needs relocation info.
2218   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2219     emit_opcode(cbuf,$opcode$$constant);
2220     int reg_encoding = $rm_reg$$reg;
2221     int base     = $mem$$base;
2222     int index    = $mem$$index;
2223     int scale    = $mem$$scale;
2224     int displace = $mem$$disp + $disp_for_half$$constant;
2225     relocInfo::relocType disp_reloc = relocInfo::none;
2226     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2227   %}
2228 
2229   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2230   //
2231   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2232   // and it never needs relocation information.
2233   // Frequently used to move data between FPU's Stack Top and memory.
2234   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2235     int rm_byte_opcode = $rm_opcode$$constant;
2236     int base     = $mem$$base;
2237     int index    = $mem$$index;
2238     int scale    = $mem$$scale;
2239     int displace = $mem$$disp;
2240     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2241     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2242   %}
2243 
2244   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2245     int rm_byte_opcode = $rm_opcode$$constant;
2246     int base     = $mem$$base;
2247     int index    = $mem$$index;
2248     int scale    = $mem$$scale;
2249     int displace = $mem$$disp;
2250     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2251     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2252   %}
2253 
2254   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2255     int reg_encoding = $dst$$reg;
2256     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2257     int index        = 0x04;            // 0x04 indicates no index
2258     int scale        = 0x00;            // 0x00 indicates no scale
2259     int displace     = $src1$$constant; // 0x00 indicates no displacement
2260     relocInfo::relocType disp_reloc = relocInfo::none;
2261     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2262   %}
2263 
2264   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2265     // Compare dst,src
2266     emit_opcode(cbuf,0x3B);
2267     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2268     // jmp dst < src around move
2269     emit_opcode(cbuf,0x7C);
2270     emit_d8(cbuf,2);
2271     // move dst,src
2272     emit_opcode(cbuf,0x8B);
2273     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2274   %}
2275 
2276   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2277     // Compare dst,src
2278     emit_opcode(cbuf,0x3B);
2279     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2280     // jmp dst > src around move
2281     emit_opcode(cbuf,0x7F);
2282     emit_d8(cbuf,2);
2283     // move dst,src
2284     emit_opcode(cbuf,0x8B);
2285     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2286   %}
2287 
2288   enc_class enc_FPR_store(memory mem, regDPR src) %{
2289     // If src is FPR1, we can just FST to store it.
2290     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2291     int reg_encoding = 0x2; // Just store
2292     int base  = $mem$$base;
2293     int index = $mem$$index;
2294     int scale = $mem$$scale;
2295     int displace = $mem$$disp;
2296     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2297     if( $src$$reg != FPR1L_enc ) {
2298       reg_encoding = 0x3;  // Store & pop
2299       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2300       emit_d8( cbuf, 0xC0-1+$src$$reg );
2301     }
2302     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2303     emit_opcode(cbuf,$primary);
2304     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2305   %}
2306 
2307   enc_class neg_reg(rRegI dst) %{
2308     // NEG $dst
2309     emit_opcode(cbuf,0xF7);
2310     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2311   %}
2312 
2313   enc_class setLT_reg(eCXRegI dst) %{
2314     // SETLT $dst
2315     emit_opcode(cbuf,0x0F);
2316     emit_opcode(cbuf,0x9C);
2317     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2318   %}
2319 
2320   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2321     int tmpReg = $tmp$$reg;
2322 
2323     // SUB $p,$q
2324     emit_opcode(cbuf,0x2B);
2325     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2326     // SBB $tmp,$tmp
2327     emit_opcode(cbuf,0x1B);
2328     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2329     // AND $tmp,$y
2330     emit_opcode(cbuf,0x23);
2331     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2332     // ADD $p,$tmp
2333     emit_opcode(cbuf,0x03);
2334     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2335   %}
2336 
2337   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2338     // TEST shift,32
2339     emit_opcode(cbuf,0xF7);
2340     emit_rm(cbuf, 0x3, 0, ECX_enc);
2341     emit_d32(cbuf,0x20);
2342     // JEQ,s small
2343     emit_opcode(cbuf, 0x74);
2344     emit_d8(cbuf, 0x04);
2345     // MOV    $dst.hi,$dst.lo
2346     emit_opcode( cbuf, 0x8B );
2347     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2348     // CLR    $dst.lo
2349     emit_opcode(cbuf, 0x33);
2350     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2351 // small:
2352     // SHLD   $dst.hi,$dst.lo,$shift
2353     emit_opcode(cbuf,0x0F);
2354     emit_opcode(cbuf,0xA5);
2355     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2356     // SHL    $dst.lo,$shift"
2357     emit_opcode(cbuf,0xD3);
2358     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2359   %}
2360 
2361   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2362     // TEST shift,32
2363     emit_opcode(cbuf,0xF7);
2364     emit_rm(cbuf, 0x3, 0, ECX_enc);
2365     emit_d32(cbuf,0x20);
2366     // JEQ,s small
2367     emit_opcode(cbuf, 0x74);
2368     emit_d8(cbuf, 0x04);
2369     // MOV    $dst.lo,$dst.hi
2370     emit_opcode( cbuf, 0x8B );
2371     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2372     // CLR    $dst.hi
2373     emit_opcode(cbuf, 0x33);
2374     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2375 // small:
2376     // SHRD   $dst.lo,$dst.hi,$shift
2377     emit_opcode(cbuf,0x0F);
2378     emit_opcode(cbuf,0xAD);
2379     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2380     // SHR    $dst.hi,$shift"
2381     emit_opcode(cbuf,0xD3);
2382     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2383   %}
2384 
2385   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2386     // TEST shift,32
2387     emit_opcode(cbuf,0xF7);
2388     emit_rm(cbuf, 0x3, 0, ECX_enc);
2389     emit_d32(cbuf,0x20);
2390     // JEQ,s small
2391     emit_opcode(cbuf, 0x74);
2392     emit_d8(cbuf, 0x05);
2393     // MOV    $dst.lo,$dst.hi
2394     emit_opcode( cbuf, 0x8B );
2395     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2396     // SAR    $dst.hi,31
2397     emit_opcode(cbuf, 0xC1);
2398     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2399     emit_d8(cbuf, 0x1F );
2400 // small:
2401     // SHRD   $dst.lo,$dst.hi,$shift
2402     emit_opcode(cbuf,0x0F);
2403     emit_opcode(cbuf,0xAD);
2404     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2405     // SAR    $dst.hi,$shift"
2406     emit_opcode(cbuf,0xD3);
2407     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2408   %}
2409 
2410 
2411   // ----------------- Encodings for floating point unit -----------------
2412   // May leave result in FPU-TOS or FPU reg depending on opcodes
2413   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2414     $$$emit8$primary;
2415     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2416   %}
2417 
2418   // Pop argument in FPR0 with FSTP ST(0)
2419   enc_class PopFPU() %{
2420     emit_opcode( cbuf, 0xDD );
2421     emit_d8( cbuf, 0xD8 );
2422   %}
2423 
2424   // !!!!! equivalent to Pop_Reg_F
2425   enc_class Pop_Reg_DPR( regDPR dst ) %{
2426     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2427     emit_d8( cbuf, 0xD8+$dst$$reg );
2428   %}
2429 
2430   enc_class Push_Reg_DPR( regDPR dst ) %{
2431     emit_opcode( cbuf, 0xD9 );
2432     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2433   %}
2434 
2435   enc_class strictfp_bias1( regDPR dst ) %{
2436     emit_opcode( cbuf, 0xDB );           // FLD m80real
2437     emit_opcode( cbuf, 0x2D );
2438     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2439     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2440     emit_opcode( cbuf, 0xC8+$dst$$reg );
2441   %}
2442 
2443   enc_class strictfp_bias2( regDPR dst ) %{
2444     emit_opcode( cbuf, 0xDB );           // FLD m80real
2445     emit_opcode( cbuf, 0x2D );
2446     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2447     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2448     emit_opcode( cbuf, 0xC8+$dst$$reg );
2449   %}
2450 
2451   // Special case for moving an integer register to a stack slot.
2452   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2453     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2454   %}
2455 
2456   // Special case for moving a register to a stack slot.
2457   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2458     // Opcode already emitted
2459     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2460     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2461     emit_d32(cbuf, $dst$$disp);   // Displacement
2462   %}
2463 
2464   // Push the integer in stackSlot 'src' onto FP-stack
2465   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2466     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2467   %}
2468 
2469   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2470   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2471     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2472   %}
2473 
2474   // Same as Pop_Mem_F except for opcode
2475   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2476   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2477     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2478   %}
2479 
2480   enc_class Pop_Reg_FPR( regFPR dst ) %{
2481     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2482     emit_d8( cbuf, 0xD8+$dst$$reg );
2483   %}
2484 
2485   enc_class Push_Reg_FPR( regFPR dst ) %{
2486     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2487     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2488   %}
2489 
2490   // Push FPU's float to a stack-slot, and pop FPU-stack
2491   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2492     int pop = 0x02;
2493     if ($src$$reg != FPR1L_enc) {
2494       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2495       emit_d8( cbuf, 0xC0-1+$src$$reg );
2496       pop = 0x03;
2497     }
2498     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2499   %}
2500 
2501   // Push FPU's double to a stack-slot, and pop FPU-stack
2502   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2503     int pop = 0x02;
2504     if ($src$$reg != FPR1L_enc) {
2505       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2506       emit_d8( cbuf, 0xC0-1+$src$$reg );
2507       pop = 0x03;
2508     }
2509     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2510   %}
2511 
2512   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2513   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2514     int pop = 0xD0 - 1; // -1 since we skip FLD
2515     if ($src$$reg != FPR1L_enc) {
2516       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2517       emit_d8( cbuf, 0xC0-1+$src$$reg );
2518       pop = 0xD8;
2519     }
2520     emit_opcode( cbuf, 0xDD );
2521     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2522   %}
2523 
2524 
2525   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2526     // load dst in FPR0
2527     emit_opcode( cbuf, 0xD9 );
2528     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2529     if ($src$$reg != FPR1L_enc) {
2530       // fincstp
2531       emit_opcode (cbuf, 0xD9);
2532       emit_opcode (cbuf, 0xF7);
2533       // swap src with FPR1:
2534       // FXCH FPR1 with src
2535       emit_opcode(cbuf, 0xD9);
2536       emit_d8(cbuf, 0xC8-1+$src$$reg );
2537       // fdecstp
2538       emit_opcode (cbuf, 0xD9);
2539       emit_opcode (cbuf, 0xF6);
2540     }
2541   %}
2542 
2543   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2544     MacroAssembler _masm(&cbuf);
2545     __ subptr(rsp, 8);
2546     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2547     __ fld_d(Address(rsp, 0));
2548     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2549     __ fld_d(Address(rsp, 0));
2550   %}
2551 
2552   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2553     MacroAssembler _masm(&cbuf);
2554     __ subptr(rsp, 4);
2555     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2556     __ fld_s(Address(rsp, 0));
2557     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2558     __ fld_s(Address(rsp, 0));
2559   %}
2560 
2561   enc_class Push_ResultD(regD dst) %{
2562     MacroAssembler _masm(&cbuf);
2563     __ fstp_d(Address(rsp, 0));
2564     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2565     __ addptr(rsp, 8);
2566   %}
2567 
2568   enc_class Push_ResultF(regF dst, immI d8) %{
2569     MacroAssembler _masm(&cbuf);
2570     __ fstp_s(Address(rsp, 0));
2571     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2572     __ addptr(rsp, $d8$$constant);
2573   %}
2574 
2575   enc_class Push_SrcD(regD src) %{
2576     MacroAssembler _masm(&cbuf);
2577     __ subptr(rsp, 8);
2578     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2579     __ fld_d(Address(rsp, 0));
2580   %}
2581 
2582   enc_class push_stack_temp_qword() %{
2583     MacroAssembler _masm(&cbuf);
2584     __ subptr(rsp, 8);
2585   %}
2586 
2587   enc_class pop_stack_temp_qword() %{
2588     MacroAssembler _masm(&cbuf);
2589     __ addptr(rsp, 8);
2590   %}
2591 
2592   enc_class push_xmm_to_fpr1(regD src) %{
2593     MacroAssembler _masm(&cbuf);
2594     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2595     __ fld_d(Address(rsp, 0));
2596   %}
2597 
2598   enc_class Push_Result_Mod_DPR( regDPR src) %{
2599     if ($src$$reg != FPR1L_enc) {
2600       // fincstp
2601       emit_opcode (cbuf, 0xD9);
2602       emit_opcode (cbuf, 0xF7);
2603       // FXCH FPR1 with src
2604       emit_opcode(cbuf, 0xD9);
2605       emit_d8(cbuf, 0xC8-1+$src$$reg );
2606       // fdecstp
2607       emit_opcode (cbuf, 0xD9);
2608       emit_opcode (cbuf, 0xF6);
2609     }
2610     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2611     // // FSTP   FPR$dst$$reg
2612     // emit_opcode( cbuf, 0xDD );
2613     // emit_d8( cbuf, 0xD8+$dst$$reg );
2614   %}
2615 
2616   enc_class fnstsw_sahf_skip_parity() %{
2617     // fnstsw ax
2618     emit_opcode( cbuf, 0xDF );
2619     emit_opcode( cbuf, 0xE0 );
2620     // sahf
2621     emit_opcode( cbuf, 0x9E );
2622     // jnp  ::skip
2623     emit_opcode( cbuf, 0x7B );
2624     emit_opcode( cbuf, 0x05 );
2625   %}
2626 
2627   enc_class emitModDPR() %{
2628     // fprem must be iterative
2629     // :: loop
2630     // fprem
2631     emit_opcode( cbuf, 0xD9 );
2632     emit_opcode( cbuf, 0xF8 );
2633     // wait
2634     emit_opcode( cbuf, 0x9b );
2635     // fnstsw ax
2636     emit_opcode( cbuf, 0xDF );
2637     emit_opcode( cbuf, 0xE0 );
2638     // sahf
2639     emit_opcode( cbuf, 0x9E );
2640     // jp  ::loop
2641     emit_opcode( cbuf, 0x0F );
2642     emit_opcode( cbuf, 0x8A );
2643     emit_opcode( cbuf, 0xF4 );
2644     emit_opcode( cbuf, 0xFF );
2645     emit_opcode( cbuf, 0xFF );
2646     emit_opcode( cbuf, 0xFF );
2647   %}
2648 
2649   enc_class fpu_flags() %{
2650     // fnstsw_ax
2651     emit_opcode( cbuf, 0xDF);
2652     emit_opcode( cbuf, 0xE0);
2653     // test ax,0x0400
2654     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2655     emit_opcode( cbuf, 0xA9 );
2656     emit_d16   ( cbuf, 0x0400 );
2657     // // // This sequence works, but stalls for 12-16 cycles on PPro
2658     // // test rax,0x0400
2659     // emit_opcode( cbuf, 0xA9 );
2660     // emit_d32   ( cbuf, 0x00000400 );
2661     //
2662     // jz exit (no unordered comparison)
2663     emit_opcode( cbuf, 0x74 );
2664     emit_d8    ( cbuf, 0x02 );
2665     // mov ah,1 - treat as LT case (set carry flag)
2666     emit_opcode( cbuf, 0xB4 );
2667     emit_d8    ( cbuf, 0x01 );
2668     // sahf
2669     emit_opcode( cbuf, 0x9E);
2670   %}
2671 
2672   enc_class cmpF_P6_fixup() %{
2673     // Fixup the integer flags in case comparison involved a NaN
2674     //
2675     // JNP exit (no unordered comparison, P-flag is set by NaN)
2676     emit_opcode( cbuf, 0x7B );
2677     emit_d8    ( cbuf, 0x03 );
2678     // MOV AH,1 - treat as LT case (set carry flag)
2679     emit_opcode( cbuf, 0xB4 );
2680     emit_d8    ( cbuf, 0x01 );
2681     // SAHF
2682     emit_opcode( cbuf, 0x9E);
2683     // NOP     // target for branch to avoid branch to branch
2684     emit_opcode( cbuf, 0x90);
2685   %}
2686 
2687 //     fnstsw_ax();
2688 //     sahf();
2689 //     movl(dst, nan_result);
2690 //     jcc(Assembler::parity, exit);
2691 //     movl(dst, less_result);
2692 //     jcc(Assembler::below, exit);
2693 //     movl(dst, equal_result);
2694 //     jcc(Assembler::equal, exit);
2695 //     movl(dst, greater_result);
2696 
2697 // less_result     =  1;
2698 // greater_result  = -1;
2699 // equal_result    = 0;
2700 // nan_result      = -1;
2701 
2702   enc_class CmpF_Result(rRegI dst) %{
2703     // fnstsw_ax();
2704     emit_opcode( cbuf, 0xDF);
2705     emit_opcode( cbuf, 0xE0);
2706     // sahf
2707     emit_opcode( cbuf, 0x9E);
2708     // movl(dst, nan_result);
2709     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2710     emit_d32( cbuf, -1 );
2711     // jcc(Assembler::parity, exit);
2712     emit_opcode( cbuf, 0x7A );
2713     emit_d8    ( cbuf, 0x13 );
2714     // movl(dst, less_result);
2715     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2716     emit_d32( cbuf, -1 );
2717     // jcc(Assembler::below, exit);
2718     emit_opcode( cbuf, 0x72 );
2719     emit_d8    ( cbuf, 0x0C );
2720     // movl(dst, equal_result);
2721     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2722     emit_d32( cbuf, 0 );
2723     // jcc(Assembler::equal, exit);
2724     emit_opcode( cbuf, 0x74 );
2725     emit_d8    ( cbuf, 0x05 );
2726     // movl(dst, greater_result);
2727     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2728     emit_d32( cbuf, 1 );
2729   %}
2730 
2731 
2732   // Compare the longs and set flags
2733   // BROKEN!  Do Not use as-is
2734   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2735     // CMP    $src1.hi,$src2.hi
2736     emit_opcode( cbuf, 0x3B );
2737     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2738     // JNE,s  done
2739     emit_opcode(cbuf,0x75);
2740     emit_d8(cbuf, 2 );
2741     // CMP    $src1.lo,$src2.lo
2742     emit_opcode( cbuf, 0x3B );
2743     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2744 // done:
2745   %}
2746 
2747   enc_class convert_int_long( regL dst, rRegI src ) %{
2748     // mov $dst.lo,$src
2749     int dst_encoding = $dst$$reg;
2750     int src_encoding = $src$$reg;
2751     encode_Copy( cbuf, dst_encoding  , src_encoding );
2752     // mov $dst.hi,$src
2753     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2754     // sar $dst.hi,31
2755     emit_opcode( cbuf, 0xC1 );
2756     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2757     emit_d8(cbuf, 0x1F );
2758   %}
2759 
2760   enc_class convert_long_double( eRegL src ) %{
2761     // push $src.hi
2762     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2763     // push $src.lo
2764     emit_opcode(cbuf, 0x50+$src$$reg  );
2765     // fild 64-bits at [SP]
2766     emit_opcode(cbuf,0xdf);
2767     emit_d8(cbuf, 0x6C);
2768     emit_d8(cbuf, 0x24);
2769     emit_d8(cbuf, 0x00);
2770     // pop stack
2771     emit_opcode(cbuf, 0x83); // add  SP, #8
2772     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2773     emit_d8(cbuf, 0x8);
2774   %}
2775 
2776   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2777     // IMUL   EDX:EAX,$src1
2778     emit_opcode( cbuf, 0xF7 );
2779     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2780     // SAR    EDX,$cnt-32
2781     int shift_count = ((int)$cnt$$constant) - 32;
2782     if (shift_count > 0) {
2783       emit_opcode(cbuf, 0xC1);
2784       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2785       emit_d8(cbuf, shift_count);
2786     }
2787   %}
2788 
2789   // this version doesn't have add sp, 8
2790   enc_class convert_long_double2( eRegL src ) %{
2791     // push $src.hi
2792     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2793     // push $src.lo
2794     emit_opcode(cbuf, 0x50+$src$$reg  );
2795     // fild 64-bits at [SP]
2796     emit_opcode(cbuf,0xdf);
2797     emit_d8(cbuf, 0x6C);
2798     emit_d8(cbuf, 0x24);
2799     emit_d8(cbuf, 0x00);
2800   %}
2801 
2802   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2803     // Basic idea: long = (long)int * (long)int
2804     // IMUL EDX:EAX, src
2805     emit_opcode( cbuf, 0xF7 );
2806     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2807   %}
2808 
2809   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2810     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2811     // MUL EDX:EAX, src
2812     emit_opcode( cbuf, 0xF7 );
2813     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2814   %}
2815 
2816   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2817     // Basic idea: lo(result) = lo(x_lo * y_lo)
2818     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2819     // MOV    $tmp,$src.lo
2820     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2821     // IMUL   $tmp,EDX
2822     emit_opcode( cbuf, 0x0F );
2823     emit_opcode( cbuf, 0xAF );
2824     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2825     // MOV    EDX,$src.hi
2826     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2827     // IMUL   EDX,EAX
2828     emit_opcode( cbuf, 0x0F );
2829     emit_opcode( cbuf, 0xAF );
2830     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2831     // ADD    $tmp,EDX
2832     emit_opcode( cbuf, 0x03 );
2833     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2834     // MUL   EDX:EAX,$src.lo
2835     emit_opcode( cbuf, 0xF7 );
2836     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2837     // ADD    EDX,ESI
2838     emit_opcode( cbuf, 0x03 );
2839     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2840   %}
2841 
2842   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2843     // Basic idea: lo(result) = lo(src * y_lo)
2844     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2845     // IMUL   $tmp,EDX,$src
2846     emit_opcode( cbuf, 0x6B );
2847     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2848     emit_d8( cbuf, (int)$src$$constant );
2849     // MOV    EDX,$src
2850     emit_opcode(cbuf, 0xB8 + EDX_enc);
2851     emit_d32( cbuf, (int)$src$$constant );
2852     // MUL   EDX:EAX,EDX
2853     emit_opcode( cbuf, 0xF7 );
2854     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2855     // ADD    EDX,ESI
2856     emit_opcode( cbuf, 0x03 );
2857     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2858   %}
2859 
2860   enc_class long_div( eRegL src1, eRegL src2 ) %{
2861     // PUSH src1.hi
2862     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2863     // PUSH src1.lo
2864     emit_opcode(cbuf,               0x50+$src1$$reg  );
2865     // PUSH src2.hi
2866     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2867     // PUSH src2.lo
2868     emit_opcode(cbuf,               0x50+$src2$$reg  );
2869     // CALL directly to the runtime
2870     cbuf.set_insts_mark();
2871     emit_opcode(cbuf,0xE8);       // Call into runtime
2872     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2873     // Restore stack
2874     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2875     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2876     emit_d8(cbuf, 4*4);
2877   %}
2878 
2879   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2880     // PUSH src1.hi
2881     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2882     // PUSH src1.lo
2883     emit_opcode(cbuf,               0x50+$src1$$reg  );
2884     // PUSH src2.hi
2885     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2886     // PUSH src2.lo
2887     emit_opcode(cbuf,               0x50+$src2$$reg  );
2888     // CALL directly to the runtime
2889     cbuf.set_insts_mark();
2890     emit_opcode(cbuf,0xE8);       // Call into runtime
2891     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2892     // Restore stack
2893     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2894     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2895     emit_d8(cbuf, 4*4);
2896   %}
2897 
2898   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2899     // MOV   $tmp,$src.lo
2900     emit_opcode(cbuf, 0x8B);
2901     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2902     // OR    $tmp,$src.hi
2903     emit_opcode(cbuf, 0x0B);
2904     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2905   %}
2906 
2907   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2908     // CMP    $src1.lo,$src2.lo
2909     emit_opcode( cbuf, 0x3B );
2910     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2911     // JNE,s  skip
2912     emit_cc(cbuf, 0x70, 0x5);
2913     emit_d8(cbuf,2);
2914     // CMP    $src1.hi,$src2.hi
2915     emit_opcode( cbuf, 0x3B );
2916     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2917   %}
2918 
2919   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2920     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2921     emit_opcode( cbuf, 0x3B );
2922     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2923     // MOV    $tmp,$src1.hi
2924     emit_opcode( cbuf, 0x8B );
2925     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2926     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2927     emit_opcode( cbuf, 0x1B );
2928     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2929   %}
2930 
2931   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2932     // XOR    $tmp,$tmp
2933     emit_opcode(cbuf,0x33);  // XOR
2934     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2935     // CMP    $tmp,$src.lo
2936     emit_opcode( cbuf, 0x3B );
2937     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2938     // SBB    $tmp,$src.hi
2939     emit_opcode( cbuf, 0x1B );
2940     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2941   %}
2942 
2943  // Sniff, sniff... smells like Gnu Superoptimizer
2944   enc_class neg_long( eRegL dst ) %{
2945     emit_opcode(cbuf,0xF7);    // NEG hi
2946     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2947     emit_opcode(cbuf,0xF7);    // NEG lo
2948     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2949     emit_opcode(cbuf,0x83);    // SBB hi,0
2950     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2951     emit_d8    (cbuf,0 );
2952   %}
2953 
2954   enc_class enc_pop_rdx() %{
2955     emit_opcode(cbuf,0x5A);
2956   %}
2957 
2958   enc_class enc_rethrow() %{
2959     cbuf.set_insts_mark();
2960     emit_opcode(cbuf, 0xE9);        // jmp    entry
2961     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2962                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2963   %}
2964 
2965 
2966   // Convert a double to an int.  Java semantics require we do complex
2967   // manglelations in the corner cases.  So we set the rounding mode to
2968   // 'zero', store the darned double down as an int, and reset the
2969   // rounding mode to 'nearest'.  The hardware throws an exception which
2970   // patches up the correct value directly to the stack.
2971   enc_class DPR2I_encoding( regDPR src ) %{
2972     // Flip to round-to-zero mode.  We attempted to allow invalid-op
2973     // exceptions here, so that a NAN or other corner-case value will
2974     // thrown an exception (but normal values get converted at full speed).
2975     // However, I2C adapters and other float-stack manglers leave pending
2976     // invalid-op exceptions hanging.  We would have to clear them before
2977     // enabling them and that is more expensive than just testing for the
2978     // invalid value Intel stores down in the corner cases.
2979     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2980     emit_opcode(cbuf,0x2D);
2981     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2982     // Allocate a word
2983     emit_opcode(cbuf,0x83);            // SUB ESP,4
2984     emit_opcode(cbuf,0xEC);
2985     emit_d8(cbuf,0x04);
2986     // Encoding assumes a double has been pushed into FPR0.
2987     // Store down the double as an int, popping the FPU stack
2988     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
2989     emit_opcode(cbuf,0x1C);
2990     emit_d8(cbuf,0x24);
2991     // Restore the rounding mode; mask the exception
2992     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
2993     emit_opcode(cbuf,0x2D);
2994     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
2995         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
2996         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
2997 
2998     // Load the converted int; adjust CPU stack
2999     emit_opcode(cbuf,0x58);       // POP EAX
3000     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
3001     emit_d32   (cbuf,0x80000000); //         0x80000000
3002     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3003     emit_d8    (cbuf,0x07);       // Size of slow_call
3004     // Push src onto stack slow-path
3005     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3006     emit_d8    (cbuf,0xC0-1+$src$$reg );
3007     // CALL directly to the runtime
3008     cbuf.set_insts_mark();
3009     emit_opcode(cbuf,0xE8);       // Call into runtime
3010     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3011     // Carry on here...
3012   %}
3013 
3014   enc_class DPR2L_encoding( regDPR src ) %{
3015     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3016     emit_opcode(cbuf,0x2D);
3017     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3018     // Allocate a word
3019     emit_opcode(cbuf,0x83);            // SUB ESP,8
3020     emit_opcode(cbuf,0xEC);
3021     emit_d8(cbuf,0x08);
3022     // Encoding assumes a double has been pushed into FPR0.
3023     // Store down the double as a long, popping the FPU stack
3024     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3025     emit_opcode(cbuf,0x3C);
3026     emit_d8(cbuf,0x24);
3027     // Restore the rounding mode; mask the exception
3028     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3029     emit_opcode(cbuf,0x2D);
3030     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3031         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3032         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3033 
3034     // Load the converted int; adjust CPU stack
3035     emit_opcode(cbuf,0x58);       // POP EAX
3036     emit_opcode(cbuf,0x5A);       // POP EDX
3037     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3038     emit_d8    (cbuf,0xFA);       // rdx
3039     emit_d32   (cbuf,0x80000000); //         0x80000000
3040     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3041     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3042     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3043     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3044     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3045     emit_d8    (cbuf,0x07);       // Size of slow_call
3046     // Push src onto stack slow-path
3047     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3048     emit_d8    (cbuf,0xC0-1+$src$$reg );
3049     // CALL directly to the runtime
3050     cbuf.set_insts_mark();
3051     emit_opcode(cbuf,0xE8);       // Call into runtime
3052     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3053     // Carry on here...
3054   %}
3055 
3056   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3057     // Operand was loaded from memory into fp ST (stack top)
3058     // FMUL   ST,$src  /* D8 C8+i */
3059     emit_opcode(cbuf, 0xD8);
3060     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3061   %}
3062 
3063   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3064     // FADDP  ST,src2  /* D8 C0+i */
3065     emit_opcode(cbuf, 0xD8);
3066     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3067     //could use FADDP  src2,fpST  /* DE C0+i */
3068   %}
3069 
3070   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3071     // FADDP  src2,ST  /* DE C0+i */
3072     emit_opcode(cbuf, 0xDE);
3073     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3074   %}
3075 
3076   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3077     // Operand has been loaded into fp ST (stack top)
3078       // FSUB   ST,$src1
3079       emit_opcode(cbuf, 0xD8);
3080       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3081 
3082       // FDIV
3083       emit_opcode(cbuf, 0xD8);
3084       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3085   %}
3086 
3087   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3088     // Operand was loaded from memory into fp ST (stack top)
3089     // FADD   ST,$src  /* D8 C0+i */
3090     emit_opcode(cbuf, 0xD8);
3091     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3092 
3093     // FMUL  ST,src2  /* D8 C*+i */
3094     emit_opcode(cbuf, 0xD8);
3095     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3096   %}
3097 
3098 
3099   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3100     // Operand was loaded from memory into fp ST (stack top)
3101     // FADD   ST,$src  /* D8 C0+i */
3102     emit_opcode(cbuf, 0xD8);
3103     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3104 
3105     // FMULP  src2,ST  /* DE C8+i */
3106     emit_opcode(cbuf, 0xDE);
3107     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3108   %}
3109 
3110   // Atomically load the volatile long
3111   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3112     emit_opcode(cbuf,0xDF);
3113     int rm_byte_opcode = 0x05;
3114     int base     = $mem$$base;
3115     int index    = $mem$$index;
3116     int scale    = $mem$$scale;
3117     int displace = $mem$$disp;
3118     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3119     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3120     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3121   %}
3122 
3123   // Volatile Store Long.  Must be atomic, so move it into
3124   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3125   // target address before the store (for null-ptr checks)
3126   // so the memory operand is used twice in the encoding.
3127   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3128     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3129     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3130     emit_opcode(cbuf,0xDF);
3131     int rm_byte_opcode = 0x07;
3132     int base     = $mem$$base;
3133     int index    = $mem$$index;
3134     int scale    = $mem$$scale;
3135     int displace = $mem$$disp;
3136     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3137     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3138   %}
3139 
3140   // Safepoint Poll.  This polls the safepoint page, and causes an
3141   // exception if it is not readable. Unfortunately, it kills the condition code
3142   // in the process
3143   // We current use TESTL [spp],EDI
3144   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3145 
3146   enc_class Safepoint_Poll() %{
3147     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3148     emit_opcode(cbuf,0x85);
3149     emit_rm (cbuf, 0x0, 0x7, 0x5);
3150     emit_d32(cbuf, (intptr_t)os::get_polling_page());
3151   %}
3152 %}
3153 
3154 
3155 //----------FRAME--------------------------------------------------------------
3156 // Definition of frame structure and management information.
3157 //
3158 //  S T A C K   L A Y O U T    Allocators stack-slot number
3159 //                             |   (to get allocators register number
3160 //  G  Owned by    |        |  v    add OptoReg::stack0())
3161 //  r   CALLER     |        |
3162 //  o     |        +--------+      pad to even-align allocators stack-slot
3163 //  w     V        |  pad0  |        numbers; owned by CALLER
3164 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3165 //  h     ^        |   in   |  5
3166 //        |        |  args  |  4   Holes in incoming args owned by SELF
3167 //  |     |        |        |  3
3168 //  |     |        +--------+
3169 //  V     |        | old out|      Empty on Intel, window on Sparc
3170 //        |    old |preserve|      Must be even aligned.
3171 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3172 //        |        |   in   |  3   area for Intel ret address
3173 //     Owned by    |preserve|      Empty on Sparc.
3174 //       SELF      +--------+
3175 //        |        |  pad2  |  2   pad to align old SP
3176 //        |        +--------+  1
3177 //        |        | locks  |  0
3178 //        |        +--------+----> OptoReg::stack0(), even aligned
3179 //        |        |  pad1  | 11   pad to align new SP
3180 //        |        +--------+
3181 //        |        |        | 10
3182 //        |        | spills |  9   spills
3183 //        V        |        |  8   (pad0 slot for callee)
3184 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3185 //        ^        |  out   |  7
3186 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3187 //     Owned by    +--------+
3188 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3189 //        |    new |preserve|      Must be even-aligned.
3190 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3191 //        |        |        |
3192 //
3193 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3194 //         known from SELF's arguments and the Java calling convention.
3195 //         Region 6-7 is determined per call site.
3196 // Note 2: If the calling convention leaves holes in the incoming argument
3197 //         area, those holes are owned by SELF.  Holes in the outgoing area
3198 //         are owned by the CALLEE.  Holes should not be nessecary in the
3199 //         incoming area, as the Java calling convention is completely under
3200 //         the control of the AD file.  Doubles can be sorted and packed to
3201 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3202 //         varargs C calling conventions.
3203 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3204 //         even aligned with pad0 as needed.
3205 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3206 //         region 6-11 is even aligned; it may be padded out more so that
3207 //         the region from SP to FP meets the minimum stack alignment.
3208 
3209 frame %{
3210   // What direction does stack grow in (assumed to be same for C & Java)
3211   stack_direction(TOWARDS_LOW);
3212 
3213   // These three registers define part of the calling convention
3214   // between compiled code and the interpreter.
3215   inline_cache_reg(EAX);                // Inline Cache Register
3216   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3217 
3218   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3219   cisc_spilling_operand_name(indOffset32);
3220 
3221   // Number of stack slots consumed by locking an object
3222   sync_stack_slots(1);
3223 
3224   // Compiled code's Frame Pointer
3225   frame_pointer(ESP);
3226   // Interpreter stores its frame pointer in a register which is
3227   // stored to the stack by I2CAdaptors.
3228   // I2CAdaptors convert from interpreted java to compiled java.
3229   interpreter_frame_pointer(EBP);
3230 
3231   // Stack alignment requirement
3232   // Alignment size in bytes (128-bit -> 16 bytes)
3233   stack_alignment(StackAlignmentInBytes);
3234 
3235   // Number of stack slots between incoming argument block and the start of
3236   // a new frame.  The PROLOG must add this many slots to the stack.  The
3237   // EPILOG must remove this many slots.  Intel needs one slot for
3238   // return address and one for rbp, (must save rbp)
3239   in_preserve_stack_slots(2+VerifyStackAtCalls);
3240 
3241   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3242   // for calls to C.  Supports the var-args backing area for register parms.
3243   varargs_C_out_slots_killed(0);
3244 
3245   // The after-PROLOG location of the return address.  Location of
3246   // return address specifies a type (REG or STACK) and a number
3247   // representing the register number (i.e. - use a register name) or
3248   // stack slot.
3249   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3250   // Otherwise, it is above the locks and verification slot and alignment word
3251   return_addr(STACK - 1 +
3252               round_to((Compile::current()->in_preserve_stack_slots() +
3253                         Compile::current()->fixed_slots()),
3254                        stack_alignment_in_slots()));
3255 
3256   // Body of function which returns an integer array locating
3257   // arguments either in registers or in stack slots.  Passed an array
3258   // of ideal registers called "sig" and a "length" count.  Stack-slot
3259   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3260   // arguments for a CALLEE.  Incoming stack arguments are
3261   // automatically biased by the preserve_stack_slots field above.
3262   calling_convention %{
3263     // No difference between ingoing/outgoing just pass false
3264     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3265   %}
3266 
3267 
3268   // Body of function which returns an integer array locating
3269   // arguments either in registers or in stack slots.  Passed an array
3270   // of ideal registers called "sig" and a "length" count.  Stack-slot
3271   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3272   // arguments for a CALLEE.  Incoming stack arguments are
3273   // automatically biased by the preserve_stack_slots field above.
3274   c_calling_convention %{
3275     // This is obviously always outgoing
3276     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3277   %}
3278 
3279   // Location of C & interpreter return values
3280   c_return_value %{
3281     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3282     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3283     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3284 
3285     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3286     // that C functions return float and double results in XMM0.
3287     if( ideal_reg == Op_RegD && UseSSE>=2 )
3288       return OptoRegPair(XMM0b_num,XMM0_num);
3289     if( ideal_reg == Op_RegF && UseSSE>=2 )
3290       return OptoRegPair(OptoReg::Bad,XMM0_num);
3291 
3292     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3293   %}
3294 
3295   // Location of return values
3296   return_value %{
3297     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3298     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3299     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3300     if( ideal_reg == Op_RegD && UseSSE>=2 )
3301       return OptoRegPair(XMM0b_num,XMM0_num);
3302     if( ideal_reg == Op_RegF && UseSSE>=1 )
3303       return OptoRegPair(OptoReg::Bad,XMM0_num);
3304     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3305   %}
3306 
3307 %}
3308 
3309 //----------ATTRIBUTES---------------------------------------------------------
3310 //----------Operand Attributes-------------------------------------------------
3311 op_attrib op_cost(0);        // Required cost attribute
3312 
3313 //----------Instruction Attributes---------------------------------------------
3314 ins_attrib ins_cost(100);       // Required cost attribute
3315 ins_attrib ins_size(8);         // Required size attribute (in bits)
3316 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3317                                 // non-matching short branch variant of some
3318                                                             // long branch?
3319 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3320                                 // specifies the alignment that some part of the instruction (not
3321                                 // necessarily the start) requires.  If > 1, a compute_padding()
3322                                 // function must be provided for the instruction
3323 
3324 //----------OPERANDS-----------------------------------------------------------
3325 // Operand definitions must precede instruction definitions for correct parsing
3326 // in the ADLC because operands constitute user defined types which are used in
3327 // instruction definitions.
3328 
3329 //----------Simple Operands----------------------------------------------------
3330 // Immediate Operands
3331 // Integer Immediate
3332 operand immI() %{
3333   match(ConI);
3334 
3335   op_cost(10);
3336   format %{ %}
3337   interface(CONST_INTER);
3338 %}
3339 
3340 // Constant for test vs zero
3341 operand immI0() %{
3342   predicate(n->get_int() == 0);
3343   match(ConI);
3344 
3345   op_cost(0);
3346   format %{ %}
3347   interface(CONST_INTER);
3348 %}
3349 
3350 // Constant for increment
3351 operand immI1() %{
3352   predicate(n->get_int() == 1);
3353   match(ConI);
3354 
3355   op_cost(0);
3356   format %{ %}
3357   interface(CONST_INTER);
3358 %}
3359 
3360 // Constant for decrement
3361 operand immI_M1() %{
3362   predicate(n->get_int() == -1);
3363   match(ConI);
3364 
3365   op_cost(0);
3366   format %{ %}
3367   interface(CONST_INTER);
3368 %}
3369 
3370 // Valid scale values for addressing modes
3371 operand immI2() %{
3372   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3373   match(ConI);
3374 
3375   format %{ %}
3376   interface(CONST_INTER);
3377 %}
3378 
3379 operand immI8() %{
3380   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3381   match(ConI);
3382 
3383   op_cost(5);
3384   format %{ %}
3385   interface(CONST_INTER);
3386 %}
3387 
3388 operand immI16() %{
3389   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3390   match(ConI);
3391 
3392   op_cost(10);
3393   format %{ %}
3394   interface(CONST_INTER);
3395 %}
3396 
3397 // Int Immediate non-negative
3398 operand immU31()
3399 %{
3400   predicate(n->get_int() >= 0);
3401   match(ConI);
3402 
3403   op_cost(0);
3404   format %{ %}
3405   interface(CONST_INTER);
3406 %}
3407 
3408 // Constant for long shifts
3409 operand immI_32() %{
3410   predicate( n->get_int() == 32 );
3411   match(ConI);
3412 
3413   op_cost(0);
3414   format %{ %}
3415   interface(CONST_INTER);
3416 %}
3417 
3418 operand immI_1_31() %{
3419   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3420   match(ConI);
3421 
3422   op_cost(0);
3423   format %{ %}
3424   interface(CONST_INTER);
3425 %}
3426 
3427 operand immI_32_63() %{
3428   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3429   match(ConI);
3430   op_cost(0);
3431 
3432   format %{ %}
3433   interface(CONST_INTER);
3434 %}
3435 
3436 operand immI_1() %{
3437   predicate( n->get_int() == 1 );
3438   match(ConI);
3439 
3440   op_cost(0);
3441   format %{ %}
3442   interface(CONST_INTER);
3443 %}
3444 
3445 operand immI_2() %{
3446   predicate( n->get_int() == 2 );
3447   match(ConI);
3448 
3449   op_cost(0);
3450   format %{ %}
3451   interface(CONST_INTER);
3452 %}
3453 
3454 operand immI_3() %{
3455   predicate( n->get_int() == 3 );
3456   match(ConI);
3457 
3458   op_cost(0);
3459   format %{ %}
3460   interface(CONST_INTER);
3461 %}
3462 
3463 // Pointer Immediate
3464 operand immP() %{
3465   match(ConP);
3466 
3467   op_cost(10);
3468   format %{ %}
3469   interface(CONST_INTER);
3470 %}
3471 
3472 // NULL Pointer Immediate
3473 operand immP0() %{
3474   predicate( n->get_ptr() == 0 );
3475   match(ConP);
3476   op_cost(0);
3477 
3478   format %{ %}
3479   interface(CONST_INTER);
3480 %}
3481 
3482 // Long Immediate
3483 operand immL() %{
3484   match(ConL);
3485 
3486   op_cost(20);
3487   format %{ %}
3488   interface(CONST_INTER);
3489 %}
3490 
3491 // Long Immediate zero
3492 operand immL0() %{
3493   predicate( n->get_long() == 0L );
3494   match(ConL);
3495   op_cost(0);
3496 
3497   format %{ %}
3498   interface(CONST_INTER);
3499 %}
3500 
3501 // Long Immediate zero
3502 operand immL_M1() %{
3503   predicate( n->get_long() == -1L );
3504   match(ConL);
3505   op_cost(0);
3506 
3507   format %{ %}
3508   interface(CONST_INTER);
3509 %}
3510 
3511 // Long immediate from 0 to 127.
3512 // Used for a shorter form of long mul by 10.
3513 operand immL_127() %{
3514   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3515   match(ConL);
3516   op_cost(0);
3517 
3518   format %{ %}
3519   interface(CONST_INTER);
3520 %}
3521 
3522 // Long Immediate: low 32-bit mask
3523 operand immL_32bits() %{
3524   predicate(n->get_long() == 0xFFFFFFFFL);
3525   match(ConL);
3526   op_cost(0);
3527 
3528   format %{ %}
3529   interface(CONST_INTER);
3530 %}
3531 
3532 // Long Immediate: low 32-bit mask
3533 operand immL32() %{
3534   predicate(n->get_long() == (int)(n->get_long()));
3535   match(ConL);
3536   op_cost(20);
3537 
3538   format %{ %}
3539   interface(CONST_INTER);
3540 %}
3541 
3542 //Double Immediate zero
3543 operand immDPR0() %{
3544   // Do additional (and counter-intuitive) test against NaN to work around VC++
3545   // bug that generates code such that NaNs compare equal to 0.0
3546   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3547   match(ConD);
3548 
3549   op_cost(5);
3550   format %{ %}
3551   interface(CONST_INTER);
3552 %}
3553 
3554 // Double Immediate one
3555 operand immDPR1() %{
3556   predicate( UseSSE<=1 && n->getd() == 1.0 );
3557   match(ConD);
3558 
3559   op_cost(5);
3560   format %{ %}
3561   interface(CONST_INTER);
3562 %}
3563 
3564 // Double Immediate
3565 operand immDPR() %{
3566   predicate(UseSSE<=1);
3567   match(ConD);
3568 
3569   op_cost(5);
3570   format %{ %}
3571   interface(CONST_INTER);
3572 %}
3573 
3574 operand immD() %{
3575   predicate(UseSSE>=2);
3576   match(ConD);
3577 
3578   op_cost(5);
3579   format %{ %}
3580   interface(CONST_INTER);
3581 %}
3582 
3583 // Double Immediate zero
3584 operand immD0() %{
3585   // Do additional (and counter-intuitive) test against NaN to work around VC++
3586   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3587   // compare equal to -0.0.
3588   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3589   match(ConD);
3590 
3591   format %{ %}
3592   interface(CONST_INTER);
3593 %}
3594 
3595 // Float Immediate zero
3596 operand immFPR0() %{
3597   predicate(UseSSE == 0 && n->getf() == 0.0F);
3598   match(ConF);
3599 
3600   op_cost(5);
3601   format %{ %}
3602   interface(CONST_INTER);
3603 %}
3604 
3605 // Float Immediate one
3606 operand immFPR1() %{
3607   predicate(UseSSE == 0 && n->getf() == 1.0F);
3608   match(ConF);
3609 
3610   op_cost(5);
3611   format %{ %}
3612   interface(CONST_INTER);
3613 %}
3614 
3615 // Float Immediate
3616 operand immFPR() %{
3617   predicate( UseSSE == 0 );
3618   match(ConF);
3619 
3620   op_cost(5);
3621   format %{ %}
3622   interface(CONST_INTER);
3623 %}
3624 
3625 // Float Immediate
3626 operand immF() %{
3627   predicate(UseSSE >= 1);
3628   match(ConF);
3629 
3630   op_cost(5);
3631   format %{ %}
3632   interface(CONST_INTER);
3633 %}
3634 
3635 // Float Immediate zero.  Zero and not -0.0
3636 operand immF0() %{
3637   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3638   match(ConF);
3639 
3640   op_cost(5);
3641   format %{ %}
3642   interface(CONST_INTER);
3643 %}
3644 
3645 // Immediates for special shifts (sign extend)
3646 
3647 // Constants for increment
3648 operand immI_16() %{
3649   predicate( n->get_int() == 16 );
3650   match(ConI);
3651 
3652   format %{ %}
3653   interface(CONST_INTER);
3654 %}
3655 
3656 operand immI_24() %{
3657   predicate( n->get_int() == 24 );
3658   match(ConI);
3659 
3660   format %{ %}
3661   interface(CONST_INTER);
3662 %}
3663 
3664 // Constant for byte-wide masking
3665 operand immI_255() %{
3666   predicate( n->get_int() == 255 );
3667   match(ConI);
3668 
3669   format %{ %}
3670   interface(CONST_INTER);
3671 %}
3672 
3673 // Constant for short-wide masking
3674 operand immI_65535() %{
3675   predicate(n->get_int() == 65535);
3676   match(ConI);
3677 
3678   format %{ %}
3679   interface(CONST_INTER);
3680 %}
3681 
3682 // Register Operands
3683 // Integer Register
3684 operand rRegI() %{
3685   constraint(ALLOC_IN_RC(int_reg));
3686   match(RegI);
3687   match(xRegI);
3688   match(eAXRegI);
3689   match(eBXRegI);
3690   match(eCXRegI);
3691   match(eDXRegI);
3692   match(eDIRegI);
3693   match(eSIRegI);
3694 
3695   format %{ %}
3696   interface(REG_INTER);
3697 %}
3698 
3699 // Subset of Integer Register
3700 operand xRegI(rRegI reg) %{
3701   constraint(ALLOC_IN_RC(int_x_reg));
3702   match(reg);
3703   match(eAXRegI);
3704   match(eBXRegI);
3705   match(eCXRegI);
3706   match(eDXRegI);
3707 
3708   format %{ %}
3709   interface(REG_INTER);
3710 %}
3711 
3712 // Special Registers
3713 operand eAXRegI(xRegI reg) %{
3714   constraint(ALLOC_IN_RC(eax_reg));
3715   match(reg);
3716   match(rRegI);
3717 
3718   format %{ "EAX" %}
3719   interface(REG_INTER);
3720 %}
3721 
3722 // Special Registers
3723 operand eBXRegI(xRegI reg) %{
3724   constraint(ALLOC_IN_RC(ebx_reg));
3725   match(reg);
3726   match(rRegI);
3727 
3728   format %{ "EBX" %}
3729   interface(REG_INTER);
3730 %}
3731 
3732 operand eCXRegI(xRegI reg) %{
3733   constraint(ALLOC_IN_RC(ecx_reg));
3734   match(reg);
3735   match(rRegI);
3736 
3737   format %{ "ECX" %}
3738   interface(REG_INTER);
3739 %}
3740 
3741 operand eDXRegI(xRegI reg) %{
3742   constraint(ALLOC_IN_RC(edx_reg));
3743   match(reg);
3744   match(rRegI);
3745 
3746   format %{ "EDX" %}
3747   interface(REG_INTER);
3748 %}
3749 
3750 operand eDIRegI(xRegI reg) %{
3751   constraint(ALLOC_IN_RC(edi_reg));
3752   match(reg);
3753   match(rRegI);
3754 
3755   format %{ "EDI" %}
3756   interface(REG_INTER);
3757 %}
3758 
3759 operand naxRegI() %{
3760   constraint(ALLOC_IN_RC(nax_reg));
3761   match(RegI);
3762   match(eCXRegI);
3763   match(eDXRegI);
3764   match(eSIRegI);
3765   match(eDIRegI);
3766 
3767   format %{ %}
3768   interface(REG_INTER);
3769 %}
3770 
3771 operand nadxRegI() %{
3772   constraint(ALLOC_IN_RC(nadx_reg));
3773   match(RegI);
3774   match(eBXRegI);
3775   match(eCXRegI);
3776   match(eSIRegI);
3777   match(eDIRegI);
3778 
3779   format %{ %}
3780   interface(REG_INTER);
3781 %}
3782 
3783 operand ncxRegI() %{
3784   constraint(ALLOC_IN_RC(ncx_reg));
3785   match(RegI);
3786   match(eAXRegI);
3787   match(eDXRegI);
3788   match(eSIRegI);
3789   match(eDIRegI);
3790 
3791   format %{ %}
3792   interface(REG_INTER);
3793 %}
3794 
3795 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3796 // //
3797 operand eSIRegI(xRegI reg) %{
3798    constraint(ALLOC_IN_RC(esi_reg));
3799    match(reg);
3800    match(rRegI);
3801 
3802    format %{ "ESI" %}
3803    interface(REG_INTER);
3804 %}
3805 
3806 // Pointer Register
3807 operand anyRegP() %{
3808   constraint(ALLOC_IN_RC(any_reg));
3809   match(RegP);
3810   match(eAXRegP);
3811   match(eBXRegP);
3812   match(eCXRegP);
3813   match(eDIRegP);
3814   match(eRegP);
3815 
3816   format %{ %}
3817   interface(REG_INTER);
3818 %}
3819 
3820 operand eRegP() %{
3821   constraint(ALLOC_IN_RC(int_reg));
3822   match(RegP);
3823   match(eAXRegP);
3824   match(eBXRegP);
3825   match(eCXRegP);
3826   match(eDIRegP);
3827 
3828   format %{ %}
3829   interface(REG_INTER);
3830 %}
3831 
3832 // On windows95, EBP is not safe to use for implicit null tests.
3833 operand eRegP_no_EBP() %{
3834   constraint(ALLOC_IN_RC(int_reg_no_ebp));
3835   match(RegP);
3836   match(eAXRegP);
3837   match(eBXRegP);
3838   match(eCXRegP);
3839   match(eDIRegP);
3840 
3841   op_cost(100);
3842   format %{ %}
3843   interface(REG_INTER);
3844 %}
3845 
3846 operand naxRegP() %{
3847   constraint(ALLOC_IN_RC(nax_reg));
3848   match(RegP);
3849   match(eBXRegP);
3850   match(eDXRegP);
3851   match(eCXRegP);
3852   match(eSIRegP);
3853   match(eDIRegP);
3854 
3855   format %{ %}
3856   interface(REG_INTER);
3857 %}
3858 
3859 operand nabxRegP() %{
3860   constraint(ALLOC_IN_RC(nabx_reg));
3861   match(RegP);
3862   match(eCXRegP);
3863   match(eDXRegP);
3864   match(eSIRegP);
3865   match(eDIRegP);
3866 
3867   format %{ %}
3868   interface(REG_INTER);
3869 %}
3870 
3871 operand pRegP() %{
3872   constraint(ALLOC_IN_RC(p_reg));
3873   match(RegP);
3874   match(eBXRegP);
3875   match(eDXRegP);
3876   match(eSIRegP);
3877   match(eDIRegP);
3878 
3879   format %{ %}
3880   interface(REG_INTER);
3881 %}
3882 
3883 // Special Registers
3884 // Return a pointer value
3885 operand eAXRegP(eRegP reg) %{
3886   constraint(ALLOC_IN_RC(eax_reg));
3887   match(reg);
3888   format %{ "EAX" %}
3889   interface(REG_INTER);
3890 %}
3891 
3892 // Used in AtomicAdd
3893 operand eBXRegP(eRegP reg) %{
3894   constraint(ALLOC_IN_RC(ebx_reg));
3895   match(reg);
3896   format %{ "EBX" %}
3897   interface(REG_INTER);
3898 %}
3899 
3900 // Tail-call (interprocedural jump) to interpreter
3901 operand eCXRegP(eRegP reg) %{
3902   constraint(ALLOC_IN_RC(ecx_reg));
3903   match(reg);
3904   format %{ "ECX" %}
3905   interface(REG_INTER);
3906 %}
3907 
3908 operand eSIRegP(eRegP reg) %{
3909   constraint(ALLOC_IN_RC(esi_reg));
3910   match(reg);
3911   format %{ "ESI" %}
3912   interface(REG_INTER);
3913 %}
3914 
3915 // Used in rep stosw
3916 operand eDIRegP(eRegP reg) %{
3917   constraint(ALLOC_IN_RC(edi_reg));
3918   match(reg);
3919   format %{ "EDI" %}
3920   interface(REG_INTER);
3921 %}
3922 
3923 operand eRegL() %{
3924   constraint(ALLOC_IN_RC(long_reg));
3925   match(RegL);
3926   match(eADXRegL);
3927 
3928   format %{ %}
3929   interface(REG_INTER);
3930 %}
3931 
3932 operand eADXRegL( eRegL reg ) %{
3933   constraint(ALLOC_IN_RC(eadx_reg));
3934   match(reg);
3935 
3936   format %{ "EDX:EAX" %}
3937   interface(REG_INTER);
3938 %}
3939 
3940 operand eBCXRegL( eRegL reg ) %{
3941   constraint(ALLOC_IN_RC(ebcx_reg));
3942   match(reg);
3943 
3944   format %{ "EBX:ECX" %}
3945   interface(REG_INTER);
3946 %}
3947 
3948 // Special case for integer high multiply
3949 operand eADXRegL_low_only() %{
3950   constraint(ALLOC_IN_RC(eadx_reg));
3951   match(RegL);
3952 
3953   format %{ "EAX" %}
3954   interface(REG_INTER);
3955 %}
3956 
3957 // Flags register, used as output of compare instructions
3958 operand eFlagsReg() %{
3959   constraint(ALLOC_IN_RC(int_flags));
3960   match(RegFlags);
3961 
3962   format %{ "EFLAGS" %}
3963   interface(REG_INTER);
3964 %}
3965 
3966 // Flags register, used as output of FLOATING POINT compare instructions
3967 operand eFlagsRegU() %{
3968   constraint(ALLOC_IN_RC(int_flags));
3969   match(RegFlags);
3970 
3971   format %{ "EFLAGS_U" %}
3972   interface(REG_INTER);
3973 %}
3974 
3975 operand eFlagsRegUCF() %{
3976   constraint(ALLOC_IN_RC(int_flags));
3977   match(RegFlags);
3978   predicate(false);
3979 
3980   format %{ "EFLAGS_U_CF" %}
3981   interface(REG_INTER);
3982 %}
3983 
3984 // Condition Code Register used by long compare
3985 operand flagsReg_long_LTGE() %{
3986   constraint(ALLOC_IN_RC(int_flags));
3987   match(RegFlags);
3988   format %{ "FLAGS_LTGE" %}
3989   interface(REG_INTER);
3990 %}
3991 operand flagsReg_long_EQNE() %{
3992   constraint(ALLOC_IN_RC(int_flags));
3993   match(RegFlags);
3994   format %{ "FLAGS_EQNE" %}
3995   interface(REG_INTER);
3996 %}
3997 operand flagsReg_long_LEGT() %{
3998   constraint(ALLOC_IN_RC(int_flags));
3999   match(RegFlags);
4000   format %{ "FLAGS_LEGT" %}
4001   interface(REG_INTER);
4002 %}
4003 
4004 // Float register operands
4005 operand regDPR() %{
4006   predicate( UseSSE < 2 );
4007   constraint(ALLOC_IN_RC(fp_dbl_reg));
4008   match(RegD);
4009   match(regDPR1);
4010   match(regDPR2);
4011   format %{ %}
4012   interface(REG_INTER);
4013 %}
4014 
4015 operand regDPR1(regDPR reg) %{
4016   predicate( UseSSE < 2 );
4017   constraint(ALLOC_IN_RC(fp_dbl_reg0));
4018   match(reg);
4019   format %{ "FPR1" %}
4020   interface(REG_INTER);
4021 %}
4022 
4023 operand regDPR2(regDPR reg) %{
4024   predicate( UseSSE < 2 );
4025   constraint(ALLOC_IN_RC(fp_dbl_reg1));
4026   match(reg);
4027   format %{ "FPR2" %}
4028   interface(REG_INTER);
4029 %}
4030 
4031 operand regnotDPR1(regDPR reg) %{
4032   predicate( UseSSE < 2 );
4033   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4034   match(reg);
4035   format %{ %}
4036   interface(REG_INTER);
4037 %}
4038 
4039 // Float register operands
4040 operand regFPR() %{
4041   predicate( UseSSE < 2 );
4042   constraint(ALLOC_IN_RC(fp_flt_reg));
4043   match(RegF);
4044   match(regFPR1);
4045   format %{ %}
4046   interface(REG_INTER);
4047 %}
4048 
4049 // Float register operands
4050 operand regFPR1(regFPR reg) %{
4051   predicate( UseSSE < 2 );
4052   constraint(ALLOC_IN_RC(fp_flt_reg0));
4053   match(reg);
4054   format %{ "FPR1" %}
4055   interface(REG_INTER);
4056 %}
4057 
4058 // XMM Float register operands
4059 operand regF() %{
4060   predicate( UseSSE>=1 );
4061   constraint(ALLOC_IN_RC(float_reg_legacy));
4062   match(RegF);
4063   format %{ %}
4064   interface(REG_INTER);
4065 %}
4066 
4067 // XMM Double register operands
4068 operand regD() %{
4069   predicate( UseSSE>=2 );
4070   constraint(ALLOC_IN_RC(double_reg_legacy));
4071   match(RegD);
4072   format %{ %}
4073   interface(REG_INTER);
4074 %}
4075 
4076 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4077 // runtime code generation via reg_class_dynamic.
4078 operand vecS() %{
4079   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4080   match(VecS);
4081 
4082   format %{ %}
4083   interface(REG_INTER);
4084 %}
4085 
4086 operand vecD() %{
4087   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4088   match(VecD);
4089 
4090   format %{ %}
4091   interface(REG_INTER);
4092 %}
4093 
4094 operand vecX() %{
4095   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4096   match(VecX);
4097 
4098   format %{ %}
4099   interface(REG_INTER);
4100 %}
4101 
4102 operand vecY() %{
4103   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4104   match(VecY);
4105 
4106   format %{ %}
4107   interface(REG_INTER);
4108 %}
4109 
4110 //----------Memory Operands----------------------------------------------------
4111 // Direct Memory Operand
4112 operand direct(immP addr) %{
4113   match(addr);
4114 
4115   format %{ "[$addr]" %}
4116   interface(MEMORY_INTER) %{
4117     base(0xFFFFFFFF);
4118     index(0x4);
4119     scale(0x0);
4120     disp($addr);
4121   %}
4122 %}
4123 
4124 // Indirect Memory Operand
4125 operand indirect(eRegP reg) %{
4126   constraint(ALLOC_IN_RC(int_reg));
4127   match(reg);
4128 
4129   format %{ "[$reg]" %}
4130   interface(MEMORY_INTER) %{
4131     base($reg);
4132     index(0x4);
4133     scale(0x0);
4134     disp(0x0);
4135   %}
4136 %}
4137 
4138 // Indirect Memory Plus Short Offset Operand
4139 operand indOffset8(eRegP reg, immI8 off) %{
4140   match(AddP reg off);
4141 
4142   format %{ "[$reg + $off]" %}
4143   interface(MEMORY_INTER) %{
4144     base($reg);
4145     index(0x4);
4146     scale(0x0);
4147     disp($off);
4148   %}
4149 %}
4150 
4151 // Indirect Memory Plus Long Offset Operand
4152 operand indOffset32(eRegP reg, immI off) %{
4153   match(AddP reg off);
4154 
4155   format %{ "[$reg + $off]" %}
4156   interface(MEMORY_INTER) %{
4157     base($reg);
4158     index(0x4);
4159     scale(0x0);
4160     disp($off);
4161   %}
4162 %}
4163 
4164 // Indirect Memory Plus Long Offset Operand
4165 operand indOffset32X(rRegI reg, immP off) %{
4166   match(AddP off reg);
4167 
4168   format %{ "[$reg + $off]" %}
4169   interface(MEMORY_INTER) %{
4170     base($reg);
4171     index(0x4);
4172     scale(0x0);
4173     disp($off);
4174   %}
4175 %}
4176 
4177 // Indirect Memory Plus Index Register Plus Offset Operand
4178 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4179   match(AddP (AddP reg ireg) off);
4180 
4181   op_cost(10);
4182   format %{"[$reg + $off + $ireg]" %}
4183   interface(MEMORY_INTER) %{
4184     base($reg);
4185     index($ireg);
4186     scale(0x0);
4187     disp($off);
4188   %}
4189 %}
4190 
4191 // Indirect Memory Plus Index Register Plus Offset Operand
4192 operand indIndex(eRegP reg, rRegI ireg) %{
4193   match(AddP reg ireg);
4194 
4195   op_cost(10);
4196   format %{"[$reg + $ireg]" %}
4197   interface(MEMORY_INTER) %{
4198     base($reg);
4199     index($ireg);
4200     scale(0x0);
4201     disp(0x0);
4202   %}
4203 %}
4204 
4205 // // -------------------------------------------------------------------------
4206 // // 486 architecture doesn't support "scale * index + offset" with out a base
4207 // // -------------------------------------------------------------------------
4208 // // Scaled Memory Operands
4209 // // Indirect Memory Times Scale Plus Offset Operand
4210 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4211 //   match(AddP off (LShiftI ireg scale));
4212 //
4213 //   op_cost(10);
4214 //   format %{"[$off + $ireg << $scale]" %}
4215 //   interface(MEMORY_INTER) %{
4216 //     base(0x4);
4217 //     index($ireg);
4218 //     scale($scale);
4219 //     disp($off);
4220 //   %}
4221 // %}
4222 
4223 // Indirect Memory Times Scale Plus Index Register
4224 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4225   match(AddP reg (LShiftI ireg scale));
4226 
4227   op_cost(10);
4228   format %{"[$reg + $ireg << $scale]" %}
4229   interface(MEMORY_INTER) %{
4230     base($reg);
4231     index($ireg);
4232     scale($scale);
4233     disp(0x0);
4234   %}
4235 %}
4236 
4237 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4238 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4239   match(AddP (AddP reg (LShiftI ireg scale)) off);
4240 
4241   op_cost(10);
4242   format %{"[$reg + $off + $ireg << $scale]" %}
4243   interface(MEMORY_INTER) %{
4244     base($reg);
4245     index($ireg);
4246     scale($scale);
4247     disp($off);
4248   %}
4249 %}
4250 
4251 //----------Load Long Memory Operands------------------------------------------
4252 // The load-long idiom will use it's address expression again after loading
4253 // the first word of the long.  If the load-long destination overlaps with
4254 // registers used in the addressing expression, the 2nd half will be loaded
4255 // from a clobbered address.  Fix this by requiring that load-long use
4256 // address registers that do not overlap with the load-long target.
4257 
4258 // load-long support
4259 operand load_long_RegP() %{
4260   constraint(ALLOC_IN_RC(esi_reg));
4261   match(RegP);
4262   match(eSIRegP);
4263   op_cost(100);
4264   format %{  %}
4265   interface(REG_INTER);
4266 %}
4267 
4268 // Indirect Memory Operand Long
4269 operand load_long_indirect(load_long_RegP reg) %{
4270   constraint(ALLOC_IN_RC(esi_reg));
4271   match(reg);
4272 
4273   format %{ "[$reg]" %}
4274   interface(MEMORY_INTER) %{
4275     base($reg);
4276     index(0x4);
4277     scale(0x0);
4278     disp(0x0);
4279   %}
4280 %}
4281 
4282 // Indirect Memory Plus Long Offset Operand
4283 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4284   match(AddP reg off);
4285 
4286   format %{ "[$reg + $off]" %}
4287   interface(MEMORY_INTER) %{
4288     base($reg);
4289     index(0x4);
4290     scale(0x0);
4291     disp($off);
4292   %}
4293 %}
4294 
4295 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4296 
4297 
4298 //----------Special Memory Operands--------------------------------------------
4299 // Stack Slot Operand - This operand is used for loading and storing temporary
4300 //                      values on the stack where a match requires a value to
4301 //                      flow through memory.
4302 operand stackSlotP(sRegP reg) %{
4303   constraint(ALLOC_IN_RC(stack_slots));
4304   // No match rule because this operand is only generated in matching
4305   format %{ "[$reg]" %}
4306   interface(MEMORY_INTER) %{
4307     base(0x4);   // ESP
4308     index(0x4);  // No Index
4309     scale(0x0);  // No Scale
4310     disp($reg);  // Stack Offset
4311   %}
4312 %}
4313 
4314 operand stackSlotI(sRegI reg) %{
4315   constraint(ALLOC_IN_RC(stack_slots));
4316   // No match rule because this operand is only generated in matching
4317   format %{ "[$reg]" %}
4318   interface(MEMORY_INTER) %{
4319     base(0x4);   // ESP
4320     index(0x4);  // No Index
4321     scale(0x0);  // No Scale
4322     disp($reg);  // Stack Offset
4323   %}
4324 %}
4325 
4326 operand stackSlotF(sRegF reg) %{
4327   constraint(ALLOC_IN_RC(stack_slots));
4328   // No match rule because this operand is only generated in matching
4329   format %{ "[$reg]" %}
4330   interface(MEMORY_INTER) %{
4331     base(0x4);   // ESP
4332     index(0x4);  // No Index
4333     scale(0x0);  // No Scale
4334     disp($reg);  // Stack Offset
4335   %}
4336 %}
4337 
4338 operand stackSlotD(sRegD reg) %{
4339   constraint(ALLOC_IN_RC(stack_slots));
4340   // No match rule because this operand is only generated in matching
4341   format %{ "[$reg]" %}
4342   interface(MEMORY_INTER) %{
4343     base(0x4);   // ESP
4344     index(0x4);  // No Index
4345     scale(0x0);  // No Scale
4346     disp($reg);  // Stack Offset
4347   %}
4348 %}
4349 
4350 operand stackSlotL(sRegL reg) %{
4351   constraint(ALLOC_IN_RC(stack_slots));
4352   // No match rule because this operand is only generated in matching
4353   format %{ "[$reg]" %}
4354   interface(MEMORY_INTER) %{
4355     base(0x4);   // ESP
4356     index(0x4);  // No Index
4357     scale(0x0);  // No Scale
4358     disp($reg);  // Stack Offset
4359   %}
4360 %}
4361 
4362 //----------Memory Operands - Win95 Implicit Null Variants----------------
4363 // Indirect Memory Operand
4364 operand indirect_win95_safe(eRegP_no_EBP reg)
4365 %{
4366   constraint(ALLOC_IN_RC(int_reg));
4367   match(reg);
4368 
4369   op_cost(100);
4370   format %{ "[$reg]" %}
4371   interface(MEMORY_INTER) %{
4372     base($reg);
4373     index(0x4);
4374     scale(0x0);
4375     disp(0x0);
4376   %}
4377 %}
4378 
4379 // Indirect Memory Plus Short Offset Operand
4380 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4381 %{
4382   match(AddP reg off);
4383 
4384   op_cost(100);
4385   format %{ "[$reg + $off]" %}
4386   interface(MEMORY_INTER) %{
4387     base($reg);
4388     index(0x4);
4389     scale(0x0);
4390     disp($off);
4391   %}
4392 %}
4393 
4394 // Indirect Memory Plus Long Offset Operand
4395 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4396 %{
4397   match(AddP reg off);
4398 
4399   op_cost(100);
4400   format %{ "[$reg + $off]" %}
4401   interface(MEMORY_INTER) %{
4402     base($reg);
4403     index(0x4);
4404     scale(0x0);
4405     disp($off);
4406   %}
4407 %}
4408 
4409 // Indirect Memory Plus Index Register Plus Offset Operand
4410 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4411 %{
4412   match(AddP (AddP reg ireg) off);
4413 
4414   op_cost(100);
4415   format %{"[$reg + $off + $ireg]" %}
4416   interface(MEMORY_INTER) %{
4417     base($reg);
4418     index($ireg);
4419     scale(0x0);
4420     disp($off);
4421   %}
4422 %}
4423 
4424 // Indirect Memory Times Scale Plus Index Register
4425 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4426 %{
4427   match(AddP reg (LShiftI ireg scale));
4428 
4429   op_cost(100);
4430   format %{"[$reg + $ireg << $scale]" %}
4431   interface(MEMORY_INTER) %{
4432     base($reg);
4433     index($ireg);
4434     scale($scale);
4435     disp(0x0);
4436   %}
4437 %}
4438 
4439 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4440 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4441 %{
4442   match(AddP (AddP reg (LShiftI ireg scale)) off);
4443 
4444   op_cost(100);
4445   format %{"[$reg + $off + $ireg << $scale]" %}
4446   interface(MEMORY_INTER) %{
4447     base($reg);
4448     index($ireg);
4449     scale($scale);
4450     disp($off);
4451   %}
4452 %}
4453 
4454 //----------Conditional Branch Operands----------------------------------------
4455 // Comparison Op  - This is the operation of the comparison, and is limited to
4456 //                  the following set of codes:
4457 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4458 //
4459 // Other attributes of the comparison, such as unsignedness, are specified
4460 // by the comparison instruction that sets a condition code flags register.
4461 // That result is represented by a flags operand whose subtype is appropriate
4462 // to the unsignedness (etc.) of the comparison.
4463 //
4464 // Later, the instruction which matches both the Comparison Op (a Bool) and
4465 // the flags (produced by the Cmp) specifies the coding of the comparison op
4466 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4467 
4468 // Comparision Code
4469 operand cmpOp() %{
4470   match(Bool);
4471 
4472   format %{ "" %}
4473   interface(COND_INTER) %{
4474     equal(0x4, "e");
4475     not_equal(0x5, "ne");
4476     less(0xC, "l");
4477     greater_equal(0xD, "ge");
4478     less_equal(0xE, "le");
4479     greater(0xF, "g");
4480     overflow(0x0, "o");
4481     no_overflow(0x1, "no");
4482   %}
4483 %}
4484 
4485 // Comparison Code, unsigned compare.  Used by FP also, with
4486 // C2 (unordered) turned into GT or LT already.  The other bits
4487 // C0 and C3 are turned into Carry & Zero flags.
4488 operand cmpOpU() %{
4489   match(Bool);
4490 
4491   format %{ "" %}
4492   interface(COND_INTER) %{
4493     equal(0x4, "e");
4494     not_equal(0x5, "ne");
4495     less(0x2, "b");
4496     greater_equal(0x3, "nb");
4497     less_equal(0x6, "be");
4498     greater(0x7, "nbe");
4499     overflow(0x0, "o");
4500     no_overflow(0x1, "no");
4501   %}
4502 %}
4503 
4504 // Floating comparisons that don't require any fixup for the unordered case
4505 operand cmpOpUCF() %{
4506   match(Bool);
4507   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4508             n->as_Bool()->_test._test == BoolTest::ge ||
4509             n->as_Bool()->_test._test == BoolTest::le ||
4510             n->as_Bool()->_test._test == BoolTest::gt);
4511   format %{ "" %}
4512   interface(COND_INTER) %{
4513     equal(0x4, "e");
4514     not_equal(0x5, "ne");
4515     less(0x2, "b");
4516     greater_equal(0x3, "nb");
4517     less_equal(0x6, "be");
4518     greater(0x7, "nbe");
4519     overflow(0x0, "o");
4520     no_overflow(0x1, "no");
4521   %}
4522 %}
4523 
4524 
4525 // Floating comparisons that can be fixed up with extra conditional jumps
4526 operand cmpOpUCF2() %{
4527   match(Bool);
4528   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4529             n->as_Bool()->_test._test == BoolTest::eq);
4530   format %{ "" %}
4531   interface(COND_INTER) %{
4532     equal(0x4, "e");
4533     not_equal(0x5, "ne");
4534     less(0x2, "b");
4535     greater_equal(0x3, "nb");
4536     less_equal(0x6, "be");
4537     greater(0x7, "nbe");
4538     overflow(0x0, "o");
4539     no_overflow(0x1, "no");
4540   %}
4541 %}
4542 
4543 // Comparison Code for FP conditional move
4544 operand cmpOp_fcmov() %{
4545   match(Bool);
4546 
4547   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4548             n->as_Bool()->_test._test != BoolTest::no_overflow);
4549   format %{ "" %}
4550   interface(COND_INTER) %{
4551     equal        (0x0C8);
4552     not_equal    (0x1C8);
4553     less         (0x0C0);
4554     greater_equal(0x1C0);
4555     less_equal   (0x0D0);
4556     greater      (0x1D0);
4557     overflow(0x0, "o"); // not really supported by the instruction
4558     no_overflow(0x1, "no"); // not really supported by the instruction
4559   %}
4560 %}
4561 
4562 // Comparision Code used in long compares
4563 operand cmpOp_commute() %{
4564   match(Bool);
4565 
4566   format %{ "" %}
4567   interface(COND_INTER) %{
4568     equal(0x4, "e");
4569     not_equal(0x5, "ne");
4570     less(0xF, "g");
4571     greater_equal(0xE, "le");
4572     less_equal(0xD, "ge");
4573     greater(0xC, "l");
4574     overflow(0x0, "o");
4575     no_overflow(0x1, "no");
4576   %}
4577 %}
4578 
4579 //----------OPERAND CLASSES----------------------------------------------------
4580 // Operand Classes are groups of operands that are used as to simplify
4581 // instruction definitions by not requiring the AD writer to specify separate
4582 // instructions for every form of operand when the instruction accepts
4583 // multiple operand types with the same basic encoding and format.  The classic
4584 // case of this is memory operands.
4585 
4586 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4587                indIndex, indIndexScale, indIndexScaleOffset);
4588 
4589 // Long memory operations are encoded in 2 instructions and a +4 offset.
4590 // This means some kind of offset is always required and you cannot use
4591 // an oop as the offset (done when working on static globals).
4592 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4593                     indIndex, indIndexScale, indIndexScaleOffset);
4594 
4595 
4596 //----------PIPELINE-----------------------------------------------------------
4597 // Rules which define the behavior of the target architectures pipeline.
4598 pipeline %{
4599 
4600 //----------ATTRIBUTES---------------------------------------------------------
4601 attributes %{
4602   variable_size_instructions;        // Fixed size instructions
4603   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4604   instruction_unit_size = 1;         // An instruction is 1 bytes long
4605   instruction_fetch_unit_size = 16;  // The processor fetches one line
4606   instruction_fetch_units = 1;       // of 16 bytes
4607 
4608   // List of nop instructions
4609   nops( MachNop );
4610 %}
4611 
4612 //----------RESOURCES----------------------------------------------------------
4613 // Resources are the functional units available to the machine
4614 
4615 // Generic P2/P3 pipeline
4616 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4617 // 3 instructions decoded per cycle.
4618 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4619 // 2 ALU op, only ALU0 handles mul/div instructions.
4620 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4621            MS0, MS1, MEM = MS0 | MS1,
4622            BR, FPU,
4623            ALU0, ALU1, ALU = ALU0 | ALU1 );
4624 
4625 //----------PIPELINE DESCRIPTION-----------------------------------------------
4626 // Pipeline Description specifies the stages in the machine's pipeline
4627 
4628 // Generic P2/P3 pipeline
4629 pipe_desc(S0, S1, S2, S3, S4, S5);
4630 
4631 //----------PIPELINE CLASSES---------------------------------------------------
4632 // Pipeline Classes describe the stages in which input and output are
4633 // referenced by the hardware pipeline.
4634 
4635 // Naming convention: ialu or fpu
4636 // Then: _reg
4637 // Then: _reg if there is a 2nd register
4638 // Then: _long if it's a pair of instructions implementing a long
4639 // Then: _fat if it requires the big decoder
4640 //   Or: _mem if it requires the big decoder and a memory unit.
4641 
4642 // Integer ALU reg operation
4643 pipe_class ialu_reg(rRegI dst) %{
4644     single_instruction;
4645     dst    : S4(write);
4646     dst    : S3(read);
4647     DECODE : S0;        // any decoder
4648     ALU    : S3;        // any alu
4649 %}
4650 
4651 // Long ALU reg operation
4652 pipe_class ialu_reg_long(eRegL dst) %{
4653     instruction_count(2);
4654     dst    : S4(write);
4655     dst    : S3(read);
4656     DECODE : S0(2);     // any 2 decoders
4657     ALU    : S3(2);     // both alus
4658 %}
4659 
4660 // Integer ALU reg operation using big decoder
4661 pipe_class ialu_reg_fat(rRegI dst) %{
4662     single_instruction;
4663     dst    : S4(write);
4664     dst    : S3(read);
4665     D0     : S0;        // big decoder only
4666     ALU    : S3;        // any alu
4667 %}
4668 
4669 // Long ALU reg operation using big decoder
4670 pipe_class ialu_reg_long_fat(eRegL dst) %{
4671     instruction_count(2);
4672     dst    : S4(write);
4673     dst    : S3(read);
4674     D0     : S0(2);     // big decoder only; twice
4675     ALU    : S3(2);     // any 2 alus
4676 %}
4677 
4678 // Integer ALU reg-reg operation
4679 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4680     single_instruction;
4681     dst    : S4(write);
4682     src    : S3(read);
4683     DECODE : S0;        // any decoder
4684     ALU    : S3;        // any alu
4685 %}
4686 
4687 // Long ALU reg-reg operation
4688 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4689     instruction_count(2);
4690     dst    : S4(write);
4691     src    : S3(read);
4692     DECODE : S0(2);     // any 2 decoders
4693     ALU    : S3(2);     // both alus
4694 %}
4695 
4696 // Integer ALU reg-reg operation
4697 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4698     single_instruction;
4699     dst    : S4(write);
4700     src    : S3(read);
4701     D0     : S0;        // big decoder only
4702     ALU    : S3;        // any alu
4703 %}
4704 
4705 // Long ALU reg-reg operation
4706 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4707     instruction_count(2);
4708     dst    : S4(write);
4709     src    : S3(read);
4710     D0     : S0(2);     // big decoder only; twice
4711     ALU    : S3(2);     // both alus
4712 %}
4713 
4714 // Integer ALU reg-mem operation
4715 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4716     single_instruction;
4717     dst    : S5(write);
4718     mem    : S3(read);
4719     D0     : S0;        // big decoder only
4720     ALU    : S4;        // any alu
4721     MEM    : S3;        // any mem
4722 %}
4723 
4724 // Long ALU reg-mem operation
4725 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4726     instruction_count(2);
4727     dst    : S5(write);
4728     mem    : S3(read);
4729     D0     : S0(2);     // big decoder only; twice
4730     ALU    : S4(2);     // any 2 alus
4731     MEM    : S3(2);     // both mems
4732 %}
4733 
4734 // Integer mem operation (prefetch)
4735 pipe_class ialu_mem(memory mem)
4736 %{
4737     single_instruction;
4738     mem    : S3(read);
4739     D0     : S0;        // big decoder only
4740     MEM    : S3;        // any mem
4741 %}
4742 
4743 // Integer Store to Memory
4744 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4745     single_instruction;
4746     mem    : S3(read);
4747     src    : S5(read);
4748     D0     : S0;        // big decoder only
4749     ALU    : S4;        // any alu
4750     MEM    : S3;
4751 %}
4752 
4753 // Long Store to Memory
4754 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4755     instruction_count(2);
4756     mem    : S3(read);
4757     src    : S5(read);
4758     D0     : S0(2);     // big decoder only; twice
4759     ALU    : S4(2);     // any 2 alus
4760     MEM    : S3(2);     // Both mems
4761 %}
4762 
4763 // Integer Store to Memory
4764 pipe_class ialu_mem_imm(memory mem) %{
4765     single_instruction;
4766     mem    : S3(read);
4767     D0     : S0;        // big decoder only
4768     ALU    : S4;        // any alu
4769     MEM    : S3;
4770 %}
4771 
4772 // Integer ALU0 reg-reg operation
4773 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4774     single_instruction;
4775     dst    : S4(write);
4776     src    : S3(read);
4777     D0     : S0;        // Big decoder only
4778     ALU0   : S3;        // only alu0
4779 %}
4780 
4781 // Integer ALU0 reg-mem operation
4782 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4783     single_instruction;
4784     dst    : S5(write);
4785     mem    : S3(read);
4786     D0     : S0;        // big decoder only
4787     ALU0   : S4;        // ALU0 only
4788     MEM    : S3;        // any mem
4789 %}
4790 
4791 // Integer ALU reg-reg operation
4792 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4793     single_instruction;
4794     cr     : S4(write);
4795     src1   : S3(read);
4796     src2   : S3(read);
4797     DECODE : S0;        // any decoder
4798     ALU    : S3;        // any alu
4799 %}
4800 
4801 // Integer ALU reg-imm operation
4802 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4803     single_instruction;
4804     cr     : S4(write);
4805     src1   : S3(read);
4806     DECODE : S0;        // any decoder
4807     ALU    : S3;        // any alu
4808 %}
4809 
4810 // Integer ALU reg-mem operation
4811 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4812     single_instruction;
4813     cr     : S4(write);
4814     src1   : S3(read);
4815     src2   : S3(read);
4816     D0     : S0;        // big decoder only
4817     ALU    : S4;        // any alu
4818     MEM    : S3;
4819 %}
4820 
4821 // Conditional move reg-reg
4822 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4823     instruction_count(4);
4824     y      : S4(read);
4825     q      : S3(read);
4826     p      : S3(read);
4827     DECODE : S0(4);     // any decoder
4828 %}
4829 
4830 // Conditional move reg-reg
4831 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4832     single_instruction;
4833     dst    : S4(write);
4834     src    : S3(read);
4835     cr     : S3(read);
4836     DECODE : S0;        // any decoder
4837 %}
4838 
4839 // Conditional move reg-mem
4840 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4841     single_instruction;
4842     dst    : S4(write);
4843     src    : S3(read);
4844     cr     : S3(read);
4845     DECODE : S0;        // any decoder
4846     MEM    : S3;
4847 %}
4848 
4849 // Conditional move reg-reg long
4850 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4851     single_instruction;
4852     dst    : S4(write);
4853     src    : S3(read);
4854     cr     : S3(read);
4855     DECODE : S0(2);     // any 2 decoders
4856 %}
4857 
4858 // Conditional move double reg-reg
4859 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4860     single_instruction;
4861     dst    : S4(write);
4862     src    : S3(read);
4863     cr     : S3(read);
4864     DECODE : S0;        // any decoder
4865 %}
4866 
4867 // Float reg-reg operation
4868 pipe_class fpu_reg(regDPR dst) %{
4869     instruction_count(2);
4870     dst    : S3(read);
4871     DECODE : S0(2);     // any 2 decoders
4872     FPU    : S3;
4873 %}
4874 
4875 // Float reg-reg operation
4876 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4877     instruction_count(2);
4878     dst    : S4(write);
4879     src    : S3(read);
4880     DECODE : S0(2);     // any 2 decoders
4881     FPU    : S3;
4882 %}
4883 
4884 // Float reg-reg operation
4885 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4886     instruction_count(3);
4887     dst    : S4(write);
4888     src1   : S3(read);
4889     src2   : S3(read);
4890     DECODE : S0(3);     // any 3 decoders
4891     FPU    : S3(2);
4892 %}
4893 
4894 // Float reg-reg operation
4895 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4896     instruction_count(4);
4897     dst    : S4(write);
4898     src1   : S3(read);
4899     src2   : S3(read);
4900     src3   : S3(read);
4901     DECODE : S0(4);     // any 3 decoders
4902     FPU    : S3(2);
4903 %}
4904 
4905 // Float reg-reg operation
4906 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4907     instruction_count(4);
4908     dst    : S4(write);
4909     src1   : S3(read);
4910     src2   : S3(read);
4911     src3   : S3(read);
4912     DECODE : S1(3);     // any 3 decoders
4913     D0     : S0;        // Big decoder only
4914     FPU    : S3(2);
4915     MEM    : S3;
4916 %}
4917 
4918 // Float reg-mem operation
4919 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4920     instruction_count(2);
4921     dst    : S5(write);
4922     mem    : S3(read);
4923     D0     : S0;        // big decoder only
4924     DECODE : S1;        // any decoder for FPU POP
4925     FPU    : S4;
4926     MEM    : S3;        // any mem
4927 %}
4928 
4929 // Float reg-mem operation
4930 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4931     instruction_count(3);
4932     dst    : S5(write);
4933     src1   : S3(read);
4934     mem    : S3(read);
4935     D0     : S0;        // big decoder only
4936     DECODE : S1(2);     // any decoder for FPU POP
4937     FPU    : S4;
4938     MEM    : S3;        // any mem
4939 %}
4940 
4941 // Float mem-reg operation
4942 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4943     instruction_count(2);
4944     src    : S5(read);
4945     mem    : S3(read);
4946     DECODE : S0;        // any decoder for FPU PUSH
4947     D0     : S1;        // big decoder only
4948     FPU    : S4;
4949     MEM    : S3;        // any mem
4950 %}
4951 
4952 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
4953     instruction_count(3);
4954     src1   : S3(read);
4955     src2   : S3(read);
4956     mem    : S3(read);
4957     DECODE : S0(2);     // any decoder for FPU PUSH
4958     D0     : S1;        // big decoder only
4959     FPU    : S4;
4960     MEM    : S3;        // any mem
4961 %}
4962 
4963 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
4964     instruction_count(3);
4965     src1   : S3(read);
4966     src2   : S3(read);
4967     mem    : S4(read);
4968     DECODE : S0;        // any decoder for FPU PUSH
4969     D0     : S0(2);     // big decoder only
4970     FPU    : S4;
4971     MEM    : S3(2);     // any mem
4972 %}
4973 
4974 pipe_class fpu_mem_mem(memory dst, memory src1) %{
4975     instruction_count(2);
4976     src1   : S3(read);
4977     dst    : S4(read);
4978     D0     : S0(2);     // big decoder only
4979     MEM    : S3(2);     // any mem
4980 %}
4981 
4982 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
4983     instruction_count(3);
4984     src1   : S3(read);
4985     src2   : S3(read);
4986     dst    : S4(read);
4987     D0     : S0(3);     // big decoder only
4988     FPU    : S4;
4989     MEM    : S3(3);     // any mem
4990 %}
4991 
4992 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
4993     instruction_count(3);
4994     src1   : S4(read);
4995     mem    : S4(read);
4996     DECODE : S0;        // any decoder for FPU PUSH
4997     D0     : S0(2);     // big decoder only
4998     FPU    : S4;
4999     MEM    : S3(2);     // any mem
5000 %}
5001 
5002 // Float load constant
5003 pipe_class fpu_reg_con(regDPR dst) %{
5004     instruction_count(2);
5005     dst    : S5(write);
5006     D0     : S0;        // big decoder only for the load
5007     DECODE : S1;        // any decoder for FPU POP
5008     FPU    : S4;
5009     MEM    : S3;        // any mem
5010 %}
5011 
5012 // Float load constant
5013 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5014     instruction_count(3);
5015     dst    : S5(write);
5016     src    : S3(read);
5017     D0     : S0;        // big decoder only for the load
5018     DECODE : S1(2);     // any decoder for FPU POP
5019     FPU    : S4;
5020     MEM    : S3;        // any mem
5021 %}
5022 
5023 // UnConditional branch
5024 pipe_class pipe_jmp( label labl ) %{
5025     single_instruction;
5026     BR   : S3;
5027 %}
5028 
5029 // Conditional branch
5030 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5031     single_instruction;
5032     cr    : S1(read);
5033     BR    : S3;
5034 %}
5035 
5036 // Allocation idiom
5037 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5038     instruction_count(1); force_serialization;
5039     fixed_latency(6);
5040     heap_ptr : S3(read);
5041     DECODE   : S0(3);
5042     D0       : S2;
5043     MEM      : S3;
5044     ALU      : S3(2);
5045     dst      : S5(write);
5046     BR       : S5;
5047 %}
5048 
5049 // Generic big/slow expanded idiom
5050 pipe_class pipe_slow(  ) %{
5051     instruction_count(10); multiple_bundles; force_serialization;
5052     fixed_latency(100);
5053     D0  : S0(2);
5054     MEM : S3(2);
5055 %}
5056 
5057 // The real do-nothing guy
5058 pipe_class empty( ) %{
5059     instruction_count(0);
5060 %}
5061 
5062 // Define the class for the Nop node
5063 define %{
5064    MachNop = empty;
5065 %}
5066 
5067 %}
5068 
5069 //----------INSTRUCTIONS-------------------------------------------------------
5070 //
5071 // match      -- States which machine-independent subtree may be replaced
5072 //               by this instruction.
5073 // ins_cost   -- The estimated cost of this instruction is used by instruction
5074 //               selection to identify a minimum cost tree of machine
5075 //               instructions that matches a tree of machine-independent
5076 //               instructions.
5077 // format     -- A string providing the disassembly for this instruction.
5078 //               The value of an instruction's operand may be inserted
5079 //               by referring to it with a '$' prefix.
5080 // opcode     -- Three instruction opcodes may be provided.  These are referred
5081 //               to within an encode class as $primary, $secondary, and $tertiary
5082 //               respectively.  The primary opcode is commonly used to
5083 //               indicate the type of machine instruction, while secondary
5084 //               and tertiary are often used for prefix options or addressing
5085 //               modes.
5086 // ins_encode -- A list of encode classes with parameters. The encode class
5087 //               name must have been defined in an 'enc_class' specification
5088 //               in the encode section of the architecture description.
5089 
5090 //----------BSWAP-Instruction--------------------------------------------------
5091 instruct bytes_reverse_int(rRegI dst) %{
5092   match(Set dst (ReverseBytesI dst));
5093 
5094   format %{ "BSWAP  $dst" %}
5095   opcode(0x0F, 0xC8);
5096   ins_encode( OpcP, OpcSReg(dst) );
5097   ins_pipe( ialu_reg );
5098 %}
5099 
5100 instruct bytes_reverse_long(eRegL dst) %{
5101   match(Set dst (ReverseBytesL dst));
5102 
5103   format %{ "BSWAP  $dst.lo\n\t"
5104             "BSWAP  $dst.hi\n\t"
5105             "XCHG   $dst.lo $dst.hi" %}
5106 
5107   ins_cost(125);
5108   ins_encode( bswap_long_bytes(dst) );
5109   ins_pipe( ialu_reg_reg);
5110 %}
5111 
5112 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5113   match(Set dst (ReverseBytesUS dst));
5114   effect(KILL cr);
5115 
5116   format %{ "BSWAP  $dst\n\t"
5117             "SHR    $dst,16\n\t" %}
5118   ins_encode %{
5119     __ bswapl($dst$$Register);
5120     __ shrl($dst$$Register, 16);
5121   %}
5122   ins_pipe( ialu_reg );
5123 %}
5124 
5125 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5126   match(Set dst (ReverseBytesS dst));
5127   effect(KILL cr);
5128 
5129   format %{ "BSWAP  $dst\n\t"
5130             "SAR    $dst,16\n\t" %}
5131   ins_encode %{
5132     __ bswapl($dst$$Register);
5133     __ sarl($dst$$Register, 16);
5134   %}
5135   ins_pipe( ialu_reg );
5136 %}
5137 
5138 
5139 //---------- Zeros Count Instructions ------------------------------------------
5140 
5141 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5142   predicate(UseCountLeadingZerosInstruction);
5143   match(Set dst (CountLeadingZerosI src));
5144   effect(KILL cr);
5145 
5146   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5147   ins_encode %{
5148     __ lzcntl($dst$$Register, $src$$Register);
5149   %}
5150   ins_pipe(ialu_reg);
5151 %}
5152 
5153 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5154   predicate(!UseCountLeadingZerosInstruction);
5155   match(Set dst (CountLeadingZerosI src));
5156   effect(KILL cr);
5157 
5158   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5159             "JNZ    skip\n\t"
5160             "MOV    $dst, -1\n"
5161       "skip:\n\t"
5162             "NEG    $dst\n\t"
5163             "ADD    $dst, 31" %}
5164   ins_encode %{
5165     Register Rdst = $dst$$Register;
5166     Register Rsrc = $src$$Register;
5167     Label skip;
5168     __ bsrl(Rdst, Rsrc);
5169     __ jccb(Assembler::notZero, skip);
5170     __ movl(Rdst, -1);
5171     __ bind(skip);
5172     __ negl(Rdst);
5173     __ addl(Rdst, BitsPerInt - 1);
5174   %}
5175   ins_pipe(ialu_reg);
5176 %}
5177 
5178 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5179   predicate(UseCountLeadingZerosInstruction);
5180   match(Set dst (CountLeadingZerosL src));
5181   effect(TEMP dst, KILL cr);
5182 
5183   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5184             "JNC    done\n\t"
5185             "LZCNT  $dst, $src.lo\n\t"
5186             "ADD    $dst, 32\n"
5187       "done:" %}
5188   ins_encode %{
5189     Register Rdst = $dst$$Register;
5190     Register Rsrc = $src$$Register;
5191     Label done;
5192     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5193     __ jccb(Assembler::carryClear, done);
5194     __ lzcntl(Rdst, Rsrc);
5195     __ addl(Rdst, BitsPerInt);
5196     __ bind(done);
5197   %}
5198   ins_pipe(ialu_reg);
5199 %}
5200 
5201 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5202   predicate(!UseCountLeadingZerosInstruction);
5203   match(Set dst (CountLeadingZerosL src));
5204   effect(TEMP dst, KILL cr);
5205 
5206   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5207             "JZ     msw_is_zero\n\t"
5208             "ADD    $dst, 32\n\t"
5209             "JMP    not_zero\n"
5210       "msw_is_zero:\n\t"
5211             "BSR    $dst, $src.lo\n\t"
5212             "JNZ    not_zero\n\t"
5213             "MOV    $dst, -1\n"
5214       "not_zero:\n\t"
5215             "NEG    $dst\n\t"
5216             "ADD    $dst, 63\n" %}
5217  ins_encode %{
5218     Register Rdst = $dst$$Register;
5219     Register Rsrc = $src$$Register;
5220     Label msw_is_zero;
5221     Label not_zero;
5222     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5223     __ jccb(Assembler::zero, msw_is_zero);
5224     __ addl(Rdst, BitsPerInt);
5225     __ jmpb(not_zero);
5226     __ bind(msw_is_zero);
5227     __ bsrl(Rdst, Rsrc);
5228     __ jccb(Assembler::notZero, not_zero);
5229     __ movl(Rdst, -1);
5230     __ bind(not_zero);
5231     __ negl(Rdst);
5232     __ addl(Rdst, BitsPerLong - 1);
5233   %}
5234   ins_pipe(ialu_reg);
5235 %}
5236 
5237 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5238   predicate(UseCountTrailingZerosInstruction);
5239   match(Set dst (CountTrailingZerosI src));
5240   effect(KILL cr);
5241 
5242   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5243   ins_encode %{
5244     __ tzcntl($dst$$Register, $src$$Register);
5245   %}
5246   ins_pipe(ialu_reg);
5247 %}
5248 
5249 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5250   predicate(!UseCountTrailingZerosInstruction);
5251   match(Set dst (CountTrailingZerosI src));
5252   effect(KILL cr);
5253 
5254   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5255             "JNZ    done\n\t"
5256             "MOV    $dst, 32\n"
5257       "done:" %}
5258   ins_encode %{
5259     Register Rdst = $dst$$Register;
5260     Label done;
5261     __ bsfl(Rdst, $src$$Register);
5262     __ jccb(Assembler::notZero, done);
5263     __ movl(Rdst, BitsPerInt);
5264     __ bind(done);
5265   %}
5266   ins_pipe(ialu_reg);
5267 %}
5268 
5269 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5270   predicate(UseCountTrailingZerosInstruction);
5271   match(Set dst (CountTrailingZerosL src));
5272   effect(TEMP dst, KILL cr);
5273 
5274   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5275             "JNC    done\n\t"
5276             "TZCNT  $dst, $src.hi\n\t"
5277             "ADD    $dst, 32\n"
5278             "done:" %}
5279   ins_encode %{
5280     Register Rdst = $dst$$Register;
5281     Register Rsrc = $src$$Register;
5282     Label done;
5283     __ tzcntl(Rdst, Rsrc);
5284     __ jccb(Assembler::carryClear, done);
5285     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5286     __ addl(Rdst, BitsPerInt);
5287     __ bind(done);
5288   %}
5289   ins_pipe(ialu_reg);
5290 %}
5291 
5292 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5293   predicate(!UseCountTrailingZerosInstruction);
5294   match(Set dst (CountTrailingZerosL src));
5295   effect(TEMP dst, KILL cr);
5296 
5297   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5298             "JNZ    done\n\t"
5299             "BSF    $dst, $src.hi\n\t"
5300             "JNZ    msw_not_zero\n\t"
5301             "MOV    $dst, 32\n"
5302       "msw_not_zero:\n\t"
5303             "ADD    $dst, 32\n"
5304       "done:" %}
5305   ins_encode %{
5306     Register Rdst = $dst$$Register;
5307     Register Rsrc = $src$$Register;
5308     Label msw_not_zero;
5309     Label done;
5310     __ bsfl(Rdst, Rsrc);
5311     __ jccb(Assembler::notZero, done);
5312     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5313     __ jccb(Assembler::notZero, msw_not_zero);
5314     __ movl(Rdst, BitsPerInt);
5315     __ bind(msw_not_zero);
5316     __ addl(Rdst, BitsPerInt);
5317     __ bind(done);
5318   %}
5319   ins_pipe(ialu_reg);
5320 %}
5321 
5322 
5323 //---------- Population Count Instructions -------------------------------------
5324 
5325 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5326   predicate(UsePopCountInstruction);
5327   match(Set dst (PopCountI src));
5328   effect(KILL cr);
5329 
5330   format %{ "POPCNT $dst, $src" %}
5331   ins_encode %{
5332     __ popcntl($dst$$Register, $src$$Register);
5333   %}
5334   ins_pipe(ialu_reg);
5335 %}
5336 
5337 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5338   predicate(UsePopCountInstruction);
5339   match(Set dst (PopCountI (LoadI mem)));
5340   effect(KILL cr);
5341 
5342   format %{ "POPCNT $dst, $mem" %}
5343   ins_encode %{
5344     __ popcntl($dst$$Register, $mem$$Address);
5345   %}
5346   ins_pipe(ialu_reg);
5347 %}
5348 
5349 // Note: Long.bitCount(long) returns an int.
5350 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5351   predicate(UsePopCountInstruction);
5352   match(Set dst (PopCountL src));
5353   effect(KILL cr, TEMP tmp, TEMP dst);
5354 
5355   format %{ "POPCNT $dst, $src.lo\n\t"
5356             "POPCNT $tmp, $src.hi\n\t"
5357             "ADD    $dst, $tmp" %}
5358   ins_encode %{
5359     __ popcntl($dst$$Register, $src$$Register);
5360     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5361     __ addl($dst$$Register, $tmp$$Register);
5362   %}
5363   ins_pipe(ialu_reg);
5364 %}
5365 
5366 // Note: Long.bitCount(long) returns an int.
5367 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5368   predicate(UsePopCountInstruction);
5369   match(Set dst (PopCountL (LoadL mem)));
5370   effect(KILL cr, TEMP tmp, TEMP dst);
5371 
5372   format %{ "POPCNT $dst, $mem\n\t"
5373             "POPCNT $tmp, $mem+4\n\t"
5374             "ADD    $dst, $tmp" %}
5375   ins_encode %{
5376     //__ popcntl($dst$$Register, $mem$$Address$$first);
5377     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5378     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5379     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5380     __ addl($dst$$Register, $tmp$$Register);
5381   %}
5382   ins_pipe(ialu_reg);
5383 %}
5384 
5385 
5386 //----------Load/Store/Move Instructions---------------------------------------
5387 //----------Load Instructions--------------------------------------------------
5388 // Load Byte (8bit signed)
5389 instruct loadB(xRegI dst, memory mem) %{
5390   match(Set dst (LoadB mem));
5391 
5392   ins_cost(125);
5393   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5394 
5395   ins_encode %{
5396     __ movsbl($dst$$Register, $mem$$Address);
5397   %}
5398 
5399   ins_pipe(ialu_reg_mem);
5400 %}
5401 
5402 // Load Byte (8bit signed) into Long Register
5403 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5404   match(Set dst (ConvI2L (LoadB mem)));
5405   effect(KILL cr);
5406 
5407   ins_cost(375);
5408   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5409             "MOV    $dst.hi,$dst.lo\n\t"
5410             "SAR    $dst.hi,7" %}
5411 
5412   ins_encode %{
5413     __ movsbl($dst$$Register, $mem$$Address);
5414     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5415     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5416   %}
5417 
5418   ins_pipe(ialu_reg_mem);
5419 %}
5420 
5421 // Load Unsigned Byte (8bit UNsigned)
5422 instruct loadUB(xRegI dst, memory mem) %{
5423   match(Set dst (LoadUB mem));
5424 
5425   ins_cost(125);
5426   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5427 
5428   ins_encode %{
5429     __ movzbl($dst$$Register, $mem$$Address);
5430   %}
5431 
5432   ins_pipe(ialu_reg_mem);
5433 %}
5434 
5435 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5436 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5437   match(Set dst (ConvI2L (LoadUB mem)));
5438   effect(KILL cr);
5439 
5440   ins_cost(250);
5441   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5442             "XOR    $dst.hi,$dst.hi" %}
5443 
5444   ins_encode %{
5445     Register Rdst = $dst$$Register;
5446     __ movzbl(Rdst, $mem$$Address);
5447     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5448   %}
5449 
5450   ins_pipe(ialu_reg_mem);
5451 %}
5452 
5453 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5454 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5455   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5456   effect(KILL cr);
5457 
5458   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5459             "XOR    $dst.hi,$dst.hi\n\t"
5460             "AND    $dst.lo,right_n_bits($mask, 8)" %}
5461   ins_encode %{
5462     Register Rdst = $dst$$Register;
5463     __ movzbl(Rdst, $mem$$Address);
5464     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5465     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5466   %}
5467   ins_pipe(ialu_reg_mem);
5468 %}
5469 
5470 // Load Short (16bit signed)
5471 instruct loadS(rRegI dst, memory mem) %{
5472   match(Set dst (LoadS mem));
5473 
5474   ins_cost(125);
5475   format %{ "MOVSX  $dst,$mem\t# short" %}
5476 
5477   ins_encode %{
5478     __ movswl($dst$$Register, $mem$$Address);
5479   %}
5480 
5481   ins_pipe(ialu_reg_mem);
5482 %}
5483 
5484 // Load Short (16 bit signed) to Byte (8 bit signed)
5485 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5486   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5487 
5488   ins_cost(125);
5489   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5490   ins_encode %{
5491     __ movsbl($dst$$Register, $mem$$Address);
5492   %}
5493   ins_pipe(ialu_reg_mem);
5494 %}
5495 
5496 // Load Short (16bit signed) into Long Register
5497 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5498   match(Set dst (ConvI2L (LoadS mem)));
5499   effect(KILL cr);
5500 
5501   ins_cost(375);
5502   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5503             "MOV    $dst.hi,$dst.lo\n\t"
5504             "SAR    $dst.hi,15" %}
5505 
5506   ins_encode %{
5507     __ movswl($dst$$Register, $mem$$Address);
5508     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5509     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5510   %}
5511 
5512   ins_pipe(ialu_reg_mem);
5513 %}
5514 
5515 // Load Unsigned Short/Char (16bit unsigned)
5516 instruct loadUS(rRegI dst, memory mem) %{
5517   match(Set dst (LoadUS mem));
5518 
5519   ins_cost(125);
5520   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5521 
5522   ins_encode %{
5523     __ movzwl($dst$$Register, $mem$$Address);
5524   %}
5525 
5526   ins_pipe(ialu_reg_mem);
5527 %}
5528 
5529 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5530 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5531   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5532 
5533   ins_cost(125);
5534   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5535   ins_encode %{
5536     __ movsbl($dst$$Register, $mem$$Address);
5537   %}
5538   ins_pipe(ialu_reg_mem);
5539 %}
5540 
5541 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5542 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5543   match(Set dst (ConvI2L (LoadUS mem)));
5544   effect(KILL cr);
5545 
5546   ins_cost(250);
5547   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5548             "XOR    $dst.hi,$dst.hi" %}
5549 
5550   ins_encode %{
5551     __ movzwl($dst$$Register, $mem$$Address);
5552     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5553   %}
5554 
5555   ins_pipe(ialu_reg_mem);
5556 %}
5557 
5558 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5559 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5560   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5561   effect(KILL cr);
5562 
5563   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5564             "XOR    $dst.hi,$dst.hi" %}
5565   ins_encode %{
5566     Register Rdst = $dst$$Register;
5567     __ movzbl(Rdst, $mem$$Address);
5568     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5569   %}
5570   ins_pipe(ialu_reg_mem);
5571 %}
5572 
5573 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5574 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5575   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5576   effect(KILL cr);
5577 
5578   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5579             "XOR    $dst.hi,$dst.hi\n\t"
5580             "AND    $dst.lo,right_n_bits($mask, 16)" %}
5581   ins_encode %{
5582     Register Rdst = $dst$$Register;
5583     __ movzwl(Rdst, $mem$$Address);
5584     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5585     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5586   %}
5587   ins_pipe(ialu_reg_mem);
5588 %}
5589 
5590 // Load Integer
5591 instruct loadI(rRegI dst, memory mem) %{
5592   match(Set dst (LoadI mem));
5593 
5594   ins_cost(125);
5595   format %{ "MOV    $dst,$mem\t# int" %}
5596 
5597   ins_encode %{
5598     __ movl($dst$$Register, $mem$$Address);
5599   %}
5600 
5601   ins_pipe(ialu_reg_mem);
5602 %}
5603 
5604 // Load Integer (32 bit signed) to Byte (8 bit signed)
5605 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5606   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5607 
5608   ins_cost(125);
5609   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5610   ins_encode %{
5611     __ movsbl($dst$$Register, $mem$$Address);
5612   %}
5613   ins_pipe(ialu_reg_mem);
5614 %}
5615 
5616 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5617 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5618   match(Set dst (AndI (LoadI mem) mask));
5619 
5620   ins_cost(125);
5621   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5622   ins_encode %{
5623     __ movzbl($dst$$Register, $mem$$Address);
5624   %}
5625   ins_pipe(ialu_reg_mem);
5626 %}
5627 
5628 // Load Integer (32 bit signed) to Short (16 bit signed)
5629 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5630   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5631 
5632   ins_cost(125);
5633   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5634   ins_encode %{
5635     __ movswl($dst$$Register, $mem$$Address);
5636   %}
5637   ins_pipe(ialu_reg_mem);
5638 %}
5639 
5640 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5641 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5642   match(Set dst (AndI (LoadI mem) mask));
5643 
5644   ins_cost(125);
5645   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5646   ins_encode %{
5647     __ movzwl($dst$$Register, $mem$$Address);
5648   %}
5649   ins_pipe(ialu_reg_mem);
5650 %}
5651 
5652 // Load Integer into Long Register
5653 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5654   match(Set dst (ConvI2L (LoadI mem)));
5655   effect(KILL cr);
5656 
5657   ins_cost(375);
5658   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5659             "MOV    $dst.hi,$dst.lo\n\t"
5660             "SAR    $dst.hi,31" %}
5661 
5662   ins_encode %{
5663     __ movl($dst$$Register, $mem$$Address);
5664     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5665     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5666   %}
5667 
5668   ins_pipe(ialu_reg_mem);
5669 %}
5670 
5671 // Load Integer with mask 0xFF into Long Register
5672 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5673   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5674   effect(KILL cr);
5675 
5676   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5677             "XOR    $dst.hi,$dst.hi" %}
5678   ins_encode %{
5679     Register Rdst = $dst$$Register;
5680     __ movzbl(Rdst, $mem$$Address);
5681     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5682   %}
5683   ins_pipe(ialu_reg_mem);
5684 %}
5685 
5686 // Load Integer with mask 0xFFFF into Long Register
5687 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5688   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5689   effect(KILL cr);
5690 
5691   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5692             "XOR    $dst.hi,$dst.hi" %}
5693   ins_encode %{
5694     Register Rdst = $dst$$Register;
5695     __ movzwl(Rdst, $mem$$Address);
5696     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5697   %}
5698   ins_pipe(ialu_reg_mem);
5699 %}
5700 
5701 // Load Integer with 31-bit mask into Long Register
5702 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5703   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5704   effect(KILL cr);
5705 
5706   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5707             "XOR    $dst.hi,$dst.hi\n\t"
5708             "AND    $dst.lo,$mask" %}
5709   ins_encode %{
5710     Register Rdst = $dst$$Register;
5711     __ movl(Rdst, $mem$$Address);
5712     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5713     __ andl(Rdst, $mask$$constant);
5714   %}
5715   ins_pipe(ialu_reg_mem);
5716 %}
5717 
5718 // Load Unsigned Integer into Long Register
5719 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5720   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5721   effect(KILL cr);
5722 
5723   ins_cost(250);
5724   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5725             "XOR    $dst.hi,$dst.hi" %}
5726 
5727   ins_encode %{
5728     __ movl($dst$$Register, $mem$$Address);
5729     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5730   %}
5731 
5732   ins_pipe(ialu_reg_mem);
5733 %}
5734 
5735 // Load Long.  Cannot clobber address while loading, so restrict address
5736 // register to ESI
5737 instruct loadL(eRegL dst, load_long_memory mem) %{
5738   predicate(!((LoadLNode*)n)->require_atomic_access());
5739   match(Set dst (LoadL mem));
5740 
5741   ins_cost(250);
5742   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5743             "MOV    $dst.hi,$mem+4" %}
5744 
5745   ins_encode %{
5746     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5747     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5748     __ movl($dst$$Register, Amemlo);
5749     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5750   %}
5751 
5752   ins_pipe(ialu_reg_long_mem);
5753 %}
5754 
5755 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5756 // then store it down to the stack and reload on the int
5757 // side.
5758 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5759   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5760   match(Set dst (LoadL mem));
5761 
5762   ins_cost(200);
5763   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5764             "FISTp  $dst" %}
5765   ins_encode(enc_loadL_volatile(mem,dst));
5766   ins_pipe( fpu_reg_mem );
5767 %}
5768 
5769 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5770   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5771   match(Set dst (LoadL mem));
5772   effect(TEMP tmp);
5773   ins_cost(180);
5774   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5775             "MOVSD  $dst,$tmp" %}
5776   ins_encode %{
5777     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5778     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5779   %}
5780   ins_pipe( pipe_slow );
5781 %}
5782 
5783 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5784   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5785   match(Set dst (LoadL mem));
5786   effect(TEMP tmp);
5787   ins_cost(160);
5788   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5789             "MOVD   $dst.lo,$tmp\n\t"
5790             "PSRLQ  $tmp,32\n\t"
5791             "MOVD   $dst.hi,$tmp" %}
5792   ins_encode %{
5793     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5794     __ movdl($dst$$Register, $tmp$$XMMRegister);
5795     __ psrlq($tmp$$XMMRegister, 32);
5796     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5797   %}
5798   ins_pipe( pipe_slow );
5799 %}
5800 
5801 // Load Range
5802 instruct loadRange(rRegI dst, memory mem) %{
5803   match(Set dst (LoadRange mem));
5804 
5805   ins_cost(125);
5806   format %{ "MOV    $dst,$mem" %}
5807   opcode(0x8B);
5808   ins_encode( OpcP, RegMem(dst,mem));
5809   ins_pipe( ialu_reg_mem );
5810 %}
5811 
5812 
5813 // Load Pointer
5814 instruct loadP(eRegP dst, memory mem) %{
5815   match(Set dst (LoadP mem));
5816 
5817   ins_cost(125);
5818   format %{ "MOV    $dst,$mem" %}
5819   opcode(0x8B);
5820   ins_encode( OpcP, RegMem(dst,mem));
5821   ins_pipe( ialu_reg_mem );
5822 %}
5823 
5824 // Load Klass Pointer
5825 instruct loadKlass(eRegP dst, memory mem) %{
5826   match(Set dst (LoadKlass mem));
5827 
5828   ins_cost(125);
5829   format %{ "MOV    $dst,$mem" %}
5830   opcode(0x8B);
5831   ins_encode( OpcP, RegMem(dst,mem));
5832   ins_pipe( ialu_reg_mem );
5833 %}
5834 
5835 // Load Double
5836 instruct loadDPR(regDPR dst, memory mem) %{
5837   predicate(UseSSE<=1);
5838   match(Set dst (LoadD mem));
5839 
5840   ins_cost(150);
5841   format %{ "FLD_D  ST,$mem\n\t"
5842             "FSTP   $dst" %}
5843   opcode(0xDD);               /* DD /0 */
5844   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5845               Pop_Reg_DPR(dst) );
5846   ins_pipe( fpu_reg_mem );
5847 %}
5848 
5849 // Load Double to XMM
5850 instruct loadD(regD dst, memory mem) %{
5851   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5852   match(Set dst (LoadD mem));
5853   ins_cost(145);
5854   format %{ "MOVSD  $dst,$mem" %}
5855   ins_encode %{
5856     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5857   %}
5858   ins_pipe( pipe_slow );
5859 %}
5860 
5861 instruct loadD_partial(regD dst, memory mem) %{
5862   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5863   match(Set dst (LoadD mem));
5864   ins_cost(145);
5865   format %{ "MOVLPD $dst,$mem" %}
5866   ins_encode %{
5867     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5868   %}
5869   ins_pipe( pipe_slow );
5870 %}
5871 
5872 // Load to XMM register (single-precision floating point)
5873 // MOVSS instruction
5874 instruct loadF(regF dst, memory mem) %{
5875   predicate(UseSSE>=1);
5876   match(Set dst (LoadF mem));
5877   ins_cost(145);
5878   format %{ "MOVSS  $dst,$mem" %}
5879   ins_encode %{
5880     __ movflt ($dst$$XMMRegister, $mem$$Address);
5881   %}
5882   ins_pipe( pipe_slow );
5883 %}
5884 
5885 // Load Float
5886 instruct loadFPR(regFPR dst, memory mem) %{
5887   predicate(UseSSE==0);
5888   match(Set dst (LoadF mem));
5889 
5890   ins_cost(150);
5891   format %{ "FLD_S  ST,$mem\n\t"
5892             "FSTP   $dst" %}
5893   opcode(0xD9);               /* D9 /0 */
5894   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5895               Pop_Reg_FPR(dst) );
5896   ins_pipe( fpu_reg_mem );
5897 %}
5898 
5899 // Load Effective Address
5900 instruct leaP8(eRegP dst, indOffset8 mem) %{
5901   match(Set dst mem);
5902 
5903   ins_cost(110);
5904   format %{ "LEA    $dst,$mem" %}
5905   opcode(0x8D);
5906   ins_encode( OpcP, RegMem(dst,mem));
5907   ins_pipe( ialu_reg_reg_fat );
5908 %}
5909 
5910 instruct leaP32(eRegP dst, indOffset32 mem) %{
5911   match(Set dst mem);
5912 
5913   ins_cost(110);
5914   format %{ "LEA    $dst,$mem" %}
5915   opcode(0x8D);
5916   ins_encode( OpcP, RegMem(dst,mem));
5917   ins_pipe( ialu_reg_reg_fat );
5918 %}
5919 
5920 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5921   match(Set dst mem);
5922 
5923   ins_cost(110);
5924   format %{ "LEA    $dst,$mem" %}
5925   opcode(0x8D);
5926   ins_encode( OpcP, RegMem(dst,mem));
5927   ins_pipe( ialu_reg_reg_fat );
5928 %}
5929 
5930 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5931   match(Set dst mem);
5932 
5933   ins_cost(110);
5934   format %{ "LEA    $dst,$mem" %}
5935   opcode(0x8D);
5936   ins_encode( OpcP, RegMem(dst,mem));
5937   ins_pipe( ialu_reg_reg_fat );
5938 %}
5939 
5940 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5941   match(Set dst mem);
5942 
5943   ins_cost(110);
5944   format %{ "LEA    $dst,$mem" %}
5945   opcode(0x8D);
5946   ins_encode( OpcP, RegMem(dst,mem));
5947   ins_pipe( ialu_reg_reg_fat );
5948 %}
5949 
5950 // Load Constant
5951 instruct loadConI(rRegI dst, immI src) %{
5952   match(Set dst src);
5953 
5954   format %{ "MOV    $dst,$src" %}
5955   ins_encode( LdImmI(dst, src) );
5956   ins_pipe( ialu_reg_fat );
5957 %}
5958 
5959 // Load Constant zero
5960 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
5961   match(Set dst src);
5962   effect(KILL cr);
5963 
5964   ins_cost(50);
5965   format %{ "XOR    $dst,$dst" %}
5966   opcode(0x33);  /* + rd */
5967   ins_encode( OpcP, RegReg( dst, dst ) );
5968   ins_pipe( ialu_reg );
5969 %}
5970 
5971 instruct loadConP(eRegP dst, immP src) %{
5972   match(Set dst src);
5973 
5974   format %{ "MOV    $dst,$src" %}
5975   opcode(0xB8);  /* + rd */
5976   ins_encode( LdImmP(dst, src) );
5977   ins_pipe( ialu_reg_fat );
5978 %}
5979 
5980 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
5981   match(Set dst src);
5982   effect(KILL cr);
5983   ins_cost(200);
5984   format %{ "MOV    $dst.lo,$src.lo\n\t"
5985             "MOV    $dst.hi,$src.hi" %}
5986   opcode(0xB8);
5987   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
5988   ins_pipe( ialu_reg_long_fat );
5989 %}
5990 
5991 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
5992   match(Set dst src);
5993   effect(KILL cr);
5994   ins_cost(150);
5995   format %{ "XOR    $dst.lo,$dst.lo\n\t"
5996             "XOR    $dst.hi,$dst.hi" %}
5997   opcode(0x33,0x33);
5998   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
5999   ins_pipe( ialu_reg_long );
6000 %}
6001 
6002 // The instruction usage is guarded by predicate in operand immFPR().
6003 instruct loadConFPR(regFPR dst, immFPR con) %{
6004   match(Set dst con);
6005   ins_cost(125);
6006   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
6007             "FSTP   $dst" %}
6008   ins_encode %{
6009     __ fld_s($constantaddress($con));
6010     __ fstp_d($dst$$reg);
6011   %}
6012   ins_pipe(fpu_reg_con);
6013 %}
6014 
6015 // The instruction usage is guarded by predicate in operand immFPR0().
6016 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6017   match(Set dst con);
6018   ins_cost(125);
6019   format %{ "FLDZ   ST\n\t"
6020             "FSTP   $dst" %}
6021   ins_encode %{
6022     __ fldz();
6023     __ fstp_d($dst$$reg);
6024   %}
6025   ins_pipe(fpu_reg_con);
6026 %}
6027 
6028 // The instruction usage is guarded by predicate in operand immFPR1().
6029 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6030   match(Set dst con);
6031   ins_cost(125);
6032   format %{ "FLD1   ST\n\t"
6033             "FSTP   $dst" %}
6034   ins_encode %{
6035     __ fld1();
6036     __ fstp_d($dst$$reg);
6037   %}
6038   ins_pipe(fpu_reg_con);
6039 %}
6040 
6041 // The instruction usage is guarded by predicate in operand immF().
6042 instruct loadConF(regF dst, immF con) %{
6043   match(Set dst con);
6044   ins_cost(125);
6045   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6046   ins_encode %{
6047     __ movflt($dst$$XMMRegister, $constantaddress($con));
6048   %}
6049   ins_pipe(pipe_slow);
6050 %}
6051 
6052 // The instruction usage is guarded by predicate in operand immF0().
6053 instruct loadConF0(regF dst, immF0 src) %{
6054   match(Set dst src);
6055   ins_cost(100);
6056   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6057   ins_encode %{
6058     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6059   %}
6060   ins_pipe(pipe_slow);
6061 %}
6062 
6063 // The instruction usage is guarded by predicate in operand immDPR().
6064 instruct loadConDPR(regDPR dst, immDPR con) %{
6065   match(Set dst con);
6066   ins_cost(125);
6067 
6068   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6069             "FSTP   $dst" %}
6070   ins_encode %{
6071     __ fld_d($constantaddress($con));
6072     __ fstp_d($dst$$reg);
6073   %}
6074   ins_pipe(fpu_reg_con);
6075 %}
6076 
6077 // The instruction usage is guarded by predicate in operand immDPR0().
6078 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6079   match(Set dst con);
6080   ins_cost(125);
6081 
6082   format %{ "FLDZ   ST\n\t"
6083             "FSTP   $dst" %}
6084   ins_encode %{
6085     __ fldz();
6086     __ fstp_d($dst$$reg);
6087   %}
6088   ins_pipe(fpu_reg_con);
6089 %}
6090 
6091 // The instruction usage is guarded by predicate in operand immDPR1().
6092 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6093   match(Set dst con);
6094   ins_cost(125);
6095 
6096   format %{ "FLD1   ST\n\t"
6097             "FSTP   $dst" %}
6098   ins_encode %{
6099     __ fld1();
6100     __ fstp_d($dst$$reg);
6101   %}
6102   ins_pipe(fpu_reg_con);
6103 %}
6104 
6105 // The instruction usage is guarded by predicate in operand immD().
6106 instruct loadConD(regD dst, immD con) %{
6107   match(Set dst con);
6108   ins_cost(125);
6109   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6110   ins_encode %{
6111     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6112   %}
6113   ins_pipe(pipe_slow);
6114 %}
6115 
6116 // The instruction usage is guarded by predicate in operand immD0().
6117 instruct loadConD0(regD dst, immD0 src) %{
6118   match(Set dst src);
6119   ins_cost(100);
6120   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6121   ins_encode %{
6122     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6123   %}
6124   ins_pipe( pipe_slow );
6125 %}
6126 
6127 // Load Stack Slot
6128 instruct loadSSI(rRegI dst, stackSlotI src) %{
6129   match(Set dst src);
6130   ins_cost(125);
6131 
6132   format %{ "MOV    $dst,$src" %}
6133   opcode(0x8B);
6134   ins_encode( OpcP, RegMem(dst,src));
6135   ins_pipe( ialu_reg_mem );
6136 %}
6137 
6138 instruct loadSSL(eRegL dst, stackSlotL src) %{
6139   match(Set dst src);
6140 
6141   ins_cost(200);
6142   format %{ "MOV    $dst,$src.lo\n\t"
6143             "MOV    $dst+4,$src.hi" %}
6144   opcode(0x8B, 0x8B);
6145   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6146   ins_pipe( ialu_mem_long_reg );
6147 %}
6148 
6149 // Load Stack Slot
6150 instruct loadSSP(eRegP dst, stackSlotP src) %{
6151   match(Set dst src);
6152   ins_cost(125);
6153 
6154   format %{ "MOV    $dst,$src" %}
6155   opcode(0x8B);
6156   ins_encode( OpcP, RegMem(dst,src));
6157   ins_pipe( ialu_reg_mem );
6158 %}
6159 
6160 // Load Stack Slot
6161 instruct loadSSF(regFPR dst, stackSlotF src) %{
6162   match(Set dst src);
6163   ins_cost(125);
6164 
6165   format %{ "FLD_S  $src\n\t"
6166             "FSTP   $dst" %}
6167   opcode(0xD9);               /* D9 /0, FLD m32real */
6168   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6169               Pop_Reg_FPR(dst) );
6170   ins_pipe( fpu_reg_mem );
6171 %}
6172 
6173 // Load Stack Slot
6174 instruct loadSSD(regDPR dst, stackSlotD src) %{
6175   match(Set dst src);
6176   ins_cost(125);
6177 
6178   format %{ "FLD_D  $src\n\t"
6179             "FSTP   $dst" %}
6180   opcode(0xDD);               /* DD /0, FLD m64real */
6181   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6182               Pop_Reg_DPR(dst) );
6183   ins_pipe( fpu_reg_mem );
6184 %}
6185 
6186 // Prefetch instructions for allocation.
6187 // Must be safe to execute with invalid address (cannot fault).
6188 
6189 instruct prefetchAlloc0( memory mem ) %{
6190   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6191   match(PrefetchAllocation mem);
6192   ins_cost(0);
6193   size(0);
6194   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6195   ins_encode();
6196   ins_pipe(empty);
6197 %}
6198 
6199 instruct prefetchAlloc( memory mem ) %{
6200   predicate(AllocatePrefetchInstr==3);
6201   match( PrefetchAllocation mem );
6202   ins_cost(100);
6203 
6204   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6205   ins_encode %{
6206     __ prefetchw($mem$$Address);
6207   %}
6208   ins_pipe(ialu_mem);
6209 %}
6210 
6211 instruct prefetchAllocNTA( memory mem ) %{
6212   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6213   match(PrefetchAllocation mem);
6214   ins_cost(100);
6215 
6216   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6217   ins_encode %{
6218     __ prefetchnta($mem$$Address);
6219   %}
6220   ins_pipe(ialu_mem);
6221 %}
6222 
6223 instruct prefetchAllocT0( memory mem ) %{
6224   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6225   match(PrefetchAllocation mem);
6226   ins_cost(100);
6227 
6228   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6229   ins_encode %{
6230     __ prefetcht0($mem$$Address);
6231   %}
6232   ins_pipe(ialu_mem);
6233 %}
6234 
6235 instruct prefetchAllocT2( memory mem ) %{
6236   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6237   match(PrefetchAllocation mem);
6238   ins_cost(100);
6239 
6240   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6241   ins_encode %{
6242     __ prefetcht2($mem$$Address);
6243   %}
6244   ins_pipe(ialu_mem);
6245 %}
6246 
6247 //----------Store Instructions-------------------------------------------------
6248 
6249 // Store Byte
6250 instruct storeB(memory mem, xRegI src) %{
6251   match(Set mem (StoreB mem src));
6252 
6253   ins_cost(125);
6254   format %{ "MOV8   $mem,$src" %}
6255   opcode(0x88);
6256   ins_encode( OpcP, RegMem( src, mem ) );
6257   ins_pipe( ialu_mem_reg );
6258 %}
6259 
6260 // Store Char/Short
6261 instruct storeC(memory mem, rRegI src) %{
6262   match(Set mem (StoreC mem src));
6263 
6264   ins_cost(125);
6265   format %{ "MOV16  $mem,$src" %}
6266   opcode(0x89, 0x66);
6267   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6268   ins_pipe( ialu_mem_reg );
6269 %}
6270 
6271 // Store Integer
6272 instruct storeI(memory mem, rRegI src) %{
6273   match(Set mem (StoreI mem src));
6274 
6275   ins_cost(125);
6276   format %{ "MOV    $mem,$src" %}
6277   opcode(0x89);
6278   ins_encode( OpcP, RegMem( src, mem ) );
6279   ins_pipe( ialu_mem_reg );
6280 %}
6281 
6282 // Store Long
6283 instruct storeL(long_memory mem, eRegL src) %{
6284   predicate(!((StoreLNode*)n)->require_atomic_access());
6285   match(Set mem (StoreL mem src));
6286 
6287   ins_cost(200);
6288   format %{ "MOV    $mem,$src.lo\n\t"
6289             "MOV    $mem+4,$src.hi" %}
6290   opcode(0x89, 0x89);
6291   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6292   ins_pipe( ialu_mem_long_reg );
6293 %}
6294 
6295 // Store Long to Integer
6296 instruct storeL2I(memory mem, eRegL src) %{
6297   match(Set mem (StoreI mem (ConvL2I src)));
6298 
6299   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6300   ins_encode %{
6301     __ movl($mem$$Address, $src$$Register);
6302   %}
6303   ins_pipe(ialu_mem_reg);
6304 %}
6305 
6306 // Volatile Store Long.  Must be atomic, so move it into
6307 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6308 // target address before the store (for null-ptr checks)
6309 // so the memory operand is used twice in the encoding.
6310 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6311   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6312   match(Set mem (StoreL mem src));
6313   effect( KILL cr );
6314   ins_cost(400);
6315   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6316             "FILD   $src\n\t"
6317             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6318   opcode(0x3B);
6319   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6320   ins_pipe( fpu_reg_mem );
6321 %}
6322 
6323 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6324   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6325   match(Set mem (StoreL mem src));
6326   effect( TEMP tmp, KILL cr );
6327   ins_cost(380);
6328   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6329             "MOVSD  $tmp,$src\n\t"
6330             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6331   ins_encode %{
6332     __ cmpl(rax, $mem$$Address);
6333     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6334     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6335   %}
6336   ins_pipe( pipe_slow );
6337 %}
6338 
6339 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6340   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6341   match(Set mem (StoreL mem src));
6342   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6343   ins_cost(360);
6344   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6345             "MOVD   $tmp,$src.lo\n\t"
6346             "MOVD   $tmp2,$src.hi\n\t"
6347             "PUNPCKLDQ $tmp,$tmp2\n\t"
6348             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6349   ins_encode %{
6350     __ cmpl(rax, $mem$$Address);
6351     __ movdl($tmp$$XMMRegister, $src$$Register);
6352     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6353     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6354     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6355   %}
6356   ins_pipe( pipe_slow );
6357 %}
6358 
6359 // Store Pointer; for storing unknown oops and raw pointers
6360 instruct storeP(memory mem, anyRegP src) %{
6361   match(Set mem (StoreP mem src));
6362 
6363   ins_cost(125);
6364   format %{ "MOV    $mem,$src" %}
6365   opcode(0x89);
6366   ins_encode( OpcP, RegMem( src, mem ) );
6367   ins_pipe( ialu_mem_reg );
6368 %}
6369 
6370 // Store Integer Immediate
6371 instruct storeImmI(memory mem, immI src) %{
6372   match(Set mem (StoreI mem src));
6373 
6374   ins_cost(150);
6375   format %{ "MOV    $mem,$src" %}
6376   opcode(0xC7);               /* C7 /0 */
6377   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6378   ins_pipe( ialu_mem_imm );
6379 %}
6380 
6381 // Store Short/Char Immediate
6382 instruct storeImmI16(memory mem, immI16 src) %{
6383   predicate(UseStoreImmI16);
6384   match(Set mem (StoreC mem src));
6385 
6386   ins_cost(150);
6387   format %{ "MOV16  $mem,$src" %}
6388   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6389   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6390   ins_pipe( ialu_mem_imm );
6391 %}
6392 
6393 // Store Pointer Immediate; null pointers or constant oops that do not
6394 // need card-mark barriers.
6395 instruct storeImmP(memory mem, immP src) %{
6396   match(Set mem (StoreP mem src));
6397 
6398   ins_cost(150);
6399   format %{ "MOV    $mem,$src" %}
6400   opcode(0xC7);               /* C7 /0 */
6401   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6402   ins_pipe( ialu_mem_imm );
6403 %}
6404 
6405 // Store Byte Immediate
6406 instruct storeImmB(memory mem, immI8 src) %{
6407   match(Set mem (StoreB mem src));
6408 
6409   ins_cost(150);
6410   format %{ "MOV8   $mem,$src" %}
6411   opcode(0xC6);               /* C6 /0 */
6412   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6413   ins_pipe( ialu_mem_imm );
6414 %}
6415 
6416 // Store CMS card-mark Immediate
6417 instruct storeImmCM(memory mem, immI8 src) %{
6418   match(Set mem (StoreCM mem src));
6419 
6420   ins_cost(150);
6421   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6422   opcode(0xC6);               /* C6 /0 */
6423   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6424   ins_pipe( ialu_mem_imm );
6425 %}
6426 
6427 // Store Double
6428 instruct storeDPR( memory mem, regDPR1 src) %{
6429   predicate(UseSSE<=1);
6430   match(Set mem (StoreD mem src));
6431 
6432   ins_cost(100);
6433   format %{ "FST_D  $mem,$src" %}
6434   opcode(0xDD);       /* DD /2 */
6435   ins_encode( enc_FPR_store(mem,src) );
6436   ins_pipe( fpu_mem_reg );
6437 %}
6438 
6439 // Store double does rounding on x86
6440 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6441   predicate(UseSSE<=1);
6442   match(Set mem (StoreD mem (RoundDouble src)));
6443 
6444   ins_cost(100);
6445   format %{ "FST_D  $mem,$src\t# round" %}
6446   opcode(0xDD);       /* DD /2 */
6447   ins_encode( enc_FPR_store(mem,src) );
6448   ins_pipe( fpu_mem_reg );
6449 %}
6450 
6451 // Store XMM register to memory (double-precision floating points)
6452 // MOVSD instruction
6453 instruct storeD(memory mem, regD src) %{
6454   predicate(UseSSE>=2);
6455   match(Set mem (StoreD mem src));
6456   ins_cost(95);
6457   format %{ "MOVSD  $mem,$src" %}
6458   ins_encode %{
6459     __ movdbl($mem$$Address, $src$$XMMRegister);
6460   %}
6461   ins_pipe( pipe_slow );
6462 %}
6463 
6464 // Store XMM register to memory (single-precision floating point)
6465 // MOVSS instruction
6466 instruct storeF(memory mem, regF src) %{
6467   predicate(UseSSE>=1);
6468   match(Set mem (StoreF mem src));
6469   ins_cost(95);
6470   format %{ "MOVSS  $mem,$src" %}
6471   ins_encode %{
6472     __ movflt($mem$$Address, $src$$XMMRegister);
6473   %}
6474   ins_pipe( pipe_slow );
6475 %}
6476 
6477 // Store Float
6478 instruct storeFPR( memory mem, regFPR1 src) %{
6479   predicate(UseSSE==0);
6480   match(Set mem (StoreF mem src));
6481 
6482   ins_cost(100);
6483   format %{ "FST_S  $mem,$src" %}
6484   opcode(0xD9);       /* D9 /2 */
6485   ins_encode( enc_FPR_store(mem,src) );
6486   ins_pipe( fpu_mem_reg );
6487 %}
6488 
6489 // Store Float does rounding on x86
6490 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6491   predicate(UseSSE==0);
6492   match(Set mem (StoreF mem (RoundFloat src)));
6493 
6494   ins_cost(100);
6495   format %{ "FST_S  $mem,$src\t# round" %}
6496   opcode(0xD9);       /* D9 /2 */
6497   ins_encode( enc_FPR_store(mem,src) );
6498   ins_pipe( fpu_mem_reg );
6499 %}
6500 
6501 // Store Float does rounding on x86
6502 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6503   predicate(UseSSE<=1);
6504   match(Set mem (StoreF mem (ConvD2F src)));
6505 
6506   ins_cost(100);
6507   format %{ "FST_S  $mem,$src\t# D-round" %}
6508   opcode(0xD9);       /* D9 /2 */
6509   ins_encode( enc_FPR_store(mem,src) );
6510   ins_pipe( fpu_mem_reg );
6511 %}
6512 
6513 // Store immediate Float value (it is faster than store from FPU register)
6514 // The instruction usage is guarded by predicate in operand immFPR().
6515 instruct storeFPR_imm( memory mem, immFPR src) %{
6516   match(Set mem (StoreF mem src));
6517 
6518   ins_cost(50);
6519   format %{ "MOV    $mem,$src\t# store float" %}
6520   opcode(0xC7);               /* C7 /0 */
6521   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6522   ins_pipe( ialu_mem_imm );
6523 %}
6524 
6525 // Store immediate Float value (it is faster than store from XMM register)
6526 // The instruction usage is guarded by predicate in operand immF().
6527 instruct storeF_imm( memory mem, immF src) %{
6528   match(Set mem (StoreF mem src));
6529 
6530   ins_cost(50);
6531   format %{ "MOV    $mem,$src\t# store float" %}
6532   opcode(0xC7);               /* C7 /0 */
6533   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6534   ins_pipe( ialu_mem_imm );
6535 %}
6536 
6537 // Store Integer to stack slot
6538 instruct storeSSI(stackSlotI dst, rRegI src) %{
6539   match(Set dst src);
6540 
6541   ins_cost(100);
6542   format %{ "MOV    $dst,$src" %}
6543   opcode(0x89);
6544   ins_encode( OpcPRegSS( dst, src ) );
6545   ins_pipe( ialu_mem_reg );
6546 %}
6547 
6548 // Store Integer to stack slot
6549 instruct storeSSP(stackSlotP dst, eRegP src) %{
6550   match(Set dst src);
6551 
6552   ins_cost(100);
6553   format %{ "MOV    $dst,$src" %}
6554   opcode(0x89);
6555   ins_encode( OpcPRegSS( dst, src ) );
6556   ins_pipe( ialu_mem_reg );
6557 %}
6558 
6559 // Store Long to stack slot
6560 instruct storeSSL(stackSlotL dst, eRegL src) %{
6561   match(Set dst src);
6562 
6563   ins_cost(200);
6564   format %{ "MOV    $dst,$src.lo\n\t"
6565             "MOV    $dst+4,$src.hi" %}
6566   opcode(0x89, 0x89);
6567   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6568   ins_pipe( ialu_mem_long_reg );
6569 %}
6570 
6571 //----------MemBar Instructions-----------------------------------------------
6572 // Memory barrier flavors
6573 
6574 instruct membar_acquire() %{
6575   match(MemBarAcquire);
6576   match(LoadFence);
6577   ins_cost(400);
6578 
6579   size(0);
6580   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6581   ins_encode();
6582   ins_pipe(empty);
6583 %}
6584 
6585 instruct membar_acquire_lock() %{
6586   match(MemBarAcquireLock);
6587   ins_cost(0);
6588 
6589   size(0);
6590   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6591   ins_encode( );
6592   ins_pipe(empty);
6593 %}
6594 
6595 instruct membar_release() %{
6596   match(MemBarRelease);
6597   match(StoreFence);
6598   ins_cost(400);
6599 
6600   size(0);
6601   format %{ "MEMBAR-release ! (empty encoding)" %}
6602   ins_encode( );
6603   ins_pipe(empty);
6604 %}
6605 
6606 instruct membar_release_lock() %{
6607   match(MemBarReleaseLock);
6608   ins_cost(0);
6609 
6610   size(0);
6611   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6612   ins_encode( );
6613   ins_pipe(empty);
6614 %}
6615 
6616 instruct membar_volatile(eFlagsReg cr) %{
6617   match(MemBarVolatile);
6618   effect(KILL cr);
6619   ins_cost(400);
6620 
6621   format %{
6622     $$template
6623     if (os::is_MP()) {
6624       $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6625     } else {
6626       $$emit$$"MEMBAR-volatile ! (empty encoding)"
6627     }
6628   %}
6629   ins_encode %{
6630     __ membar(Assembler::StoreLoad);
6631   %}
6632   ins_pipe(pipe_slow);
6633 %}
6634 
6635 instruct unnecessary_membar_volatile() %{
6636   match(MemBarVolatile);
6637   predicate(Matcher::post_store_load_barrier(n));
6638   ins_cost(0);
6639 
6640   size(0);
6641   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6642   ins_encode( );
6643   ins_pipe(empty);
6644 %}
6645 
6646 instruct membar_storestore() %{
6647   match(MemBarStoreStore);
6648   ins_cost(0);
6649 
6650   size(0);
6651   format %{ "MEMBAR-storestore (empty encoding)" %}
6652   ins_encode( );
6653   ins_pipe(empty);
6654 %}
6655 
6656 //----------Move Instructions--------------------------------------------------
6657 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6658   match(Set dst (CastX2P src));
6659   format %{ "# X2P  $dst, $src" %}
6660   ins_encode( /*empty encoding*/ );
6661   ins_cost(0);
6662   ins_pipe(empty);
6663 %}
6664 
6665 instruct castP2X(rRegI dst, eRegP src ) %{
6666   match(Set dst (CastP2X src));
6667   ins_cost(50);
6668   format %{ "MOV    $dst, $src\t# CastP2X" %}
6669   ins_encode( enc_Copy( dst, src) );
6670   ins_pipe( ialu_reg_reg );
6671 %}
6672 
6673 //----------Conditional Move---------------------------------------------------
6674 // Conditional move
6675 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6676   predicate(!VM_Version::supports_cmov() );
6677   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6678   ins_cost(200);
6679   format %{ "J$cop,us skip\t# signed cmove\n\t"
6680             "MOV    $dst,$src\n"
6681       "skip:" %}
6682   ins_encode %{
6683     Label Lskip;
6684     // Invert sense of branch from sense of CMOV
6685     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6686     __ movl($dst$$Register, $src$$Register);
6687     __ bind(Lskip);
6688   %}
6689   ins_pipe( pipe_cmov_reg );
6690 %}
6691 
6692 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6693   predicate(!VM_Version::supports_cmov() );
6694   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6695   ins_cost(200);
6696   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6697             "MOV    $dst,$src\n"
6698       "skip:" %}
6699   ins_encode %{
6700     Label Lskip;
6701     // Invert sense of branch from sense of CMOV
6702     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6703     __ movl($dst$$Register, $src$$Register);
6704     __ bind(Lskip);
6705   %}
6706   ins_pipe( pipe_cmov_reg );
6707 %}
6708 
6709 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6710   predicate(VM_Version::supports_cmov() );
6711   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6712   ins_cost(200);
6713   format %{ "CMOV$cop $dst,$src" %}
6714   opcode(0x0F,0x40);
6715   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6716   ins_pipe( pipe_cmov_reg );
6717 %}
6718 
6719 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6720   predicate(VM_Version::supports_cmov() );
6721   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6722   ins_cost(200);
6723   format %{ "CMOV$cop $dst,$src" %}
6724   opcode(0x0F,0x40);
6725   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6726   ins_pipe( pipe_cmov_reg );
6727 %}
6728 
6729 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6730   predicate(VM_Version::supports_cmov() );
6731   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6732   ins_cost(200);
6733   expand %{
6734     cmovI_regU(cop, cr, dst, src);
6735   %}
6736 %}
6737 
6738 // Conditional move
6739 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6740   predicate(VM_Version::supports_cmov() );
6741   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6742   ins_cost(250);
6743   format %{ "CMOV$cop $dst,$src" %}
6744   opcode(0x0F,0x40);
6745   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6746   ins_pipe( pipe_cmov_mem );
6747 %}
6748 
6749 // Conditional move
6750 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6751   predicate(VM_Version::supports_cmov() );
6752   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6753   ins_cost(250);
6754   format %{ "CMOV$cop $dst,$src" %}
6755   opcode(0x0F,0x40);
6756   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6757   ins_pipe( pipe_cmov_mem );
6758 %}
6759 
6760 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6761   predicate(VM_Version::supports_cmov() );
6762   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6763   ins_cost(250);
6764   expand %{
6765     cmovI_memU(cop, cr, dst, src);
6766   %}
6767 %}
6768 
6769 // Conditional move
6770 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6771   predicate(VM_Version::supports_cmov() );
6772   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6773   ins_cost(200);
6774   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6775   opcode(0x0F,0x40);
6776   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6777   ins_pipe( pipe_cmov_reg );
6778 %}
6779 
6780 // Conditional move (non-P6 version)
6781 // Note:  a CMoveP is generated for  stubs and native wrappers
6782 //        regardless of whether we are on a P6, so we
6783 //        emulate a cmov here
6784 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6785   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6786   ins_cost(300);
6787   format %{ "Jn$cop   skip\n\t"
6788           "MOV    $dst,$src\t# pointer\n"
6789       "skip:" %}
6790   opcode(0x8b);
6791   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6792   ins_pipe( pipe_cmov_reg );
6793 %}
6794 
6795 // Conditional move
6796 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6797   predicate(VM_Version::supports_cmov() );
6798   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6799   ins_cost(200);
6800   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6801   opcode(0x0F,0x40);
6802   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6803   ins_pipe( pipe_cmov_reg );
6804 %}
6805 
6806 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6807   predicate(VM_Version::supports_cmov() );
6808   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6809   ins_cost(200);
6810   expand %{
6811     cmovP_regU(cop, cr, dst, src);
6812   %}
6813 %}
6814 
6815 // DISABLED: Requires the ADLC to emit a bottom_type call that
6816 // correctly meets the two pointer arguments; one is an incoming
6817 // register but the other is a memory operand.  ALSO appears to
6818 // be buggy with implicit null checks.
6819 //
6820 //// Conditional move
6821 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6822 //  predicate(VM_Version::supports_cmov() );
6823 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6824 //  ins_cost(250);
6825 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6826 //  opcode(0x0F,0x40);
6827 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6828 //  ins_pipe( pipe_cmov_mem );
6829 //%}
6830 //
6831 //// Conditional move
6832 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6833 //  predicate(VM_Version::supports_cmov() );
6834 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6835 //  ins_cost(250);
6836 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6837 //  opcode(0x0F,0x40);
6838 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6839 //  ins_pipe( pipe_cmov_mem );
6840 //%}
6841 
6842 // Conditional move
6843 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6844   predicate(UseSSE<=1);
6845   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6846   ins_cost(200);
6847   format %{ "FCMOV$cop $dst,$src\t# double" %}
6848   opcode(0xDA);
6849   ins_encode( enc_cmov_dpr(cop,src) );
6850   ins_pipe( pipe_cmovDPR_reg );
6851 %}
6852 
6853 // Conditional move
6854 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6855   predicate(UseSSE==0);
6856   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6857   ins_cost(200);
6858   format %{ "FCMOV$cop $dst,$src\t# float" %}
6859   opcode(0xDA);
6860   ins_encode( enc_cmov_dpr(cop,src) );
6861   ins_pipe( pipe_cmovDPR_reg );
6862 %}
6863 
6864 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6865 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6866   predicate(UseSSE<=1);
6867   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6868   ins_cost(200);
6869   format %{ "Jn$cop   skip\n\t"
6870             "MOV    $dst,$src\t# double\n"
6871       "skip:" %}
6872   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6873   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6874   ins_pipe( pipe_cmovDPR_reg );
6875 %}
6876 
6877 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6878 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6879   predicate(UseSSE==0);
6880   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6881   ins_cost(200);
6882   format %{ "Jn$cop    skip\n\t"
6883             "MOV    $dst,$src\t# float\n"
6884       "skip:" %}
6885   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6886   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6887   ins_pipe( pipe_cmovDPR_reg );
6888 %}
6889 
6890 // No CMOVE with SSE/SSE2
6891 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6892   predicate (UseSSE>=1);
6893   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6894   ins_cost(200);
6895   format %{ "Jn$cop   skip\n\t"
6896             "MOVSS  $dst,$src\t# float\n"
6897       "skip:" %}
6898   ins_encode %{
6899     Label skip;
6900     // Invert sense of branch from sense of CMOV
6901     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6902     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6903     __ bind(skip);
6904   %}
6905   ins_pipe( pipe_slow );
6906 %}
6907 
6908 // No CMOVE with SSE/SSE2
6909 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6910   predicate (UseSSE>=2);
6911   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6912   ins_cost(200);
6913   format %{ "Jn$cop   skip\n\t"
6914             "MOVSD  $dst,$src\t# float\n"
6915       "skip:" %}
6916   ins_encode %{
6917     Label skip;
6918     // Invert sense of branch from sense of CMOV
6919     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6920     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6921     __ bind(skip);
6922   %}
6923   ins_pipe( pipe_slow );
6924 %}
6925 
6926 // unsigned version
6927 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6928   predicate (UseSSE>=1);
6929   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6930   ins_cost(200);
6931   format %{ "Jn$cop   skip\n\t"
6932             "MOVSS  $dst,$src\t# float\n"
6933       "skip:" %}
6934   ins_encode %{
6935     Label skip;
6936     // Invert sense of branch from sense of CMOV
6937     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6938     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6939     __ bind(skip);
6940   %}
6941   ins_pipe( pipe_slow );
6942 %}
6943 
6944 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
6945   predicate (UseSSE>=1);
6946   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6947   ins_cost(200);
6948   expand %{
6949     fcmovF_regU(cop, cr, dst, src);
6950   %}
6951 %}
6952 
6953 // unsigned version
6954 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
6955   predicate (UseSSE>=2);
6956   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6957   ins_cost(200);
6958   format %{ "Jn$cop   skip\n\t"
6959             "MOVSD  $dst,$src\t# float\n"
6960       "skip:" %}
6961   ins_encode %{
6962     Label skip;
6963     // Invert sense of branch from sense of CMOV
6964     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6965     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6966     __ bind(skip);
6967   %}
6968   ins_pipe( pipe_slow );
6969 %}
6970 
6971 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
6972   predicate (UseSSE>=2);
6973   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6974   ins_cost(200);
6975   expand %{
6976     fcmovD_regU(cop, cr, dst, src);
6977   %}
6978 %}
6979 
6980 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
6981   predicate(VM_Version::supports_cmov() );
6982   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6983   ins_cost(200);
6984   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6985             "CMOV$cop $dst.hi,$src.hi" %}
6986   opcode(0x0F,0x40);
6987   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6988   ins_pipe( pipe_cmov_reg_long );
6989 %}
6990 
6991 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
6992   predicate(VM_Version::supports_cmov() );
6993   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6994   ins_cost(200);
6995   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6996             "CMOV$cop $dst.hi,$src.hi" %}
6997   opcode(0x0F,0x40);
6998   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6999   ins_pipe( pipe_cmov_reg_long );
7000 %}
7001 
7002 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
7003   predicate(VM_Version::supports_cmov() );
7004   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7005   ins_cost(200);
7006   expand %{
7007     cmovL_regU(cop, cr, dst, src);
7008   %}
7009 %}
7010 
7011 //----------Arithmetic Instructions--------------------------------------------
7012 //----------Addition Instructions----------------------------------------------
7013 
7014 // Integer Addition Instructions
7015 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7016   match(Set dst (AddI dst src));
7017   effect(KILL cr);
7018 
7019   size(2);
7020   format %{ "ADD    $dst,$src" %}
7021   opcode(0x03);
7022   ins_encode( OpcP, RegReg( dst, src) );
7023   ins_pipe( ialu_reg_reg );
7024 %}
7025 
7026 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7027   match(Set dst (AddI dst src));
7028   effect(KILL cr);
7029 
7030   format %{ "ADD    $dst,$src" %}
7031   opcode(0x81, 0x00); /* /0 id */
7032   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7033   ins_pipe( ialu_reg );
7034 %}
7035 
7036 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7037   predicate(UseIncDec);
7038   match(Set dst (AddI dst src));
7039   effect(KILL cr);
7040 
7041   size(1);
7042   format %{ "INC    $dst" %}
7043   opcode(0x40); /*  */
7044   ins_encode( Opc_plus( primary, dst ) );
7045   ins_pipe( ialu_reg );
7046 %}
7047 
7048 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7049   match(Set dst (AddI src0 src1));
7050   ins_cost(110);
7051 
7052   format %{ "LEA    $dst,[$src0 + $src1]" %}
7053   opcode(0x8D); /* 0x8D /r */
7054   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7055   ins_pipe( ialu_reg_reg );
7056 %}
7057 
7058 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7059   match(Set dst (AddP src0 src1));
7060   ins_cost(110);
7061 
7062   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7063   opcode(0x8D); /* 0x8D /r */
7064   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7065   ins_pipe( ialu_reg_reg );
7066 %}
7067 
7068 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7069   predicate(UseIncDec);
7070   match(Set dst (AddI dst src));
7071   effect(KILL cr);
7072 
7073   size(1);
7074   format %{ "DEC    $dst" %}
7075   opcode(0x48); /*  */
7076   ins_encode( Opc_plus( primary, dst ) );
7077   ins_pipe( ialu_reg );
7078 %}
7079 
7080 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7081   match(Set dst (AddP dst src));
7082   effect(KILL cr);
7083 
7084   size(2);
7085   format %{ "ADD    $dst,$src" %}
7086   opcode(0x03);
7087   ins_encode( OpcP, RegReg( dst, src) );
7088   ins_pipe( ialu_reg_reg );
7089 %}
7090 
7091 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7092   match(Set dst (AddP dst src));
7093   effect(KILL cr);
7094 
7095   format %{ "ADD    $dst,$src" %}
7096   opcode(0x81,0x00); /* Opcode 81 /0 id */
7097   // ins_encode( RegImm( dst, src) );
7098   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7099   ins_pipe( ialu_reg );
7100 %}
7101 
7102 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7103   match(Set dst (AddI dst (LoadI src)));
7104   effect(KILL cr);
7105 
7106   ins_cost(125);
7107   format %{ "ADD    $dst,$src" %}
7108   opcode(0x03);
7109   ins_encode( OpcP, RegMem( dst, src) );
7110   ins_pipe( ialu_reg_mem );
7111 %}
7112 
7113 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7114   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7115   effect(KILL cr);
7116 
7117   ins_cost(150);
7118   format %{ "ADD    $dst,$src" %}
7119   opcode(0x01);  /* Opcode 01 /r */
7120   ins_encode( OpcP, RegMem( src, dst ) );
7121   ins_pipe( ialu_mem_reg );
7122 %}
7123 
7124 // Add Memory with Immediate
7125 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7126   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7127   effect(KILL cr);
7128 
7129   ins_cost(125);
7130   format %{ "ADD    $dst,$src" %}
7131   opcode(0x81);               /* Opcode 81 /0 id */
7132   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7133   ins_pipe( ialu_mem_imm );
7134 %}
7135 
7136 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7137   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7138   effect(KILL cr);
7139 
7140   ins_cost(125);
7141   format %{ "INC    $dst" %}
7142   opcode(0xFF);               /* Opcode FF /0 */
7143   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7144   ins_pipe( ialu_mem_imm );
7145 %}
7146 
7147 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7148   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7149   effect(KILL cr);
7150 
7151   ins_cost(125);
7152   format %{ "DEC    $dst" %}
7153   opcode(0xFF);               /* Opcode FF /1 */
7154   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7155   ins_pipe( ialu_mem_imm );
7156 %}
7157 
7158 
7159 instruct checkCastPP( eRegP dst ) %{
7160   match(Set dst (CheckCastPP dst));
7161 
7162   size(0);
7163   format %{ "#checkcastPP of $dst" %}
7164   ins_encode( /*empty encoding*/ );
7165   ins_pipe( empty );
7166 %}
7167 
7168 instruct castPP( eRegP dst ) %{
7169   match(Set dst (CastPP dst));
7170   format %{ "#castPP of $dst" %}
7171   ins_encode( /*empty encoding*/ );
7172   ins_pipe( empty );
7173 %}
7174 
7175 instruct castII( rRegI dst ) %{
7176   match(Set dst (CastII dst));
7177   format %{ "#castII of $dst" %}
7178   ins_encode( /*empty encoding*/ );
7179   ins_cost(0);
7180   ins_pipe( empty );
7181 %}
7182 
7183 
7184 // Load-locked - same as a regular pointer load when used with compare-swap
7185 instruct loadPLocked(eRegP dst, memory mem) %{
7186   match(Set dst (LoadPLocked mem));
7187 
7188   ins_cost(125);
7189   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7190   opcode(0x8B);
7191   ins_encode( OpcP, RegMem(dst,mem));
7192   ins_pipe( ialu_reg_mem );
7193 %}
7194 
7195 // Conditional-store of the updated heap-top.
7196 // Used during allocation of the shared heap.
7197 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7198 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7199   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7200   // EAX is killed if there is contention, but then it's also unused.
7201   // In the common case of no contention, EAX holds the new oop address.
7202   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7203   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7204   ins_pipe( pipe_cmpxchg );
7205 %}
7206 
7207 // Conditional-store of an int value.
7208 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7209 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7210   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7211   effect(KILL oldval);
7212   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7213   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7214   ins_pipe( pipe_cmpxchg );
7215 %}
7216 
7217 // Conditional-store of a long value.
7218 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7219 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7220   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7221   effect(KILL oldval);
7222   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7223             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7224             "XCHG   EBX,ECX"
7225   %}
7226   ins_encode %{
7227     // Note: we need to swap rbx, and rcx before and after the
7228     //       cmpxchg8 instruction because the instruction uses
7229     //       rcx as the high order word of the new value to store but
7230     //       our register encoding uses rbx.
7231     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7232     if( os::is_MP() )
7233       __ lock();
7234     __ cmpxchg8($mem$$Address);
7235     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7236   %}
7237   ins_pipe( pipe_cmpxchg );
7238 %}
7239 
7240 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7241 
7242 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7243   predicate(VM_Version::supports_cx8());
7244   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7245   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7246   effect(KILL cr, KILL oldval);
7247   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7248             "MOV    $res,0\n\t"
7249             "JNE,s  fail\n\t"
7250             "MOV    $res,1\n"
7251           "fail:" %}
7252   ins_encode( enc_cmpxchg8(mem_ptr),
7253               enc_flags_ne_to_boolean(res) );
7254   ins_pipe( pipe_cmpxchg );
7255 %}
7256 
7257 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7258   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7259   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7260   effect(KILL cr, KILL oldval);
7261   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7262             "MOV    $res,0\n\t"
7263             "JNE,s  fail\n\t"
7264             "MOV    $res,1\n"
7265           "fail:" %}
7266   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7267   ins_pipe( pipe_cmpxchg );
7268 %}
7269 
7270 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7271   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7272   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7273   effect(KILL cr, KILL oldval);
7274   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7275             "MOV    $res,0\n\t"
7276             "JNE,s  fail\n\t"
7277             "MOV    $res,1\n"
7278           "fail:" %}
7279   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7280   ins_pipe( pipe_cmpxchg );
7281 %}
7282 
7283 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7284   predicate(VM_Version::supports_cx8());
7285   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7286   effect(KILL cr);
7287   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7288   ins_encode( enc_cmpxchg8(mem_ptr) );
7289   ins_pipe( pipe_cmpxchg );
7290 %}
7291 
7292 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7293   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7294   effect(KILL cr);
7295   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7296   ins_encode( enc_cmpxchg(mem_ptr) );
7297   ins_pipe( pipe_cmpxchg );
7298 %}
7299 
7300 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7301   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7302   effect(KILL cr);
7303   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7304   ins_encode( enc_cmpxchg(mem_ptr) );
7305   ins_pipe( pipe_cmpxchg );
7306 %}
7307 
7308 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7309   predicate(n->as_LoadStore()->result_not_used());
7310   match(Set dummy (GetAndAddI mem add));
7311   effect(KILL cr);
7312   format %{ "ADDL  [$mem],$add" %}
7313   ins_encode %{
7314     if (os::is_MP()) { __ lock(); }
7315     __ addl($mem$$Address, $add$$constant);
7316   %}
7317   ins_pipe( pipe_cmpxchg );
7318 %}
7319 
7320 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7321   match(Set newval (GetAndAddI mem newval));
7322   effect(KILL cr);
7323   format %{ "XADDL  [$mem],$newval" %}
7324   ins_encode %{
7325     if (os::is_MP()) { __ lock(); }
7326     __ xaddl($mem$$Address, $newval$$Register);
7327   %}
7328   ins_pipe( pipe_cmpxchg );
7329 %}
7330 
7331 instruct xchgI( memory mem, rRegI newval) %{
7332   match(Set newval (GetAndSetI mem newval));
7333   format %{ "XCHGL  $newval,[$mem]" %}
7334   ins_encode %{
7335     __ xchgl($newval$$Register, $mem$$Address);
7336   %}
7337   ins_pipe( pipe_cmpxchg );
7338 %}
7339 
7340 instruct xchgP( memory mem, pRegP newval) %{
7341   match(Set newval (GetAndSetP mem newval));
7342   format %{ "XCHGL  $newval,[$mem]" %}
7343   ins_encode %{
7344     __ xchgl($newval$$Register, $mem$$Address);
7345   %}
7346   ins_pipe( pipe_cmpxchg );
7347 %}
7348 
7349 //----------Subtraction Instructions-------------------------------------------
7350 
7351 // Integer Subtraction Instructions
7352 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7353   match(Set dst (SubI dst src));
7354   effect(KILL cr);
7355 
7356   size(2);
7357   format %{ "SUB    $dst,$src" %}
7358   opcode(0x2B);
7359   ins_encode( OpcP, RegReg( dst, src) );
7360   ins_pipe( ialu_reg_reg );
7361 %}
7362 
7363 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7364   match(Set dst (SubI dst src));
7365   effect(KILL cr);
7366 
7367   format %{ "SUB    $dst,$src" %}
7368   opcode(0x81,0x05);  /* Opcode 81 /5 */
7369   // ins_encode( RegImm( dst, src) );
7370   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7371   ins_pipe( ialu_reg );
7372 %}
7373 
7374 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7375   match(Set dst (SubI dst (LoadI src)));
7376   effect(KILL cr);
7377 
7378   ins_cost(125);
7379   format %{ "SUB    $dst,$src" %}
7380   opcode(0x2B);
7381   ins_encode( OpcP, RegMem( dst, src) );
7382   ins_pipe( ialu_reg_mem );
7383 %}
7384 
7385 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7386   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7387   effect(KILL cr);
7388 
7389   ins_cost(150);
7390   format %{ "SUB    $dst,$src" %}
7391   opcode(0x29);  /* Opcode 29 /r */
7392   ins_encode( OpcP, RegMem( src, dst ) );
7393   ins_pipe( ialu_mem_reg );
7394 %}
7395 
7396 // Subtract from a pointer
7397 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7398   match(Set dst (AddP dst (SubI zero src)));
7399   effect(KILL cr);
7400 
7401   size(2);
7402   format %{ "SUB    $dst,$src" %}
7403   opcode(0x2B);
7404   ins_encode( OpcP, RegReg( dst, src) );
7405   ins_pipe( ialu_reg_reg );
7406 %}
7407 
7408 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7409   match(Set dst (SubI zero dst));
7410   effect(KILL cr);
7411 
7412   size(2);
7413   format %{ "NEG    $dst" %}
7414   opcode(0xF7,0x03);  // Opcode F7 /3
7415   ins_encode( OpcP, RegOpc( dst ) );
7416   ins_pipe( ialu_reg );
7417 %}
7418 
7419 //----------Multiplication/Division Instructions-------------------------------
7420 // Integer Multiplication Instructions
7421 // Multiply Register
7422 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7423   match(Set dst (MulI dst src));
7424   effect(KILL cr);
7425 
7426   size(3);
7427   ins_cost(300);
7428   format %{ "IMUL   $dst,$src" %}
7429   opcode(0xAF, 0x0F);
7430   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7431   ins_pipe( ialu_reg_reg_alu0 );
7432 %}
7433 
7434 // Multiply 32-bit Immediate
7435 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7436   match(Set dst (MulI src imm));
7437   effect(KILL cr);
7438 
7439   ins_cost(300);
7440   format %{ "IMUL   $dst,$src,$imm" %}
7441   opcode(0x69);  /* 69 /r id */
7442   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7443   ins_pipe( ialu_reg_reg_alu0 );
7444 %}
7445 
7446 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7447   match(Set dst src);
7448   effect(KILL cr);
7449 
7450   // Note that this is artificially increased to make it more expensive than loadConL
7451   ins_cost(250);
7452   format %{ "MOV    EAX,$src\t// low word only" %}
7453   opcode(0xB8);
7454   ins_encode( LdImmL_Lo(dst, src) );
7455   ins_pipe( ialu_reg_fat );
7456 %}
7457 
7458 // Multiply by 32-bit Immediate, taking the shifted high order results
7459 //  (special case for shift by 32)
7460 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7461   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7462   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7463              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7464              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7465   effect(USE src1, KILL cr);
7466 
7467   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7468   ins_cost(0*100 + 1*400 - 150);
7469   format %{ "IMUL   EDX:EAX,$src1" %}
7470   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7471   ins_pipe( pipe_slow );
7472 %}
7473 
7474 // Multiply by 32-bit Immediate, taking the shifted high order results
7475 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7476   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7477   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7478              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7479              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7480   effect(USE src1, KILL cr);
7481 
7482   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7483   ins_cost(1*100 + 1*400 - 150);
7484   format %{ "IMUL   EDX:EAX,$src1\n\t"
7485             "SAR    EDX,$cnt-32" %}
7486   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7487   ins_pipe( pipe_slow );
7488 %}
7489 
7490 // Multiply Memory 32-bit Immediate
7491 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7492   match(Set dst (MulI (LoadI src) imm));
7493   effect(KILL cr);
7494 
7495   ins_cost(300);
7496   format %{ "IMUL   $dst,$src,$imm" %}
7497   opcode(0x69);  /* 69 /r id */
7498   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7499   ins_pipe( ialu_reg_mem_alu0 );
7500 %}
7501 
7502 // Multiply Memory
7503 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7504   match(Set dst (MulI dst (LoadI src)));
7505   effect(KILL cr);
7506 
7507   ins_cost(350);
7508   format %{ "IMUL   $dst,$src" %}
7509   opcode(0xAF, 0x0F);
7510   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7511   ins_pipe( ialu_reg_mem_alu0 );
7512 %}
7513 
7514 // Multiply Register Int to Long
7515 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7516   // Basic Idea: long = (long)int * (long)int
7517   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7518   effect(DEF dst, USE src, USE src1, KILL flags);
7519 
7520   ins_cost(300);
7521   format %{ "IMUL   $dst,$src1" %}
7522 
7523   ins_encode( long_int_multiply( dst, src1 ) );
7524   ins_pipe( ialu_reg_reg_alu0 );
7525 %}
7526 
7527 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7528   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7529   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7530   effect(KILL flags);
7531 
7532   ins_cost(300);
7533   format %{ "MUL    $dst,$src1" %}
7534 
7535   ins_encode( long_uint_multiply(dst, src1) );
7536   ins_pipe( ialu_reg_reg_alu0 );
7537 %}
7538 
7539 // Multiply Register Long
7540 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7541   match(Set dst (MulL dst src));
7542   effect(KILL cr, TEMP tmp);
7543   ins_cost(4*100+3*400);
7544 // Basic idea: lo(result) = lo(x_lo * y_lo)
7545 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7546   format %{ "MOV    $tmp,$src.lo\n\t"
7547             "IMUL   $tmp,EDX\n\t"
7548             "MOV    EDX,$src.hi\n\t"
7549             "IMUL   EDX,EAX\n\t"
7550             "ADD    $tmp,EDX\n\t"
7551             "MUL    EDX:EAX,$src.lo\n\t"
7552             "ADD    EDX,$tmp" %}
7553   ins_encode( long_multiply( dst, src, tmp ) );
7554   ins_pipe( pipe_slow );
7555 %}
7556 
7557 // Multiply Register Long where the left operand's high 32 bits are zero
7558 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7559   predicate(is_operand_hi32_zero(n->in(1)));
7560   match(Set dst (MulL dst src));
7561   effect(KILL cr, TEMP tmp);
7562   ins_cost(2*100+2*400);
7563 // Basic idea: lo(result) = lo(x_lo * y_lo)
7564 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7565   format %{ "MOV    $tmp,$src.hi\n\t"
7566             "IMUL   $tmp,EAX\n\t"
7567             "MUL    EDX:EAX,$src.lo\n\t"
7568             "ADD    EDX,$tmp" %}
7569   ins_encode %{
7570     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7571     __ imull($tmp$$Register, rax);
7572     __ mull($src$$Register);
7573     __ addl(rdx, $tmp$$Register);
7574   %}
7575   ins_pipe( pipe_slow );
7576 %}
7577 
7578 // Multiply Register Long where the right operand's high 32 bits are zero
7579 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7580   predicate(is_operand_hi32_zero(n->in(2)));
7581   match(Set dst (MulL dst src));
7582   effect(KILL cr, TEMP tmp);
7583   ins_cost(2*100+2*400);
7584 // Basic idea: lo(result) = lo(x_lo * y_lo)
7585 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7586   format %{ "MOV    $tmp,$src.lo\n\t"
7587             "IMUL   $tmp,EDX\n\t"
7588             "MUL    EDX:EAX,$src.lo\n\t"
7589             "ADD    EDX,$tmp" %}
7590   ins_encode %{
7591     __ movl($tmp$$Register, $src$$Register);
7592     __ imull($tmp$$Register, rdx);
7593     __ mull($src$$Register);
7594     __ addl(rdx, $tmp$$Register);
7595   %}
7596   ins_pipe( pipe_slow );
7597 %}
7598 
7599 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7600 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7601   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7602   match(Set dst (MulL dst src));
7603   effect(KILL cr);
7604   ins_cost(1*400);
7605 // Basic idea: lo(result) = lo(x_lo * y_lo)
7606 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7607   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7608   ins_encode %{
7609     __ mull($src$$Register);
7610   %}
7611   ins_pipe( pipe_slow );
7612 %}
7613 
7614 // Multiply Register Long by small constant
7615 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7616   match(Set dst (MulL dst src));
7617   effect(KILL cr, TEMP tmp);
7618   ins_cost(2*100+2*400);
7619   size(12);
7620 // Basic idea: lo(result) = lo(src * EAX)
7621 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7622   format %{ "IMUL   $tmp,EDX,$src\n\t"
7623             "MOV    EDX,$src\n\t"
7624             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7625             "ADD    EDX,$tmp" %}
7626   ins_encode( long_multiply_con( dst, src, tmp ) );
7627   ins_pipe( pipe_slow );
7628 %}
7629 
7630 // Integer DIV with Register
7631 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7632   match(Set rax (DivI rax div));
7633   effect(KILL rdx, KILL cr);
7634   size(26);
7635   ins_cost(30*100+10*100);
7636   format %{ "CMP    EAX,0x80000000\n\t"
7637             "JNE,s  normal\n\t"
7638             "XOR    EDX,EDX\n\t"
7639             "CMP    ECX,-1\n\t"
7640             "JE,s   done\n"
7641     "normal: CDQ\n\t"
7642             "IDIV   $div\n\t"
7643     "done:"        %}
7644   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7645   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7646   ins_pipe( ialu_reg_reg_alu0 );
7647 %}
7648 
7649 // Divide Register Long
7650 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7651   match(Set dst (DivL src1 src2));
7652   effect( KILL cr, KILL cx, KILL bx );
7653   ins_cost(10000);
7654   format %{ "PUSH   $src1.hi\n\t"
7655             "PUSH   $src1.lo\n\t"
7656             "PUSH   $src2.hi\n\t"
7657             "PUSH   $src2.lo\n\t"
7658             "CALL   SharedRuntime::ldiv\n\t"
7659             "ADD    ESP,16" %}
7660   ins_encode( long_div(src1,src2) );
7661   ins_pipe( pipe_slow );
7662 %}
7663 
7664 // Integer DIVMOD with Register, both quotient and mod results
7665 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7666   match(DivModI rax div);
7667   effect(KILL cr);
7668   size(26);
7669   ins_cost(30*100+10*100);
7670   format %{ "CMP    EAX,0x80000000\n\t"
7671             "JNE,s  normal\n\t"
7672             "XOR    EDX,EDX\n\t"
7673             "CMP    ECX,-1\n\t"
7674             "JE,s   done\n"
7675     "normal: CDQ\n\t"
7676             "IDIV   $div\n\t"
7677     "done:"        %}
7678   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7679   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7680   ins_pipe( pipe_slow );
7681 %}
7682 
7683 // Integer MOD with Register
7684 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7685   match(Set rdx (ModI rax div));
7686   effect(KILL rax, KILL cr);
7687 
7688   size(26);
7689   ins_cost(300);
7690   format %{ "CDQ\n\t"
7691             "IDIV   $div" %}
7692   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7693   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7694   ins_pipe( ialu_reg_reg_alu0 );
7695 %}
7696 
7697 // Remainder Register Long
7698 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7699   match(Set dst (ModL src1 src2));
7700   effect( KILL cr, KILL cx, KILL bx );
7701   ins_cost(10000);
7702   format %{ "PUSH   $src1.hi\n\t"
7703             "PUSH   $src1.lo\n\t"
7704             "PUSH   $src2.hi\n\t"
7705             "PUSH   $src2.lo\n\t"
7706             "CALL   SharedRuntime::lrem\n\t"
7707             "ADD    ESP,16" %}
7708   ins_encode( long_mod(src1,src2) );
7709   ins_pipe( pipe_slow );
7710 %}
7711 
7712 // Divide Register Long (no special case since divisor != -1)
7713 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7714   match(Set dst (DivL dst imm));
7715   effect( TEMP tmp, TEMP tmp2, KILL cr );
7716   ins_cost(1000);
7717   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7718             "XOR    $tmp2,$tmp2\n\t"
7719             "CMP    $tmp,EDX\n\t"
7720             "JA,s   fast\n\t"
7721             "MOV    $tmp2,EAX\n\t"
7722             "MOV    EAX,EDX\n\t"
7723             "MOV    EDX,0\n\t"
7724             "JLE,s  pos\n\t"
7725             "LNEG   EAX : $tmp2\n\t"
7726             "DIV    $tmp # unsigned division\n\t"
7727             "XCHG   EAX,$tmp2\n\t"
7728             "DIV    $tmp\n\t"
7729             "LNEG   $tmp2 : EAX\n\t"
7730             "JMP,s  done\n"
7731     "pos:\n\t"
7732             "DIV    $tmp\n\t"
7733             "XCHG   EAX,$tmp2\n"
7734     "fast:\n\t"
7735             "DIV    $tmp\n"
7736     "done:\n\t"
7737             "MOV    EDX,$tmp2\n\t"
7738             "NEG    EDX:EAX # if $imm < 0" %}
7739   ins_encode %{
7740     int con = (int)$imm$$constant;
7741     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7742     int pcon = (con > 0) ? con : -con;
7743     Label Lfast, Lpos, Ldone;
7744 
7745     __ movl($tmp$$Register, pcon);
7746     __ xorl($tmp2$$Register,$tmp2$$Register);
7747     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7748     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7749 
7750     __ movl($tmp2$$Register, $dst$$Register); // save
7751     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7752     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7753     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7754 
7755     // Negative dividend.
7756     // convert value to positive to use unsigned division
7757     __ lneg($dst$$Register, $tmp2$$Register);
7758     __ divl($tmp$$Register);
7759     __ xchgl($dst$$Register, $tmp2$$Register);
7760     __ divl($tmp$$Register);
7761     // revert result back to negative
7762     __ lneg($tmp2$$Register, $dst$$Register);
7763     __ jmpb(Ldone);
7764 
7765     __ bind(Lpos);
7766     __ divl($tmp$$Register); // Use unsigned division
7767     __ xchgl($dst$$Register, $tmp2$$Register);
7768     // Fallthrow for final divide, tmp2 has 32 bit hi result
7769 
7770     __ bind(Lfast);
7771     // fast path: src is positive
7772     __ divl($tmp$$Register); // Use unsigned division
7773 
7774     __ bind(Ldone);
7775     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7776     if (con < 0) {
7777       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7778     }
7779   %}
7780   ins_pipe( pipe_slow );
7781 %}
7782 
7783 // Remainder Register Long (remainder fit into 32 bits)
7784 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7785   match(Set dst (ModL dst imm));
7786   effect( TEMP tmp, TEMP tmp2, KILL cr );
7787   ins_cost(1000);
7788   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7789             "CMP    $tmp,EDX\n\t"
7790             "JA,s   fast\n\t"
7791             "MOV    $tmp2,EAX\n\t"
7792             "MOV    EAX,EDX\n\t"
7793             "MOV    EDX,0\n\t"
7794             "JLE,s  pos\n\t"
7795             "LNEG   EAX : $tmp2\n\t"
7796             "DIV    $tmp # unsigned division\n\t"
7797             "MOV    EAX,$tmp2\n\t"
7798             "DIV    $tmp\n\t"
7799             "NEG    EDX\n\t"
7800             "JMP,s  done\n"
7801     "pos:\n\t"
7802             "DIV    $tmp\n\t"
7803             "MOV    EAX,$tmp2\n"
7804     "fast:\n\t"
7805             "DIV    $tmp\n"
7806     "done:\n\t"
7807             "MOV    EAX,EDX\n\t"
7808             "SAR    EDX,31\n\t" %}
7809   ins_encode %{
7810     int con = (int)$imm$$constant;
7811     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7812     int pcon = (con > 0) ? con : -con;
7813     Label  Lfast, Lpos, Ldone;
7814 
7815     __ movl($tmp$$Register, pcon);
7816     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7817     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7818 
7819     __ movl($tmp2$$Register, $dst$$Register); // save
7820     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7821     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7822     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7823 
7824     // Negative dividend.
7825     // convert value to positive to use unsigned division
7826     __ lneg($dst$$Register, $tmp2$$Register);
7827     __ divl($tmp$$Register);
7828     __ movl($dst$$Register, $tmp2$$Register);
7829     __ divl($tmp$$Register);
7830     // revert remainder back to negative
7831     __ negl(HIGH_FROM_LOW($dst$$Register));
7832     __ jmpb(Ldone);
7833 
7834     __ bind(Lpos);
7835     __ divl($tmp$$Register);
7836     __ movl($dst$$Register, $tmp2$$Register);
7837 
7838     __ bind(Lfast);
7839     // fast path: src is positive
7840     __ divl($tmp$$Register);
7841 
7842     __ bind(Ldone);
7843     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7844     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
7845 
7846   %}
7847   ins_pipe( pipe_slow );
7848 %}
7849 
7850 // Integer Shift Instructions
7851 // Shift Left by one
7852 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7853   match(Set dst (LShiftI dst shift));
7854   effect(KILL cr);
7855 
7856   size(2);
7857   format %{ "SHL    $dst,$shift" %}
7858   opcode(0xD1, 0x4);  /* D1 /4 */
7859   ins_encode( OpcP, RegOpc( dst ) );
7860   ins_pipe( ialu_reg );
7861 %}
7862 
7863 // Shift Left by 8-bit immediate
7864 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7865   match(Set dst (LShiftI dst shift));
7866   effect(KILL cr);
7867 
7868   size(3);
7869   format %{ "SHL    $dst,$shift" %}
7870   opcode(0xC1, 0x4);  /* C1 /4 ib */
7871   ins_encode( RegOpcImm( dst, shift) );
7872   ins_pipe( ialu_reg );
7873 %}
7874 
7875 // Shift Left by variable
7876 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7877   match(Set dst (LShiftI dst shift));
7878   effect(KILL cr);
7879 
7880   size(2);
7881   format %{ "SHL    $dst,$shift" %}
7882   opcode(0xD3, 0x4);  /* D3 /4 */
7883   ins_encode( OpcP, RegOpc( dst ) );
7884   ins_pipe( ialu_reg_reg );
7885 %}
7886 
7887 // Arithmetic shift right by one
7888 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7889   match(Set dst (RShiftI dst shift));
7890   effect(KILL cr);
7891 
7892   size(2);
7893   format %{ "SAR    $dst,$shift" %}
7894   opcode(0xD1, 0x7);  /* D1 /7 */
7895   ins_encode( OpcP, RegOpc( dst ) );
7896   ins_pipe( ialu_reg );
7897 %}
7898 
7899 // Arithmetic shift right by one
7900 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
7901   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7902   effect(KILL cr);
7903   format %{ "SAR    $dst,$shift" %}
7904   opcode(0xD1, 0x7);  /* D1 /7 */
7905   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
7906   ins_pipe( ialu_mem_imm );
7907 %}
7908 
7909 // Arithmetic Shift Right by 8-bit immediate
7910 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7911   match(Set dst (RShiftI dst shift));
7912   effect(KILL cr);
7913 
7914   size(3);
7915   format %{ "SAR    $dst,$shift" %}
7916   opcode(0xC1, 0x7);  /* C1 /7 ib */
7917   ins_encode( RegOpcImm( dst, shift ) );
7918   ins_pipe( ialu_mem_imm );
7919 %}
7920 
7921 // Arithmetic Shift Right by 8-bit immediate
7922 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
7923   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7924   effect(KILL cr);
7925 
7926   format %{ "SAR    $dst,$shift" %}
7927   opcode(0xC1, 0x7);  /* C1 /7 ib */
7928   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
7929   ins_pipe( ialu_mem_imm );
7930 %}
7931 
7932 // Arithmetic Shift Right by variable
7933 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7934   match(Set dst (RShiftI dst shift));
7935   effect(KILL cr);
7936 
7937   size(2);
7938   format %{ "SAR    $dst,$shift" %}
7939   opcode(0xD3, 0x7);  /* D3 /7 */
7940   ins_encode( OpcP, RegOpc( dst ) );
7941   ins_pipe( ialu_reg_reg );
7942 %}
7943 
7944 // Logical shift right by one
7945 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7946   match(Set dst (URShiftI dst shift));
7947   effect(KILL cr);
7948 
7949   size(2);
7950   format %{ "SHR    $dst,$shift" %}
7951   opcode(0xD1, 0x5);  /* D1 /5 */
7952   ins_encode( OpcP, RegOpc( dst ) );
7953   ins_pipe( ialu_reg );
7954 %}
7955 
7956 // Logical Shift Right by 8-bit immediate
7957 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7958   match(Set dst (URShiftI dst shift));
7959   effect(KILL cr);
7960 
7961   size(3);
7962   format %{ "SHR    $dst,$shift" %}
7963   opcode(0xC1, 0x5);  /* C1 /5 ib */
7964   ins_encode( RegOpcImm( dst, shift) );
7965   ins_pipe( ialu_reg );
7966 %}
7967 
7968 
7969 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
7970 // This idiom is used by the compiler for the i2b bytecode.
7971 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
7972   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
7973 
7974   size(3);
7975   format %{ "MOVSX  $dst,$src :8" %}
7976   ins_encode %{
7977     __ movsbl($dst$$Register, $src$$Register);
7978   %}
7979   ins_pipe(ialu_reg_reg);
7980 %}
7981 
7982 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
7983 // This idiom is used by the compiler the i2s bytecode.
7984 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
7985   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
7986 
7987   size(3);
7988   format %{ "MOVSX  $dst,$src :16" %}
7989   ins_encode %{
7990     __ movswl($dst$$Register, $src$$Register);
7991   %}
7992   ins_pipe(ialu_reg_reg);
7993 %}
7994 
7995 
7996 // Logical Shift Right by variable
7997 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7998   match(Set dst (URShiftI dst shift));
7999   effect(KILL cr);
8000 
8001   size(2);
8002   format %{ "SHR    $dst,$shift" %}
8003   opcode(0xD3, 0x5);  /* D3 /5 */
8004   ins_encode( OpcP, RegOpc( dst ) );
8005   ins_pipe( ialu_reg_reg );
8006 %}
8007 
8008 
8009 //----------Logical Instructions-----------------------------------------------
8010 //----------Integer Logical Instructions---------------------------------------
8011 // And Instructions
8012 // And Register with Register
8013 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8014   match(Set dst (AndI dst src));
8015   effect(KILL cr);
8016 
8017   size(2);
8018   format %{ "AND    $dst,$src" %}
8019   opcode(0x23);
8020   ins_encode( OpcP, RegReg( dst, src) );
8021   ins_pipe( ialu_reg_reg );
8022 %}
8023 
8024 // And Register with Immediate
8025 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8026   match(Set dst (AndI dst src));
8027   effect(KILL cr);
8028 
8029   format %{ "AND    $dst,$src" %}
8030   opcode(0x81,0x04);  /* Opcode 81 /4 */
8031   // ins_encode( RegImm( dst, src) );
8032   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8033   ins_pipe( ialu_reg );
8034 %}
8035 
8036 // And Register with Memory
8037 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8038   match(Set dst (AndI dst (LoadI src)));
8039   effect(KILL cr);
8040 
8041   ins_cost(125);
8042   format %{ "AND    $dst,$src" %}
8043   opcode(0x23);
8044   ins_encode( OpcP, RegMem( dst, src) );
8045   ins_pipe( ialu_reg_mem );
8046 %}
8047 
8048 // And Memory with Register
8049 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8050   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8051   effect(KILL cr);
8052 
8053   ins_cost(150);
8054   format %{ "AND    $dst,$src" %}
8055   opcode(0x21);  /* Opcode 21 /r */
8056   ins_encode( OpcP, RegMem( src, dst ) );
8057   ins_pipe( ialu_mem_reg );
8058 %}
8059 
8060 // And Memory with Immediate
8061 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8062   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8063   effect(KILL cr);
8064 
8065   ins_cost(125);
8066   format %{ "AND    $dst,$src" %}
8067   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8068   // ins_encode( MemImm( dst, src) );
8069   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8070   ins_pipe( ialu_mem_imm );
8071 %}
8072 
8073 // BMI1 instructions
8074 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8075   match(Set dst (AndI (XorI src1 minus_1) src2));
8076   predicate(UseBMI1Instructions);
8077   effect(KILL cr);
8078 
8079   format %{ "ANDNL  $dst, $src1, $src2" %}
8080 
8081   ins_encode %{
8082     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8083   %}
8084   ins_pipe(ialu_reg);
8085 %}
8086 
8087 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8088   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8089   predicate(UseBMI1Instructions);
8090   effect(KILL cr);
8091 
8092   ins_cost(125);
8093   format %{ "ANDNL  $dst, $src1, $src2" %}
8094 
8095   ins_encode %{
8096     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8097   %}
8098   ins_pipe(ialu_reg_mem);
8099 %}
8100 
8101 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8102   match(Set dst (AndI (SubI imm_zero src) src));
8103   predicate(UseBMI1Instructions);
8104   effect(KILL cr);
8105 
8106   format %{ "BLSIL  $dst, $src" %}
8107 
8108   ins_encode %{
8109     __ blsil($dst$$Register, $src$$Register);
8110   %}
8111   ins_pipe(ialu_reg);
8112 %}
8113 
8114 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8115   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8116   predicate(UseBMI1Instructions);
8117   effect(KILL cr);
8118 
8119   ins_cost(125);
8120   format %{ "BLSIL  $dst, $src" %}
8121 
8122   ins_encode %{
8123     __ blsil($dst$$Register, $src$$Address);
8124   %}
8125   ins_pipe(ialu_reg_mem);
8126 %}
8127 
8128 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8129 %{
8130   match(Set dst (XorI (AddI src minus_1) src));
8131   predicate(UseBMI1Instructions);
8132   effect(KILL cr);
8133 
8134   format %{ "BLSMSKL $dst, $src" %}
8135 
8136   ins_encode %{
8137     __ blsmskl($dst$$Register, $src$$Register);
8138   %}
8139 
8140   ins_pipe(ialu_reg);
8141 %}
8142 
8143 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8144 %{
8145   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8146   predicate(UseBMI1Instructions);
8147   effect(KILL cr);
8148 
8149   ins_cost(125);
8150   format %{ "BLSMSKL $dst, $src" %}
8151 
8152   ins_encode %{
8153     __ blsmskl($dst$$Register, $src$$Address);
8154   %}
8155 
8156   ins_pipe(ialu_reg_mem);
8157 %}
8158 
8159 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8160 %{
8161   match(Set dst (AndI (AddI src minus_1) src) );
8162   predicate(UseBMI1Instructions);
8163   effect(KILL cr);
8164 
8165   format %{ "BLSRL  $dst, $src" %}
8166 
8167   ins_encode %{
8168     __ blsrl($dst$$Register, $src$$Register);
8169   %}
8170 
8171   ins_pipe(ialu_reg);
8172 %}
8173 
8174 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8175 %{
8176   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8177   predicate(UseBMI1Instructions);
8178   effect(KILL cr);
8179 
8180   ins_cost(125);
8181   format %{ "BLSRL  $dst, $src" %}
8182 
8183   ins_encode %{
8184     __ blsrl($dst$$Register, $src$$Address);
8185   %}
8186 
8187   ins_pipe(ialu_reg_mem);
8188 %}
8189 
8190 // Or Instructions
8191 // Or Register with Register
8192 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8193   match(Set dst (OrI dst src));
8194   effect(KILL cr);
8195 
8196   size(2);
8197   format %{ "OR     $dst,$src" %}
8198   opcode(0x0B);
8199   ins_encode( OpcP, RegReg( dst, src) );
8200   ins_pipe( ialu_reg_reg );
8201 %}
8202 
8203 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8204   match(Set dst (OrI dst (CastP2X src)));
8205   effect(KILL cr);
8206 
8207   size(2);
8208   format %{ "OR     $dst,$src" %}
8209   opcode(0x0B);
8210   ins_encode( OpcP, RegReg( dst, src) );
8211   ins_pipe( ialu_reg_reg );
8212 %}
8213 
8214 
8215 // Or Register with Immediate
8216 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8217   match(Set dst (OrI dst src));
8218   effect(KILL cr);
8219 
8220   format %{ "OR     $dst,$src" %}
8221   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8222   // ins_encode( RegImm( dst, src) );
8223   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8224   ins_pipe( ialu_reg );
8225 %}
8226 
8227 // Or Register with Memory
8228 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8229   match(Set dst (OrI dst (LoadI src)));
8230   effect(KILL cr);
8231 
8232   ins_cost(125);
8233   format %{ "OR     $dst,$src" %}
8234   opcode(0x0B);
8235   ins_encode( OpcP, RegMem( dst, src) );
8236   ins_pipe( ialu_reg_mem );
8237 %}
8238 
8239 // Or Memory with Register
8240 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8241   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8242   effect(KILL cr);
8243 
8244   ins_cost(150);
8245   format %{ "OR     $dst,$src" %}
8246   opcode(0x09);  /* Opcode 09 /r */
8247   ins_encode( OpcP, RegMem( src, dst ) );
8248   ins_pipe( ialu_mem_reg );
8249 %}
8250 
8251 // Or Memory with Immediate
8252 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8253   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8254   effect(KILL cr);
8255 
8256   ins_cost(125);
8257   format %{ "OR     $dst,$src" %}
8258   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8259   // ins_encode( MemImm( dst, src) );
8260   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8261   ins_pipe( ialu_mem_imm );
8262 %}
8263 
8264 // ROL/ROR
8265 // ROL expand
8266 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8267   effect(USE_DEF dst, USE shift, KILL cr);
8268 
8269   format %{ "ROL    $dst, $shift" %}
8270   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8271   ins_encode( OpcP, RegOpc( dst ));
8272   ins_pipe( ialu_reg );
8273 %}
8274 
8275 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8276   effect(USE_DEF dst, USE shift, KILL cr);
8277 
8278   format %{ "ROL    $dst, $shift" %}
8279   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8280   ins_encode( RegOpcImm(dst, shift) );
8281   ins_pipe(ialu_reg);
8282 %}
8283 
8284 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8285   effect(USE_DEF dst, USE shift, KILL cr);
8286 
8287   format %{ "ROL    $dst, $shift" %}
8288   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8289   ins_encode(OpcP, RegOpc(dst));
8290   ins_pipe( ialu_reg_reg );
8291 %}
8292 // end of ROL expand
8293 
8294 // ROL 32bit by one once
8295 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8296   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8297 
8298   expand %{
8299     rolI_eReg_imm1(dst, lshift, cr);
8300   %}
8301 %}
8302 
8303 // ROL 32bit var by imm8 once
8304 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8305   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8306   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8307 
8308   expand %{
8309     rolI_eReg_imm8(dst, lshift, cr);
8310   %}
8311 %}
8312 
8313 // ROL 32bit var by var once
8314 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8315   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8316 
8317   expand %{
8318     rolI_eReg_CL(dst, shift, cr);
8319   %}
8320 %}
8321 
8322 // ROL 32bit var by var once
8323 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8324   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8325 
8326   expand %{
8327     rolI_eReg_CL(dst, shift, cr);
8328   %}
8329 %}
8330 
8331 // ROR expand
8332 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8333   effect(USE_DEF dst, USE shift, KILL cr);
8334 
8335   format %{ "ROR    $dst, $shift" %}
8336   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8337   ins_encode( OpcP, RegOpc( dst ) );
8338   ins_pipe( ialu_reg );
8339 %}
8340 
8341 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8342   effect (USE_DEF dst, USE shift, KILL cr);
8343 
8344   format %{ "ROR    $dst, $shift" %}
8345   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8346   ins_encode( RegOpcImm(dst, shift) );
8347   ins_pipe( ialu_reg );
8348 %}
8349 
8350 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8351   effect(USE_DEF dst, USE shift, KILL cr);
8352 
8353   format %{ "ROR    $dst, $shift" %}
8354   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8355   ins_encode(OpcP, RegOpc(dst));
8356   ins_pipe( ialu_reg_reg );
8357 %}
8358 // end of ROR expand
8359 
8360 // ROR right once
8361 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8362   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8363 
8364   expand %{
8365     rorI_eReg_imm1(dst, rshift, cr);
8366   %}
8367 %}
8368 
8369 // ROR 32bit by immI8 once
8370 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8371   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8372   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8373 
8374   expand %{
8375     rorI_eReg_imm8(dst, rshift, cr);
8376   %}
8377 %}
8378 
8379 // ROR 32bit var by var once
8380 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8381   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8382 
8383   expand %{
8384     rorI_eReg_CL(dst, shift, cr);
8385   %}
8386 %}
8387 
8388 // ROR 32bit var by var once
8389 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8390   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8391 
8392   expand %{
8393     rorI_eReg_CL(dst, shift, cr);
8394   %}
8395 %}
8396 
8397 // Xor Instructions
8398 // Xor Register with Register
8399 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8400   match(Set dst (XorI dst src));
8401   effect(KILL cr);
8402 
8403   size(2);
8404   format %{ "XOR    $dst,$src" %}
8405   opcode(0x33);
8406   ins_encode( OpcP, RegReg( dst, src) );
8407   ins_pipe( ialu_reg_reg );
8408 %}
8409 
8410 // Xor Register with Immediate -1
8411 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8412   match(Set dst (XorI dst imm));
8413 
8414   size(2);
8415   format %{ "NOT    $dst" %}
8416   ins_encode %{
8417      __ notl($dst$$Register);
8418   %}
8419   ins_pipe( ialu_reg );
8420 %}
8421 
8422 // Xor Register with Immediate
8423 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8424   match(Set dst (XorI dst src));
8425   effect(KILL cr);
8426 
8427   format %{ "XOR    $dst,$src" %}
8428   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8429   // ins_encode( RegImm( dst, src) );
8430   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8431   ins_pipe( ialu_reg );
8432 %}
8433 
8434 // Xor Register with Memory
8435 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8436   match(Set dst (XorI dst (LoadI src)));
8437   effect(KILL cr);
8438 
8439   ins_cost(125);
8440   format %{ "XOR    $dst,$src" %}
8441   opcode(0x33);
8442   ins_encode( OpcP, RegMem(dst, src) );
8443   ins_pipe( ialu_reg_mem );
8444 %}
8445 
8446 // Xor Memory with Register
8447 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8448   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8449   effect(KILL cr);
8450 
8451   ins_cost(150);
8452   format %{ "XOR    $dst,$src" %}
8453   opcode(0x31);  /* Opcode 31 /r */
8454   ins_encode( OpcP, RegMem( src, dst ) );
8455   ins_pipe( ialu_mem_reg );
8456 %}
8457 
8458 // Xor Memory with Immediate
8459 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8460   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8461   effect(KILL cr);
8462 
8463   ins_cost(125);
8464   format %{ "XOR    $dst,$src" %}
8465   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8466   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8467   ins_pipe( ialu_mem_imm );
8468 %}
8469 
8470 //----------Convert Int to Boolean---------------------------------------------
8471 
8472 instruct movI_nocopy(rRegI dst, rRegI src) %{
8473   effect( DEF dst, USE src );
8474   format %{ "MOV    $dst,$src" %}
8475   ins_encode( enc_Copy( dst, src) );
8476   ins_pipe( ialu_reg_reg );
8477 %}
8478 
8479 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8480   effect( USE_DEF dst, USE src, KILL cr );
8481 
8482   size(4);
8483   format %{ "NEG    $dst\n\t"
8484             "ADC    $dst,$src" %}
8485   ins_encode( neg_reg(dst),
8486               OpcRegReg(0x13,dst,src) );
8487   ins_pipe( ialu_reg_reg_long );
8488 %}
8489 
8490 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8491   match(Set dst (Conv2B src));
8492 
8493   expand %{
8494     movI_nocopy(dst,src);
8495     ci2b(dst,src,cr);
8496   %}
8497 %}
8498 
8499 instruct movP_nocopy(rRegI dst, eRegP src) %{
8500   effect( DEF dst, USE src );
8501   format %{ "MOV    $dst,$src" %}
8502   ins_encode( enc_Copy( dst, src) );
8503   ins_pipe( ialu_reg_reg );
8504 %}
8505 
8506 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8507   effect( USE_DEF dst, USE src, KILL cr );
8508   format %{ "NEG    $dst\n\t"
8509             "ADC    $dst,$src" %}
8510   ins_encode( neg_reg(dst),
8511               OpcRegReg(0x13,dst,src) );
8512   ins_pipe( ialu_reg_reg_long );
8513 %}
8514 
8515 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8516   match(Set dst (Conv2B src));
8517 
8518   expand %{
8519     movP_nocopy(dst,src);
8520     cp2b(dst,src,cr);
8521   %}
8522 %}
8523 
8524 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8525   match(Set dst (CmpLTMask p q));
8526   effect(KILL cr);
8527   ins_cost(400);
8528 
8529   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8530   format %{ "XOR    $dst,$dst\n\t"
8531             "CMP    $p,$q\n\t"
8532             "SETlt  $dst\n\t"
8533             "NEG    $dst" %}
8534   ins_encode %{
8535     Register Rp = $p$$Register;
8536     Register Rq = $q$$Register;
8537     Register Rd = $dst$$Register;
8538     Label done;
8539     __ xorl(Rd, Rd);
8540     __ cmpl(Rp, Rq);
8541     __ setb(Assembler::less, Rd);
8542     __ negl(Rd);
8543   %}
8544 
8545   ins_pipe(pipe_slow);
8546 %}
8547 
8548 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8549   match(Set dst (CmpLTMask dst zero));
8550   effect(DEF dst, KILL cr);
8551   ins_cost(100);
8552 
8553   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8554   ins_encode %{
8555   __ sarl($dst$$Register, 31);
8556   %}
8557   ins_pipe(ialu_reg);
8558 %}
8559 
8560 /* better to save a register than avoid a branch */
8561 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8562   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8563   effect(KILL cr);
8564   ins_cost(400);
8565   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8566             "JGE    done\n\t"
8567             "ADD    $p,$y\n"
8568             "done:  " %}
8569   ins_encode %{
8570     Register Rp = $p$$Register;
8571     Register Rq = $q$$Register;
8572     Register Ry = $y$$Register;
8573     Label done;
8574     __ subl(Rp, Rq);
8575     __ jccb(Assembler::greaterEqual, done);
8576     __ addl(Rp, Ry);
8577     __ bind(done);
8578   %}
8579 
8580   ins_pipe(pipe_cmplt);
8581 %}
8582 
8583 /* better to save a register than avoid a branch */
8584 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8585   match(Set y (AndI (CmpLTMask p q) y));
8586   effect(KILL cr);
8587 
8588   ins_cost(300);
8589 
8590   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8591             "JLT      done\n\t"
8592             "XORL     $y, $y\n"
8593             "done:  " %}
8594   ins_encode %{
8595     Register Rp = $p$$Register;
8596     Register Rq = $q$$Register;
8597     Register Ry = $y$$Register;
8598     Label done;
8599     __ cmpl(Rp, Rq);
8600     __ jccb(Assembler::less, done);
8601     __ xorl(Ry, Ry);
8602     __ bind(done);
8603   %}
8604 
8605   ins_pipe(pipe_cmplt);
8606 %}
8607 
8608 /* If I enable this, I encourage spilling in the inner loop of compress.
8609 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8610   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8611 */
8612 //----------Overflow Math Instructions-----------------------------------------
8613 
8614 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8615 %{
8616   match(Set cr (OverflowAddI op1 op2));
8617   effect(DEF cr, USE_KILL op1, USE op2);
8618 
8619   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8620 
8621   ins_encode %{
8622     __ addl($op1$$Register, $op2$$Register);
8623   %}
8624   ins_pipe(ialu_reg_reg);
8625 %}
8626 
8627 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8628 %{
8629   match(Set cr (OverflowAddI op1 op2));
8630   effect(DEF cr, USE_KILL op1, USE op2);
8631 
8632   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8633 
8634   ins_encode %{
8635     __ addl($op1$$Register, $op2$$constant);
8636   %}
8637   ins_pipe(ialu_reg_reg);
8638 %}
8639 
8640 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8641 %{
8642   match(Set cr (OverflowSubI op1 op2));
8643 
8644   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8645   ins_encode %{
8646     __ cmpl($op1$$Register, $op2$$Register);
8647   %}
8648   ins_pipe(ialu_reg_reg);
8649 %}
8650 
8651 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8652 %{
8653   match(Set cr (OverflowSubI op1 op2));
8654 
8655   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8656   ins_encode %{
8657     __ cmpl($op1$$Register, $op2$$constant);
8658   %}
8659   ins_pipe(ialu_reg_reg);
8660 %}
8661 
8662 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8663 %{
8664   match(Set cr (OverflowSubI zero op2));
8665   effect(DEF cr, USE_KILL op2);
8666 
8667   format %{ "NEG    $op2\t# overflow check int" %}
8668   ins_encode %{
8669     __ negl($op2$$Register);
8670   %}
8671   ins_pipe(ialu_reg_reg);
8672 %}
8673 
8674 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8675 %{
8676   match(Set cr (OverflowMulI op1 op2));
8677   effect(DEF cr, USE_KILL op1, USE op2);
8678 
8679   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8680   ins_encode %{
8681     __ imull($op1$$Register, $op2$$Register);
8682   %}
8683   ins_pipe(ialu_reg_reg_alu0);
8684 %}
8685 
8686 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8687 %{
8688   match(Set cr (OverflowMulI op1 op2));
8689   effect(DEF cr, TEMP tmp, USE op1, USE op2);
8690 
8691   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8692   ins_encode %{
8693     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8694   %}
8695   ins_pipe(ialu_reg_reg_alu0);
8696 %}
8697 
8698 //----------Long Instructions------------------------------------------------
8699 // Add Long Register with Register
8700 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8701   match(Set dst (AddL dst src));
8702   effect(KILL cr);
8703   ins_cost(200);
8704   format %{ "ADD    $dst.lo,$src.lo\n\t"
8705             "ADC    $dst.hi,$src.hi" %}
8706   opcode(0x03, 0x13);
8707   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8708   ins_pipe( ialu_reg_reg_long );
8709 %}
8710 
8711 // Add Long Register with Immediate
8712 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8713   match(Set dst (AddL dst src));
8714   effect(KILL cr);
8715   format %{ "ADD    $dst.lo,$src.lo\n\t"
8716             "ADC    $dst.hi,$src.hi" %}
8717   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8718   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8719   ins_pipe( ialu_reg_long );
8720 %}
8721 
8722 // Add Long Register with Memory
8723 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8724   match(Set dst (AddL dst (LoadL mem)));
8725   effect(KILL cr);
8726   ins_cost(125);
8727   format %{ "ADD    $dst.lo,$mem\n\t"
8728             "ADC    $dst.hi,$mem+4" %}
8729   opcode(0x03, 0x13);
8730   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8731   ins_pipe( ialu_reg_long_mem );
8732 %}
8733 
8734 // Subtract Long Register with Register.
8735 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8736   match(Set dst (SubL dst src));
8737   effect(KILL cr);
8738   ins_cost(200);
8739   format %{ "SUB    $dst.lo,$src.lo\n\t"
8740             "SBB    $dst.hi,$src.hi" %}
8741   opcode(0x2B, 0x1B);
8742   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8743   ins_pipe( ialu_reg_reg_long );
8744 %}
8745 
8746 // Subtract Long Register with Immediate
8747 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8748   match(Set dst (SubL dst src));
8749   effect(KILL cr);
8750   format %{ "SUB    $dst.lo,$src.lo\n\t"
8751             "SBB    $dst.hi,$src.hi" %}
8752   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
8753   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8754   ins_pipe( ialu_reg_long );
8755 %}
8756 
8757 // Subtract Long Register with Memory
8758 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8759   match(Set dst (SubL dst (LoadL mem)));
8760   effect(KILL cr);
8761   ins_cost(125);
8762   format %{ "SUB    $dst.lo,$mem\n\t"
8763             "SBB    $dst.hi,$mem+4" %}
8764   opcode(0x2B, 0x1B);
8765   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8766   ins_pipe( ialu_reg_long_mem );
8767 %}
8768 
8769 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8770   match(Set dst (SubL zero dst));
8771   effect(KILL cr);
8772   ins_cost(300);
8773   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
8774   ins_encode( neg_long(dst) );
8775   ins_pipe( ialu_reg_reg_long );
8776 %}
8777 
8778 // And Long Register with Register
8779 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8780   match(Set dst (AndL dst src));
8781   effect(KILL cr);
8782   format %{ "AND    $dst.lo,$src.lo\n\t"
8783             "AND    $dst.hi,$src.hi" %}
8784   opcode(0x23,0x23);
8785   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8786   ins_pipe( ialu_reg_reg_long );
8787 %}
8788 
8789 // And Long Register with Immediate
8790 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8791   match(Set dst (AndL dst src));
8792   effect(KILL cr);
8793   format %{ "AND    $dst.lo,$src.lo\n\t"
8794             "AND    $dst.hi,$src.hi" %}
8795   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
8796   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8797   ins_pipe( ialu_reg_long );
8798 %}
8799 
8800 // And Long Register with Memory
8801 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8802   match(Set dst (AndL dst (LoadL mem)));
8803   effect(KILL cr);
8804   ins_cost(125);
8805   format %{ "AND    $dst.lo,$mem\n\t"
8806             "AND    $dst.hi,$mem+4" %}
8807   opcode(0x23, 0x23);
8808   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8809   ins_pipe( ialu_reg_long_mem );
8810 %}
8811 
8812 // BMI1 instructions
8813 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
8814   match(Set dst (AndL (XorL src1 minus_1) src2));
8815   predicate(UseBMI1Instructions);
8816   effect(KILL cr, TEMP dst);
8817 
8818   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
8819             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
8820          %}
8821 
8822   ins_encode %{
8823     Register Rdst = $dst$$Register;
8824     Register Rsrc1 = $src1$$Register;
8825     Register Rsrc2 = $src2$$Register;
8826     __ andnl(Rdst, Rsrc1, Rsrc2);
8827     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
8828   %}
8829   ins_pipe(ialu_reg_reg_long);
8830 %}
8831 
8832 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
8833   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
8834   predicate(UseBMI1Instructions);
8835   effect(KILL cr, TEMP dst);
8836 
8837   ins_cost(125);
8838   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
8839             "ANDNL  $dst.hi, $src1.hi, $src2+4"
8840          %}
8841 
8842   ins_encode %{
8843     Register Rdst = $dst$$Register;
8844     Register Rsrc1 = $src1$$Register;
8845     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
8846 
8847     __ andnl(Rdst, Rsrc1, $src2$$Address);
8848     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
8849   %}
8850   ins_pipe(ialu_reg_mem);
8851 %}
8852 
8853 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
8854   match(Set dst (AndL (SubL imm_zero src) src));
8855   predicate(UseBMI1Instructions);
8856   effect(KILL cr, TEMP dst);
8857 
8858   format %{ "MOVL   $dst.hi, 0\n\t"
8859             "BLSIL  $dst.lo, $src.lo\n\t"
8860             "JNZ    done\n\t"
8861             "BLSIL  $dst.hi, $src.hi\n"
8862             "done:"
8863          %}
8864 
8865   ins_encode %{
8866     Label done;
8867     Register Rdst = $dst$$Register;
8868     Register Rsrc = $src$$Register;
8869     __ movl(HIGH_FROM_LOW(Rdst), 0);
8870     __ blsil(Rdst, Rsrc);
8871     __ jccb(Assembler::notZero, done);
8872     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8873     __ bind(done);
8874   %}
8875   ins_pipe(ialu_reg);
8876 %}
8877 
8878 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
8879   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
8880   predicate(UseBMI1Instructions);
8881   effect(KILL cr, TEMP dst);
8882 
8883   ins_cost(125);
8884   format %{ "MOVL   $dst.hi, 0\n\t"
8885             "BLSIL  $dst.lo, $src\n\t"
8886             "JNZ    done\n\t"
8887             "BLSIL  $dst.hi, $src+4\n"
8888             "done:"
8889          %}
8890 
8891   ins_encode %{
8892     Label done;
8893     Register Rdst = $dst$$Register;
8894     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8895 
8896     __ movl(HIGH_FROM_LOW(Rdst), 0);
8897     __ blsil(Rdst, $src$$Address);
8898     __ jccb(Assembler::notZero, done);
8899     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
8900     __ bind(done);
8901   %}
8902   ins_pipe(ialu_reg_mem);
8903 %}
8904 
8905 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8906 %{
8907   match(Set dst (XorL (AddL src minus_1) src));
8908   predicate(UseBMI1Instructions);
8909   effect(KILL cr, TEMP dst);
8910 
8911   format %{ "MOVL    $dst.hi, 0\n\t"
8912             "BLSMSKL $dst.lo, $src.lo\n\t"
8913             "JNC     done\n\t"
8914             "BLSMSKL $dst.hi, $src.hi\n"
8915             "done:"
8916          %}
8917 
8918   ins_encode %{
8919     Label done;
8920     Register Rdst = $dst$$Register;
8921     Register Rsrc = $src$$Register;
8922     __ movl(HIGH_FROM_LOW(Rdst), 0);
8923     __ blsmskl(Rdst, Rsrc);
8924     __ jccb(Assembler::carryClear, done);
8925     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8926     __ bind(done);
8927   %}
8928 
8929   ins_pipe(ialu_reg);
8930 %}
8931 
8932 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8933 %{
8934   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
8935   predicate(UseBMI1Instructions);
8936   effect(KILL cr, TEMP dst);
8937 
8938   ins_cost(125);
8939   format %{ "MOVL    $dst.hi, 0\n\t"
8940             "BLSMSKL $dst.lo, $src\n\t"
8941             "JNC     done\n\t"
8942             "BLSMSKL $dst.hi, $src+4\n"
8943             "done:"
8944          %}
8945 
8946   ins_encode %{
8947     Label done;
8948     Register Rdst = $dst$$Register;
8949     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8950 
8951     __ movl(HIGH_FROM_LOW(Rdst), 0);
8952     __ blsmskl(Rdst, $src$$Address);
8953     __ jccb(Assembler::carryClear, done);
8954     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
8955     __ bind(done);
8956   %}
8957 
8958   ins_pipe(ialu_reg_mem);
8959 %}
8960 
8961 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8962 %{
8963   match(Set dst (AndL (AddL src minus_1) src) );
8964   predicate(UseBMI1Instructions);
8965   effect(KILL cr, TEMP dst);
8966 
8967   format %{ "MOVL   $dst.hi, $src.hi\n\t"
8968             "BLSRL  $dst.lo, $src.lo\n\t"
8969             "JNC    done\n\t"
8970             "BLSRL  $dst.hi, $src.hi\n"
8971             "done:"
8972   %}
8973 
8974   ins_encode %{
8975     Label done;
8976     Register Rdst = $dst$$Register;
8977     Register Rsrc = $src$$Register;
8978     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8979     __ blsrl(Rdst, Rsrc);
8980     __ jccb(Assembler::carryClear, done);
8981     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8982     __ bind(done);
8983   %}
8984 
8985   ins_pipe(ialu_reg);
8986 %}
8987 
8988 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8989 %{
8990   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
8991   predicate(UseBMI1Instructions);
8992   effect(KILL cr, TEMP dst);
8993 
8994   ins_cost(125);
8995   format %{ "MOVL   $dst.hi, $src+4\n\t"
8996             "BLSRL  $dst.lo, $src\n\t"
8997             "JNC    done\n\t"
8998             "BLSRL  $dst.hi, $src+4\n"
8999             "done:"
9000   %}
9001 
9002   ins_encode %{
9003     Label done;
9004     Register Rdst = $dst$$Register;
9005     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9006     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
9007     __ blsrl(Rdst, $src$$Address);
9008     __ jccb(Assembler::carryClear, done);
9009     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
9010     __ bind(done);
9011   %}
9012 
9013   ins_pipe(ialu_reg_mem);
9014 %}
9015 
9016 // Or Long Register with Register
9017 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9018   match(Set dst (OrL dst src));
9019   effect(KILL cr);
9020   format %{ "OR     $dst.lo,$src.lo\n\t"
9021             "OR     $dst.hi,$src.hi" %}
9022   opcode(0x0B,0x0B);
9023   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9024   ins_pipe( ialu_reg_reg_long );
9025 %}
9026 
9027 // Or Long Register with Immediate
9028 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9029   match(Set dst (OrL dst src));
9030   effect(KILL cr);
9031   format %{ "OR     $dst.lo,$src.lo\n\t"
9032             "OR     $dst.hi,$src.hi" %}
9033   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9034   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9035   ins_pipe( ialu_reg_long );
9036 %}
9037 
9038 // Or Long Register with Memory
9039 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9040   match(Set dst (OrL dst (LoadL mem)));
9041   effect(KILL cr);
9042   ins_cost(125);
9043   format %{ "OR     $dst.lo,$mem\n\t"
9044             "OR     $dst.hi,$mem+4" %}
9045   opcode(0x0B,0x0B);
9046   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9047   ins_pipe( ialu_reg_long_mem );
9048 %}
9049 
9050 // Xor Long Register with Register
9051 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9052   match(Set dst (XorL dst src));
9053   effect(KILL cr);
9054   format %{ "XOR    $dst.lo,$src.lo\n\t"
9055             "XOR    $dst.hi,$src.hi" %}
9056   opcode(0x33,0x33);
9057   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9058   ins_pipe( ialu_reg_reg_long );
9059 %}
9060 
9061 // Xor Long Register with Immediate -1
9062 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9063   match(Set dst (XorL dst imm));
9064   format %{ "NOT    $dst.lo\n\t"
9065             "NOT    $dst.hi" %}
9066   ins_encode %{
9067      __ notl($dst$$Register);
9068      __ notl(HIGH_FROM_LOW($dst$$Register));
9069   %}
9070   ins_pipe( ialu_reg_long );
9071 %}
9072 
9073 // Xor Long Register with Immediate
9074 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9075   match(Set dst (XorL dst src));
9076   effect(KILL cr);
9077   format %{ "XOR    $dst.lo,$src.lo\n\t"
9078             "XOR    $dst.hi,$src.hi" %}
9079   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9080   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9081   ins_pipe( ialu_reg_long );
9082 %}
9083 
9084 // Xor Long Register with Memory
9085 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9086   match(Set dst (XorL dst (LoadL mem)));
9087   effect(KILL cr);
9088   ins_cost(125);
9089   format %{ "XOR    $dst.lo,$mem\n\t"
9090             "XOR    $dst.hi,$mem+4" %}
9091   opcode(0x33,0x33);
9092   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9093   ins_pipe( ialu_reg_long_mem );
9094 %}
9095 
9096 // Shift Left Long by 1
9097 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9098   predicate(UseNewLongLShift);
9099   match(Set dst (LShiftL dst cnt));
9100   effect(KILL cr);
9101   ins_cost(100);
9102   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9103             "ADC    $dst.hi,$dst.hi" %}
9104   ins_encode %{
9105     __ addl($dst$$Register,$dst$$Register);
9106     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9107   %}
9108   ins_pipe( ialu_reg_long );
9109 %}
9110 
9111 // Shift Left Long by 2
9112 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9113   predicate(UseNewLongLShift);
9114   match(Set dst (LShiftL dst cnt));
9115   effect(KILL cr);
9116   ins_cost(100);
9117   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9118             "ADC    $dst.hi,$dst.hi\n\t"
9119             "ADD    $dst.lo,$dst.lo\n\t"
9120             "ADC    $dst.hi,$dst.hi" %}
9121   ins_encode %{
9122     __ addl($dst$$Register,$dst$$Register);
9123     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9124     __ addl($dst$$Register,$dst$$Register);
9125     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9126   %}
9127   ins_pipe( ialu_reg_long );
9128 %}
9129 
9130 // Shift Left Long by 3
9131 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9132   predicate(UseNewLongLShift);
9133   match(Set dst (LShiftL dst cnt));
9134   effect(KILL cr);
9135   ins_cost(100);
9136   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9137             "ADC    $dst.hi,$dst.hi\n\t"
9138             "ADD    $dst.lo,$dst.lo\n\t"
9139             "ADC    $dst.hi,$dst.hi\n\t"
9140             "ADD    $dst.lo,$dst.lo\n\t"
9141             "ADC    $dst.hi,$dst.hi" %}
9142   ins_encode %{
9143     __ addl($dst$$Register,$dst$$Register);
9144     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9145     __ addl($dst$$Register,$dst$$Register);
9146     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9147     __ addl($dst$$Register,$dst$$Register);
9148     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9149   %}
9150   ins_pipe( ialu_reg_long );
9151 %}
9152 
9153 // Shift Left Long by 1-31
9154 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9155   match(Set dst (LShiftL dst cnt));
9156   effect(KILL cr);
9157   ins_cost(200);
9158   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9159             "SHL    $dst.lo,$cnt" %}
9160   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9161   ins_encode( move_long_small_shift(dst,cnt) );
9162   ins_pipe( ialu_reg_long );
9163 %}
9164 
9165 // Shift Left Long by 32-63
9166 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9167   match(Set dst (LShiftL dst cnt));
9168   effect(KILL cr);
9169   ins_cost(300);
9170   format %{ "MOV    $dst.hi,$dst.lo\n"
9171           "\tSHL    $dst.hi,$cnt-32\n"
9172           "\tXOR    $dst.lo,$dst.lo" %}
9173   opcode(0xC1, 0x4);  /* C1 /4 ib */
9174   ins_encode( move_long_big_shift_clr(dst,cnt) );
9175   ins_pipe( ialu_reg_long );
9176 %}
9177 
9178 // Shift Left Long by variable
9179 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9180   match(Set dst (LShiftL dst shift));
9181   effect(KILL cr);
9182   ins_cost(500+200);
9183   size(17);
9184   format %{ "TEST   $shift,32\n\t"
9185             "JEQ,s  small\n\t"
9186             "MOV    $dst.hi,$dst.lo\n\t"
9187             "XOR    $dst.lo,$dst.lo\n"
9188     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9189             "SHL    $dst.lo,$shift" %}
9190   ins_encode( shift_left_long( dst, shift ) );
9191   ins_pipe( pipe_slow );
9192 %}
9193 
9194 // Shift Right Long by 1-31
9195 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9196   match(Set dst (URShiftL dst cnt));
9197   effect(KILL cr);
9198   ins_cost(200);
9199   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9200             "SHR    $dst.hi,$cnt" %}
9201   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9202   ins_encode( move_long_small_shift(dst,cnt) );
9203   ins_pipe( ialu_reg_long );
9204 %}
9205 
9206 // Shift Right Long by 32-63
9207 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9208   match(Set dst (URShiftL dst cnt));
9209   effect(KILL cr);
9210   ins_cost(300);
9211   format %{ "MOV    $dst.lo,$dst.hi\n"
9212           "\tSHR    $dst.lo,$cnt-32\n"
9213           "\tXOR    $dst.hi,$dst.hi" %}
9214   opcode(0xC1, 0x5);  /* C1 /5 ib */
9215   ins_encode( move_long_big_shift_clr(dst,cnt) );
9216   ins_pipe( ialu_reg_long );
9217 %}
9218 
9219 // Shift Right Long by variable
9220 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9221   match(Set dst (URShiftL dst shift));
9222   effect(KILL cr);
9223   ins_cost(600);
9224   size(17);
9225   format %{ "TEST   $shift,32\n\t"
9226             "JEQ,s  small\n\t"
9227             "MOV    $dst.lo,$dst.hi\n\t"
9228             "XOR    $dst.hi,$dst.hi\n"
9229     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9230             "SHR    $dst.hi,$shift" %}
9231   ins_encode( shift_right_long( dst, shift ) );
9232   ins_pipe( pipe_slow );
9233 %}
9234 
9235 // Shift Right Long by 1-31
9236 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9237   match(Set dst (RShiftL dst cnt));
9238   effect(KILL cr);
9239   ins_cost(200);
9240   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9241             "SAR    $dst.hi,$cnt" %}
9242   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9243   ins_encode( move_long_small_shift(dst,cnt) );
9244   ins_pipe( ialu_reg_long );
9245 %}
9246 
9247 // Shift Right Long by 32-63
9248 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9249   match(Set dst (RShiftL dst cnt));
9250   effect(KILL cr);
9251   ins_cost(300);
9252   format %{ "MOV    $dst.lo,$dst.hi\n"
9253           "\tSAR    $dst.lo,$cnt-32\n"
9254           "\tSAR    $dst.hi,31" %}
9255   opcode(0xC1, 0x7);  /* C1 /7 ib */
9256   ins_encode( move_long_big_shift_sign(dst,cnt) );
9257   ins_pipe( ialu_reg_long );
9258 %}
9259 
9260 // Shift Right arithmetic Long by variable
9261 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9262   match(Set dst (RShiftL dst shift));
9263   effect(KILL cr);
9264   ins_cost(600);
9265   size(18);
9266   format %{ "TEST   $shift,32\n\t"
9267             "JEQ,s  small\n\t"
9268             "MOV    $dst.lo,$dst.hi\n\t"
9269             "SAR    $dst.hi,31\n"
9270     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9271             "SAR    $dst.hi,$shift" %}
9272   ins_encode( shift_right_arith_long( dst, shift ) );
9273   ins_pipe( pipe_slow );
9274 %}
9275 
9276 
9277 //----------Double Instructions------------------------------------------------
9278 // Double Math
9279 
9280 // Compare & branch
9281 
9282 // P6 version of float compare, sets condition codes in EFLAGS
9283 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9284   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9285   match(Set cr (CmpD src1 src2));
9286   effect(KILL rax);
9287   ins_cost(150);
9288   format %{ "FLD    $src1\n\t"
9289             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9290             "JNP    exit\n\t"
9291             "MOV    ah,1       // saw a NaN, set CF\n\t"
9292             "SAHF\n"
9293      "exit:\tNOP               // avoid branch to branch" %}
9294   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9295   ins_encode( Push_Reg_DPR(src1),
9296               OpcP, RegOpc(src2),
9297               cmpF_P6_fixup );
9298   ins_pipe( pipe_slow );
9299 %}
9300 
9301 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9302   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9303   match(Set cr (CmpD src1 src2));
9304   ins_cost(150);
9305   format %{ "FLD    $src1\n\t"
9306             "FUCOMIP ST,$src2  // P6 instruction" %}
9307   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9308   ins_encode( Push_Reg_DPR(src1),
9309               OpcP, RegOpc(src2));
9310   ins_pipe( pipe_slow );
9311 %}
9312 
9313 // Compare & branch
9314 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9315   predicate(UseSSE<=1);
9316   match(Set cr (CmpD src1 src2));
9317   effect(KILL rax);
9318   ins_cost(200);
9319   format %{ "FLD    $src1\n\t"
9320             "FCOMp  $src2\n\t"
9321             "FNSTSW AX\n\t"
9322             "TEST   AX,0x400\n\t"
9323             "JZ,s   flags\n\t"
9324             "MOV    AH,1\t# unordered treat as LT\n"
9325     "flags:\tSAHF" %}
9326   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9327   ins_encode( Push_Reg_DPR(src1),
9328               OpcP, RegOpc(src2),
9329               fpu_flags);
9330   ins_pipe( pipe_slow );
9331 %}
9332 
9333 // Compare vs zero into -1,0,1
9334 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9335   predicate(UseSSE<=1);
9336   match(Set dst (CmpD3 src1 zero));
9337   effect(KILL cr, KILL rax);
9338   ins_cost(280);
9339   format %{ "FTSTD  $dst,$src1" %}
9340   opcode(0xE4, 0xD9);
9341   ins_encode( Push_Reg_DPR(src1),
9342               OpcS, OpcP, PopFPU,
9343               CmpF_Result(dst));
9344   ins_pipe( pipe_slow );
9345 %}
9346 
9347 // Compare into -1,0,1
9348 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9349   predicate(UseSSE<=1);
9350   match(Set dst (CmpD3 src1 src2));
9351   effect(KILL cr, KILL rax);
9352   ins_cost(300);
9353   format %{ "FCMPD  $dst,$src1,$src2" %}
9354   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9355   ins_encode( Push_Reg_DPR(src1),
9356               OpcP, RegOpc(src2),
9357               CmpF_Result(dst));
9358   ins_pipe( pipe_slow );
9359 %}
9360 
9361 // float compare and set condition codes in EFLAGS by XMM regs
9362 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9363   predicate(UseSSE>=2);
9364   match(Set cr (CmpD src1 src2));
9365   ins_cost(145);
9366   format %{ "UCOMISD $src1,$src2\n\t"
9367             "JNP,s   exit\n\t"
9368             "PUSHF\t# saw NaN, set CF\n\t"
9369             "AND     [rsp], #0xffffff2b\n\t"
9370             "POPF\n"
9371     "exit:" %}
9372   ins_encode %{
9373     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9374     emit_cmpfp_fixup(_masm);
9375   %}
9376   ins_pipe( pipe_slow );
9377 %}
9378 
9379 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9380   predicate(UseSSE>=2);
9381   match(Set cr (CmpD src1 src2));
9382   ins_cost(100);
9383   format %{ "UCOMISD $src1,$src2" %}
9384   ins_encode %{
9385     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9386   %}
9387   ins_pipe( pipe_slow );
9388 %}
9389 
9390 // float compare and set condition codes in EFLAGS by XMM regs
9391 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9392   predicate(UseSSE>=2);
9393   match(Set cr (CmpD src1 (LoadD src2)));
9394   ins_cost(145);
9395   format %{ "UCOMISD $src1,$src2\n\t"
9396             "JNP,s   exit\n\t"
9397             "PUSHF\t# saw NaN, set CF\n\t"
9398             "AND     [rsp], #0xffffff2b\n\t"
9399             "POPF\n"
9400     "exit:" %}
9401   ins_encode %{
9402     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9403     emit_cmpfp_fixup(_masm);
9404   %}
9405   ins_pipe( pipe_slow );
9406 %}
9407 
9408 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9409   predicate(UseSSE>=2);
9410   match(Set cr (CmpD src1 (LoadD src2)));
9411   ins_cost(100);
9412   format %{ "UCOMISD $src1,$src2" %}
9413   ins_encode %{
9414     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9415   %}
9416   ins_pipe( pipe_slow );
9417 %}
9418 
9419 // Compare into -1,0,1 in XMM
9420 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9421   predicate(UseSSE>=2);
9422   match(Set dst (CmpD3 src1 src2));
9423   effect(KILL cr);
9424   ins_cost(255);
9425   format %{ "UCOMISD $src1, $src2\n\t"
9426             "MOV     $dst, #-1\n\t"
9427             "JP,s    done\n\t"
9428             "JB,s    done\n\t"
9429             "SETNE   $dst\n\t"
9430             "MOVZB   $dst, $dst\n"
9431     "done:" %}
9432   ins_encode %{
9433     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9434     emit_cmpfp3(_masm, $dst$$Register);
9435   %}
9436   ins_pipe( pipe_slow );
9437 %}
9438 
9439 // Compare into -1,0,1 in XMM and memory
9440 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9441   predicate(UseSSE>=2);
9442   match(Set dst (CmpD3 src1 (LoadD src2)));
9443   effect(KILL cr);
9444   ins_cost(275);
9445   format %{ "UCOMISD $src1, $src2\n\t"
9446             "MOV     $dst, #-1\n\t"
9447             "JP,s    done\n\t"
9448             "JB,s    done\n\t"
9449             "SETNE   $dst\n\t"
9450             "MOVZB   $dst, $dst\n"
9451     "done:" %}
9452   ins_encode %{
9453     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9454     emit_cmpfp3(_masm, $dst$$Register);
9455   %}
9456   ins_pipe( pipe_slow );
9457 %}
9458 
9459 
9460 instruct subDPR_reg(regDPR dst, regDPR src) %{
9461   predicate (UseSSE <=1);
9462   match(Set dst (SubD dst src));
9463 
9464   format %{ "FLD    $src\n\t"
9465             "DSUBp  $dst,ST" %}
9466   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9467   ins_cost(150);
9468   ins_encode( Push_Reg_DPR(src),
9469               OpcP, RegOpc(dst) );
9470   ins_pipe( fpu_reg_reg );
9471 %}
9472 
9473 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9474   predicate (UseSSE <=1);
9475   match(Set dst (RoundDouble (SubD src1 src2)));
9476   ins_cost(250);
9477 
9478   format %{ "FLD    $src2\n\t"
9479             "DSUB   ST,$src1\n\t"
9480             "FSTP_D $dst\t# D-round" %}
9481   opcode(0xD8, 0x5);
9482   ins_encode( Push_Reg_DPR(src2),
9483               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9484   ins_pipe( fpu_mem_reg_reg );
9485 %}
9486 
9487 
9488 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9489   predicate (UseSSE <=1);
9490   match(Set dst (SubD dst (LoadD src)));
9491   ins_cost(150);
9492 
9493   format %{ "FLD    $src\n\t"
9494             "DSUBp  $dst,ST" %}
9495   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9496   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9497               OpcP, RegOpc(dst) );
9498   ins_pipe( fpu_reg_mem );
9499 %}
9500 
9501 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9502   predicate (UseSSE<=1);
9503   match(Set dst (AbsD src));
9504   ins_cost(100);
9505   format %{ "FABS" %}
9506   opcode(0xE1, 0xD9);
9507   ins_encode( OpcS, OpcP );
9508   ins_pipe( fpu_reg_reg );
9509 %}
9510 
9511 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9512   predicate(UseSSE<=1);
9513   match(Set dst (NegD src));
9514   ins_cost(100);
9515   format %{ "FCHS" %}
9516   opcode(0xE0, 0xD9);
9517   ins_encode( OpcS, OpcP );
9518   ins_pipe( fpu_reg_reg );
9519 %}
9520 
9521 instruct addDPR_reg(regDPR dst, regDPR src) %{
9522   predicate(UseSSE<=1);
9523   match(Set dst (AddD dst src));
9524   format %{ "FLD    $src\n\t"
9525             "DADD   $dst,ST" %}
9526   size(4);
9527   ins_cost(150);
9528   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9529   ins_encode( Push_Reg_DPR(src),
9530               OpcP, RegOpc(dst) );
9531   ins_pipe( fpu_reg_reg );
9532 %}
9533 
9534 
9535 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9536   predicate(UseSSE<=1);
9537   match(Set dst (RoundDouble (AddD src1 src2)));
9538   ins_cost(250);
9539 
9540   format %{ "FLD    $src2\n\t"
9541             "DADD   ST,$src1\n\t"
9542             "FSTP_D $dst\t# D-round" %}
9543   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9544   ins_encode( Push_Reg_DPR(src2),
9545               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9546   ins_pipe( fpu_mem_reg_reg );
9547 %}
9548 
9549 
9550 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9551   predicate(UseSSE<=1);
9552   match(Set dst (AddD dst (LoadD src)));
9553   ins_cost(150);
9554 
9555   format %{ "FLD    $src\n\t"
9556             "DADDp  $dst,ST" %}
9557   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9558   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9559               OpcP, RegOpc(dst) );
9560   ins_pipe( fpu_reg_mem );
9561 %}
9562 
9563 // add-to-memory
9564 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9565   predicate(UseSSE<=1);
9566   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9567   ins_cost(150);
9568 
9569   format %{ "FLD_D  $dst\n\t"
9570             "DADD   ST,$src\n\t"
9571             "FST_D  $dst" %}
9572   opcode(0xDD, 0x0);
9573   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9574               Opcode(0xD8), RegOpc(src),
9575               set_instruction_start,
9576               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9577   ins_pipe( fpu_reg_mem );
9578 %}
9579 
9580 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9581   predicate(UseSSE<=1);
9582   match(Set dst (AddD dst con));
9583   ins_cost(125);
9584   format %{ "FLD1\n\t"
9585             "DADDp  $dst,ST" %}
9586   ins_encode %{
9587     __ fld1();
9588     __ faddp($dst$$reg);
9589   %}
9590   ins_pipe(fpu_reg);
9591 %}
9592 
9593 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9594   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9595   match(Set dst (AddD dst con));
9596   ins_cost(200);
9597   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9598             "DADDp  $dst,ST" %}
9599   ins_encode %{
9600     __ fld_d($constantaddress($con));
9601     __ faddp($dst$$reg);
9602   %}
9603   ins_pipe(fpu_reg_mem);
9604 %}
9605 
9606 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9607   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9608   match(Set dst (RoundDouble (AddD src con)));
9609   ins_cost(200);
9610   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9611             "DADD   ST,$src\n\t"
9612             "FSTP_D $dst\t# D-round" %}
9613   ins_encode %{
9614     __ fld_d($constantaddress($con));
9615     __ fadd($src$$reg);
9616     __ fstp_d(Address(rsp, $dst$$disp));
9617   %}
9618   ins_pipe(fpu_mem_reg_con);
9619 %}
9620 
9621 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9622   predicate(UseSSE<=1);
9623   match(Set dst (MulD dst src));
9624   format %{ "FLD    $src\n\t"
9625             "DMULp  $dst,ST" %}
9626   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9627   ins_cost(150);
9628   ins_encode( Push_Reg_DPR(src),
9629               OpcP, RegOpc(dst) );
9630   ins_pipe( fpu_reg_reg );
9631 %}
9632 
9633 // Strict FP instruction biases argument before multiply then
9634 // biases result to avoid double rounding of subnormals.
9635 //
9636 // scale arg1 by multiplying arg1 by 2^(-15360)
9637 // load arg2
9638 // multiply scaled arg1 by arg2
9639 // rescale product by 2^(15360)
9640 //
9641 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9642   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9643   match(Set dst (MulD dst src));
9644   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9645 
9646   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9647             "DMULp  $dst,ST\n\t"
9648             "FLD    $src\n\t"
9649             "DMULp  $dst,ST\n\t"
9650             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9651             "DMULp  $dst,ST\n\t" %}
9652   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9653   ins_encode( strictfp_bias1(dst),
9654               Push_Reg_DPR(src),
9655               OpcP, RegOpc(dst),
9656               strictfp_bias2(dst) );
9657   ins_pipe( fpu_reg_reg );
9658 %}
9659 
9660 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9661   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9662   match(Set dst (MulD dst con));
9663   ins_cost(200);
9664   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9665             "DMULp  $dst,ST" %}
9666   ins_encode %{
9667     __ fld_d($constantaddress($con));
9668     __ fmulp($dst$$reg);
9669   %}
9670   ins_pipe(fpu_reg_mem);
9671 %}
9672 
9673 
9674 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9675   predicate( UseSSE<=1 );
9676   match(Set dst (MulD dst (LoadD src)));
9677   ins_cost(200);
9678   format %{ "FLD_D  $src\n\t"
9679             "DMULp  $dst,ST" %}
9680   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9681   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9682               OpcP, RegOpc(dst) );
9683   ins_pipe( fpu_reg_mem );
9684 %}
9685 
9686 //
9687 // Cisc-alternate to reg-reg multiply
9688 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9689   predicate( UseSSE<=1 );
9690   match(Set dst (MulD src (LoadD mem)));
9691   ins_cost(250);
9692   format %{ "FLD_D  $mem\n\t"
9693             "DMUL   ST,$src\n\t"
9694             "FSTP_D $dst" %}
9695   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9696   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9697               OpcReg_FPR(src),
9698               Pop_Reg_DPR(dst) );
9699   ins_pipe( fpu_reg_reg_mem );
9700 %}
9701 
9702 
9703 // MACRO3 -- addDPR a mulDPR
9704 // This instruction is a '2-address' instruction in that the result goes
9705 // back to src2.  This eliminates a move from the macro; possibly the
9706 // register allocator will have to add it back (and maybe not).
9707 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9708   predicate( UseSSE<=1 );
9709   match(Set src2 (AddD (MulD src0 src1) src2));
9710   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9711             "DMUL   ST,$src1\n\t"
9712             "DADDp  $src2,ST" %}
9713   ins_cost(250);
9714   opcode(0xDD); /* LoadD DD /0 */
9715   ins_encode( Push_Reg_FPR(src0),
9716               FMul_ST_reg(src1),
9717               FAddP_reg_ST(src2) );
9718   ins_pipe( fpu_reg_reg_reg );
9719 %}
9720 
9721 
9722 // MACRO3 -- subDPR a mulDPR
9723 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9724   predicate( UseSSE<=1 );
9725   match(Set src2 (SubD (MulD src0 src1) src2));
9726   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9727             "DMUL   ST,$src1\n\t"
9728             "DSUBRp $src2,ST" %}
9729   ins_cost(250);
9730   ins_encode( Push_Reg_FPR(src0),
9731               FMul_ST_reg(src1),
9732               Opcode(0xDE), Opc_plus(0xE0,src2));
9733   ins_pipe( fpu_reg_reg_reg );
9734 %}
9735 
9736 
9737 instruct divDPR_reg(regDPR dst, regDPR src) %{
9738   predicate( UseSSE<=1 );
9739   match(Set dst (DivD dst src));
9740 
9741   format %{ "FLD    $src\n\t"
9742             "FDIVp  $dst,ST" %}
9743   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9744   ins_cost(150);
9745   ins_encode( Push_Reg_DPR(src),
9746               OpcP, RegOpc(dst) );
9747   ins_pipe( fpu_reg_reg );
9748 %}
9749 
9750 // Strict FP instruction biases argument before division then
9751 // biases result, to avoid double rounding of subnormals.
9752 //
9753 // scale dividend by multiplying dividend by 2^(-15360)
9754 // load divisor
9755 // divide scaled dividend by divisor
9756 // rescale quotient by 2^(15360)
9757 //
9758 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9759   predicate (UseSSE<=1);
9760   match(Set dst (DivD dst src));
9761   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9762   ins_cost(01);
9763 
9764   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9765             "DMULp  $dst,ST\n\t"
9766             "FLD    $src\n\t"
9767             "FDIVp  $dst,ST\n\t"
9768             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9769             "DMULp  $dst,ST\n\t" %}
9770   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9771   ins_encode( strictfp_bias1(dst),
9772               Push_Reg_DPR(src),
9773               OpcP, RegOpc(dst),
9774               strictfp_bias2(dst) );
9775   ins_pipe( fpu_reg_reg );
9776 %}
9777 
9778 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9779   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
9780   match(Set dst (RoundDouble (DivD src1 src2)));
9781 
9782   format %{ "FLD    $src1\n\t"
9783             "FDIV   ST,$src2\n\t"
9784             "FSTP_D $dst\t# D-round" %}
9785   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
9786   ins_encode( Push_Reg_DPR(src1),
9787               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
9788   ins_pipe( fpu_mem_reg_reg );
9789 %}
9790 
9791 
9792 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9793   predicate(UseSSE<=1);
9794   match(Set dst (ModD dst src));
9795   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9796 
9797   format %{ "DMOD   $dst,$src" %}
9798   ins_cost(250);
9799   ins_encode(Push_Reg_Mod_DPR(dst, src),
9800               emitModDPR(),
9801               Push_Result_Mod_DPR(src),
9802               Pop_Reg_DPR(dst));
9803   ins_pipe( pipe_slow );
9804 %}
9805 
9806 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9807   predicate(UseSSE>=2);
9808   match(Set dst (ModD src0 src1));
9809   effect(KILL rax, KILL cr);
9810 
9811   format %{ "SUB    ESP,8\t # DMOD\n"
9812           "\tMOVSD  [ESP+0],$src1\n"
9813           "\tFLD_D  [ESP+0]\n"
9814           "\tMOVSD  [ESP+0],$src0\n"
9815           "\tFLD_D  [ESP+0]\n"
9816      "loop:\tFPREM\n"
9817           "\tFWAIT\n"
9818           "\tFNSTSW AX\n"
9819           "\tSAHF\n"
9820           "\tJP     loop\n"
9821           "\tFSTP_D [ESP+0]\n"
9822           "\tMOVSD  $dst,[ESP+0]\n"
9823           "\tADD    ESP,8\n"
9824           "\tFSTP   ST0\t # Restore FPU Stack"
9825     %}
9826   ins_cost(250);
9827   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
9828   ins_pipe( pipe_slow );
9829 %}
9830 
9831 instruct atanDPR_reg(regDPR dst, regDPR src) %{
9832   predicate (UseSSE<=1);
9833   match(Set dst(AtanD dst src));
9834   format %{ "DATA   $dst,$src" %}
9835   opcode(0xD9, 0xF3);
9836   ins_encode( Push_Reg_DPR(src),
9837               OpcP, OpcS, RegOpc(dst) );
9838   ins_pipe( pipe_slow );
9839 %}
9840 
9841 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
9842   predicate (UseSSE>=2);
9843   match(Set dst(AtanD dst src));
9844   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9845   format %{ "DATA   $dst,$src" %}
9846   opcode(0xD9, 0xF3);
9847   ins_encode( Push_SrcD(src),
9848               OpcP, OpcS, Push_ResultD(dst) );
9849   ins_pipe( pipe_slow );
9850 %}
9851 
9852 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
9853   predicate (UseSSE<=1);
9854   match(Set dst (SqrtD src));
9855   format %{ "DSQRT  $dst,$src" %}
9856   opcode(0xFA, 0xD9);
9857   ins_encode( Push_Reg_DPR(src),
9858               OpcS, OpcP, Pop_Reg_DPR(dst) );
9859   ins_pipe( pipe_slow );
9860 %}
9861 
9862 //-------------Float Instructions-------------------------------
9863 // Float Math
9864 
9865 // Code for float compare:
9866 //     fcompp();
9867 //     fwait(); fnstsw_ax();
9868 //     sahf();
9869 //     movl(dst, unordered_result);
9870 //     jcc(Assembler::parity, exit);
9871 //     movl(dst, less_result);
9872 //     jcc(Assembler::below, exit);
9873 //     movl(dst, equal_result);
9874 //     jcc(Assembler::equal, exit);
9875 //     movl(dst, greater_result);
9876 //   exit:
9877 
9878 // P6 version of float compare, sets condition codes in EFLAGS
9879 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
9880   predicate(VM_Version::supports_cmov() && UseSSE == 0);
9881   match(Set cr (CmpF src1 src2));
9882   effect(KILL rax);
9883   ins_cost(150);
9884   format %{ "FLD    $src1\n\t"
9885             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9886             "JNP    exit\n\t"
9887             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
9888             "SAHF\n"
9889      "exit:\tNOP               // avoid branch to branch" %}
9890   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9891   ins_encode( Push_Reg_DPR(src1),
9892               OpcP, RegOpc(src2),
9893               cmpF_P6_fixup );
9894   ins_pipe( pipe_slow );
9895 %}
9896 
9897 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
9898   predicate(VM_Version::supports_cmov() && UseSSE == 0);
9899   match(Set cr (CmpF src1 src2));
9900   ins_cost(100);
9901   format %{ "FLD    $src1\n\t"
9902             "FUCOMIP ST,$src2  // P6 instruction" %}
9903   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9904   ins_encode( Push_Reg_DPR(src1),
9905               OpcP, RegOpc(src2));
9906   ins_pipe( pipe_slow );
9907 %}
9908 
9909 
9910 // Compare & branch
9911 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
9912   predicate(UseSSE == 0);
9913   match(Set cr (CmpF src1 src2));
9914   effect(KILL rax);
9915   ins_cost(200);
9916   format %{ "FLD    $src1\n\t"
9917             "FCOMp  $src2\n\t"
9918             "FNSTSW AX\n\t"
9919             "TEST   AX,0x400\n\t"
9920             "JZ,s   flags\n\t"
9921             "MOV    AH,1\t# unordered treat as LT\n"
9922     "flags:\tSAHF" %}
9923   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9924   ins_encode( Push_Reg_DPR(src1),
9925               OpcP, RegOpc(src2),
9926               fpu_flags);
9927   ins_pipe( pipe_slow );
9928 %}
9929 
9930 // Compare vs zero into -1,0,1
9931 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9932   predicate(UseSSE == 0);
9933   match(Set dst (CmpF3 src1 zero));
9934   effect(KILL cr, KILL rax);
9935   ins_cost(280);
9936   format %{ "FTSTF  $dst,$src1" %}
9937   opcode(0xE4, 0xD9);
9938   ins_encode( Push_Reg_DPR(src1),
9939               OpcS, OpcP, PopFPU,
9940               CmpF_Result(dst));
9941   ins_pipe( pipe_slow );
9942 %}
9943 
9944 // Compare into -1,0,1
9945 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
9946   predicate(UseSSE == 0);
9947   match(Set dst (CmpF3 src1 src2));
9948   effect(KILL cr, KILL rax);
9949   ins_cost(300);
9950   format %{ "FCMPF  $dst,$src1,$src2" %}
9951   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9952   ins_encode( Push_Reg_DPR(src1),
9953               OpcP, RegOpc(src2),
9954               CmpF_Result(dst));
9955   ins_pipe( pipe_slow );
9956 %}
9957 
9958 // float compare and set condition codes in EFLAGS by XMM regs
9959 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
9960   predicate(UseSSE>=1);
9961   match(Set cr (CmpF src1 src2));
9962   ins_cost(145);
9963   format %{ "UCOMISS $src1,$src2\n\t"
9964             "JNP,s   exit\n\t"
9965             "PUSHF\t# saw NaN, set CF\n\t"
9966             "AND     [rsp], #0xffffff2b\n\t"
9967             "POPF\n"
9968     "exit:" %}
9969   ins_encode %{
9970     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
9971     emit_cmpfp_fixup(_masm);
9972   %}
9973   ins_pipe( pipe_slow );
9974 %}
9975 
9976 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
9977   predicate(UseSSE>=1);
9978   match(Set cr (CmpF src1 src2));
9979   ins_cost(100);
9980   format %{ "UCOMISS $src1,$src2" %}
9981   ins_encode %{
9982     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
9983   %}
9984   ins_pipe( pipe_slow );
9985 %}
9986 
9987 // float compare and set condition codes in EFLAGS by XMM regs
9988 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
9989   predicate(UseSSE>=1);
9990   match(Set cr (CmpF src1 (LoadF src2)));
9991   ins_cost(165);
9992   format %{ "UCOMISS $src1,$src2\n\t"
9993             "JNP,s   exit\n\t"
9994             "PUSHF\t# saw NaN, set CF\n\t"
9995             "AND     [rsp], #0xffffff2b\n\t"
9996             "POPF\n"
9997     "exit:" %}
9998   ins_encode %{
9999     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10000     emit_cmpfp_fixup(_masm);
10001   %}
10002   ins_pipe( pipe_slow );
10003 %}
10004 
10005 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10006   predicate(UseSSE>=1);
10007   match(Set cr (CmpF src1 (LoadF src2)));
10008   ins_cost(100);
10009   format %{ "UCOMISS $src1,$src2" %}
10010   ins_encode %{
10011     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10012   %}
10013   ins_pipe( pipe_slow );
10014 %}
10015 
10016 // Compare into -1,0,1 in XMM
10017 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10018   predicate(UseSSE>=1);
10019   match(Set dst (CmpF3 src1 src2));
10020   effect(KILL cr);
10021   ins_cost(255);
10022   format %{ "UCOMISS $src1, $src2\n\t"
10023             "MOV     $dst, #-1\n\t"
10024             "JP,s    done\n\t"
10025             "JB,s    done\n\t"
10026             "SETNE   $dst\n\t"
10027             "MOVZB   $dst, $dst\n"
10028     "done:" %}
10029   ins_encode %{
10030     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10031     emit_cmpfp3(_masm, $dst$$Register);
10032   %}
10033   ins_pipe( pipe_slow );
10034 %}
10035 
10036 // Compare into -1,0,1 in XMM and memory
10037 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10038   predicate(UseSSE>=1);
10039   match(Set dst (CmpF3 src1 (LoadF src2)));
10040   effect(KILL cr);
10041   ins_cost(275);
10042   format %{ "UCOMISS $src1, $src2\n\t"
10043             "MOV     $dst, #-1\n\t"
10044             "JP,s    done\n\t"
10045             "JB,s    done\n\t"
10046             "SETNE   $dst\n\t"
10047             "MOVZB   $dst, $dst\n"
10048     "done:" %}
10049   ins_encode %{
10050     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10051     emit_cmpfp3(_masm, $dst$$Register);
10052   %}
10053   ins_pipe( pipe_slow );
10054 %}
10055 
10056 // Spill to obtain 24-bit precision
10057 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10058   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10059   match(Set dst (SubF src1 src2));
10060 
10061   format %{ "FSUB   $dst,$src1 - $src2" %}
10062   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10063   ins_encode( Push_Reg_FPR(src1),
10064               OpcReg_FPR(src2),
10065               Pop_Mem_FPR(dst) );
10066   ins_pipe( fpu_mem_reg_reg );
10067 %}
10068 //
10069 // This instruction does not round to 24-bits
10070 instruct subFPR_reg(regFPR dst, regFPR src) %{
10071   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10072   match(Set dst (SubF dst src));
10073 
10074   format %{ "FSUB   $dst,$src" %}
10075   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10076   ins_encode( Push_Reg_FPR(src),
10077               OpcP, RegOpc(dst) );
10078   ins_pipe( fpu_reg_reg );
10079 %}
10080 
10081 // Spill to obtain 24-bit precision
10082 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10083   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10084   match(Set dst (AddF src1 src2));
10085 
10086   format %{ "FADD   $dst,$src1,$src2" %}
10087   opcode(0xD8, 0x0); /* D8 C0+i */
10088   ins_encode( Push_Reg_FPR(src2),
10089               OpcReg_FPR(src1),
10090               Pop_Mem_FPR(dst) );
10091   ins_pipe( fpu_mem_reg_reg );
10092 %}
10093 //
10094 // This instruction does not round to 24-bits
10095 instruct addFPR_reg(regFPR dst, regFPR src) %{
10096   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10097   match(Set dst (AddF dst src));
10098 
10099   format %{ "FLD    $src\n\t"
10100             "FADDp  $dst,ST" %}
10101   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10102   ins_encode( Push_Reg_FPR(src),
10103               OpcP, RegOpc(dst) );
10104   ins_pipe( fpu_reg_reg );
10105 %}
10106 
10107 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10108   predicate(UseSSE==0);
10109   match(Set dst (AbsF src));
10110   ins_cost(100);
10111   format %{ "FABS" %}
10112   opcode(0xE1, 0xD9);
10113   ins_encode( OpcS, OpcP );
10114   ins_pipe( fpu_reg_reg );
10115 %}
10116 
10117 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10118   predicate(UseSSE==0);
10119   match(Set dst (NegF src));
10120   ins_cost(100);
10121   format %{ "FCHS" %}
10122   opcode(0xE0, 0xD9);
10123   ins_encode( OpcS, OpcP );
10124   ins_pipe( fpu_reg_reg );
10125 %}
10126 
10127 // Cisc-alternate to addFPR_reg
10128 // Spill to obtain 24-bit precision
10129 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10130   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10131   match(Set dst (AddF src1 (LoadF src2)));
10132 
10133   format %{ "FLD    $src2\n\t"
10134             "FADD   ST,$src1\n\t"
10135             "FSTP_S $dst" %}
10136   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10137   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10138               OpcReg_FPR(src1),
10139               Pop_Mem_FPR(dst) );
10140   ins_pipe( fpu_mem_reg_mem );
10141 %}
10142 //
10143 // Cisc-alternate to addFPR_reg
10144 // This instruction does not round to 24-bits
10145 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10146   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10147   match(Set dst (AddF dst (LoadF src)));
10148 
10149   format %{ "FADD   $dst,$src" %}
10150   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10151   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10152               OpcP, RegOpc(dst) );
10153   ins_pipe( fpu_reg_mem );
10154 %}
10155 
10156 // // Following two instructions for _222_mpegaudio
10157 // Spill to obtain 24-bit precision
10158 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10159   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10160   match(Set dst (AddF src1 src2));
10161 
10162   format %{ "FADD   $dst,$src1,$src2" %}
10163   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10164   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10165               OpcReg_FPR(src2),
10166               Pop_Mem_FPR(dst) );
10167   ins_pipe( fpu_mem_reg_mem );
10168 %}
10169 
10170 // Cisc-spill variant
10171 // Spill to obtain 24-bit precision
10172 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10173   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10174   match(Set dst (AddF src1 (LoadF src2)));
10175 
10176   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10177   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10178   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10179               set_instruction_start,
10180               OpcP, RMopc_Mem(secondary,src1),
10181               Pop_Mem_FPR(dst) );
10182   ins_pipe( fpu_mem_mem_mem );
10183 %}
10184 
10185 // Spill to obtain 24-bit precision
10186 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10187   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10188   match(Set dst (AddF src1 src2));
10189 
10190   format %{ "FADD   $dst,$src1,$src2" %}
10191   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10192   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10193               set_instruction_start,
10194               OpcP, RMopc_Mem(secondary,src1),
10195               Pop_Mem_FPR(dst) );
10196   ins_pipe( fpu_mem_mem_mem );
10197 %}
10198 
10199 
10200 // Spill to obtain 24-bit precision
10201 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10202   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10203   match(Set dst (AddF src con));
10204   format %{ "FLD    $src\n\t"
10205             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10206             "FSTP_S $dst"  %}
10207   ins_encode %{
10208     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10209     __ fadd_s($constantaddress($con));
10210     __ fstp_s(Address(rsp, $dst$$disp));
10211   %}
10212   ins_pipe(fpu_mem_reg_con);
10213 %}
10214 //
10215 // This instruction does not round to 24-bits
10216 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10217   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10218   match(Set dst (AddF src con));
10219   format %{ "FLD    $src\n\t"
10220             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10221             "FSTP   $dst"  %}
10222   ins_encode %{
10223     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10224     __ fadd_s($constantaddress($con));
10225     __ fstp_d($dst$$reg);
10226   %}
10227   ins_pipe(fpu_reg_reg_con);
10228 %}
10229 
10230 // Spill to obtain 24-bit precision
10231 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10232   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10233   match(Set dst (MulF src1 src2));
10234 
10235   format %{ "FLD    $src1\n\t"
10236             "FMUL   $src2\n\t"
10237             "FSTP_S $dst"  %}
10238   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10239   ins_encode( Push_Reg_FPR(src1),
10240               OpcReg_FPR(src2),
10241               Pop_Mem_FPR(dst) );
10242   ins_pipe( fpu_mem_reg_reg );
10243 %}
10244 //
10245 // This instruction does not round to 24-bits
10246 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10247   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10248   match(Set dst (MulF src1 src2));
10249 
10250   format %{ "FLD    $src1\n\t"
10251             "FMUL   $src2\n\t"
10252             "FSTP_S $dst"  %}
10253   opcode(0xD8, 0x1); /* D8 C8+i */
10254   ins_encode( Push_Reg_FPR(src2),
10255               OpcReg_FPR(src1),
10256               Pop_Reg_FPR(dst) );
10257   ins_pipe( fpu_reg_reg_reg );
10258 %}
10259 
10260 
10261 // Spill to obtain 24-bit precision
10262 // Cisc-alternate to reg-reg multiply
10263 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10264   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10265   match(Set dst (MulF src1 (LoadF src2)));
10266 
10267   format %{ "FLD_S  $src2\n\t"
10268             "FMUL   $src1\n\t"
10269             "FSTP_S $dst"  %}
10270   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10271   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10272               OpcReg_FPR(src1),
10273               Pop_Mem_FPR(dst) );
10274   ins_pipe( fpu_mem_reg_mem );
10275 %}
10276 //
10277 // This instruction does not round to 24-bits
10278 // Cisc-alternate to reg-reg multiply
10279 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10280   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10281   match(Set dst (MulF src1 (LoadF src2)));
10282 
10283   format %{ "FMUL   $dst,$src1,$src2" %}
10284   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10285   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10286               OpcReg_FPR(src1),
10287               Pop_Reg_FPR(dst) );
10288   ins_pipe( fpu_reg_reg_mem );
10289 %}
10290 
10291 // Spill to obtain 24-bit precision
10292 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10293   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10294   match(Set dst (MulF src1 src2));
10295 
10296   format %{ "FMUL   $dst,$src1,$src2" %}
10297   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10298   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10299               set_instruction_start,
10300               OpcP, RMopc_Mem(secondary,src1),
10301               Pop_Mem_FPR(dst) );
10302   ins_pipe( fpu_mem_mem_mem );
10303 %}
10304 
10305 // Spill to obtain 24-bit precision
10306 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10307   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10308   match(Set dst (MulF src con));
10309 
10310   format %{ "FLD    $src\n\t"
10311             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10312             "FSTP_S $dst"  %}
10313   ins_encode %{
10314     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10315     __ fmul_s($constantaddress($con));
10316     __ fstp_s(Address(rsp, $dst$$disp));
10317   %}
10318   ins_pipe(fpu_mem_reg_con);
10319 %}
10320 //
10321 // This instruction does not round to 24-bits
10322 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10323   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10324   match(Set dst (MulF src con));
10325 
10326   format %{ "FLD    $src\n\t"
10327             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10328             "FSTP   $dst"  %}
10329   ins_encode %{
10330     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10331     __ fmul_s($constantaddress($con));
10332     __ fstp_d($dst$$reg);
10333   %}
10334   ins_pipe(fpu_reg_reg_con);
10335 %}
10336 
10337 
10338 //
10339 // MACRO1 -- subsume unshared load into mulFPR
10340 // This instruction does not round to 24-bits
10341 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10342   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10343   match(Set dst (MulF (LoadF mem1) src));
10344 
10345   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10346             "FMUL   ST,$src\n\t"
10347             "FSTP   $dst" %}
10348   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10349   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10350               OpcReg_FPR(src),
10351               Pop_Reg_FPR(dst) );
10352   ins_pipe( fpu_reg_reg_mem );
10353 %}
10354 //
10355 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10356 // This instruction does not round to 24-bits
10357 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10358   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10359   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10360   ins_cost(95);
10361 
10362   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10363             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10364             "FADD   ST,$src2\n\t"
10365             "FSTP   $dst" %}
10366   opcode(0xD9); /* LoadF D9 /0 */
10367   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10368               FMul_ST_reg(src1),
10369               FAdd_ST_reg(src2),
10370               Pop_Reg_FPR(dst) );
10371   ins_pipe( fpu_reg_mem_reg_reg );
10372 %}
10373 
10374 // MACRO3 -- addFPR a mulFPR
10375 // This instruction does not round to 24-bits.  It is a '2-address'
10376 // instruction in that the result goes back to src2.  This eliminates
10377 // a move from the macro; possibly the register allocator will have
10378 // to add it back (and maybe not).
10379 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10380   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10381   match(Set src2 (AddF (MulF src0 src1) src2));
10382 
10383   format %{ "FLD    $src0     ===MACRO3===\n\t"
10384             "FMUL   ST,$src1\n\t"
10385             "FADDP  $src2,ST" %}
10386   opcode(0xD9); /* LoadF D9 /0 */
10387   ins_encode( Push_Reg_FPR(src0),
10388               FMul_ST_reg(src1),
10389               FAddP_reg_ST(src2) );
10390   ins_pipe( fpu_reg_reg_reg );
10391 %}
10392 
10393 // MACRO4 -- divFPR subFPR
10394 // This instruction does not round to 24-bits
10395 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10396   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10397   match(Set dst (DivF (SubF src2 src1) src3));
10398 
10399   format %{ "FLD    $src2   ===MACRO4===\n\t"
10400             "FSUB   ST,$src1\n\t"
10401             "FDIV   ST,$src3\n\t"
10402             "FSTP  $dst" %}
10403   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10404   ins_encode( Push_Reg_FPR(src2),
10405               subFPR_divFPR_encode(src1,src3),
10406               Pop_Reg_FPR(dst) );
10407   ins_pipe( fpu_reg_reg_reg_reg );
10408 %}
10409 
10410 // Spill to obtain 24-bit precision
10411 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10412   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10413   match(Set dst (DivF src1 src2));
10414 
10415   format %{ "FDIV   $dst,$src1,$src2" %}
10416   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10417   ins_encode( Push_Reg_FPR(src1),
10418               OpcReg_FPR(src2),
10419               Pop_Mem_FPR(dst) );
10420   ins_pipe( fpu_mem_reg_reg );
10421 %}
10422 //
10423 // This instruction does not round to 24-bits
10424 instruct divFPR_reg(regFPR dst, regFPR src) %{
10425   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10426   match(Set dst (DivF dst src));
10427 
10428   format %{ "FDIV   $dst,$src" %}
10429   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10430   ins_encode( Push_Reg_FPR(src),
10431               OpcP, RegOpc(dst) );
10432   ins_pipe( fpu_reg_reg );
10433 %}
10434 
10435 
10436 // Spill to obtain 24-bit precision
10437 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10438   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10439   match(Set dst (ModF src1 src2));
10440   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10441 
10442   format %{ "FMOD   $dst,$src1,$src2" %}
10443   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10444               emitModDPR(),
10445               Push_Result_Mod_DPR(src2),
10446               Pop_Mem_FPR(dst));
10447   ins_pipe( pipe_slow );
10448 %}
10449 //
10450 // This instruction does not round to 24-bits
10451 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10452   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10453   match(Set dst (ModF dst src));
10454   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10455 
10456   format %{ "FMOD   $dst,$src" %}
10457   ins_encode(Push_Reg_Mod_DPR(dst, src),
10458               emitModDPR(),
10459               Push_Result_Mod_DPR(src),
10460               Pop_Reg_FPR(dst));
10461   ins_pipe( pipe_slow );
10462 %}
10463 
10464 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10465   predicate(UseSSE>=1);
10466   match(Set dst (ModF src0 src1));
10467   effect(KILL rax, KILL cr);
10468   format %{ "SUB    ESP,4\t # FMOD\n"
10469           "\tMOVSS  [ESP+0],$src1\n"
10470           "\tFLD_S  [ESP+0]\n"
10471           "\tMOVSS  [ESP+0],$src0\n"
10472           "\tFLD_S  [ESP+0]\n"
10473      "loop:\tFPREM\n"
10474           "\tFWAIT\n"
10475           "\tFNSTSW AX\n"
10476           "\tSAHF\n"
10477           "\tJP     loop\n"
10478           "\tFSTP_S [ESP+0]\n"
10479           "\tMOVSS  $dst,[ESP+0]\n"
10480           "\tADD    ESP,4\n"
10481           "\tFSTP   ST0\t # Restore FPU Stack"
10482     %}
10483   ins_cost(250);
10484   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10485   ins_pipe( pipe_slow );
10486 %}
10487 
10488 
10489 //----------Arithmetic Conversion Instructions---------------------------------
10490 // The conversions operations are all Alpha sorted.  Please keep it that way!
10491 
10492 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10493   predicate(UseSSE==0);
10494   match(Set dst (RoundFloat src));
10495   ins_cost(125);
10496   format %{ "FST_S  $dst,$src\t# F-round" %}
10497   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10498   ins_pipe( fpu_mem_reg );
10499 %}
10500 
10501 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10502   predicate(UseSSE<=1);
10503   match(Set dst (RoundDouble src));
10504   ins_cost(125);
10505   format %{ "FST_D  $dst,$src\t# D-round" %}
10506   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10507   ins_pipe( fpu_mem_reg );
10508 %}
10509 
10510 // Force rounding to 24-bit precision and 6-bit exponent
10511 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10512   predicate(UseSSE==0);
10513   match(Set dst (ConvD2F src));
10514   format %{ "FST_S  $dst,$src\t# F-round" %}
10515   expand %{
10516     roundFloat_mem_reg(dst,src);
10517   %}
10518 %}
10519 
10520 // Force rounding to 24-bit precision and 6-bit exponent
10521 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10522   predicate(UseSSE==1);
10523   match(Set dst (ConvD2F src));
10524   effect( KILL cr );
10525   format %{ "SUB    ESP,4\n\t"
10526             "FST_S  [ESP],$src\t# F-round\n\t"
10527             "MOVSS  $dst,[ESP]\n\t"
10528             "ADD ESP,4" %}
10529   ins_encode %{
10530     __ subptr(rsp, 4);
10531     if ($src$$reg != FPR1L_enc) {
10532       __ fld_s($src$$reg-1);
10533       __ fstp_s(Address(rsp, 0));
10534     } else {
10535       __ fst_s(Address(rsp, 0));
10536     }
10537     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10538     __ addptr(rsp, 4);
10539   %}
10540   ins_pipe( pipe_slow );
10541 %}
10542 
10543 // Force rounding double precision to single precision
10544 instruct convD2F_reg(regF dst, regD src) %{
10545   predicate(UseSSE>=2);
10546   match(Set dst (ConvD2F src));
10547   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10548   ins_encode %{
10549     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10550   %}
10551   ins_pipe( pipe_slow );
10552 %}
10553 
10554 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10555   predicate(UseSSE==0);
10556   match(Set dst (ConvF2D src));
10557   format %{ "FST_S  $dst,$src\t# D-round" %}
10558   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10559   ins_pipe( fpu_reg_reg );
10560 %}
10561 
10562 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10563   predicate(UseSSE==1);
10564   match(Set dst (ConvF2D src));
10565   format %{ "FST_D  $dst,$src\t# D-round" %}
10566   expand %{
10567     roundDouble_mem_reg(dst,src);
10568   %}
10569 %}
10570 
10571 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10572   predicate(UseSSE==1);
10573   match(Set dst (ConvF2D src));
10574   effect( KILL cr );
10575   format %{ "SUB    ESP,4\n\t"
10576             "MOVSS  [ESP] $src\n\t"
10577             "FLD_S  [ESP]\n\t"
10578             "ADD    ESP,4\n\t"
10579             "FSTP   $dst\t# D-round" %}
10580   ins_encode %{
10581     __ subptr(rsp, 4);
10582     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10583     __ fld_s(Address(rsp, 0));
10584     __ addptr(rsp, 4);
10585     __ fstp_d($dst$$reg);
10586   %}
10587   ins_pipe( pipe_slow );
10588 %}
10589 
10590 instruct convF2D_reg(regD dst, regF src) %{
10591   predicate(UseSSE>=2);
10592   match(Set dst (ConvF2D src));
10593   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10594   ins_encode %{
10595     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10596   %}
10597   ins_pipe( pipe_slow );
10598 %}
10599 
10600 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10601 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10602   predicate(UseSSE<=1);
10603   match(Set dst (ConvD2I src));
10604   effect( KILL tmp, KILL cr );
10605   format %{ "FLD    $src\t# Convert double to int \n\t"
10606             "FLDCW  trunc mode\n\t"
10607             "SUB    ESP,4\n\t"
10608             "FISTp  [ESP + #0]\n\t"
10609             "FLDCW  std/24-bit mode\n\t"
10610             "POP    EAX\n\t"
10611             "CMP    EAX,0x80000000\n\t"
10612             "JNE,s  fast\n\t"
10613             "FLD_D  $src\n\t"
10614             "CALL   d2i_wrapper\n"
10615       "fast:" %}
10616   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10617   ins_pipe( pipe_slow );
10618 %}
10619 
10620 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10621 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10622   predicate(UseSSE>=2);
10623   match(Set dst (ConvD2I src));
10624   effect( KILL tmp, KILL cr );
10625   format %{ "CVTTSD2SI $dst, $src\n\t"
10626             "CMP    $dst,0x80000000\n\t"
10627             "JNE,s  fast\n\t"
10628             "SUB    ESP, 8\n\t"
10629             "MOVSD  [ESP], $src\n\t"
10630             "FLD_D  [ESP]\n\t"
10631             "ADD    ESP, 8\n\t"
10632             "CALL   d2i_wrapper\n"
10633       "fast:" %}
10634   ins_encode %{
10635     Label fast;
10636     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10637     __ cmpl($dst$$Register, 0x80000000);
10638     __ jccb(Assembler::notEqual, fast);
10639     __ subptr(rsp, 8);
10640     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10641     __ fld_d(Address(rsp, 0));
10642     __ addptr(rsp, 8);
10643     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10644     __ bind(fast);
10645   %}
10646   ins_pipe( pipe_slow );
10647 %}
10648 
10649 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10650   predicate(UseSSE<=1);
10651   match(Set dst (ConvD2L src));
10652   effect( KILL cr );
10653   format %{ "FLD    $src\t# Convert double to long\n\t"
10654             "FLDCW  trunc mode\n\t"
10655             "SUB    ESP,8\n\t"
10656             "FISTp  [ESP + #0]\n\t"
10657             "FLDCW  std/24-bit mode\n\t"
10658             "POP    EAX\n\t"
10659             "POP    EDX\n\t"
10660             "CMP    EDX,0x80000000\n\t"
10661             "JNE,s  fast\n\t"
10662             "TEST   EAX,EAX\n\t"
10663             "JNE,s  fast\n\t"
10664             "FLD    $src\n\t"
10665             "CALL   d2l_wrapper\n"
10666       "fast:" %}
10667   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10668   ins_pipe( pipe_slow );
10669 %}
10670 
10671 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10672 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10673   predicate (UseSSE>=2);
10674   match(Set dst (ConvD2L src));
10675   effect( KILL cr );
10676   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10677             "MOVSD  [ESP],$src\n\t"
10678             "FLD_D  [ESP]\n\t"
10679             "FLDCW  trunc mode\n\t"
10680             "FISTp  [ESP + #0]\n\t"
10681             "FLDCW  std/24-bit mode\n\t"
10682             "POP    EAX\n\t"
10683             "POP    EDX\n\t"
10684             "CMP    EDX,0x80000000\n\t"
10685             "JNE,s  fast\n\t"
10686             "TEST   EAX,EAX\n\t"
10687             "JNE,s  fast\n\t"
10688             "SUB    ESP,8\n\t"
10689             "MOVSD  [ESP],$src\n\t"
10690             "FLD_D  [ESP]\n\t"
10691             "ADD    ESP,8\n\t"
10692             "CALL   d2l_wrapper\n"
10693       "fast:" %}
10694   ins_encode %{
10695     Label fast;
10696     __ subptr(rsp, 8);
10697     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10698     __ fld_d(Address(rsp, 0));
10699     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10700     __ fistp_d(Address(rsp, 0));
10701     // Restore the rounding mode, mask the exception
10702     if (Compile::current()->in_24_bit_fp_mode()) {
10703       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10704     } else {
10705       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10706     }
10707     // Load the converted long, adjust CPU stack
10708     __ pop(rax);
10709     __ pop(rdx);
10710     __ cmpl(rdx, 0x80000000);
10711     __ jccb(Assembler::notEqual, fast);
10712     __ testl(rax, rax);
10713     __ jccb(Assembler::notEqual, fast);
10714     __ subptr(rsp, 8);
10715     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10716     __ fld_d(Address(rsp, 0));
10717     __ addptr(rsp, 8);
10718     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10719     __ bind(fast);
10720   %}
10721   ins_pipe( pipe_slow );
10722 %}
10723 
10724 // Convert a double to an int.  Java semantics require we do complex
10725 // manglations in the corner cases.  So we set the rounding mode to
10726 // 'zero', store the darned double down as an int, and reset the
10727 // rounding mode to 'nearest'.  The hardware stores a flag value down
10728 // if we would overflow or converted a NAN; we check for this and
10729 // and go the slow path if needed.
10730 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10731   predicate(UseSSE==0);
10732   match(Set dst (ConvF2I src));
10733   effect( KILL tmp, KILL cr );
10734   format %{ "FLD    $src\t# Convert float to int \n\t"
10735             "FLDCW  trunc mode\n\t"
10736             "SUB    ESP,4\n\t"
10737             "FISTp  [ESP + #0]\n\t"
10738             "FLDCW  std/24-bit mode\n\t"
10739             "POP    EAX\n\t"
10740             "CMP    EAX,0x80000000\n\t"
10741             "JNE,s  fast\n\t"
10742             "FLD    $src\n\t"
10743             "CALL   d2i_wrapper\n"
10744       "fast:" %}
10745   // DPR2I_encoding works for FPR2I
10746   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10747   ins_pipe( pipe_slow );
10748 %}
10749 
10750 // Convert a float in xmm to an int reg.
10751 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10752   predicate(UseSSE>=1);
10753   match(Set dst (ConvF2I src));
10754   effect( KILL tmp, KILL cr );
10755   format %{ "CVTTSS2SI $dst, $src\n\t"
10756             "CMP    $dst,0x80000000\n\t"
10757             "JNE,s  fast\n\t"
10758             "SUB    ESP, 4\n\t"
10759             "MOVSS  [ESP], $src\n\t"
10760             "FLD    [ESP]\n\t"
10761             "ADD    ESP, 4\n\t"
10762             "CALL   d2i_wrapper\n"
10763       "fast:" %}
10764   ins_encode %{
10765     Label fast;
10766     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10767     __ cmpl($dst$$Register, 0x80000000);
10768     __ jccb(Assembler::notEqual, fast);
10769     __ subptr(rsp, 4);
10770     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10771     __ fld_s(Address(rsp, 0));
10772     __ addptr(rsp, 4);
10773     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10774     __ bind(fast);
10775   %}
10776   ins_pipe( pipe_slow );
10777 %}
10778 
10779 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10780   predicate(UseSSE==0);
10781   match(Set dst (ConvF2L src));
10782   effect( KILL cr );
10783   format %{ "FLD    $src\t# Convert float to long\n\t"
10784             "FLDCW  trunc mode\n\t"
10785             "SUB    ESP,8\n\t"
10786             "FISTp  [ESP + #0]\n\t"
10787             "FLDCW  std/24-bit mode\n\t"
10788             "POP    EAX\n\t"
10789             "POP    EDX\n\t"
10790             "CMP    EDX,0x80000000\n\t"
10791             "JNE,s  fast\n\t"
10792             "TEST   EAX,EAX\n\t"
10793             "JNE,s  fast\n\t"
10794             "FLD    $src\n\t"
10795             "CALL   d2l_wrapper\n"
10796       "fast:" %}
10797   // DPR2L_encoding works for FPR2L
10798   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10799   ins_pipe( pipe_slow );
10800 %}
10801 
10802 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10803 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10804   predicate (UseSSE>=1);
10805   match(Set dst (ConvF2L src));
10806   effect( KILL cr );
10807   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10808             "MOVSS  [ESP],$src\n\t"
10809             "FLD_S  [ESP]\n\t"
10810             "FLDCW  trunc mode\n\t"
10811             "FISTp  [ESP + #0]\n\t"
10812             "FLDCW  std/24-bit mode\n\t"
10813             "POP    EAX\n\t"
10814             "POP    EDX\n\t"
10815             "CMP    EDX,0x80000000\n\t"
10816             "JNE,s  fast\n\t"
10817             "TEST   EAX,EAX\n\t"
10818             "JNE,s  fast\n\t"
10819             "SUB    ESP,4\t# Convert float to long\n\t"
10820             "MOVSS  [ESP],$src\n\t"
10821             "FLD_S  [ESP]\n\t"
10822             "ADD    ESP,4\n\t"
10823             "CALL   d2l_wrapper\n"
10824       "fast:" %}
10825   ins_encode %{
10826     Label fast;
10827     __ subptr(rsp, 8);
10828     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10829     __ fld_s(Address(rsp, 0));
10830     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10831     __ fistp_d(Address(rsp, 0));
10832     // Restore the rounding mode, mask the exception
10833     if (Compile::current()->in_24_bit_fp_mode()) {
10834       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10835     } else {
10836       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10837     }
10838     // Load the converted long, adjust CPU stack
10839     __ pop(rax);
10840     __ pop(rdx);
10841     __ cmpl(rdx, 0x80000000);
10842     __ jccb(Assembler::notEqual, fast);
10843     __ testl(rax, rax);
10844     __ jccb(Assembler::notEqual, fast);
10845     __ subptr(rsp, 4);
10846     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10847     __ fld_s(Address(rsp, 0));
10848     __ addptr(rsp, 4);
10849     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10850     __ bind(fast);
10851   %}
10852   ins_pipe( pipe_slow );
10853 %}
10854 
10855 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
10856   predicate( UseSSE<=1 );
10857   match(Set dst (ConvI2D src));
10858   format %{ "FILD   $src\n\t"
10859             "FSTP   $dst" %}
10860   opcode(0xDB, 0x0);  /* DB /0 */
10861   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
10862   ins_pipe( fpu_reg_mem );
10863 %}
10864 
10865 instruct convI2D_reg(regD dst, rRegI src) %{
10866   predicate( UseSSE>=2 && !UseXmmI2D );
10867   match(Set dst (ConvI2D src));
10868   format %{ "CVTSI2SD $dst,$src" %}
10869   ins_encode %{
10870     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10871   %}
10872   ins_pipe( pipe_slow );
10873 %}
10874 
10875 instruct convI2D_mem(regD dst, memory mem) %{
10876   predicate( UseSSE>=2 );
10877   match(Set dst (ConvI2D (LoadI mem)));
10878   format %{ "CVTSI2SD $dst,$mem" %}
10879   ins_encode %{
10880     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
10881   %}
10882   ins_pipe( pipe_slow );
10883 %}
10884 
10885 instruct convXI2D_reg(regD dst, rRegI src)
10886 %{
10887   predicate( UseSSE>=2 && UseXmmI2D );
10888   match(Set dst (ConvI2D src));
10889 
10890   format %{ "MOVD  $dst,$src\n\t"
10891             "CVTDQ2PD $dst,$dst\t# i2d" %}
10892   ins_encode %{
10893     __ movdl($dst$$XMMRegister, $src$$Register);
10894     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10895   %}
10896   ins_pipe(pipe_slow); // XXX
10897 %}
10898 
10899 instruct convI2DPR_mem(regDPR dst, memory mem) %{
10900   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
10901   match(Set dst (ConvI2D (LoadI mem)));
10902   format %{ "FILD   $mem\n\t"
10903             "FSTP   $dst" %}
10904   opcode(0xDB);      /* DB /0 */
10905   ins_encode( OpcP, RMopc_Mem(0x00,mem),
10906               Pop_Reg_DPR(dst));
10907   ins_pipe( fpu_reg_mem );
10908 %}
10909 
10910 // Convert a byte to a float; no rounding step needed.
10911 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
10912   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
10913   match(Set dst (ConvI2F src));
10914   format %{ "FILD   $src\n\t"
10915             "FSTP   $dst" %}
10916 
10917   opcode(0xDB, 0x0);  /* DB /0 */
10918   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
10919   ins_pipe( fpu_reg_mem );
10920 %}
10921 
10922 // In 24-bit mode, force exponent rounding by storing back out
10923 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
10924   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10925   match(Set dst (ConvI2F src));
10926   ins_cost(200);
10927   format %{ "FILD   $src\n\t"
10928             "FSTP_S $dst" %}
10929   opcode(0xDB, 0x0);  /* DB /0 */
10930   ins_encode( Push_Mem_I(src),
10931               Pop_Mem_FPR(dst));
10932   ins_pipe( fpu_mem_mem );
10933 %}
10934 
10935 // In 24-bit mode, force exponent rounding by storing back out
10936 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
10937   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10938   match(Set dst (ConvI2F (LoadI mem)));
10939   ins_cost(200);
10940   format %{ "FILD   $mem\n\t"
10941             "FSTP_S $dst" %}
10942   opcode(0xDB);  /* DB /0 */
10943   ins_encode( OpcP, RMopc_Mem(0x00,mem),
10944               Pop_Mem_FPR(dst));
10945   ins_pipe( fpu_mem_mem );
10946 %}
10947 
10948 // This instruction does not round to 24-bits
10949 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
10950   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10951   match(Set dst (ConvI2F src));
10952   format %{ "FILD   $src\n\t"
10953             "FSTP   $dst" %}
10954   opcode(0xDB, 0x0);  /* DB /0 */
10955   ins_encode( Push_Mem_I(src),
10956               Pop_Reg_FPR(dst));
10957   ins_pipe( fpu_reg_mem );
10958 %}
10959 
10960 // This instruction does not round to 24-bits
10961 instruct convI2FPR_mem(regFPR dst, memory mem) %{
10962   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10963   match(Set dst (ConvI2F (LoadI mem)));
10964   format %{ "FILD   $mem\n\t"
10965             "FSTP   $dst" %}
10966   opcode(0xDB);      /* DB /0 */
10967   ins_encode( OpcP, RMopc_Mem(0x00,mem),
10968               Pop_Reg_FPR(dst));
10969   ins_pipe( fpu_reg_mem );
10970 %}
10971 
10972 // Convert an int to a float in xmm; no rounding step needed.
10973 instruct convI2F_reg(regF dst, rRegI src) %{
10974   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
10975   match(Set dst (ConvI2F src));
10976   format %{ "CVTSI2SS $dst, $src" %}
10977   ins_encode %{
10978     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
10979   %}
10980   ins_pipe( pipe_slow );
10981 %}
10982 
10983  instruct convXI2F_reg(regF dst, rRegI src)
10984 %{
10985   predicate( UseSSE>=2 && UseXmmI2F );
10986   match(Set dst (ConvI2F src));
10987 
10988   format %{ "MOVD  $dst,$src\n\t"
10989             "CVTDQ2PS $dst,$dst\t# i2f" %}
10990   ins_encode %{
10991     __ movdl($dst$$XMMRegister, $src$$Register);
10992     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
10993   %}
10994   ins_pipe(pipe_slow); // XXX
10995 %}
10996 
10997 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
10998   match(Set dst (ConvI2L src));
10999   effect(KILL cr);
11000   ins_cost(375);
11001   format %{ "MOV    $dst.lo,$src\n\t"
11002             "MOV    $dst.hi,$src\n\t"
11003             "SAR    $dst.hi,31" %}
11004   ins_encode(convert_int_long(dst,src));
11005   ins_pipe( ialu_reg_reg_long );
11006 %}
11007 
11008 // Zero-extend convert int to long
11009 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11010   match(Set dst (AndL (ConvI2L src) mask) );
11011   effect( KILL flags );
11012   ins_cost(250);
11013   format %{ "MOV    $dst.lo,$src\n\t"
11014             "XOR    $dst.hi,$dst.hi" %}
11015   opcode(0x33); // XOR
11016   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11017   ins_pipe( ialu_reg_reg_long );
11018 %}
11019 
11020 // Zero-extend long
11021 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11022   match(Set dst (AndL src mask) );
11023   effect( KILL flags );
11024   ins_cost(250);
11025   format %{ "MOV    $dst.lo,$src.lo\n\t"
11026             "XOR    $dst.hi,$dst.hi\n\t" %}
11027   opcode(0x33); // XOR
11028   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11029   ins_pipe( ialu_reg_reg_long );
11030 %}
11031 
11032 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11033   predicate (UseSSE<=1);
11034   match(Set dst (ConvL2D src));
11035   effect( KILL cr );
11036   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11037             "PUSH   $src.lo\n\t"
11038             "FILD   ST,[ESP + #0]\n\t"
11039             "ADD    ESP,8\n\t"
11040             "FSTP_D $dst\t# D-round" %}
11041   opcode(0xDF, 0x5);  /* DF /5 */
11042   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11043   ins_pipe( pipe_slow );
11044 %}
11045 
11046 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11047   predicate (UseSSE>=2);
11048   match(Set dst (ConvL2D src));
11049   effect( KILL cr );
11050   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11051             "PUSH   $src.lo\n\t"
11052             "FILD_D [ESP]\n\t"
11053             "FSTP_D [ESP]\n\t"
11054             "MOVSD  $dst,[ESP]\n\t"
11055             "ADD    ESP,8" %}
11056   opcode(0xDF, 0x5);  /* DF /5 */
11057   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11058   ins_pipe( pipe_slow );
11059 %}
11060 
11061 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11062   predicate (UseSSE>=1);
11063   match(Set dst (ConvL2F src));
11064   effect( KILL cr );
11065   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11066             "PUSH   $src.lo\n\t"
11067             "FILD_D [ESP]\n\t"
11068             "FSTP_S [ESP]\n\t"
11069             "MOVSS  $dst,[ESP]\n\t"
11070             "ADD    ESP,8" %}
11071   opcode(0xDF, 0x5);  /* DF /5 */
11072   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11073   ins_pipe( pipe_slow );
11074 %}
11075 
11076 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11077   match(Set dst (ConvL2F src));
11078   effect( KILL cr );
11079   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11080             "PUSH   $src.lo\n\t"
11081             "FILD   ST,[ESP + #0]\n\t"
11082             "ADD    ESP,8\n\t"
11083             "FSTP_S $dst\t# F-round" %}
11084   opcode(0xDF, 0x5);  /* DF /5 */
11085   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11086   ins_pipe( pipe_slow );
11087 %}
11088 
11089 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11090   match(Set dst (ConvL2I src));
11091   effect( DEF dst, USE src );
11092   format %{ "MOV    $dst,$src.lo" %}
11093   ins_encode(enc_CopyL_Lo(dst,src));
11094   ins_pipe( ialu_reg_reg );
11095 %}
11096 
11097 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11098   match(Set dst (MoveF2I src));
11099   effect( DEF dst, USE src );
11100   ins_cost(100);
11101   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11102   ins_encode %{
11103     __ movl($dst$$Register, Address(rsp, $src$$disp));
11104   %}
11105   ins_pipe( ialu_reg_mem );
11106 %}
11107 
11108 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11109   predicate(UseSSE==0);
11110   match(Set dst (MoveF2I src));
11111   effect( DEF dst, USE src );
11112 
11113   ins_cost(125);
11114   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11115   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11116   ins_pipe( fpu_mem_reg );
11117 %}
11118 
11119 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11120   predicate(UseSSE>=1);
11121   match(Set dst (MoveF2I src));
11122   effect( DEF dst, USE src );
11123 
11124   ins_cost(95);
11125   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11126   ins_encode %{
11127     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11128   %}
11129   ins_pipe( pipe_slow );
11130 %}
11131 
11132 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11133   predicate(UseSSE>=2);
11134   match(Set dst (MoveF2I src));
11135   effect( DEF dst, USE src );
11136   ins_cost(85);
11137   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11138   ins_encode %{
11139     __ movdl($dst$$Register, $src$$XMMRegister);
11140   %}
11141   ins_pipe( pipe_slow );
11142 %}
11143 
11144 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11145   match(Set dst (MoveI2F src));
11146   effect( DEF dst, USE src );
11147 
11148   ins_cost(100);
11149   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11150   ins_encode %{
11151     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11152   %}
11153   ins_pipe( ialu_mem_reg );
11154 %}
11155 
11156 
11157 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11158   predicate(UseSSE==0);
11159   match(Set dst (MoveI2F src));
11160   effect(DEF dst, USE src);
11161 
11162   ins_cost(125);
11163   format %{ "FLD_S  $src\n\t"
11164             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11165   opcode(0xD9);               /* D9 /0, FLD m32real */
11166   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11167               Pop_Reg_FPR(dst) );
11168   ins_pipe( fpu_reg_mem );
11169 %}
11170 
11171 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11172   predicate(UseSSE>=1);
11173   match(Set dst (MoveI2F src));
11174   effect( DEF dst, USE src );
11175 
11176   ins_cost(95);
11177   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11178   ins_encode %{
11179     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11180   %}
11181   ins_pipe( pipe_slow );
11182 %}
11183 
11184 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11185   predicate(UseSSE>=2);
11186   match(Set dst (MoveI2F src));
11187   effect( DEF dst, USE src );
11188 
11189   ins_cost(85);
11190   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11191   ins_encode %{
11192     __ movdl($dst$$XMMRegister, $src$$Register);
11193   %}
11194   ins_pipe( pipe_slow );
11195 %}
11196 
11197 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11198   match(Set dst (MoveD2L src));
11199   effect(DEF dst, USE src);
11200 
11201   ins_cost(250);
11202   format %{ "MOV    $dst.lo,$src\n\t"
11203             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11204   opcode(0x8B, 0x8B);
11205   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11206   ins_pipe( ialu_mem_long_reg );
11207 %}
11208 
11209 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11210   predicate(UseSSE<=1);
11211   match(Set dst (MoveD2L src));
11212   effect(DEF dst, USE src);
11213 
11214   ins_cost(125);
11215   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11216   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11217   ins_pipe( fpu_mem_reg );
11218 %}
11219 
11220 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11221   predicate(UseSSE>=2);
11222   match(Set dst (MoveD2L src));
11223   effect(DEF dst, USE src);
11224   ins_cost(95);
11225   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11226   ins_encode %{
11227     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11228   %}
11229   ins_pipe( pipe_slow );
11230 %}
11231 
11232 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11233   predicate(UseSSE>=2);
11234   match(Set dst (MoveD2L src));
11235   effect(DEF dst, USE src, TEMP tmp);
11236   ins_cost(85);
11237   format %{ "MOVD   $dst.lo,$src\n\t"
11238             "PSHUFLW $tmp,$src,0x4E\n\t"
11239             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11240   ins_encode %{
11241     __ movdl($dst$$Register, $src$$XMMRegister);
11242     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11243     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11244   %}
11245   ins_pipe( pipe_slow );
11246 %}
11247 
11248 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11249   match(Set dst (MoveL2D src));
11250   effect(DEF dst, USE src);
11251 
11252   ins_cost(200);
11253   format %{ "MOV    $dst,$src.lo\n\t"
11254             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11255   opcode(0x89, 0x89);
11256   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11257   ins_pipe( ialu_mem_long_reg );
11258 %}
11259 
11260 
11261 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11262   predicate(UseSSE<=1);
11263   match(Set dst (MoveL2D src));
11264   effect(DEF dst, USE src);
11265   ins_cost(125);
11266 
11267   format %{ "FLD_D  $src\n\t"
11268             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11269   opcode(0xDD);               /* DD /0, FLD m64real */
11270   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11271               Pop_Reg_DPR(dst) );
11272   ins_pipe( fpu_reg_mem );
11273 %}
11274 
11275 
11276 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11277   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11278   match(Set dst (MoveL2D src));
11279   effect(DEF dst, USE src);
11280 
11281   ins_cost(95);
11282   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11283   ins_encode %{
11284     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11285   %}
11286   ins_pipe( pipe_slow );
11287 %}
11288 
11289 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11290   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11291   match(Set dst (MoveL2D src));
11292   effect(DEF dst, USE src);
11293 
11294   ins_cost(95);
11295   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11296   ins_encode %{
11297     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11298   %}
11299   ins_pipe( pipe_slow );
11300 %}
11301 
11302 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11303   predicate(UseSSE>=2);
11304   match(Set dst (MoveL2D src));
11305   effect(TEMP dst, USE src, TEMP tmp);
11306   ins_cost(85);
11307   format %{ "MOVD   $dst,$src.lo\n\t"
11308             "MOVD   $tmp,$src.hi\n\t"
11309             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11310   ins_encode %{
11311     __ movdl($dst$$XMMRegister, $src$$Register);
11312     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11313     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11314   %}
11315   ins_pipe( pipe_slow );
11316 %}
11317 
11318 
11319 // =======================================================================
11320 // fast clearing of an array
11321 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11322   predicate(!((ClearArrayNode*)n)->is_large());
11323   match(Set dummy (ClearArray cnt base));
11324   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11325 
11326   format %{ $$template
11327     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11328     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11329     $$emit$$"JG     LARGE\n\t"
11330     $$emit$$"SHL    ECX, 1\n\t"
11331     $$emit$$"DEC    ECX\n\t"
11332     $$emit$$"JS     DONE\t# Zero length\n\t"
11333     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11334     $$emit$$"DEC    ECX\n\t"
11335     $$emit$$"JGE    LOOP\n\t"
11336     $$emit$$"JMP    DONE\n\t"
11337     $$emit$$"# LARGE:\n\t"
11338     if (UseFastStosb) {
11339        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11340        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11341     } else {
11342        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11343        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11344     }
11345     $$emit$$"# DONE"
11346   %}
11347   ins_encode %{
11348     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, false);
11349   %}
11350   ins_pipe( pipe_slow );
11351 %}
11352 
11353 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11354   predicate(((ClearArrayNode*)n)->is_large());
11355   match(Set dummy (ClearArray cnt base));
11356   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11357   format %{ $$template
11358     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11359     if (UseFastStosb) {
11360        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11361        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11362     } else {
11363        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11364        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11365     }
11366     $$emit$$"# DONE"
11367   %}
11368   ins_encode %{
11369     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, true);
11370   %}
11371   ins_pipe( pipe_slow );
11372 %}
11373 
11374 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11375                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11376   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11377   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11378   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11379 
11380   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11381   ins_encode %{
11382     __ string_compare($str1$$Register, $str2$$Register,
11383                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11384                       $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11385   %}
11386   ins_pipe( pipe_slow );
11387 %}
11388 
11389 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11390                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11391   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11392   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11393   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11394 
11395   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11396   ins_encode %{
11397     __ string_compare($str1$$Register, $str2$$Register,
11398                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11399                       $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11400   %}
11401   ins_pipe( pipe_slow );
11402 %}
11403 
11404 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11405                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11406   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11407   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11408   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11409 
11410   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11411   ins_encode %{
11412     __ string_compare($str1$$Register, $str2$$Register,
11413                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11414                       $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11415   %}
11416   ins_pipe( pipe_slow );
11417 %}
11418 
11419 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11420                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11421   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11422   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11423   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11424 
11425   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11426   ins_encode %{
11427     __ string_compare($str2$$Register, $str1$$Register,
11428                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11429                       $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11430   %}
11431   ins_pipe( pipe_slow );
11432 %}
11433 
11434 // fast string equals
11435 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11436                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11437   match(Set result (StrEquals (Binary str1 str2) cnt));
11438   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11439 
11440   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11441   ins_encode %{
11442     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11443                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11444                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11445   %}
11446 
11447   ins_pipe( pipe_slow );
11448 %}
11449 
11450 // fast search of substring with known size.
11451 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11452                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11453   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11454   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11455   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11456 
11457   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11458   ins_encode %{
11459     int icnt2 = (int)$int_cnt2$$constant;
11460     if (icnt2 >= 16) {
11461       // IndexOf for constant substrings with size >= 16 elements
11462       // which don't need to be loaded through stack.
11463       __ string_indexofC8($str1$$Register, $str2$$Register,
11464                           $cnt1$$Register, $cnt2$$Register,
11465                           icnt2, $result$$Register,
11466                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11467     } else {
11468       // Small strings are loaded through stack if they cross page boundary.
11469       __ string_indexof($str1$$Register, $str2$$Register,
11470                         $cnt1$$Register, $cnt2$$Register,
11471                         icnt2, $result$$Register,
11472                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11473     }
11474   %}
11475   ins_pipe( pipe_slow );
11476 %}
11477 
11478 // fast search of substring with known size.
11479 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11480                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11481   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11482   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11483   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11484 
11485   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11486   ins_encode %{
11487     int icnt2 = (int)$int_cnt2$$constant;
11488     if (icnt2 >= 8) {
11489       // IndexOf for constant substrings with size >= 8 elements
11490       // which don't need to be loaded through stack.
11491       __ string_indexofC8($str1$$Register, $str2$$Register,
11492                           $cnt1$$Register, $cnt2$$Register,
11493                           icnt2, $result$$Register,
11494                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11495     } else {
11496       // Small strings are loaded through stack if they cross page boundary.
11497       __ string_indexof($str1$$Register, $str2$$Register,
11498                         $cnt1$$Register, $cnt2$$Register,
11499                         icnt2, $result$$Register,
11500                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11501     }
11502   %}
11503   ins_pipe( pipe_slow );
11504 %}
11505 
11506 // fast search of substring with known size.
11507 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11508                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11509   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11510   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11511   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11512 
11513   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11514   ins_encode %{
11515     int icnt2 = (int)$int_cnt2$$constant;
11516     if (icnt2 >= 8) {
11517       // IndexOf for constant substrings with size >= 8 elements
11518       // which don't need to be loaded through stack.
11519       __ string_indexofC8($str1$$Register, $str2$$Register,
11520                           $cnt1$$Register, $cnt2$$Register,
11521                           icnt2, $result$$Register,
11522                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11523     } else {
11524       // Small strings are loaded through stack if they cross page boundary.
11525       __ string_indexof($str1$$Register, $str2$$Register,
11526                         $cnt1$$Register, $cnt2$$Register,
11527                         icnt2, $result$$Register,
11528                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11529     }
11530   %}
11531   ins_pipe( pipe_slow );
11532 %}
11533 
11534 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11535                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11536   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11537   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11538   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11539 
11540   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11541   ins_encode %{
11542     __ string_indexof($str1$$Register, $str2$$Register,
11543                       $cnt1$$Register, $cnt2$$Register,
11544                       (-1), $result$$Register,
11545                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11546   %}
11547   ins_pipe( pipe_slow );
11548 %}
11549 
11550 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11551                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11552   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11553   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11554   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11555 
11556   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11557   ins_encode %{
11558     __ string_indexof($str1$$Register, $str2$$Register,
11559                       $cnt1$$Register, $cnt2$$Register,
11560                       (-1), $result$$Register,
11561                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11562   %}
11563   ins_pipe( pipe_slow );
11564 %}
11565 
11566 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11567                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11568   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11569   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11570   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11571 
11572   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11573   ins_encode %{
11574     __ string_indexof($str1$$Register, $str2$$Register,
11575                       $cnt1$$Register, $cnt2$$Register,
11576                       (-1), $result$$Register,
11577                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11578   %}
11579   ins_pipe( pipe_slow );
11580 %}
11581 
11582 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11583                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11584   predicate(UseSSE42Intrinsics);
11585   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11586   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11587   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11588   ins_encode %{
11589     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11590                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11591   %}
11592   ins_pipe( pipe_slow );
11593 %}
11594 
11595 // fast array equals
11596 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11597                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11598 %{
11599   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11600   match(Set result (AryEq ary1 ary2));
11601   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11602   //ins_cost(300);
11603 
11604   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11605   ins_encode %{
11606     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11607                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11608                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11609   %}
11610   ins_pipe( pipe_slow );
11611 %}
11612 
11613 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11614                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11615 %{
11616   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11617   match(Set result (AryEq ary1 ary2));
11618   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11619   //ins_cost(300);
11620 
11621   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11622   ins_encode %{
11623     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11624                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11625                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11626   %}
11627   ins_pipe( pipe_slow );
11628 %}
11629 
11630 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
11631                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
11632 %{
11633   match(Set result (HasNegatives ary1 len));
11634   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11635 
11636   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11637   ins_encode %{
11638     __ has_negatives($ary1$$Register, $len$$Register,
11639                      $result$$Register, $tmp3$$Register,
11640                      $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11641   %}
11642   ins_pipe( pipe_slow );
11643 %}
11644 
11645 // fast char[] to byte[] compression
11646 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11647                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11648   match(Set result (StrCompressedCopy src (Binary dst len)));
11649   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11650 
11651   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11652   ins_encode %{
11653     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11654                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11655                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11656   %}
11657   ins_pipe( pipe_slow );
11658 %}
11659 
11660 // fast byte[] to char[] inflation
11661 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
11662                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
11663   match(Set dummy (StrInflatedCopy src (Binary dst len)));
11664   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11665 
11666   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11667   ins_encode %{
11668     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11669                           $tmp1$$XMMRegister, $tmp2$$Register);
11670   %}
11671   ins_pipe( pipe_slow );
11672 %}
11673 
11674 // encode char[] to byte[] in ISO_8859_1
11675 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11676                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11677                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11678   match(Set result (EncodeISOArray src (Binary dst len)));
11679   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11680 
11681   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11682   ins_encode %{
11683     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11684                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11685                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11686   %}
11687   ins_pipe( pipe_slow );
11688 %}
11689 
11690 
11691 //----------Control Flow Instructions------------------------------------------
11692 // Signed compare Instructions
11693 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
11694   match(Set cr (CmpI op1 op2));
11695   effect( DEF cr, USE op1, USE op2 );
11696   format %{ "CMP    $op1,$op2" %}
11697   opcode(0x3B);  /* Opcode 3B /r */
11698   ins_encode( OpcP, RegReg( op1, op2) );
11699   ins_pipe( ialu_cr_reg_reg );
11700 %}
11701 
11702 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
11703   match(Set cr (CmpI op1 op2));
11704   effect( DEF cr, USE op1 );
11705   format %{ "CMP    $op1,$op2" %}
11706   opcode(0x81,0x07);  /* Opcode 81 /7 */
11707   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
11708   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11709   ins_pipe( ialu_cr_reg_imm );
11710 %}
11711 
11712 // Cisc-spilled version of cmpI_eReg
11713 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
11714   match(Set cr (CmpI op1 (LoadI op2)));
11715 
11716   format %{ "CMP    $op1,$op2" %}
11717   ins_cost(500);
11718   opcode(0x3B);  /* Opcode 3B /r */
11719   ins_encode( OpcP, RegMem( op1, op2) );
11720   ins_pipe( ialu_cr_reg_mem );
11721 %}
11722 
11723 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
11724   match(Set cr (CmpI src zero));
11725   effect( DEF cr, USE src );
11726 
11727   format %{ "TEST   $src,$src" %}
11728   opcode(0x85);
11729   ins_encode( OpcP, RegReg( src, src ) );
11730   ins_pipe( ialu_cr_reg_imm );
11731 %}
11732 
11733 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
11734   match(Set cr (CmpI (AndI src con) zero));
11735 
11736   format %{ "TEST   $src,$con" %}
11737   opcode(0xF7,0x00);
11738   ins_encode( OpcP, RegOpc(src), Con32(con) );
11739   ins_pipe( ialu_cr_reg_imm );
11740 %}
11741 
11742 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
11743   match(Set cr (CmpI (AndI src mem) zero));
11744 
11745   format %{ "TEST   $src,$mem" %}
11746   opcode(0x85);
11747   ins_encode( OpcP, RegMem( src, mem ) );
11748   ins_pipe( ialu_cr_reg_mem );
11749 %}
11750 
11751 // Unsigned compare Instructions; really, same as signed except they
11752 // produce an eFlagsRegU instead of eFlagsReg.
11753 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
11754   match(Set cr (CmpU op1 op2));
11755 
11756   format %{ "CMPu   $op1,$op2" %}
11757   opcode(0x3B);  /* Opcode 3B /r */
11758   ins_encode( OpcP, RegReg( op1, op2) );
11759   ins_pipe( ialu_cr_reg_reg );
11760 %}
11761 
11762 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
11763   match(Set cr (CmpU op1 op2));
11764 
11765   format %{ "CMPu   $op1,$op2" %}
11766   opcode(0x81,0x07);  /* Opcode 81 /7 */
11767   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11768   ins_pipe( ialu_cr_reg_imm );
11769 %}
11770 
11771 // // Cisc-spilled version of cmpU_eReg
11772 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
11773   match(Set cr (CmpU op1 (LoadI op2)));
11774 
11775   format %{ "CMPu   $op1,$op2" %}
11776   ins_cost(500);
11777   opcode(0x3B);  /* Opcode 3B /r */
11778   ins_encode( OpcP, RegMem( op1, op2) );
11779   ins_pipe( ialu_cr_reg_mem );
11780 %}
11781 
11782 // // Cisc-spilled version of cmpU_eReg
11783 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
11784 //  match(Set cr (CmpU (LoadI op1) op2));
11785 //
11786 //  format %{ "CMPu   $op1,$op2" %}
11787 //  ins_cost(500);
11788 //  opcode(0x39);  /* Opcode 39 /r */
11789 //  ins_encode( OpcP, RegMem( op1, op2) );
11790 //%}
11791 
11792 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
11793   match(Set cr (CmpU src zero));
11794 
11795   format %{ "TESTu  $src,$src" %}
11796   opcode(0x85);
11797   ins_encode( OpcP, RegReg( src, src ) );
11798   ins_pipe( ialu_cr_reg_imm );
11799 %}
11800 
11801 // Unsigned pointer compare Instructions
11802 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
11803   match(Set cr (CmpP op1 op2));
11804 
11805   format %{ "CMPu   $op1,$op2" %}
11806   opcode(0x3B);  /* Opcode 3B /r */
11807   ins_encode( OpcP, RegReg( op1, op2) );
11808   ins_pipe( ialu_cr_reg_reg );
11809 %}
11810 
11811 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
11812   match(Set cr (CmpP op1 op2));
11813 
11814   format %{ "CMPu   $op1,$op2" %}
11815   opcode(0x81,0x07);  /* Opcode 81 /7 */
11816   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11817   ins_pipe( ialu_cr_reg_imm );
11818 %}
11819 
11820 // // Cisc-spilled version of cmpP_eReg
11821 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
11822   match(Set cr (CmpP op1 (LoadP op2)));
11823 
11824   format %{ "CMPu   $op1,$op2" %}
11825   ins_cost(500);
11826   opcode(0x3B);  /* Opcode 3B /r */
11827   ins_encode( OpcP, RegMem( op1, op2) );
11828   ins_pipe( ialu_cr_reg_mem );
11829 %}
11830 
11831 // // Cisc-spilled version of cmpP_eReg
11832 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
11833 //  match(Set cr (CmpP (LoadP op1) op2));
11834 //
11835 //  format %{ "CMPu   $op1,$op2" %}
11836 //  ins_cost(500);
11837 //  opcode(0x39);  /* Opcode 39 /r */
11838 //  ins_encode( OpcP, RegMem( op1, op2) );
11839 //%}
11840 
11841 // Compare raw pointer (used in out-of-heap check).
11842 // Only works because non-oop pointers must be raw pointers
11843 // and raw pointers have no anti-dependencies.
11844 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
11845   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
11846   match(Set cr (CmpP op1 (LoadP op2)));
11847 
11848   format %{ "CMPu   $op1,$op2" %}
11849   opcode(0x3B);  /* Opcode 3B /r */
11850   ins_encode( OpcP, RegMem( op1, op2) );
11851   ins_pipe( ialu_cr_reg_mem );
11852 %}
11853 
11854 //
11855 // This will generate a signed flags result. This should be ok
11856 // since any compare to a zero should be eq/neq.
11857 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
11858   match(Set cr (CmpP src zero));
11859 
11860   format %{ "TEST   $src,$src" %}
11861   opcode(0x85);
11862   ins_encode( OpcP, RegReg( src, src ) );
11863   ins_pipe( ialu_cr_reg_imm );
11864 %}
11865 
11866 // Cisc-spilled version of testP_reg
11867 // This will generate a signed flags result. This should be ok
11868 // since any compare to a zero should be eq/neq.
11869 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
11870   match(Set cr (CmpP (LoadP op) zero));
11871 
11872   format %{ "TEST   $op,0xFFFFFFFF" %}
11873   ins_cost(500);
11874   opcode(0xF7);               /* Opcode F7 /0 */
11875   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
11876   ins_pipe( ialu_cr_reg_imm );
11877 %}
11878 
11879 // Yanked all unsigned pointer compare operations.
11880 // Pointer compares are done with CmpP which is already unsigned.
11881 
11882 //----------Max and Min--------------------------------------------------------
11883 // Min Instructions
11884 ////
11885 //   *** Min and Max using the conditional move are slower than the
11886 //   *** branch version on a Pentium III.
11887 // // Conditional move for min
11888 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11889 //  effect( USE_DEF op2, USE op1, USE cr );
11890 //  format %{ "CMOVlt $op2,$op1\t! min" %}
11891 //  opcode(0x4C,0x0F);
11892 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11893 //  ins_pipe( pipe_cmov_reg );
11894 //%}
11895 //
11896 //// Min Register with Register (P6 version)
11897 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
11898 //  predicate(VM_Version::supports_cmov() );
11899 //  match(Set op2 (MinI op1 op2));
11900 //  ins_cost(200);
11901 //  expand %{
11902 //    eFlagsReg cr;
11903 //    compI_eReg(cr,op1,op2);
11904 //    cmovI_reg_lt(op2,op1,cr);
11905 //  %}
11906 //%}
11907 
11908 // Min Register with Register (generic version)
11909 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11910   match(Set dst (MinI dst src));
11911   effect(KILL flags);
11912   ins_cost(300);
11913 
11914   format %{ "MIN    $dst,$src" %}
11915   opcode(0xCC);
11916   ins_encode( min_enc(dst,src) );
11917   ins_pipe( pipe_slow );
11918 %}
11919 
11920 // Max Register with Register
11921 //   *** Min and Max using the conditional move are slower than the
11922 //   *** branch version on a Pentium III.
11923 // // Conditional move for max
11924 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11925 //  effect( USE_DEF op2, USE op1, USE cr );
11926 //  format %{ "CMOVgt $op2,$op1\t! max" %}
11927 //  opcode(0x4F,0x0F);
11928 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11929 //  ins_pipe( pipe_cmov_reg );
11930 //%}
11931 //
11932 // // Max Register with Register (P6 version)
11933 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
11934 //  predicate(VM_Version::supports_cmov() );
11935 //  match(Set op2 (MaxI op1 op2));
11936 //  ins_cost(200);
11937 //  expand %{
11938 //    eFlagsReg cr;
11939 //    compI_eReg(cr,op1,op2);
11940 //    cmovI_reg_gt(op2,op1,cr);
11941 //  %}
11942 //%}
11943 
11944 // Max Register with Register (generic version)
11945 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11946   match(Set dst (MaxI dst src));
11947   effect(KILL flags);
11948   ins_cost(300);
11949 
11950   format %{ "MAX    $dst,$src" %}
11951   opcode(0xCC);
11952   ins_encode( max_enc(dst,src) );
11953   ins_pipe( pipe_slow );
11954 %}
11955 
11956 // ============================================================================
11957 // Counted Loop limit node which represents exact final iterator value.
11958 // Note: the resulting value should fit into integer range since
11959 // counted loops have limit check on overflow.
11960 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
11961   match(Set limit (LoopLimit (Binary init limit) stride));
11962   effect(TEMP limit_hi, TEMP tmp, KILL flags);
11963   ins_cost(300);
11964 
11965   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
11966   ins_encode %{
11967     int strd = (int)$stride$$constant;
11968     assert(strd != 1 && strd != -1, "sanity");
11969     int m1 = (strd > 0) ? 1 : -1;
11970     // Convert limit to long (EAX:EDX)
11971     __ cdql();
11972     // Convert init to long (init:tmp)
11973     __ movl($tmp$$Register, $init$$Register);
11974     __ sarl($tmp$$Register, 31);
11975     // $limit - $init
11976     __ subl($limit$$Register, $init$$Register);
11977     __ sbbl($limit_hi$$Register, $tmp$$Register);
11978     // + ($stride - 1)
11979     if (strd > 0) {
11980       __ addl($limit$$Register, (strd - 1));
11981       __ adcl($limit_hi$$Register, 0);
11982       __ movl($tmp$$Register, strd);
11983     } else {
11984       __ addl($limit$$Register, (strd + 1));
11985       __ adcl($limit_hi$$Register, -1);
11986       __ lneg($limit_hi$$Register, $limit$$Register);
11987       __ movl($tmp$$Register, -strd);
11988     }
11989     // signed devision: (EAX:EDX) / pos_stride
11990     __ idivl($tmp$$Register);
11991     if (strd < 0) {
11992       // restore sign
11993       __ negl($tmp$$Register);
11994     }
11995     // (EAX) * stride
11996     __ mull($tmp$$Register);
11997     // + init (ignore upper bits)
11998     __ addl($limit$$Register, $init$$Register);
11999   %}
12000   ins_pipe( pipe_slow );
12001 %}
12002 
12003 // ============================================================================
12004 // Branch Instructions
12005 // Jump Table
12006 instruct jumpXtnd(rRegI switch_val) %{
12007   match(Jump switch_val);
12008   ins_cost(350);
12009   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12010   ins_encode %{
12011     // Jump to Address(table_base + switch_reg)
12012     Address index(noreg, $switch_val$$Register, Address::times_1);
12013     __ jump(ArrayAddress($constantaddress, index));
12014   %}
12015   ins_pipe(pipe_jmp);
12016 %}
12017 
12018 // Jump Direct - Label defines a relative address from JMP+1
12019 instruct jmpDir(label labl) %{
12020   match(Goto);
12021   effect(USE labl);
12022 
12023   ins_cost(300);
12024   format %{ "JMP    $labl" %}
12025   size(5);
12026   ins_encode %{
12027     Label* L = $labl$$label;
12028     __ jmp(*L, false); // Always long jump
12029   %}
12030   ins_pipe( pipe_jmp );
12031 %}
12032 
12033 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12034 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12035   match(If cop cr);
12036   effect(USE labl);
12037 
12038   ins_cost(300);
12039   format %{ "J$cop    $labl" %}
12040   size(6);
12041   ins_encode %{
12042     Label* L = $labl$$label;
12043     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12044   %}
12045   ins_pipe( pipe_jcc );
12046 %}
12047 
12048 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12049 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12050   predicate(!n->has_vector_mask_set());
12051   match(CountedLoopEnd cop cr);
12052   effect(USE labl);
12053 
12054   ins_cost(300);
12055   format %{ "J$cop    $labl\t# Loop end" %}
12056   size(6);
12057   ins_encode %{
12058     Label* L = $labl$$label;
12059     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12060   %}
12061   ins_pipe( pipe_jcc );
12062 %}
12063 
12064 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12065 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12066   predicate(!n->has_vector_mask_set());
12067   match(CountedLoopEnd cop cmp);
12068   effect(USE labl);
12069 
12070   ins_cost(300);
12071   format %{ "J$cop,u  $labl\t# Loop end" %}
12072   size(6);
12073   ins_encode %{
12074     Label* L = $labl$$label;
12075     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12076   %}
12077   ins_pipe( pipe_jcc );
12078 %}
12079 
12080 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12081   predicate(!n->has_vector_mask_set());
12082   match(CountedLoopEnd cop cmp);
12083   effect(USE labl);
12084 
12085   ins_cost(200);
12086   format %{ "J$cop,u  $labl\t# Loop end" %}
12087   size(6);
12088   ins_encode %{
12089     Label* L = $labl$$label;
12090     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12091   %}
12092   ins_pipe( pipe_jcc );
12093 %}
12094 
12095 // mask version
12096 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12097 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{
12098   predicate(n->has_vector_mask_set());
12099   match(CountedLoopEnd cop cr);
12100   effect(USE labl);
12101 
12102   ins_cost(400);
12103   format %{ "J$cop    $labl\t# Loop end\n\t"
12104             "restorevectmask \t# vector mask restore for loops" %}
12105   size(10);
12106   ins_encode %{
12107     Label* L = $labl$$label;
12108     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12109     __ restorevectmask();
12110   %}
12111   ins_pipe( pipe_jcc );
12112 %}
12113 
12114 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12115 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12116   predicate(n->has_vector_mask_set());
12117   match(CountedLoopEnd cop cmp);
12118   effect(USE labl);
12119 
12120   ins_cost(400);
12121   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12122             "restorevectmask \t# vector mask restore for loops" %}
12123   size(10);
12124   ins_encode %{
12125     Label* L = $labl$$label;
12126     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12127     __ restorevectmask();
12128   %}
12129   ins_pipe( pipe_jcc );
12130 %}
12131 
12132 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12133   predicate(n->has_vector_mask_set());
12134   match(CountedLoopEnd cop cmp);
12135   effect(USE labl);
12136 
12137   ins_cost(300);
12138   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12139             "restorevectmask \t# vector mask restore for loops" %}
12140   size(10);
12141   ins_encode %{
12142     Label* L = $labl$$label;
12143     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12144     __ restorevectmask();
12145   %}
12146   ins_pipe( pipe_jcc );
12147 %}
12148 
12149 // Jump Direct Conditional - using unsigned comparison
12150 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12151   match(If cop cmp);
12152   effect(USE labl);
12153 
12154   ins_cost(300);
12155   format %{ "J$cop,u  $labl" %}
12156   size(6);
12157   ins_encode %{
12158     Label* L = $labl$$label;
12159     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12160   %}
12161   ins_pipe(pipe_jcc);
12162 %}
12163 
12164 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12165   match(If cop cmp);
12166   effect(USE labl);
12167 
12168   ins_cost(200);
12169   format %{ "J$cop,u  $labl" %}
12170   size(6);
12171   ins_encode %{
12172     Label* L = $labl$$label;
12173     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12174   %}
12175   ins_pipe(pipe_jcc);
12176 %}
12177 
12178 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12179   match(If cop cmp);
12180   effect(USE labl);
12181 
12182   ins_cost(200);
12183   format %{ $$template
12184     if ($cop$$cmpcode == Assembler::notEqual) {
12185       $$emit$$"JP,u   $labl\n\t"
12186       $$emit$$"J$cop,u   $labl"
12187     } else {
12188       $$emit$$"JP,u   done\n\t"
12189       $$emit$$"J$cop,u   $labl\n\t"
12190       $$emit$$"done:"
12191     }
12192   %}
12193   ins_encode %{
12194     Label* l = $labl$$label;
12195     if ($cop$$cmpcode == Assembler::notEqual) {
12196       __ jcc(Assembler::parity, *l, false);
12197       __ jcc(Assembler::notEqual, *l, false);
12198     } else if ($cop$$cmpcode == Assembler::equal) {
12199       Label done;
12200       __ jccb(Assembler::parity, done);
12201       __ jcc(Assembler::equal, *l, false);
12202       __ bind(done);
12203     } else {
12204        ShouldNotReachHere();
12205     }
12206   %}
12207   ins_pipe(pipe_jcc);
12208 %}
12209 
12210 // ============================================================================
12211 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12212 // array for an instance of the superklass.  Set a hidden internal cache on a
12213 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12214 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12215 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12216   match(Set result (PartialSubtypeCheck sub super));
12217   effect( KILL rcx, KILL cr );
12218 
12219   ins_cost(1100);  // slightly larger than the next version
12220   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12221             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12222             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12223             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12224             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12225             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12226             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12227      "miss:\t" %}
12228 
12229   opcode(0x1); // Force a XOR of EDI
12230   ins_encode( enc_PartialSubtypeCheck() );
12231   ins_pipe( pipe_slow );
12232 %}
12233 
12234 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12235   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12236   effect( KILL rcx, KILL result );
12237 
12238   ins_cost(1000);
12239   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12240             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12241             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12242             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12243             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12244             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12245      "miss:\t" %}
12246 
12247   opcode(0x0);  // No need to XOR EDI
12248   ins_encode( enc_PartialSubtypeCheck() );
12249   ins_pipe( pipe_slow );
12250 %}
12251 
12252 // ============================================================================
12253 // Branch Instructions -- short offset versions
12254 //
12255 // These instructions are used to replace jumps of a long offset (the default
12256 // match) with jumps of a shorter offset.  These instructions are all tagged
12257 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12258 // match rules in general matching.  Instead, the ADLC generates a conversion
12259 // method in the MachNode which can be used to do in-place replacement of the
12260 // long variant with the shorter variant.  The compiler will determine if a
12261 // branch can be taken by the is_short_branch_offset() predicate in the machine
12262 // specific code section of the file.
12263 
12264 // Jump Direct - Label defines a relative address from JMP+1
12265 instruct jmpDir_short(label labl) %{
12266   match(Goto);
12267   effect(USE labl);
12268 
12269   ins_cost(300);
12270   format %{ "JMP,s  $labl" %}
12271   size(2);
12272   ins_encode %{
12273     Label* L = $labl$$label;
12274     __ jmpb(*L);
12275   %}
12276   ins_pipe( pipe_jmp );
12277   ins_short_branch(1);
12278 %}
12279 
12280 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12281 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12282   match(If cop cr);
12283   effect(USE labl);
12284 
12285   ins_cost(300);
12286   format %{ "J$cop,s  $labl" %}
12287   size(2);
12288   ins_encode %{
12289     Label* L = $labl$$label;
12290     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12291   %}
12292   ins_pipe( pipe_jcc );
12293   ins_short_branch(1);
12294 %}
12295 
12296 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12297 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12298   match(CountedLoopEnd cop cr);
12299   effect(USE labl);
12300 
12301   ins_cost(300);
12302   format %{ "J$cop,s  $labl\t# Loop end" %}
12303   size(2);
12304   ins_encode %{
12305     Label* L = $labl$$label;
12306     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12307   %}
12308   ins_pipe( pipe_jcc );
12309   ins_short_branch(1);
12310 %}
12311 
12312 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12313 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12314   match(CountedLoopEnd cop cmp);
12315   effect(USE labl);
12316 
12317   ins_cost(300);
12318   format %{ "J$cop,us $labl\t# Loop end" %}
12319   size(2);
12320   ins_encode %{
12321     Label* L = $labl$$label;
12322     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12323   %}
12324   ins_pipe( pipe_jcc );
12325   ins_short_branch(1);
12326 %}
12327 
12328 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12329   match(CountedLoopEnd cop cmp);
12330   effect(USE labl);
12331 
12332   ins_cost(300);
12333   format %{ "J$cop,us $labl\t# Loop end" %}
12334   size(2);
12335   ins_encode %{
12336     Label* L = $labl$$label;
12337     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12338   %}
12339   ins_pipe( pipe_jcc );
12340   ins_short_branch(1);
12341 %}
12342 
12343 // Jump Direct Conditional - using unsigned comparison
12344 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12345   match(If cop cmp);
12346   effect(USE labl);
12347 
12348   ins_cost(300);
12349   format %{ "J$cop,us $labl" %}
12350   size(2);
12351   ins_encode %{
12352     Label* L = $labl$$label;
12353     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12354   %}
12355   ins_pipe( pipe_jcc );
12356   ins_short_branch(1);
12357 %}
12358 
12359 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12360   match(If cop cmp);
12361   effect(USE labl);
12362 
12363   ins_cost(300);
12364   format %{ "J$cop,us $labl" %}
12365   size(2);
12366   ins_encode %{
12367     Label* L = $labl$$label;
12368     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12369   %}
12370   ins_pipe( pipe_jcc );
12371   ins_short_branch(1);
12372 %}
12373 
12374 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12375   match(If cop cmp);
12376   effect(USE labl);
12377 
12378   ins_cost(300);
12379   format %{ $$template
12380     if ($cop$$cmpcode == Assembler::notEqual) {
12381       $$emit$$"JP,u,s   $labl\n\t"
12382       $$emit$$"J$cop,u,s   $labl"
12383     } else {
12384       $$emit$$"JP,u,s   done\n\t"
12385       $$emit$$"J$cop,u,s  $labl\n\t"
12386       $$emit$$"done:"
12387     }
12388   %}
12389   size(4);
12390   ins_encode %{
12391     Label* l = $labl$$label;
12392     if ($cop$$cmpcode == Assembler::notEqual) {
12393       __ jccb(Assembler::parity, *l);
12394       __ jccb(Assembler::notEqual, *l);
12395     } else if ($cop$$cmpcode == Assembler::equal) {
12396       Label done;
12397       __ jccb(Assembler::parity, done);
12398       __ jccb(Assembler::equal, *l);
12399       __ bind(done);
12400     } else {
12401        ShouldNotReachHere();
12402     }
12403   %}
12404   ins_pipe(pipe_jcc);
12405   ins_short_branch(1);
12406 %}
12407 
12408 // ============================================================================
12409 // Long Compare
12410 //
12411 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12412 // is tricky.  The flavor of compare used depends on whether we are testing
12413 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12414 // The GE test is the negated LT test.  The LE test can be had by commuting
12415 // the operands (yielding a GE test) and then negating; negate again for the
12416 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12417 // NE test is negated from that.
12418 
12419 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12420 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12421 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12422 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12423 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12424 // foo match ends up with the wrong leaf.  One fix is to not match both
12425 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12426 // both forms beat the trinary form of long-compare and both are very useful
12427 // on Intel which has so few registers.
12428 
12429 // Manifest a CmpL result in an integer register.  Very painful.
12430 // This is the test to avoid.
12431 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12432   match(Set dst (CmpL3 src1 src2));
12433   effect( KILL flags );
12434   ins_cost(1000);
12435   format %{ "XOR    $dst,$dst\n\t"
12436             "CMP    $src1.hi,$src2.hi\n\t"
12437             "JLT,s  m_one\n\t"
12438             "JGT,s  p_one\n\t"
12439             "CMP    $src1.lo,$src2.lo\n\t"
12440             "JB,s   m_one\n\t"
12441             "JEQ,s  done\n"
12442     "p_one:\tINC    $dst\n\t"
12443             "JMP,s  done\n"
12444     "m_one:\tDEC    $dst\n"
12445      "done:" %}
12446   ins_encode %{
12447     Label p_one, m_one, done;
12448     __ xorptr($dst$$Register, $dst$$Register);
12449     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12450     __ jccb(Assembler::less,    m_one);
12451     __ jccb(Assembler::greater, p_one);
12452     __ cmpl($src1$$Register, $src2$$Register);
12453     __ jccb(Assembler::below,   m_one);
12454     __ jccb(Assembler::equal,   done);
12455     __ bind(p_one);
12456     __ incrementl($dst$$Register);
12457     __ jmpb(done);
12458     __ bind(m_one);
12459     __ decrementl($dst$$Register);
12460     __ bind(done);
12461   %}
12462   ins_pipe( pipe_slow );
12463 %}
12464 
12465 //======
12466 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12467 // compares.  Can be used for LE or GT compares by reversing arguments.
12468 // NOT GOOD FOR EQ/NE tests.
12469 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12470   match( Set flags (CmpL src zero ));
12471   ins_cost(100);
12472   format %{ "TEST   $src.hi,$src.hi" %}
12473   opcode(0x85);
12474   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12475   ins_pipe( ialu_cr_reg_reg );
12476 %}
12477 
12478 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12479 // compares.  Can be used for LE or GT compares by reversing arguments.
12480 // NOT GOOD FOR EQ/NE tests.
12481 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12482   match( Set flags (CmpL src1 src2 ));
12483   effect( TEMP tmp );
12484   ins_cost(300);
12485   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12486             "MOV    $tmp,$src1.hi\n\t"
12487             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12488   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12489   ins_pipe( ialu_cr_reg_reg );
12490 %}
12491 
12492 // Long compares reg < zero/req OR reg >= zero/req.
12493 // Just a wrapper for a normal branch, plus the predicate test.
12494 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12495   match(If cmp flags);
12496   effect(USE labl);
12497   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12498   expand %{
12499     jmpCon(cmp,flags,labl);    // JLT or JGE...
12500   %}
12501 %}
12502 
12503 // Compare 2 longs and CMOVE longs.
12504 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12505   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12506   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12507   ins_cost(400);
12508   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12509             "CMOV$cmp $dst.hi,$src.hi" %}
12510   opcode(0x0F,0x40);
12511   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12512   ins_pipe( pipe_cmov_reg_long );
12513 %}
12514 
12515 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12516   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12517   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12518   ins_cost(500);
12519   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12520             "CMOV$cmp $dst.hi,$src.hi" %}
12521   opcode(0x0F,0x40);
12522   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12523   ins_pipe( pipe_cmov_reg_long );
12524 %}
12525 
12526 // Compare 2 longs and CMOVE ints.
12527 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12528   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12529   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12530   ins_cost(200);
12531   format %{ "CMOV$cmp $dst,$src" %}
12532   opcode(0x0F,0x40);
12533   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12534   ins_pipe( pipe_cmov_reg );
12535 %}
12536 
12537 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12538   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12539   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12540   ins_cost(250);
12541   format %{ "CMOV$cmp $dst,$src" %}
12542   opcode(0x0F,0x40);
12543   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12544   ins_pipe( pipe_cmov_mem );
12545 %}
12546 
12547 // Compare 2 longs and CMOVE ints.
12548 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12549   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12550   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12551   ins_cost(200);
12552   format %{ "CMOV$cmp $dst,$src" %}
12553   opcode(0x0F,0x40);
12554   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12555   ins_pipe( pipe_cmov_reg );
12556 %}
12557 
12558 // Compare 2 longs and CMOVE doubles
12559 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12560   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12561   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12562   ins_cost(200);
12563   expand %{
12564     fcmovDPR_regS(cmp,flags,dst,src);
12565   %}
12566 %}
12567 
12568 // Compare 2 longs and CMOVE doubles
12569 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12570   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12571   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12572   ins_cost(200);
12573   expand %{
12574     fcmovD_regS(cmp,flags,dst,src);
12575   %}
12576 %}
12577 
12578 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12579   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12580   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12581   ins_cost(200);
12582   expand %{
12583     fcmovFPR_regS(cmp,flags,dst,src);
12584   %}
12585 %}
12586 
12587 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12588   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12589   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12590   ins_cost(200);
12591   expand %{
12592     fcmovF_regS(cmp,flags,dst,src);
12593   %}
12594 %}
12595 
12596 //======
12597 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12598 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12599   match( Set flags (CmpL src zero ));
12600   effect(TEMP tmp);
12601   ins_cost(200);
12602   format %{ "MOV    $tmp,$src.lo\n\t"
12603             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12604   ins_encode( long_cmp_flags0( src, tmp ) );
12605   ins_pipe( ialu_reg_reg_long );
12606 %}
12607 
12608 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12609 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12610   match( Set flags (CmpL src1 src2 ));
12611   ins_cost(200+300);
12612   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12613             "JNE,s  skip\n\t"
12614             "CMP    $src1.hi,$src2.hi\n\t"
12615      "skip:\t" %}
12616   ins_encode( long_cmp_flags1( src1, src2 ) );
12617   ins_pipe( ialu_cr_reg_reg );
12618 %}
12619 
12620 // Long compare reg == zero/reg OR reg != zero/reg
12621 // Just a wrapper for a normal branch, plus the predicate test.
12622 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12623   match(If cmp flags);
12624   effect(USE labl);
12625   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12626   expand %{
12627     jmpCon(cmp,flags,labl);    // JEQ or JNE...
12628   %}
12629 %}
12630 
12631 // Compare 2 longs and CMOVE longs.
12632 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12633   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12634   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12635   ins_cost(400);
12636   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12637             "CMOV$cmp $dst.hi,$src.hi" %}
12638   opcode(0x0F,0x40);
12639   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12640   ins_pipe( pipe_cmov_reg_long );
12641 %}
12642 
12643 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12644   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12645   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12646   ins_cost(500);
12647   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12648             "CMOV$cmp $dst.hi,$src.hi" %}
12649   opcode(0x0F,0x40);
12650   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12651   ins_pipe( pipe_cmov_reg_long );
12652 %}
12653 
12654 // Compare 2 longs and CMOVE ints.
12655 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
12656   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12657   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12658   ins_cost(200);
12659   format %{ "CMOV$cmp $dst,$src" %}
12660   opcode(0x0F,0x40);
12661   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12662   ins_pipe( pipe_cmov_reg );
12663 %}
12664 
12665 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
12666   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12667   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12668   ins_cost(250);
12669   format %{ "CMOV$cmp $dst,$src" %}
12670   opcode(0x0F,0x40);
12671   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12672   ins_pipe( pipe_cmov_mem );
12673 %}
12674 
12675 // Compare 2 longs and CMOVE ints.
12676 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
12677   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12678   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12679   ins_cost(200);
12680   format %{ "CMOV$cmp $dst,$src" %}
12681   opcode(0x0F,0x40);
12682   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12683   ins_pipe( pipe_cmov_reg );
12684 %}
12685 
12686 // Compare 2 longs and CMOVE doubles
12687 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
12688   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12689   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12690   ins_cost(200);
12691   expand %{
12692     fcmovDPR_regS(cmp,flags,dst,src);
12693   %}
12694 %}
12695 
12696 // Compare 2 longs and CMOVE doubles
12697 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
12698   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12699   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12700   ins_cost(200);
12701   expand %{
12702     fcmovD_regS(cmp,flags,dst,src);
12703   %}
12704 %}
12705 
12706 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
12707   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12708   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12709   ins_cost(200);
12710   expand %{
12711     fcmovFPR_regS(cmp,flags,dst,src);
12712   %}
12713 %}
12714 
12715 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
12716   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12717   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12718   ins_cost(200);
12719   expand %{
12720     fcmovF_regS(cmp,flags,dst,src);
12721   %}
12722 %}
12723 
12724 //======
12725 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12726 // Same as cmpL_reg_flags_LEGT except must negate src
12727 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
12728   match( Set flags (CmpL src zero ));
12729   effect( TEMP tmp );
12730   ins_cost(300);
12731   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
12732             "CMP    $tmp,$src.lo\n\t"
12733             "SBB    $tmp,$src.hi\n\t" %}
12734   ins_encode( long_cmp_flags3(src, tmp) );
12735   ins_pipe( ialu_reg_reg_long );
12736 %}
12737 
12738 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12739 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
12740 // requires a commuted test to get the same result.
12741 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12742   match( Set flags (CmpL src1 src2 ));
12743   effect( TEMP tmp );
12744   ins_cost(300);
12745   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
12746             "MOV    $tmp,$src2.hi\n\t"
12747             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
12748   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
12749   ins_pipe( ialu_cr_reg_reg );
12750 %}
12751 
12752 // Long compares reg < zero/req OR reg >= zero/req.
12753 // Just a wrapper for a normal branch, plus the predicate test
12754 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
12755   match(If cmp flags);
12756   effect(USE labl);
12757   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
12758   ins_cost(300);
12759   expand %{
12760     jmpCon(cmp,flags,labl);    // JGT or JLE...
12761   %}
12762 %}
12763 
12764 // Compare 2 longs and CMOVE longs.
12765 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
12766   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12767   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12768   ins_cost(400);
12769   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12770             "CMOV$cmp $dst.hi,$src.hi" %}
12771   opcode(0x0F,0x40);
12772   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12773   ins_pipe( pipe_cmov_reg_long );
12774 %}
12775 
12776 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
12777   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12778   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12779   ins_cost(500);
12780   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12781             "CMOV$cmp $dst.hi,$src.hi+4" %}
12782   opcode(0x0F,0x40);
12783   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12784   ins_pipe( pipe_cmov_reg_long );
12785 %}
12786 
12787 // Compare 2 longs and CMOVE ints.
12788 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
12789   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12790   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12791   ins_cost(200);
12792   format %{ "CMOV$cmp $dst,$src" %}
12793   opcode(0x0F,0x40);
12794   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12795   ins_pipe( pipe_cmov_reg );
12796 %}
12797 
12798 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
12799   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12800   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12801   ins_cost(250);
12802   format %{ "CMOV$cmp $dst,$src" %}
12803   opcode(0x0F,0x40);
12804   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12805   ins_pipe( pipe_cmov_mem );
12806 %}
12807 
12808 // Compare 2 longs and CMOVE ptrs.
12809 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
12810   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12811   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12812   ins_cost(200);
12813   format %{ "CMOV$cmp $dst,$src" %}
12814   opcode(0x0F,0x40);
12815   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12816   ins_pipe( pipe_cmov_reg );
12817 %}
12818 
12819 // Compare 2 longs and CMOVE doubles
12820 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
12821   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12822   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12823   ins_cost(200);
12824   expand %{
12825     fcmovDPR_regS(cmp,flags,dst,src);
12826   %}
12827 %}
12828 
12829 // Compare 2 longs and CMOVE doubles
12830 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
12831   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12832   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12833   ins_cost(200);
12834   expand %{
12835     fcmovD_regS(cmp,flags,dst,src);
12836   %}
12837 %}
12838 
12839 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
12840   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12841   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12842   ins_cost(200);
12843   expand %{
12844     fcmovFPR_regS(cmp,flags,dst,src);
12845   %}
12846 %}
12847 
12848 
12849 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
12850   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12851   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12852   ins_cost(200);
12853   expand %{
12854     fcmovF_regS(cmp,flags,dst,src);
12855   %}
12856 %}
12857 
12858 
12859 // ============================================================================
12860 // Procedure Call/Return Instructions
12861 // Call Java Static Instruction
12862 // Note: If this code changes, the corresponding ret_addr_offset() and
12863 //       compute_padding() functions will have to be adjusted.
12864 instruct CallStaticJavaDirect(method meth) %{
12865   match(CallStaticJava);
12866   effect(USE meth);
12867 
12868   ins_cost(300);
12869   format %{ "CALL,static " %}
12870   opcode(0xE8); /* E8 cd */
12871   ins_encode( pre_call_resets,
12872               Java_Static_Call( meth ),
12873               call_epilog,
12874               post_call_FPU );
12875   ins_pipe( pipe_slow );
12876   ins_alignment(4);
12877 %}
12878 
12879 // Call Java Dynamic Instruction
12880 // Note: If this code changes, the corresponding ret_addr_offset() and
12881 //       compute_padding() functions will have to be adjusted.
12882 instruct CallDynamicJavaDirect(method meth) %{
12883   match(CallDynamicJava);
12884   effect(USE meth);
12885 
12886   ins_cost(300);
12887   format %{ "MOV    EAX,(oop)-1\n\t"
12888             "CALL,dynamic" %}
12889   opcode(0xE8); /* E8 cd */
12890   ins_encode( pre_call_resets,
12891               Java_Dynamic_Call( meth ),
12892               call_epilog,
12893               post_call_FPU );
12894   ins_pipe( pipe_slow );
12895   ins_alignment(4);
12896 %}
12897 
12898 // Call Runtime Instruction
12899 instruct CallRuntimeDirect(method meth) %{
12900   match(CallRuntime );
12901   effect(USE meth);
12902 
12903   ins_cost(300);
12904   format %{ "CALL,runtime " %}
12905   opcode(0xE8); /* E8 cd */
12906   // Use FFREEs to clear entries in float stack
12907   ins_encode( pre_call_resets,
12908               FFree_Float_Stack_All,
12909               Java_To_Runtime( meth ),
12910               post_call_FPU );
12911   ins_pipe( pipe_slow );
12912 %}
12913 
12914 // Call runtime without safepoint
12915 instruct CallLeafDirect(method meth) %{
12916   match(CallLeaf);
12917   effect(USE meth);
12918 
12919   ins_cost(300);
12920   format %{ "CALL_LEAF,runtime " %}
12921   opcode(0xE8); /* E8 cd */
12922   ins_encode( pre_call_resets,
12923               FFree_Float_Stack_All,
12924               Java_To_Runtime( meth ),
12925               Verify_FPU_For_Leaf, post_call_FPU );
12926   ins_pipe( pipe_slow );
12927 %}
12928 
12929 instruct CallLeafNoFPDirect(method meth) %{
12930   match(CallLeafNoFP);
12931   effect(USE meth);
12932 
12933   ins_cost(300);
12934   format %{ "CALL_LEAF_NOFP,runtime " %}
12935   opcode(0xE8); /* E8 cd */
12936   ins_encode(Java_To_Runtime(meth));
12937   ins_pipe( pipe_slow );
12938 %}
12939 
12940 
12941 // Return Instruction
12942 // Remove the return address & jump to it.
12943 instruct Ret() %{
12944   match(Return);
12945   format %{ "RET" %}
12946   opcode(0xC3);
12947   ins_encode(OpcP);
12948   ins_pipe( pipe_jmp );
12949 %}
12950 
12951 // Tail Call; Jump from runtime stub to Java code.
12952 // Also known as an 'interprocedural jump'.
12953 // Target of jump will eventually return to caller.
12954 // TailJump below removes the return address.
12955 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
12956   match(TailCall jump_target method_oop );
12957   ins_cost(300);
12958   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
12959   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12960   ins_encode( OpcP, RegOpc(jump_target) );
12961   ins_pipe( pipe_jmp );
12962 %}
12963 
12964 
12965 // Tail Jump; remove the return address; jump to target.
12966 // TailCall above leaves the return address around.
12967 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
12968   match( TailJump jump_target ex_oop );
12969   ins_cost(300);
12970   format %{ "POP    EDX\t# pop return address into dummy\n\t"
12971             "JMP    $jump_target " %}
12972   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12973   ins_encode( enc_pop_rdx,
12974               OpcP, RegOpc(jump_target) );
12975   ins_pipe( pipe_jmp );
12976 %}
12977 
12978 // Create exception oop: created by stack-crawling runtime code.
12979 // Created exception is now available to this handler, and is setup
12980 // just prior to jumping to this handler.  No code emitted.
12981 instruct CreateException( eAXRegP ex_oop )
12982 %{
12983   match(Set ex_oop (CreateEx));
12984 
12985   size(0);
12986   // use the following format syntax
12987   format %{ "# exception oop is in EAX; no code emitted" %}
12988   ins_encode();
12989   ins_pipe( empty );
12990 %}
12991 
12992 
12993 // Rethrow exception:
12994 // The exception oop will come in the first argument position.
12995 // Then JUMP (not call) to the rethrow stub code.
12996 instruct RethrowException()
12997 %{
12998   match(Rethrow);
12999 
13000   // use the following format syntax
13001   format %{ "JMP    rethrow_stub" %}
13002   ins_encode(enc_rethrow);
13003   ins_pipe( pipe_jmp );
13004 %}
13005 
13006 // inlined locking and unlocking
13007 
13008 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13009   predicate(Compile::current()->use_rtm());
13010   match(Set cr (FastLock object box));
13011   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13012   ins_cost(300);
13013   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13014   ins_encode %{
13015     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13016                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13017                  _counters, _rtm_counters, _stack_rtm_counters,
13018                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13019                  true, ra_->C->profile_rtm());
13020   %}
13021   ins_pipe(pipe_slow);
13022 %}
13023 
13024 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13025   predicate(!Compile::current()->use_rtm());
13026   match(Set cr (FastLock object box));
13027   effect(TEMP tmp, TEMP scr, USE_KILL box);
13028   ins_cost(300);
13029   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13030   ins_encode %{
13031     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13032                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
13033   %}
13034   ins_pipe(pipe_slow);
13035 %}
13036 
13037 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13038   match(Set cr (FastUnlock object box));
13039   effect(TEMP tmp, USE_KILL box);
13040   ins_cost(300);
13041   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13042   ins_encode %{
13043     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13044   %}
13045   ins_pipe(pipe_slow);
13046 %}
13047 
13048 
13049 
13050 // ============================================================================
13051 // Safepoint Instruction
13052 instruct safePoint_poll(eFlagsReg cr) %{
13053   match(SafePoint);
13054   effect(KILL cr);
13055 
13056   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13057   // On SPARC that might be acceptable as we can generate the address with
13058   // just a sethi, saving an or.  By polling at offset 0 we can end up
13059   // putting additional pressure on the index-0 in the D$.  Because of
13060   // alignment (just like the situation at hand) the lower indices tend
13061   // to see more traffic.  It'd be better to change the polling address
13062   // to offset 0 of the last $line in the polling page.
13063 
13064   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13065   ins_cost(125);
13066   size(6) ;
13067   ins_encode( Safepoint_Poll() );
13068   ins_pipe( ialu_reg_mem );
13069 %}
13070 
13071 
13072 // ============================================================================
13073 // This name is KNOWN by the ADLC and cannot be changed.
13074 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13075 // for this guy.
13076 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13077   match(Set dst (ThreadLocal));
13078   effect(DEF dst, KILL cr);
13079 
13080   format %{ "MOV    $dst, Thread::current()" %}
13081   ins_encode %{
13082     Register dstReg = as_Register($dst$$reg);
13083     __ get_thread(dstReg);
13084   %}
13085   ins_pipe( ialu_reg_fat );
13086 %}
13087 
13088 
13089 
13090 //----------PEEPHOLE RULES-----------------------------------------------------
13091 // These must follow all instruction definitions as they use the names
13092 // defined in the instructions definitions.
13093 //
13094 // peepmatch ( root_instr_name [preceding_instruction]* );
13095 //
13096 // peepconstraint %{
13097 // (instruction_number.operand_name relational_op instruction_number.operand_name
13098 //  [, ...] );
13099 // // instruction numbers are zero-based using left to right order in peepmatch
13100 //
13101 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13102 // // provide an instruction_number.operand_name for each operand that appears
13103 // // in the replacement instruction's match rule
13104 //
13105 // ---------VM FLAGS---------------------------------------------------------
13106 //
13107 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13108 //
13109 // Each peephole rule is given an identifying number starting with zero and
13110 // increasing by one in the order seen by the parser.  An individual peephole
13111 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13112 // on the command-line.
13113 //
13114 // ---------CURRENT LIMITATIONS----------------------------------------------
13115 //
13116 // Only match adjacent instructions in same basic block
13117 // Only equality constraints
13118 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13119 // Only one replacement instruction
13120 //
13121 // ---------EXAMPLE----------------------------------------------------------
13122 //
13123 // // pertinent parts of existing instructions in architecture description
13124 // instruct movI(rRegI dst, rRegI src) %{
13125 //   match(Set dst (CopyI src));
13126 // %}
13127 //
13128 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
13129 //   match(Set dst (AddI dst src));
13130 //   effect(KILL cr);
13131 // %}
13132 //
13133 // // Change (inc mov) to lea
13134 // peephole %{
13135 //   // increment preceeded by register-register move
13136 //   peepmatch ( incI_eReg movI );
13137 //   // require that the destination register of the increment
13138 //   // match the destination register of the move
13139 //   peepconstraint ( 0.dst == 1.dst );
13140 //   // construct a replacement instruction that sets
13141 //   // the destination to ( move's source register + one )
13142 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13143 // %}
13144 //
13145 // Implementation no longer uses movX instructions since
13146 // machine-independent system no longer uses CopyX nodes.
13147 //
13148 // peephole %{
13149 //   peepmatch ( incI_eReg movI );
13150 //   peepconstraint ( 0.dst == 1.dst );
13151 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13152 // %}
13153 //
13154 // peephole %{
13155 //   peepmatch ( decI_eReg movI );
13156 //   peepconstraint ( 0.dst == 1.dst );
13157 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13158 // %}
13159 //
13160 // peephole %{
13161 //   peepmatch ( addI_eReg_imm movI );
13162 //   peepconstraint ( 0.dst == 1.dst );
13163 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13164 // %}
13165 //
13166 // peephole %{
13167 //   peepmatch ( addP_eReg_imm movP );
13168 //   peepconstraint ( 0.dst == 1.dst );
13169 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13170 // %}
13171 
13172 // // Change load of spilled value to only a spill
13173 // instruct storeI(memory mem, rRegI src) %{
13174 //   match(Set mem (StoreI mem src));
13175 // %}
13176 //
13177 // instruct loadI(rRegI dst, memory mem) %{
13178 //   match(Set dst (LoadI mem));
13179 // %}
13180 //
13181 peephole %{
13182   peepmatch ( loadI storeI );
13183   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13184   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13185 %}
13186 
13187 //----------SMARTSPILL RULES---------------------------------------------------
13188 // These must follow all instruction definitions as they use the names
13189 // defined in the instructions definitions.