Old src/cpu/x86/vm/x86

   1 //
   2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 //
 105 // Empty fill registers, which are never used, but supply alignment to xmm regs
 106 //
 107 reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
 108 reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
 109 reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
 110 reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
 111 reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
 112 reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
 113 reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
 114 reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
 115 
 116 // Specify priority of register selection within phases of register
 117 // allocation.  Highest priority is first.  A useful heuristic is to
 118 // give registers a low priority when they are required by machine
 119 // instructions, like EAX and EDX.  Registers which are used as
 120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 121 // For the Intel integer registers, the equivalent Long pairs are
 122 // EDX:EAX, EBX:ECX, and EDI:EBP.
 123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 126                     FPR6L, FPR6H, FPR7L, FPR7H,
 127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
 128 
 129 
 130 //----------Architecture Description Register Classes--------------------------
 131 // Several register classes are automatically defined based upon information in
 132 // this architecture description.
 133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 134 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 137 //
 138 // Class for no registers (empty set).
 139 reg_class no_reg();
 140 
 141 // Class for all registers
 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 143 // Class for all registers (excluding EBP)
 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
 145 // Dynamic register class that selects at runtime between register classes
 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
 149 
 150 // Class for general registers
 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 152 // Class for general registers (excluding EBP).
 153 // This register class can be used for implicit null checks on win95.
 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
 155 // Used also if the PreserveFramePointer flag is true.
 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
 159 
 160 // Class of "X" registers
 161 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 162 
 163 // Class of registers that can appear in an address with no offset.
 164 // EBP and ESP require an extra instruction byte for zero offset.
 165 // Used in fast-unlock
 166 reg_class p_reg(EDX, EDI, ESI, EBX);
 167 
 168 // Class for general registers excluding ECX
 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
 170 // Class for general registers excluding ECX (and EBP)
 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
 174 
 175 // Class for general registers excluding EAX
 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 177 
 178 // Class for general registers excluding EAX and EBX.
 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
 180 // Class for general registers excluding EAX and EBX (and EBP)
 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
 184 
 185 // Class of EAX (for multiply and divide operations)
 186 reg_class eax_reg(EAX);
 187 
 188 // Class of EBX (for atomic add)
 189 reg_class ebx_reg(EBX);
 190 
 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 192 reg_class ecx_reg(ECX);
 193 
 194 // Class of EDX (for multiply and divide operations)
 195 reg_class edx_reg(EDX);
 196 
 197 // Class of EDI (for synchronization)
 198 reg_class edi_reg(EDI);
 199 
 200 // Class of ESI (for synchronization)
 201 reg_class esi_reg(ESI);
 202 
 203 // Singleton class for stack pointer
 204 reg_class sp_reg(ESP);
 205 
 206 // Singleton class for instruction pointer
 207 // reg_class ip_reg(EIP);
 208 
 209 // Class of integer register pairs
 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
 211 // Class of integer register pairs (excluding EBP and EDI);
 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
 215 
 216 // Class of integer register pairs that aligns with calling convention
 217 reg_class eadx_reg( EAX,EDX );
 218 reg_class ebcx_reg( ECX,EBX );
 219 
 220 // Not AX or DX, used in divides
 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
 222 // Not AX or DX (and neither EBP), used in divides
 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 226 
 227 // Floating point registers.  Notice FPR0 is not a choice.
 228 // FPR0 is not ever allocated; we use clever encodings to fake
 229 // a 2-address instructions out of Intels FP stack.
 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 231 
 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 233                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 234                       FPR7L,FPR7H );
 235 
 236 reg_class fp_flt_reg0( FPR1L );
 237 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 238 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 240                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 241 
 242 %}
 243 
 244 
 245 //----------SOURCE BLOCK-------------------------------------------------------
 246 // This is a block of C++ code which provides values, functions, and
 247 // definitions necessary in the rest of the architecture description
 248 source_hpp %{
 249 // Must be visible to the DFA in dfa_x86_32.cpp
 250 extern bool is_operand_hi32_zero(Node* n);
 251 %}
 252 
 253 source %{
 254 #define   RELOC_IMM32    Assembler::imm_operand
 255 #define   RELOC_DISP32   Assembler::disp32_operand
 256 
 257 #define __ _masm.
 258 
 259 // How to find the high register of a Long pair, given the low register
 260 #define   HIGH_FROM_LOW(x) ((x)+2)
 261 
 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 263 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 264 // fast versions of NegF/NegD and AbsF/AbsD.
 265 
 266 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 268   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 269   // of 128-bits operands for SSE instructions.
 270   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 271   // Store the value to a 128-bits operand.
 272   operand[0] = lo;
 273   operand[1] = hi;
 274   return operand;
 275 }
 276 
 277 // Buffer for 128-bits masks used by SSE instructions.
 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 279 
 280 // Static initialization during VM startup.
 281 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 283 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 285 
 286 // Offset hacking within calls.
 287 static int pre_call_resets_size() {
 288   int size = 0;
 289   Compile* C = Compile::current();
 290   if (C->in_24_bit_fp_mode()) {
 291     size += 6; // fldcw
 292   }
 293   if (VM_Version::supports_vzeroupper()) {
 294     size += 3; // vzeroupper
 295   }
 296   return size;
 297 }
 298 
 299 // !!!!! Special hack to get all type of calls to specify the byte offset
 300 //       from the start of the call to the point where the return address
 301 //       will point.
 302 int MachCallStaticJavaNode::ret_addr_offset() {
 303   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
 304 }
 305 
 306 int MachCallDynamicJavaNode::ret_addr_offset() {
 307   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 308 }
 309 
 310 static int sizeof_FFree_Float_Stack_All = -1;
 311 
 312 int MachCallRuntimeNode::ret_addr_offset() {
 313   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 314   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 315 }
 316 
 317 // Indicate if the safepoint node needs the polling page as an input.
 318 // Since x86 does have absolute addressing, it doesn't.
 319 bool SafePointNode::needs_polling_address_input() {
 320   return false;
 321 }
 322 
 323 //
 324 // Compute padding required for nodes which need alignment
 325 //
 326 
 327 // The address of the call instruction needs to be 4-byte aligned to
 328 // ensure that it does not span a cache line so that it can be patched.
 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 330   current_offset += pre_call_resets_size();  // skip fldcw, if any
 331   current_offset += 1;      // skip call opcode byte
 332   return align_up(current_offset, alignment_required()) - current_offset;
 333 }
 334 
 335 // The address of the call instruction needs to be 4-byte aligned to
 336 // ensure that it does not span a cache line so that it can be patched.
 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 338   current_offset += pre_call_resets_size();  // skip fldcw, if any
 339   current_offset += 5;      // skip MOV instruction
 340   current_offset += 1;      // skip call opcode byte
 341   return align_up(current_offset, alignment_required()) - current_offset;
 342 }
 343 
 344 // EMIT_RM()
 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 346   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 347   cbuf.insts()->emit_int8(c);
 348 }
 349 
 350 // EMIT_CC()
 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 352   unsigned char c = (unsigned char)( f1 | f2 );
 353   cbuf.insts()->emit_int8(c);
 354 }
 355 
 356 // EMIT_OPCODE()
 357 void emit_opcode(CodeBuffer &cbuf, int code) {
 358   cbuf.insts()->emit_int8((unsigned char) code);
 359 }
 360 
 361 // EMIT_OPCODE() w/ relocation information
 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 363   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 364   emit_opcode(cbuf, code);
 365 }
 366 
 367 // EMIT_D8()
 368 void emit_d8(CodeBuffer &cbuf, int d8) {
 369   cbuf.insts()->emit_int8((unsigned char) d8);
 370 }
 371 
 372 // EMIT_D16()
 373 void emit_d16(CodeBuffer &cbuf, int d16) {
 374   cbuf.insts()->emit_int16(d16);
 375 }
 376 
 377 // EMIT_D32()
 378 void emit_d32(CodeBuffer &cbuf, int d32) {
 379   cbuf.insts()->emit_int32(d32);
 380 }
 381 
 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 384         int format) {
 385   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 386   cbuf.insts()->emit_int32(d32);
 387 }
 388 
 389 // emit 32 bit value and construct relocation entry from RelocationHolder
 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 391         int format) {
 392 #ifdef ASSERT
 393   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 394     assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
 395   }
 396 #endif
 397   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 398   cbuf.insts()->emit_int32(d32);
 399 }
 400 
 401 // Access stack slot for load or store
 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 403   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 404   if( -128 <= disp && disp <= 127 ) {
 405     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 406     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 407     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 408   } else {
 409     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 410     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 411     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 412   }
 413 }
 414 
 415    // rRegI ereg, memory mem) %{    // emit_reg_mem
 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 417   // There is no index & no scale, use form without SIB byte
 418   if ((index == 0x4) &&
 419       (scale == 0) && (base != ESP_enc)) {
 420     // If no displacement, mode is 0x0; unless base is [EBP]
 421     if ( (displace == 0) && (base != EBP_enc) ) {
 422       emit_rm(cbuf, 0x0, reg_encoding, base);
 423     }
 424     else {                    // If 8-bit displacement, mode 0x1
 425       if ((displace >= -128) && (displace <= 127)
 426           && (disp_reloc == relocInfo::none) ) {
 427         emit_rm(cbuf, 0x1, reg_encoding, base);
 428         emit_d8(cbuf, displace);
 429       }
 430       else {                  // If 32-bit displacement
 431         if (base == -1) { // Special flag for absolute address
 432           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 433           // (manual lies; no SIB needed here)
 434           if ( disp_reloc != relocInfo::none ) {
 435             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 436           } else {
 437             emit_d32      (cbuf, displace);
 438           }
 439         }
 440         else {                // Normal base + offset
 441           emit_rm(cbuf, 0x2, reg_encoding, base);
 442           if ( disp_reloc != relocInfo::none ) {
 443             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 444           } else {
 445             emit_d32      (cbuf, displace);
 446           }
 447         }
 448       }
 449     }
 450   }
 451   else {                      // Else, encode with the SIB byte
 452     // If no displacement, mode is 0x0; unless base is [EBP]
 453     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 454       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 455       emit_rm(cbuf, scale, index, base);
 456     }
 457     else {                    // If 8-bit displacement, mode 0x1
 458       if ((displace >= -128) && (displace <= 127)
 459           && (disp_reloc == relocInfo::none) ) {
 460         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 461         emit_rm(cbuf, scale, index, base);
 462         emit_d8(cbuf, displace);
 463       }
 464       else {                  // If 32-bit displacement
 465         if (base == 0x04 ) {
 466           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 467           emit_rm(cbuf, scale, index, 0x04);
 468         } else {
 469           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 470           emit_rm(cbuf, scale, index, base);
 471         }
 472         if ( disp_reloc != relocInfo::none ) {
 473           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 474         } else {
 475           emit_d32      (cbuf, displace);
 476         }
 477       }
 478     }
 479   }
 480 }
 481 
 482 
 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 484   if( dst_encoding == src_encoding ) {
 485     // reg-reg copy, use an empty encoding
 486   } else {
 487     emit_opcode( cbuf, 0x8B );
 488     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 489   }
 490 }
 491 
 492 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 493   Label exit;
 494   __ jccb(Assembler::noParity, exit);
 495   __ pushf();
 496   //
 497   // comiss/ucomiss instructions set ZF,PF,CF flags and
 498   // zero OF,AF,SF for NaN values.
 499   // Fixup flags by zeroing ZF,PF so that compare of NaN
 500   // values returns 'less than' result (CF is set).
 501   // Leave the rest of flags unchanged.
 502   //
 503   //    7 6 5 4 3 2 1 0
 504   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 505   //    0 0 1 0 1 0 1 1   (0x2B)
 506   //
 507   __ andl(Address(rsp, 0), 0xffffff2b);
 508   __ popf();
 509   __ bind(exit);
 510 }
 511 
 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 513   Label done;
 514   __ movl(dst, -1);
 515   __ jcc(Assembler::parity, done);
 516   __ jcc(Assembler::below, done);
 517   __ setb(Assembler::notEqual, dst);
 518   __ movzbl(dst, dst);
 519   __ bind(done);
 520 }
 521 
 522 
 523 //=============================================================================
 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 525 
 526 int Compile::ConstantTable::calculate_table_base_offset() const {
 527   return 0;  // absolute addressing, no offset
 528 }
 529 
 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 532   ShouldNotReachHere();
 533 }
 534 
 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 536   // Empty encoding
 537 }
 538 
 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 540   return 0;
 541 }
 542 
 543 #ifndef PRODUCT
 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 545   st->print("# MachConstantBaseNode (empty encoding)");
 546 }
 547 #endif
 548 
 549 
 550 //=============================================================================
 551 #ifndef PRODUCT
 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 553   Compile* C = ra_->C;
 554 
 555   int framesize = C->frame_size_in_bytes();
 556   int bangsize = C->bang_size_in_bytes();
 557   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 558   // Remove wordSize for return addr which is already pushed.
 559   framesize -= wordSize;
 560 
 561   if (C->need_stack_bang(bangsize)) {
 562     framesize -= wordSize;
 563     st->print("# stack bang (%d bytes)", bangsize);
 564     st->print("\n\t");
 565     st->print("PUSH   EBP\t# Save EBP");
 566     if (PreserveFramePointer) {
 567       st->print("\n\t");
 568       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 569     }
 570     if (framesize) {
 571       st->print("\n\t");
 572       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 573     }
 574   } else {
 575     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 576     st->print("\n\t");
 577     framesize -= wordSize;
 578     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 579     if (PreserveFramePointer) {
 580       st->print("\n\t");
 581       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 582       if (framesize > 0) {
 583         st->print("\n\t");
 584         st->print("ADD    EBP, #%d", framesize);
 585       }
 586     }
 587   }
 588 
 589   if (VerifyStackAtCalls) {
 590     st->print("\n\t");
 591     framesize -= wordSize;
 592     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 593   }
 594 
 595   if( C->in_24_bit_fp_mode() ) {
 596     st->print("\n\t");
 597     st->print("FLDCW  \t# load 24 bit fpu control word");
 598   }
 599   if (UseSSE >= 2 && VerifyFPU) {
 600     st->print("\n\t");
 601     st->print("# verify FPU stack (must be clean on entry)");
 602   }
 603 
 604 #ifdef ASSERT
 605   if (VerifyStackAtCalls) {
 606     st->print("\n\t");
 607     st->print("# stack alignment check");
 608   }
 609 #endif
 610   st->cr();
 611 }
 612 #endif
 613 
 614 
 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 616   Compile* C = ra_->C;
 617   MacroAssembler _masm(&cbuf);
 618 
 619   int framesize = C->frame_size_in_bytes();
 620   int bangsize = C->bang_size_in_bytes();
 621 
 622   __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode());
 623 
 624   C->set_frame_complete(cbuf.insts_size());
 625 
 626   if (C->has_mach_constant_base_node()) {
 627     // NOTE: We set the table base offset here because users might be
 628     // emitted before MachConstantBaseNode.
 629     Compile::ConstantTable& constant_table = C->constant_table();
 630     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 631   }
 632 }
 633 
 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 635   return MachNode::size(ra_); // too many variables; just compute it the hard way
 636 }
 637 
 638 int MachPrologNode::reloc() const {
 639   return 0; // a large enough number
 640 }
 641 
 642 //=============================================================================
 643 #ifndef PRODUCT
 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 645   Compile *C = ra_->C;
 646   int framesize = C->frame_size_in_bytes();
 647   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 648   // Remove two words for return addr and rbp,
 649   framesize -= 2*wordSize;
 650 
 651   if (C->max_vector_size() > 16) {
 652     st->print("VZEROUPPER");
 653     st->cr(); st->print("\t");
 654   }
 655   if (C->in_24_bit_fp_mode()) {
 656     st->print("FLDCW  standard control word");
 657     st->cr(); st->print("\t");
 658   }
 659   if (framesize) {
 660     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 661     st->cr(); st->print("\t");
 662   }
 663   st->print_cr("POPL   EBP"); st->print("\t");
 664   if (do_polling() && C->is_method_compilation()) {
 665     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 666     st->cr(); st->print("\t");
 667   }
 668 }
 669 #endif
 670 
 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 672   Compile *C = ra_->C;
 673   MacroAssembler _masm(&cbuf);
 674 
 675   if (C->max_vector_size() > 16) {
 676     // Clear upper bits of YMM registers when current compiled code uses
 677     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 678     _masm.vzeroupper();
 679   }
 680   // If method set FPU control word, restore to standard control word
 681   if (C->in_24_bit_fp_mode()) {
 682     _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 683   }
 684 
 685   int framesize = C->frame_size_in_bytes();
 686   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 687   // Remove two words for return addr and rbp,
 688   framesize -= 2*wordSize;
 689 
 690   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 691 
 692   if (framesize >= 128) {
 693     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 694     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 695     emit_d32(cbuf, framesize);
 696   } else if (framesize) {
 697     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 698     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 699     emit_d8(cbuf, framesize);
 700   }
 701 
 702   emit_opcode(cbuf, 0x58 | EBP_enc);
 703 
 704   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 705     __ reserved_stack_check();
 706   }
 707 
 708   if (do_polling() && C->is_method_compilation()) {
 709     cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 710     emit_opcode(cbuf,0x85);
 711     emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 712     emit_d32(cbuf, (intptr_t)os::get_polling_page());
 713   }
 714 }
 715 
 716 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 717   Compile *C = ra_->C;
 718   // If method set FPU control word, restore to standard control word
 719   int size = C->in_24_bit_fp_mode() ? 6 : 0;
 720   if (C->max_vector_size() > 16) size += 3; // vzeroupper
 721   if (do_polling() && C->is_method_compilation()) size += 6;
 722 
 723   int framesize = C->frame_size_in_bytes();
 724   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 725   // Remove two words for return addr and rbp,
 726   framesize -= 2*wordSize;
 727 
 728   size++; // popl rbp,
 729 
 730   if (framesize >= 128) {
 731     size += 6;
 732   } else {
 733     size += framesize ? 3 : 0;
 734   }
 735   size += 64; // added to support ReservedStackAccess
 736   return size;
 737 }
 738 
 739 int MachEpilogNode::reloc() const {
 740   return 0; // a large enough number
 741 }
 742 
 743 const Pipeline * MachEpilogNode::pipeline() const {
 744   return MachNode::pipeline_class();
 745 }
 746 
 747 int MachEpilogNode::safepoint_offset() const { return 0; }
 748 
 749 //=============================================================================
 750 
 751 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 752 static enum RC rc_class( OptoReg::Name reg ) {
 753 
 754   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 755   if (OptoReg::is_stack(reg)) return rc_stack;
 756 
 757   VMReg r = OptoReg::as_VMReg(reg);
 758   if (r->is_Register()) return rc_int;
 759   if (r->is_FloatRegister()) {
 760     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 761     return rc_float;
 762   }
 763   assert(r->is_XMMRegister(), "must be");
 764   return rc_xmm;
 765 }
 766 
 767 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 768                         int opcode, const char *op_str, int size, outputStream* st ) {
 769   if( cbuf ) {
 770     emit_opcode  (*cbuf, opcode );
 771     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 772 #ifndef PRODUCT
 773   } else if( !do_size ) {
 774     if( size != 0 ) st->print("\n\t");
 775     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 776       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 777       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 778     } else { // FLD, FST, PUSH, POP
 779       st->print("%s [ESP + #%d]",op_str,offset);
 780     }
 781 #endif
 782   }
 783   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 784   return size+3+offset_size;
 785 }
 786 
 787 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 788 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 789                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 790   int in_size_in_bits = Assembler::EVEX_32bit;
 791   int evex_encoding = 0;
 792   if (reg_lo+1 == reg_hi) {
 793     in_size_in_bits = Assembler::EVEX_64bit;
 794     evex_encoding = Assembler::VEX_W;
 795   }
 796   if (cbuf) {
 797     MacroAssembler _masm(cbuf);
 798     // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 
 799     //                          it maps more cases to single byte displacement
 800     _masm.set_managed();
 801     if (reg_lo+1 == reg_hi) { // double move?
 802       if (is_load) {
 803         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 804       } else {
 805         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 806       }
 807     } else {
 808       if (is_load) {
 809         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 810       } else {
 811         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 812       }
 813     }
 814 #ifndef PRODUCT
 815   } else if (!do_size) {
 816     if (size != 0) st->print("\n\t");
 817     if (reg_lo+1 == reg_hi) { // double move?
 818       if (is_load) st->print("%s %s,[ESP + #%d]",
 819                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 820                               Matcher::regName[reg_lo], offset);
 821       else         st->print("MOVSD  [ESP + #%d],%s",
 822                               offset, Matcher::regName[reg_lo]);
 823     } else {
 824       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 825                               Matcher::regName[reg_lo], offset);
 826       else         st->print("MOVSS  [ESP + #%d],%s",
 827                               offset, Matcher::regName[reg_lo]);
 828     }
 829 #endif
 830   }
 831   bool is_single_byte = false;
 832   if ((UseAVX > 2) && (offset != 0)) {
 833     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
 834   }
 835   int offset_size = 0;
 836   if (UseAVX > 2 ) {
 837     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
 838   } else {
 839     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 840   }
 841   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
 842   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 843   return size+5+offset_size;
 844 }
 845 
 846 
 847 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 848                             int src_hi, int dst_hi, int size, outputStream* st ) {
 849   if (cbuf) {
 850     MacroAssembler _masm(cbuf);
 851     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 852     _masm.set_managed();
 853     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 854       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 855                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 856     } else {
 857       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 858                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 859     }
 860 #ifndef PRODUCT
 861   } else if (!do_size) {
 862     if (size != 0) st->print("\n\t");
 863     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 864       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 865         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 866       } else {
 867         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 868       }
 869     } else {
 870       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 871         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 872       } else {
 873         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 874       }
 875     }
 876 #endif
 877   }
 878   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 879   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
 880   int sz = (UseAVX > 2) ? 6 : 4;
 881   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 882       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 883   return size + sz;
 884 }
 885 
 886 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 887                             int src_hi, int dst_hi, int size, outputStream* st ) {
 888   // 32-bit
 889   if (cbuf) {
 890     MacroAssembler _masm(cbuf);
 891     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 892     _masm.set_managed();
 893     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 894              as_Register(Matcher::_regEncode[src_lo]));
 895 #ifndef PRODUCT
 896   } else if (!do_size) {
 897     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 898 #endif
 899   }
 900   return (UseAVX> 2) ? 6 : 4;
 901 }
 902 
 903 
 904 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 905                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 906   // 32-bit
 907   if (cbuf) {
 908     MacroAssembler _masm(cbuf);
 909     // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
 910     _masm.set_managed();
 911     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 912              as_XMMRegister(Matcher::_regEncode[src_lo]));
 913 #ifndef PRODUCT
 914   } else if (!do_size) {
 915     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 916 #endif
 917   }
 918   return (UseAVX> 2) ? 6 : 4;
 919 }
 920 
 921 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 922   if( cbuf ) {
 923     emit_opcode(*cbuf, 0x8B );
 924     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 925 #ifndef PRODUCT
 926   } else if( !do_size ) {
 927     if( size != 0 ) st->print("\n\t");
 928     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 929 #endif
 930   }
 931   return size+2;
 932 }
 933 
 934 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 935                                  int offset, int size, outputStream* st ) {
 936   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 937     if( cbuf ) {
 938       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 939       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 940 #ifndef PRODUCT
 941     } else if( !do_size ) {
 942       if( size != 0 ) st->print("\n\t");
 943       st->print("FLD    %s",Matcher::regName[src_lo]);
 944 #endif
 945     }
 946     size += 2;
 947   }
 948 
 949   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 950   const char *op_str;
 951   int op;
 952   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 953     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 954     op = 0xDD;
 955   } else {                   // 32-bit store
 956     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 957     op = 0xD9;
 958     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 959   }
 960 
 961   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 962 }
 963 
 964 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 965 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 966                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 967 
 968 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 969                             int stack_offset, int reg, uint ireg, outputStream* st);
 970 
 971 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 972                                      int dst_offset, uint ireg, outputStream* st) {
 973   int calc_size = 0;
 974   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 975   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 976   switch (ireg) {
 977   case Op_VecS:
 978     calc_size = 3+src_offset_size + 3+dst_offset_size;
 979     break;
 980   case Op_VecD: {
 981     calc_size = 3+src_offset_size + 3+dst_offset_size;
 982     int tmp_src_offset = src_offset + 4;
 983     int tmp_dst_offset = dst_offset + 4;
 984     src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4);
 985     dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4);
 986     calc_size += 3+src_offset_size + 3+dst_offset_size;
 987     break;
 988   }   
 989   case Op_VecX:
 990   case Op_VecY:
 991   case Op_VecZ:
 992     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 993     break;
 994   default:
 995     ShouldNotReachHere();
 996   }
 997   if (cbuf) {
 998     MacroAssembler _masm(cbuf);
 999     int offset = __ offset();
1000     switch (ireg) {
1001     case Op_VecS:
1002       __ pushl(Address(rsp, src_offset));
1003       __ popl (Address(rsp, dst_offset));
1004       break;
1005     case Op_VecD:
1006       __ pushl(Address(rsp, src_offset));
1007       __ popl (Address(rsp, dst_offset));
1008       __ pushl(Address(rsp, src_offset+4));
1009       __ popl (Address(rsp, dst_offset+4));
1010       break;
1011     case Op_VecX:
1012       __ movdqu(Address(rsp, -16), xmm0);
1013       __ movdqu(xmm0, Address(rsp, src_offset));
1014       __ movdqu(Address(rsp, dst_offset), xmm0);
1015       __ movdqu(xmm0, Address(rsp, -16));
1016       break;
1017     case Op_VecY:
1018       __ vmovdqu(Address(rsp, -32), xmm0);
1019       __ vmovdqu(xmm0, Address(rsp, src_offset));
1020       __ vmovdqu(Address(rsp, dst_offset), xmm0);
1021       __ vmovdqu(xmm0, Address(rsp, -32));
1022       break;
1023     case Op_VecZ:
1024       __ evmovdquq(Address(rsp, -64), xmm0, 2);
1025       __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
1026       __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
1027       __ evmovdquq(xmm0, Address(rsp, -64), 2);
1028       break;
1029     default:
1030       ShouldNotReachHere();
1031     }
1032     int size = __ offset() - offset;
1033     assert(size == calc_size, "incorrect size calculation");
1034     return size;
1035 #ifndef PRODUCT
1036   } else if (!do_size) {
1037     switch (ireg) {
1038     case Op_VecS:
1039       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1040                 "popl    [rsp + #%d]",
1041                 src_offset, dst_offset);
1042       break;
1043     case Op_VecD:
1044       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1045                 "popq    [rsp + #%d]\n\t"
1046                 "pushl   [rsp + #%d]\n\t"
1047                 "popq    [rsp + #%d]",
1048                 src_offset, dst_offset, src_offset+4, dst_offset+4);
1049       break;
1050      case Op_VecX:
1051       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1052                 "movdqu  xmm0, [rsp + #%d]\n\t"
1053                 "movdqu  [rsp + #%d], xmm0\n\t"
1054                 "movdqu  xmm0, [rsp - #16]",
1055                 src_offset, dst_offset);
1056       break;
1057     case Op_VecY:
1058       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1059                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1060                 "vmovdqu [rsp + #%d], xmm0\n\t"
1061                 "vmovdqu xmm0, [rsp - #32]",
1062                 src_offset, dst_offset);
1063       break;
1064     case Op_VecZ:
1065       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1066                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1067                 "vmovdqu [rsp + #%d], xmm0\n\t"
1068                 "vmovdqu xmm0, [rsp - #64]",
1069                 src_offset, dst_offset);
1070       break;
1071     default:
1072       ShouldNotReachHere();
1073     }
1074 #endif
1075   }
1076   return calc_size;
1077 }
1078 
1079 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1080   // Get registers to move
1081   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1082   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1083   OptoReg::Name dst_second = ra_->get_reg_second(this );
1084   OptoReg::Name dst_first = ra_->get_reg_first(this );
1085 
1086   enum RC src_second_rc = rc_class(src_second);
1087   enum RC src_first_rc = rc_class(src_first);
1088   enum RC dst_second_rc = rc_class(dst_second);
1089   enum RC dst_first_rc = rc_class(dst_first);
1090 
1091   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1092 
1093   // Generate spill code!
1094   int size = 0;
1095 
1096   if( src_first == dst_first && src_second == dst_second )
1097     return size;            // Self copy, no move
1098 
1099   if (bottom_type()->isa_vect() != NULL) {
1100     uint ireg = ideal_reg();
1101     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1102     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1103     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1104     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1105       // mem -> mem
1106       int src_offset = ra_->reg2offset(src_first);
1107       int dst_offset = ra_->reg2offset(dst_first);
1108       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1109     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1110       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1111     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1112       int stack_offset = ra_->reg2offset(dst_first);
1113       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1114     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1115       int stack_offset = ra_->reg2offset(src_first);
1116       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1117     } else {
1118       ShouldNotReachHere();
1119     }
1120   }
1121 
1122   // --------------------------------------
1123   // Check for mem-mem move.  push/pop to move.
1124   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1125     if( src_second == dst_first ) { // overlapping stack copy ranges
1126       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1127       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1128       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1129       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1130     }
1131     // move low bits
1132     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1133     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1134     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1135       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1136       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1137     }
1138     return size;
1139   }
1140 
1141   // --------------------------------------
1142   // Check for integer reg-reg copy
1143   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1144     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1145 
1146   // Check for integer store
1147   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1148     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1149 
1150   // Check for integer load
1151   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1152     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1153 
1154   // Check for integer reg-xmm reg copy
1155   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1156     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1157             "no 64 bit integer-float reg moves" );
1158     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1159   }
1160   // --------------------------------------
1161   // Check for float reg-reg copy
1162   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1163     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1164             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1165     if( cbuf ) {
1166 
1167       // Note the mucking with the register encode to compensate for the 0/1
1168       // indexing issue mentioned in a comment in the reg_def sections
1169       // for FPR registers many lines above here.
1170 
1171       if( src_first != FPR1L_num ) {
1172         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1173         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1174         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1175         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1176      } else {
1177         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1178         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1179      }
1180 #ifndef PRODUCT
1181     } else if( !do_size ) {
1182       if( size != 0 ) st->print("\n\t");
1183       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1184       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1185 #endif
1186     }
1187     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1188   }
1189 
1190   // Check for float store
1191   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1192     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1193   }
1194 
1195   // Check for float load
1196   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1197     int offset = ra_->reg2offset(src_first);
1198     const char *op_str;
1199     int op;
1200     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1201       op_str = "FLD_D";
1202       op = 0xDD;
1203     } else {                   // 32-bit load
1204       op_str = "FLD_S";
1205       op = 0xD9;
1206       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1207     }
1208     if( cbuf ) {
1209       emit_opcode  (*cbuf, op );
1210       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1211       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1212       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1213 #ifndef PRODUCT
1214     } else if( !do_size ) {
1215       if( size != 0 ) st->print("\n\t");
1216       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1217 #endif
1218     }
1219     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1220     return size + 3+offset_size+2;
1221   }
1222 
1223   // Check for xmm reg-reg copy
1224   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1225     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1226             (src_first+1 == src_second && dst_first+1 == dst_second),
1227             "no non-adjacent float-moves" );
1228     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1229   }
1230 
1231   // Check for xmm reg-integer reg copy
1232   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1233     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1234             "no 64 bit float-integer reg moves" );
1235     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1236   }
1237 
1238   // Check for xmm store
1239   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1240     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1241   }
1242 
1243   // Check for float xmm load
1244   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1245     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1246   }
1247 
1248   // Copy from float reg to xmm reg
1249   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1250     // copy to the top of stack from floating point reg
1251     // and use LEA to preserve flags
1252     if( cbuf ) {
1253       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1254       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1255       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1256       emit_d8(*cbuf,0xF8);
1257 #ifndef PRODUCT
1258     } else if( !do_size ) {
1259       if( size != 0 ) st->print("\n\t");
1260       st->print("LEA    ESP,[ESP-8]");
1261 #endif
1262     }
1263     size += 4;
1264 
1265     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1266 
1267     // Copy from the temp memory to the xmm reg.
1268     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1269 
1270     if( cbuf ) {
1271       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1272       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1273       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1274       emit_d8(*cbuf,0x08);
1275 #ifndef PRODUCT
1276     } else if( !do_size ) {
1277       if( size != 0 ) st->print("\n\t");
1278       st->print("LEA    ESP,[ESP+8]");
1279 #endif
1280     }
1281     size += 4;
1282     return size;
1283   }
1284 
1285   assert( size > 0, "missed a case" );
1286 
1287   // --------------------------------------------------------------------
1288   // Check for second bits still needing moving.
1289   if( src_second == dst_second )
1290     return size;               // Self copy; no move
1291   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1292 
1293   // Check for second word int-int move
1294   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1295     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1296 
1297   // Check for second word integer store
1298   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1299     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1300 
1301   // Check for second word integer load
1302   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1303     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1304 
1305 
1306   Unimplemented();
1307   return 0; // Mute compiler
1308 }
1309 
1310 #ifndef PRODUCT
1311 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1312   implementation( NULL, ra_, false, st );
1313 }
1314 #endif
1315 
1316 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1317   implementation( &cbuf, ra_, false, NULL );
1318 }
1319 
1320 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1321   return implementation( NULL, ra_, true, NULL );
1322 }
1323 
1324 
1325 //=============================================================================
1326 #ifndef PRODUCT
1327 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1328   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1329   int reg = ra_->get_reg_first(this);
1330   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1331 }
1332 #endif
1333 
1334 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1335   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1336   int reg = ra_->get_encode(this);
1337   if( offset >= 128 ) {
1338     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1339     emit_rm(cbuf, 0x2, reg, 0x04);
1340     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1341     emit_d32(cbuf, offset);
1342   }
1343   else {
1344     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1345     emit_rm(cbuf, 0x1, reg, 0x04);
1346     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1347     emit_d8(cbuf, offset);
1348   }
1349 }
1350 
1351 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1352   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1353   if( offset >= 128 ) {
1354     return 7;
1355   }
1356   else {
1357     return 4;
1358   }
1359 }
1360 
1361 //=============================================================================
1362 #ifndef PRODUCT
1363 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1364   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1365   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1366   st->print_cr("\tNOP");
1367   st->print_cr("\tNOP");
1368   if( !OptoBreakpoint )
1369     st->print_cr("\tNOP");
1370 }
1371 #endif
1372 
1373 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1374   MacroAssembler masm(&cbuf);
1375 #ifdef ASSERT
1376   uint insts_size = cbuf.insts_size();
1377 #endif
1378   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1379   masm.jump_cc(Assembler::notEqual,
1380                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1381   /* WARNING these NOPs are critical so that verified entry point is properly
1382      aligned for patching by NativeJump::patch_verified_entry() */
1383   int nops_cnt = 2;
1384   if( !OptoBreakpoint ) // Leave space for int3
1385      nops_cnt += 1;
1386   masm.nop(nops_cnt);
1387 
1388   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1389 }
1390 
1391 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1392   return OptoBreakpoint ? 11 : 12;
1393 }
1394 
1395 
1396 //=============================================================================
1397 
1398 int Matcher::regnum_to_fpu_offset(int regnum) {
1399   return regnum - 32; // The FP registers are in the second chunk
1400 }
1401 
1402 // This is UltraSparc specific, true just means we have fast l2f conversion
1403 const bool Matcher::convL2FSupported(void) {
1404   return true;
1405 }
1406 
1407 // Is this branch offset short enough that a short branch can be used?
1408 //
1409 // NOTE: If the platform does not provide any short branch variants, then
1410 //       this method should return false for offset 0.
1411 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1412   // The passed offset is relative to address of the branch.
1413   // On 86 a branch displacement is calculated relative to address
1414   // of a next instruction.
1415   offset -= br_size;
1416 
1417   // the short version of jmpConUCF2 contains multiple branches,
1418   // making the reach slightly less
1419   if (rule == jmpConUCF2_rule)
1420     return (-126 <= offset && offset <= 125);
1421   return (-128 <= offset && offset <= 127);
1422 }
1423 
1424 const bool Matcher::isSimpleConstant64(jlong value) {
1425   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1426   return false;
1427 }
1428 
1429 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1430 const bool Matcher::init_array_count_is_in_bytes = false;
1431 
1432 // Needs 2 CMOV's for longs.
1433 const int Matcher::long_cmove_cost() { return 1; }
1434 
1435 // No CMOVF/CMOVD with SSE/SSE2
1436 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1437 
1438 // Does the CPU require late expand (see block.cpp for description of late expand)?
1439 const bool Matcher::require_postalloc_expand = false;
1440 
1441 // Do we need to mask the count passed to shift instructions or does
1442 // the cpu only look at the lower 5/6 bits anyway?
1443 const bool Matcher::need_masked_shift_count = false;
1444 
1445 bool Matcher::narrow_oop_use_complex_address() {
1446   ShouldNotCallThis();
1447   return true;
1448 }
1449 
1450 bool Matcher::narrow_klass_use_complex_address() {
1451   ShouldNotCallThis();
1452   return true;
1453 }
1454 
1455 bool Matcher::const_oop_prefer_decode() {
1456   ShouldNotCallThis();
1457   return true;
1458 }
1459 
1460 bool Matcher::const_klass_prefer_decode() {
1461   ShouldNotCallThis();
1462   return true;
1463 }
1464 
1465 // Is it better to copy float constants, or load them directly from memory?
1466 // Intel can load a float constant from a direct address, requiring no
1467 // extra registers.  Most RISCs will have to materialize an address into a
1468 // register first, so they would do better to copy the constant from stack.
1469 const bool Matcher::rematerialize_float_constants = true;
1470 
1471 // If CPU can load and store mis-aligned doubles directly then no fixup is
1472 // needed.  Else we split the double into 2 integer pieces and move it
1473 // piece-by-piece.  Only happens when passing doubles into C code as the
1474 // Java calling convention forces doubles to be aligned.
1475 const bool Matcher::misaligned_doubles_ok = true;
1476 
1477 
1478 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1479   // Get the memory operand from the node
1480   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1481   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1482   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1483   uint opcnt     = 1;                 // First operand
1484   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1485   while( idx >= skipped+num_edges ) {
1486     skipped += num_edges;
1487     opcnt++;                          // Bump operand count
1488     assert( opcnt < numopnds, "Accessing non-existent operand" );
1489     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1490   }
1491 
1492   MachOper *memory = node->_opnds[opcnt];
1493   MachOper *new_memory = NULL;
1494   switch (memory->opcode()) {
1495   case DIRECT:
1496   case INDOFFSET32X:
1497     // No transformation necessary.
1498     return;
1499   case INDIRECT:
1500     new_memory = new indirect_win95_safeOper( );
1501     break;
1502   case INDOFFSET8:
1503     new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1504     break;
1505   case INDOFFSET32:
1506     new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1507     break;
1508   case INDINDEXOFFSET:
1509     new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1510     break;
1511   case INDINDEXSCALE:
1512     new_memory = new indIndexScale_win95_safeOper(memory->scale());
1513     break;
1514   case INDINDEXSCALEOFFSET:
1515     new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1516     break;
1517   case LOAD_LONG_INDIRECT:
1518   case LOAD_LONG_INDOFFSET32:
1519     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1520     return;
1521   default:
1522     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1523     return;
1524   }
1525   node->_opnds[opcnt] = new_memory;
1526 }
1527 
1528 // Advertise here if the CPU requires explicit rounding operations
1529 // to implement the UseStrictFP mode.
1530 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1531 
1532 // Are floats conerted to double when stored to stack during deoptimization?
1533 // On x32 it is stored with convertion only when FPU is used for floats.
1534 bool Matcher::float_in_double() { return (UseSSE == 0); }
1535 
1536 // Do ints take an entire long register or just half?
1537 const bool Matcher::int_in_long = false;
1538 
1539 // Return whether or not this register is ever used as an argument.  This
1540 // function is used on startup to build the trampoline stubs in generateOptoStub.
1541 // Registers not mentioned will be killed by the VM call in the trampoline, and
1542 // arguments in those registers not be available to the callee.
1543 bool Matcher::can_be_java_arg( int reg ) {
1544   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1545   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1546   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1547   return false;
1548 }
1549 
1550 bool Matcher::is_spillable_arg( int reg ) {
1551   return can_be_java_arg(reg);
1552 }
1553 
1554 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1555   // Use hardware integer DIV instruction when
1556   // it is faster than a code which use multiply.
1557   // Only when constant divisor fits into 32 bit
1558   // (min_jint is excluded to get only correct
1559   // positive 32 bit values from negative).
1560   return VM_Version::has_fast_idiv() &&
1561          (divisor == (int)divisor && divisor != min_jint);
1562 }
1563 
1564 // Register for DIVI projection of divmodI
1565 RegMask Matcher::divI_proj_mask() {
1566   return EAX_REG_mask();
1567 }
1568 
1569 // Register for MODI projection of divmodI
1570 RegMask Matcher::modI_proj_mask() {
1571   return EDX_REG_mask();
1572 }
1573 
1574 // Register for DIVL projection of divmodL
1575 RegMask Matcher::divL_proj_mask() {
1576   ShouldNotReachHere();
1577   return RegMask();
1578 }
1579 
1580 // Register for MODL projection of divmodL
1581 RegMask Matcher::modL_proj_mask() {
1582   ShouldNotReachHere();
1583   return RegMask();
1584 }
1585 
1586 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1587   return NO_REG_mask();
1588 }
1589 
1590 // Returns true if the high 32 bits of the value is known to be zero.
1591 bool is_operand_hi32_zero(Node* n) {
1592   int opc = n->Opcode();
1593   if (opc == Op_AndL) {
1594     Node* o2 = n->in(2);
1595     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1596       return true;
1597     }
1598   }
1599   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1600     return true;
1601   }
1602   return false;
1603 }
1604 
1605 %}
1606 
1607 //----------ENCODING BLOCK-----------------------------------------------------
1608 // This block specifies the encoding classes used by the compiler to output
1609 // byte streams.  Encoding classes generate functions which are called by
1610 // Machine Instruction Nodes in order to generate the bit encoding of the
1611 // instruction.  Operands specify their base encoding interface with the
1612 // interface keyword.  There are currently supported four interfaces,
1613 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1614 // operand to generate a function which returns its register number when
1615 // queried.   CONST_INTER causes an operand to generate a function which
1616 // returns the value of the constant when queried.  MEMORY_INTER causes an
1617 // operand to generate four functions which return the Base Register, the
1618 // Index Register, the Scale Value, and the Offset Value of the operand when
1619 // queried.  COND_INTER causes an operand to generate six functions which
1620 // return the encoding code (ie - encoding bits for the instruction)
1621 // associated with each basic boolean condition for a conditional instruction.
1622 // Instructions specify two basic values for encoding.  They use the
1623 // ins_encode keyword to specify their encoding class (which must be one of
1624 // the class names specified in the encoding block), and they use the
1625 // opcode keyword to specify, in order, their primary, secondary, and
1626 // tertiary opcode.  Only the opcode sections which a particular instruction
1627 // needs for encoding need to be specified.
1628 encode %{
1629   // Build emit functions for each basic byte or larger field in the intel
1630   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1631   // code in the enc_class source block.  Emit functions will live in the
1632   // main source block for now.  In future, we can generalize this by
1633   // adding a syntax that specifies the sizes of fields in an order,
1634   // so that the adlc can build the emit functions automagically
1635 
1636   // Emit primary opcode
1637   enc_class OpcP %{
1638     emit_opcode(cbuf, $primary);
1639   %}
1640 
1641   // Emit secondary opcode
1642   enc_class OpcS %{
1643     emit_opcode(cbuf, $secondary);
1644   %}
1645 
1646   // Emit opcode directly
1647   enc_class Opcode(immI d8) %{
1648     emit_opcode(cbuf, $d8$$constant);
1649   %}
1650 
1651   enc_class SizePrefix %{
1652     emit_opcode(cbuf,0x66);
1653   %}
1654 
1655   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1656     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1657   %}
1658 
1659   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1660     emit_opcode(cbuf,$opcode$$constant);
1661     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1662   %}
1663 
1664   enc_class mov_r32_imm0( rRegI dst ) %{
1665     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1666     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1667   %}
1668 
1669   enc_class cdq_enc %{
1670     // Full implementation of Java idiv and irem; checks for
1671     // special case as described in JVM spec., p.243 & p.271.
1672     //
1673     //         normal case                           special case
1674     //
1675     // input : rax,: dividend                         min_int
1676     //         reg: divisor                          -1
1677     //
1678     // output: rax,: quotient  (= rax, idiv reg)       min_int
1679     //         rdx: remainder (= rax, irem reg)       0
1680     //
1681     //  Code sequnce:
1682     //
1683     //  81 F8 00 00 00 80    cmp         rax,80000000h
1684     //  0F 85 0B 00 00 00    jne         normal_case
1685     //  33 D2                xor         rdx,edx
1686     //  83 F9 FF             cmp         rcx,0FFh
1687     //  0F 84 03 00 00 00    je          done
1688     //                  normal_case:
1689     //  99                   cdq
1690     //  F7 F9                idiv        rax,ecx
1691     //                  done:
1692     //
1693     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1694     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1695     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1696     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1697     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1698     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1699     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1700     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1701     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1702     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1703     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1704     // normal_case:
1705     emit_opcode(cbuf,0x99);                                         // cdq
1706     // idiv (note: must be emitted by the user of this rule)
1707     // normal:
1708   %}
1709 
1710   // Dense encoding for older common ops
1711   enc_class Opc_plus(immI opcode, rRegI reg) %{
1712     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1713   %}
1714 
1715 
1716   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1717   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1718     // Check for 8-bit immediate, and set sign extend bit in opcode
1719     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1720       emit_opcode(cbuf, $primary | 0x02);
1721     }
1722     else {                          // If 32-bit immediate
1723       emit_opcode(cbuf, $primary);
1724     }
1725   %}
1726 
1727   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1728     // Emit primary opcode and set sign-extend bit
1729     // Check for 8-bit immediate, and set sign extend bit in opcode
1730     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1731       emit_opcode(cbuf, $primary | 0x02);    }
1732     else {                          // If 32-bit immediate
1733       emit_opcode(cbuf, $primary);
1734     }
1735     // Emit r/m byte with secondary opcode, after primary opcode.
1736     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1737   %}
1738 
1739   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1740     // Check for 8-bit immediate, and set sign extend bit in opcode
1741     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1742       $$$emit8$imm$$constant;
1743     }
1744     else {                          // If 32-bit immediate
1745       // Output immediate
1746       $$$emit32$imm$$constant;
1747     }
1748   %}
1749 
1750   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1751     // Emit primary opcode and set sign-extend bit
1752     // Check for 8-bit immediate, and set sign extend bit in opcode
1753     int con = (int)$imm$$constant; // Throw away top bits
1754     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1755     // Emit r/m byte with secondary opcode, after primary opcode.
1756     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1757     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1758     else                               emit_d32(cbuf,con);
1759   %}
1760 
1761   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1762     // Emit primary opcode and set sign-extend bit
1763     // Check for 8-bit immediate, and set sign extend bit in opcode
1764     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1765     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1766     // Emit r/m byte with tertiary opcode, after primary opcode.
1767     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1768     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1769     else                               emit_d32(cbuf,con);
1770   %}
1771 
1772   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1773     emit_cc(cbuf, $secondary, $dst$$reg );
1774   %}
1775 
1776   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1777     int destlo = $dst$$reg;
1778     int desthi = HIGH_FROM_LOW(destlo);
1779     // bswap lo
1780     emit_opcode(cbuf, 0x0F);
1781     emit_cc(cbuf, 0xC8, destlo);
1782     // bswap hi
1783     emit_opcode(cbuf, 0x0F);
1784     emit_cc(cbuf, 0xC8, desthi);
1785     // xchg lo and hi
1786     emit_opcode(cbuf, 0x87);
1787     emit_rm(cbuf, 0x3, destlo, desthi);
1788   %}
1789 
1790   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1791     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1792   %}
1793 
1794   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1795     $$$emit8$primary;
1796     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1797   %}
1798 
1799   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1800     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1801     emit_d8(cbuf, op >> 8 );
1802     emit_d8(cbuf, op & 255);
1803   %}
1804 
1805   // emulate a CMOV with a conditional branch around a MOV
1806   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1807     // Invert sense of branch from sense of CMOV
1808     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1809     emit_d8( cbuf, $brOffs$$constant );
1810   %}
1811 
1812   enc_class enc_PartialSubtypeCheck( ) %{
1813     Register Redi = as_Register(EDI_enc); // result register
1814     Register Reax = as_Register(EAX_enc); // super class
1815     Register Recx = as_Register(ECX_enc); // killed
1816     Register Resi = as_Register(ESI_enc); // sub class
1817     Label miss;
1818 
1819     MacroAssembler _masm(&cbuf);
1820     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1821                                      NULL, &miss,
1822                                      /*set_cond_codes:*/ true);
1823     if ($primary) {
1824       __ xorptr(Redi, Redi);
1825     }
1826     __ bind(miss);
1827   %}
1828 
1829   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1830     MacroAssembler masm(&cbuf);
1831     int start = masm.offset();
1832     if (UseSSE >= 2) {
1833       if (VerifyFPU) {
1834         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1835       }
1836     } else {
1837       // External c_calling_convention expects the FPU stack to be 'clean'.
1838       // Compiled code leaves it dirty.  Do cleanup now.
1839       masm.empty_FPU_stack();
1840     }
1841     if (sizeof_FFree_Float_Stack_All == -1) {
1842       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1843     } else {
1844       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1845     }
1846   %}
1847 
1848   enc_class Verify_FPU_For_Leaf %{
1849     if( VerifyFPU ) {
1850       MacroAssembler masm(&cbuf);
1851       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1852     }
1853   %}
1854 
1855   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1856     // This is the instruction starting address for relocation info.
1857     cbuf.set_insts_mark();
1858     $$$emit8$primary;
1859     // CALL directly to the runtime
1860     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1861                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1862 
1863     if (UseSSE >= 2) {
1864       MacroAssembler _masm(&cbuf);
1865       BasicType rt = tf()->return_type();
1866 
1867       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1868         // A C runtime call where the return value is unused.  In SSE2+
1869         // mode the result needs to be removed from the FPU stack.  It's
1870         // likely that this function call could be removed by the
1871         // optimizer if the C function is a pure function.
1872         __ ffree(0);
1873       } else if (rt == T_FLOAT) {
1874         __ lea(rsp, Address(rsp, -4));
1875         __ fstp_s(Address(rsp, 0));
1876         __ movflt(xmm0, Address(rsp, 0));
1877         __ lea(rsp, Address(rsp,  4));
1878       } else if (rt == T_DOUBLE) {
1879         __ lea(rsp, Address(rsp, -8));
1880         __ fstp_d(Address(rsp, 0));
1881         __ movdbl(xmm0, Address(rsp, 0));
1882         __ lea(rsp, Address(rsp,  8));
1883       }
1884     }
1885   %}
1886 
1887   enc_class pre_call_resets %{
1888     // If method sets FPU control word restore it here
1889     debug_only(int off0 = cbuf.insts_size());
1890     if (ra_->C->in_24_bit_fp_mode()) {
1891       MacroAssembler _masm(&cbuf);
1892       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1893     }
1894     // Clear upper bits of YMM registers when current compiled code uses
1895     // wide vectors to avoid AVX <-> SSE transition penalty during call.
1896     MacroAssembler _masm(&cbuf);
1897     __ vzeroupper();
1898     debug_only(int off1 = cbuf.insts_size());
1899     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1900   %}
1901 
1902   enc_class post_call_FPU %{
1903     // If method sets FPU control word do it here also
1904     if (Compile::current()->in_24_bit_fp_mode()) {
1905       MacroAssembler masm(&cbuf);
1906       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1907     }
1908   %}
1909 
1910   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1911     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1912     // who we intended to call.
1913     cbuf.set_insts_mark();
1914     $$$emit8$primary;
1915 
1916     if (!_method) {
1917       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1918                      runtime_call_Relocation::spec(),
1919                      RELOC_IMM32);
1920     } else {
1921       int method_index = resolved_method_index(cbuf);
1922       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1923                                                   : static_call_Relocation::spec(method_index);
1924       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1925                      rspec, RELOC_DISP32);
1926       // Emit stubs for static call.
1927       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1928       if (stub == NULL) {
1929         ciEnv::current()->record_failure("CodeCache is full");
1930         return;
1931       }
1932     }
1933   %}
1934 
1935   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1936     MacroAssembler _masm(&cbuf);
1937     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1938   %}
1939 
1940   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1941     int disp = in_bytes(Method::from_compiled_offset());
1942     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1943 
1944     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1945     cbuf.set_insts_mark();
1946     $$$emit8$primary;
1947     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1948     emit_d8(cbuf, disp);             // Displacement
1949 
1950   %}
1951 
1952 //   Following encoding is no longer used, but may be restored if calling
1953 //   convention changes significantly.
1954 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1955 //
1956 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1957 //     // int ic_reg     = Matcher::inline_cache_reg();
1958 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1959 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1960 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1961 //
1962 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1963 //     // // so we load it immediately before the call
1964 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1965 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1966 //
1967 //     // xor rbp,ebp
1968 //     emit_opcode(cbuf, 0x33);
1969 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1970 //
1971 //     // CALL to interpreter.
1972 //     cbuf.set_insts_mark();
1973 //     $$$emit8$primary;
1974 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1975 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1976 //   %}
1977 
1978   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1979     $$$emit8$primary;
1980     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1981     $$$emit8$shift$$constant;
1982   %}
1983 
1984   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1985     // Load immediate does not have a zero or sign extended version
1986     // for 8-bit immediates
1987     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1988     $$$emit32$src$$constant;
1989   %}
1990 
1991   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1992     // Load immediate does not have a zero or sign extended version
1993     // for 8-bit immediates
1994     emit_opcode(cbuf, $primary + $dst$$reg);
1995     $$$emit32$src$$constant;
1996   %}
1997 
1998   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1999     // Load immediate does not have a zero or sign extended version
2000     // for 8-bit immediates
2001     int dst_enc = $dst$$reg;
2002     int src_con = $src$$constant & 0x0FFFFFFFFL;
2003     if (src_con == 0) {
2004       // xor dst, dst
2005       emit_opcode(cbuf, 0x33);
2006       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2007     } else {
2008       emit_opcode(cbuf, $primary + dst_enc);
2009       emit_d32(cbuf, src_con);
2010     }
2011   %}
2012 
2013   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2014     // Load immediate does not have a zero or sign extended version
2015     // for 8-bit immediates
2016     int dst_enc = $dst$$reg + 2;
2017     int src_con = ((julong)($src$$constant)) >> 32;
2018     if (src_con == 0) {
2019       // xor dst, dst
2020       emit_opcode(cbuf, 0x33);
2021       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2022     } else {
2023       emit_opcode(cbuf, $primary + dst_enc);
2024       emit_d32(cbuf, src_con);
2025     }
2026   %}
2027 
2028 
2029   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2030   enc_class enc_Copy( rRegI dst, rRegI src ) %{
2031     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2032   %}
2033 
2034   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2035     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2036   %}
2037 
2038   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2039     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2040   %}
2041 
2042   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2043     $$$emit8$primary;
2044     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2045   %}
2046 
2047   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2048     $$$emit8$secondary;
2049     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2050   %}
2051 
2052   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2053     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2054   %}
2055 
2056   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2057     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2058   %}
2059 
2060   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2061     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2062   %}
2063 
2064   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2065     // Output immediate
2066     $$$emit32$src$$constant;
2067   %}
2068 
2069   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2070     // Output Float immediate bits
2071     jfloat jf = $src$$constant;
2072     int    jf_as_bits = jint_cast( jf );
2073     emit_d32(cbuf, jf_as_bits);
2074   %}
2075 
2076   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2077     // Output Float immediate bits
2078     jfloat jf = $src$$constant;
2079     int    jf_as_bits = jint_cast( jf );
2080     emit_d32(cbuf, jf_as_bits);
2081   %}
2082 
2083   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2084     // Output immediate
2085     $$$emit16$src$$constant;
2086   %}
2087 
2088   enc_class Con_d32(immI src) %{
2089     emit_d32(cbuf,$src$$constant);
2090   %}
2091 
2092   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2093     // Output immediate memory reference
2094     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2095     emit_d32(cbuf, 0x00);
2096   %}
2097 
2098   enc_class lock_prefix( ) %{
2099     if( os::is_MP() )
2100       emit_opcode(cbuf,0xF0);         // [Lock]
2101   %}
2102 
2103   // Cmp-xchg long value.
2104   // Note: we need to swap rbx, and rcx before and after the
2105   //       cmpxchg8 instruction because the instruction uses
2106   //       rcx as the high order word of the new value to store but
2107   //       our register encoding uses rbx,.
2108   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2109 
2110     // XCHG  rbx,ecx
2111     emit_opcode(cbuf,0x87);
2112     emit_opcode(cbuf,0xD9);
2113     // [Lock]
2114     if( os::is_MP() )
2115       emit_opcode(cbuf,0xF0);
2116     // CMPXCHG8 [Eptr]
2117     emit_opcode(cbuf,0x0F);
2118     emit_opcode(cbuf,0xC7);
2119     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2120     // XCHG  rbx,ecx
2121     emit_opcode(cbuf,0x87);
2122     emit_opcode(cbuf,0xD9);
2123   %}
2124 
2125   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2126     // [Lock]
2127     if( os::is_MP() )
2128       emit_opcode(cbuf,0xF0);
2129 
2130     // CMPXCHG [Eptr]
2131     emit_opcode(cbuf,0x0F);
2132     emit_opcode(cbuf,0xB1);
2133     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2134   %}
2135 
2136   enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
2137     // [Lock]
2138     if( os::is_MP() )
2139       emit_opcode(cbuf,0xF0);
2140 
2141     // CMPXCHGB [Eptr]
2142     emit_opcode(cbuf,0x0F);
2143     emit_opcode(cbuf,0xB0);
2144     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2145   %}
2146 
2147   enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
2148     // [Lock]
2149     if( os::is_MP() )
2150       emit_opcode(cbuf,0xF0);
2151 
2152     // 16-bit mode
2153     emit_opcode(cbuf, 0x66);
2154 
2155     // CMPXCHGW [Eptr]
2156     emit_opcode(cbuf,0x0F);
2157     emit_opcode(cbuf,0xB1);
2158     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2159   %}
2160 
2161   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2162     int res_encoding = $res$$reg;
2163 
2164     // MOV  res,0
2165     emit_opcode( cbuf, 0xB8 + res_encoding);
2166     emit_d32( cbuf, 0 );
2167     // JNE,s  fail
2168     emit_opcode(cbuf,0x75);
2169     emit_d8(cbuf, 5 );
2170     // MOV  res,1
2171     emit_opcode( cbuf, 0xB8 + res_encoding);
2172     emit_d32( cbuf, 1 );
2173     // fail:
2174   %}
2175 
2176   enc_class set_instruction_start( ) %{
2177     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2178   %}
2179 
2180   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2181     int reg_encoding = $ereg$$reg;
2182     int base  = $mem$$base;
2183     int index = $mem$$index;
2184     int scale = $mem$$scale;
2185     int displace = $mem$$disp;
2186     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2187     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2188   %}
2189 
2190   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2191     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2192     int base  = $mem$$base;
2193     int index = $mem$$index;
2194     int scale = $mem$$scale;
2195     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2196     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2197     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2198   %}
2199 
2200   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2201     int r1, r2;
2202     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2203     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2204     emit_opcode(cbuf,0x0F);
2205     emit_opcode(cbuf,$tertiary);
2206     emit_rm(cbuf, 0x3, r1, r2);
2207     emit_d8(cbuf,$cnt$$constant);
2208     emit_d8(cbuf,$primary);
2209     emit_rm(cbuf, 0x3, $secondary, r1);
2210     emit_d8(cbuf,$cnt$$constant);
2211   %}
2212 
2213   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2214     emit_opcode( cbuf, 0x8B ); // Move
2215     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2216     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2217       emit_d8(cbuf,$primary);
2218       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2219       emit_d8(cbuf,$cnt$$constant-32);
2220     }
2221     emit_d8(cbuf,$primary);
2222     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2223     emit_d8(cbuf,31);
2224   %}
2225 
2226   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2227     int r1, r2;
2228     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2229     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2230 
2231     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2232     emit_rm(cbuf, 0x3, r1, r2);
2233     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2234       emit_opcode(cbuf,$primary);
2235       emit_rm(cbuf, 0x3, $secondary, r1);
2236       emit_d8(cbuf,$cnt$$constant-32);
2237     }
2238     emit_opcode(cbuf,0x33);  // XOR r2,r2
2239     emit_rm(cbuf, 0x3, r2, r2);
2240   %}
2241 
2242   // Clone of RegMem but accepts an extra parameter to access each
2243   // half of a double in memory; it never needs relocation info.
2244   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2245     emit_opcode(cbuf,$opcode$$constant);
2246     int reg_encoding = $rm_reg$$reg;
2247     int base     = $mem$$base;
2248     int index    = $mem$$index;
2249     int scale    = $mem$$scale;
2250     int displace = $mem$$disp + $disp_for_half$$constant;
2251     relocInfo::relocType disp_reloc = relocInfo::none;
2252     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2253   %}
2254 
2255   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2256   //
2257   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2258   // and it never needs relocation information.
2259   // Frequently used to move data between FPU's Stack Top and memory.
2260   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2261     int rm_byte_opcode = $rm_opcode$$constant;
2262     int base     = $mem$$base;
2263     int index    = $mem$$index;
2264     int scale    = $mem$$scale;
2265     int displace = $mem$$disp;
2266     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2267     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2268   %}
2269 
2270   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2271     int rm_byte_opcode = $rm_opcode$$constant;
2272     int base     = $mem$$base;
2273     int index    = $mem$$index;
2274     int scale    = $mem$$scale;
2275     int displace = $mem$$disp;
2276     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2277     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2278   %}
2279 
2280   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2281     int reg_encoding = $dst$$reg;
2282     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2283     int index        = 0x04;            // 0x04 indicates no index
2284     int scale        = 0x00;            // 0x00 indicates no scale
2285     int displace     = $src1$$constant; // 0x00 indicates no displacement
2286     relocInfo::relocType disp_reloc = relocInfo::none;
2287     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2288   %}
2289 
2290   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2291     // Compare dst,src
2292     emit_opcode(cbuf,0x3B);
2293     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2294     // jmp dst < src around move
2295     emit_opcode(cbuf,0x7C);
2296     emit_d8(cbuf,2);
2297     // move dst,src
2298     emit_opcode(cbuf,0x8B);
2299     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2300   %}
2301 
2302   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2303     // Compare dst,src
2304     emit_opcode(cbuf,0x3B);
2305     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2306     // jmp dst > src around move
2307     emit_opcode(cbuf,0x7F);
2308     emit_d8(cbuf,2);
2309     // move dst,src
2310     emit_opcode(cbuf,0x8B);
2311     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2312   %}
2313 
2314   enc_class enc_FPR_store(memory mem, regDPR src) %{
2315     // If src is FPR1, we can just FST to store it.
2316     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2317     int reg_encoding = 0x2; // Just store
2318     int base  = $mem$$base;
2319     int index = $mem$$index;
2320     int scale = $mem$$scale;
2321     int displace = $mem$$disp;
2322     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2323     if( $src$$reg != FPR1L_enc ) {
2324       reg_encoding = 0x3;  // Store & pop
2325       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2326       emit_d8( cbuf, 0xC0-1+$src$$reg );
2327     }
2328     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2329     emit_opcode(cbuf,$primary);
2330     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2331   %}
2332 
2333   enc_class neg_reg(rRegI dst) %{
2334     // NEG $dst
2335     emit_opcode(cbuf,0xF7);
2336     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2337   %}
2338 
2339   enc_class setLT_reg(eCXRegI dst) %{
2340     // SETLT $dst
2341     emit_opcode(cbuf,0x0F);
2342     emit_opcode(cbuf,0x9C);
2343     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2344   %}
2345 
2346   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2347     int tmpReg = $tmp$$reg;
2348 
2349     // SUB $p,$q
2350     emit_opcode(cbuf,0x2B);
2351     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2352     // SBB $tmp,$tmp
2353     emit_opcode(cbuf,0x1B);
2354     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2355     // AND $tmp,$y
2356     emit_opcode(cbuf,0x23);
2357     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2358     // ADD $p,$tmp
2359     emit_opcode(cbuf,0x03);
2360     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2361   %}
2362 
2363   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2364     // TEST shift,32
2365     emit_opcode(cbuf,0xF7);
2366     emit_rm(cbuf, 0x3, 0, ECX_enc);
2367     emit_d32(cbuf,0x20);
2368     // JEQ,s small
2369     emit_opcode(cbuf, 0x74);
2370     emit_d8(cbuf, 0x04);
2371     // MOV    $dst.hi,$dst.lo
2372     emit_opcode( cbuf, 0x8B );
2373     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2374     // CLR    $dst.lo
2375     emit_opcode(cbuf, 0x33);
2376     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2377 // small:
2378     // SHLD   $dst.hi,$dst.lo,$shift
2379     emit_opcode(cbuf,0x0F);
2380     emit_opcode(cbuf,0xA5);
2381     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2382     // SHL    $dst.lo,$shift"
2383     emit_opcode(cbuf,0xD3);
2384     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2385   %}
2386 
2387   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2388     // TEST shift,32
2389     emit_opcode(cbuf,0xF7);
2390     emit_rm(cbuf, 0x3, 0, ECX_enc);
2391     emit_d32(cbuf,0x20);
2392     // JEQ,s small
2393     emit_opcode(cbuf, 0x74);
2394     emit_d8(cbuf, 0x04);
2395     // MOV    $dst.lo,$dst.hi
2396     emit_opcode( cbuf, 0x8B );
2397     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2398     // CLR    $dst.hi
2399     emit_opcode(cbuf, 0x33);
2400     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2401 // small:
2402     // SHRD   $dst.lo,$dst.hi,$shift
2403     emit_opcode(cbuf,0x0F);
2404     emit_opcode(cbuf,0xAD);
2405     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2406     // SHR    $dst.hi,$shift"
2407     emit_opcode(cbuf,0xD3);
2408     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2409   %}
2410 
2411   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2412     // TEST shift,32
2413     emit_opcode(cbuf,0xF7);
2414     emit_rm(cbuf, 0x3, 0, ECX_enc);
2415     emit_d32(cbuf,0x20);
2416     // JEQ,s small
2417     emit_opcode(cbuf, 0x74);
2418     emit_d8(cbuf, 0x05);
2419     // MOV    $dst.lo,$dst.hi
2420     emit_opcode( cbuf, 0x8B );
2421     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2422     // SAR    $dst.hi,31
2423     emit_opcode(cbuf, 0xC1);
2424     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2425     emit_d8(cbuf, 0x1F );
2426 // small:
2427     // SHRD   $dst.lo,$dst.hi,$shift
2428     emit_opcode(cbuf,0x0F);
2429     emit_opcode(cbuf,0xAD);
2430     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2431     // SAR    $dst.hi,$shift"
2432     emit_opcode(cbuf,0xD3);
2433     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2434   %}
2435 
2436 
2437   // ----------------- Encodings for floating point unit -----------------
2438   // May leave result in FPU-TOS or FPU reg depending on opcodes
2439   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2440     $$$emit8$primary;
2441     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2442   %}
2443 
2444   // Pop argument in FPR0 with FSTP ST(0)
2445   enc_class PopFPU() %{
2446     emit_opcode( cbuf, 0xDD );
2447     emit_d8( cbuf, 0xD8 );
2448   %}
2449 
2450   // !!!!! equivalent to Pop_Reg_F
2451   enc_class Pop_Reg_DPR( regDPR dst ) %{
2452     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2453     emit_d8( cbuf, 0xD8+$dst$$reg );
2454   %}
2455 
2456   enc_class Push_Reg_DPR( regDPR dst ) %{
2457     emit_opcode( cbuf, 0xD9 );
2458     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2459   %}
2460 
2461   enc_class strictfp_bias1( regDPR dst ) %{
2462     emit_opcode( cbuf, 0xDB );           // FLD m80real
2463     emit_opcode( cbuf, 0x2D );
2464     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2465     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2466     emit_opcode( cbuf, 0xC8+$dst$$reg );
2467   %}
2468 
2469   enc_class strictfp_bias2( regDPR dst ) %{
2470     emit_opcode( cbuf, 0xDB );           // FLD m80real
2471     emit_opcode( cbuf, 0x2D );
2472     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2473     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2474     emit_opcode( cbuf, 0xC8+$dst$$reg );
2475   %}
2476 
2477   // Special case for moving an integer register to a stack slot.
2478   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2479     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2480   %}
2481 
2482   // Special case for moving a register to a stack slot.
2483   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2484     // Opcode already emitted
2485     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2486     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2487     emit_d32(cbuf, $dst$$disp);   // Displacement
2488   %}
2489 
2490   // Push the integer in stackSlot 'src' onto FP-stack
2491   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2492     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2493   %}
2494 
2495   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2496   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2497     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2498   %}
2499 
2500   // Same as Pop_Mem_F except for opcode
2501   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2502   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2503     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2504   %}
2505 
2506   enc_class Pop_Reg_FPR( regFPR dst ) %{
2507     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2508     emit_d8( cbuf, 0xD8+$dst$$reg );
2509   %}
2510 
2511   enc_class Push_Reg_FPR( regFPR dst ) %{
2512     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2513     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2514   %}
2515 
2516   // Push FPU's float to a stack-slot, and pop FPU-stack
2517   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2518     int pop = 0x02;
2519     if ($src$$reg != FPR1L_enc) {
2520       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2521       emit_d8( cbuf, 0xC0-1+$src$$reg );
2522       pop = 0x03;
2523     }
2524     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2525   %}
2526 
2527   // Push FPU's double to a stack-slot, and pop FPU-stack
2528   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2529     int pop = 0x02;
2530     if ($src$$reg != FPR1L_enc) {
2531       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2532       emit_d8( cbuf, 0xC0-1+$src$$reg );
2533       pop = 0x03;
2534     }
2535     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2536   %}
2537 
2538   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2539   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2540     int pop = 0xD0 - 1; // -1 since we skip FLD
2541     if ($src$$reg != FPR1L_enc) {
2542       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2543       emit_d8( cbuf, 0xC0-1+$src$$reg );
2544       pop = 0xD8;
2545     }
2546     emit_opcode( cbuf, 0xDD );
2547     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2548   %}
2549 
2550 
2551   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2552     // load dst in FPR0
2553     emit_opcode( cbuf, 0xD9 );
2554     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2555     if ($src$$reg != FPR1L_enc) {
2556       // fincstp
2557       emit_opcode (cbuf, 0xD9);
2558       emit_opcode (cbuf, 0xF7);
2559       // swap src with FPR1:
2560       // FXCH FPR1 with src
2561       emit_opcode(cbuf, 0xD9);
2562       emit_d8(cbuf, 0xC8-1+$src$$reg );
2563       // fdecstp
2564       emit_opcode (cbuf, 0xD9);
2565       emit_opcode (cbuf, 0xF6);
2566     }
2567   %}
2568 
2569   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2570     MacroAssembler _masm(&cbuf);
2571     __ subptr(rsp, 8);
2572     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2573     __ fld_d(Address(rsp, 0));
2574     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2575     __ fld_d(Address(rsp, 0));
2576   %}
2577 
2578   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2579     MacroAssembler _masm(&cbuf);
2580     __ subptr(rsp, 4);
2581     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2582     __ fld_s(Address(rsp, 0));
2583     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2584     __ fld_s(Address(rsp, 0));
2585   %}
2586 
2587   enc_class Push_ResultD(regD dst) %{
2588     MacroAssembler _masm(&cbuf);
2589     __ fstp_d(Address(rsp, 0));
2590     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2591     __ addptr(rsp, 8);
2592   %}
2593 
2594   enc_class Push_ResultF(regF dst, immI d8) %{
2595     MacroAssembler _masm(&cbuf);
2596     __ fstp_s(Address(rsp, 0));
2597     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2598     __ addptr(rsp, $d8$$constant);
2599   %}
2600 
2601   enc_class Push_SrcD(regD src) %{
2602     MacroAssembler _masm(&cbuf);
2603     __ subptr(rsp, 8);
2604     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2605     __ fld_d(Address(rsp, 0));
2606   %}
2607 
2608   enc_class push_stack_temp_qword() %{
2609     MacroAssembler _masm(&cbuf);
2610     __ subptr(rsp, 8);
2611   %}
2612 
2613   enc_class pop_stack_temp_qword() %{
2614     MacroAssembler _masm(&cbuf);
2615     __ addptr(rsp, 8);
2616   %}
2617 
2618   enc_class push_xmm_to_fpr1(regD src) %{
2619     MacroAssembler _masm(&cbuf);
2620     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2621     __ fld_d(Address(rsp, 0));
2622   %}
2623 
2624   enc_class Push_Result_Mod_DPR( regDPR src) %{
2625     if ($src$$reg != FPR1L_enc) {
2626       // fincstp
2627       emit_opcode (cbuf, 0xD9);
2628       emit_opcode (cbuf, 0xF7);
2629       // FXCH FPR1 with src
2630       emit_opcode(cbuf, 0xD9);
2631       emit_d8(cbuf, 0xC8-1+$src$$reg );
2632       // fdecstp
2633       emit_opcode (cbuf, 0xD9);
2634       emit_opcode (cbuf, 0xF6);
2635     }
2636     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2637     // // FSTP   FPR$dst$$reg
2638     // emit_opcode( cbuf, 0xDD );
2639     // emit_d8( cbuf, 0xD8+$dst$$reg );
2640   %}
2641 
2642   enc_class fnstsw_sahf_skip_parity() %{
2643     // fnstsw ax
2644     emit_opcode( cbuf, 0xDF );
2645     emit_opcode( cbuf, 0xE0 );
2646     // sahf
2647     emit_opcode( cbuf, 0x9E );
2648     // jnp  ::skip
2649     emit_opcode( cbuf, 0x7B );
2650     emit_opcode( cbuf, 0x05 );
2651   %}
2652 
2653   enc_class emitModDPR() %{
2654     // fprem must be iterative
2655     // :: loop
2656     // fprem
2657     emit_opcode( cbuf, 0xD9 );
2658     emit_opcode( cbuf, 0xF8 );
2659     // wait
2660     emit_opcode( cbuf, 0x9b );
2661     // fnstsw ax
2662     emit_opcode( cbuf, 0xDF );
2663     emit_opcode( cbuf, 0xE0 );
2664     // sahf
2665     emit_opcode( cbuf, 0x9E );
2666     // jp  ::loop
2667     emit_opcode( cbuf, 0x0F );
2668     emit_opcode( cbuf, 0x8A );
2669     emit_opcode( cbuf, 0xF4 );
2670     emit_opcode( cbuf, 0xFF );
2671     emit_opcode( cbuf, 0xFF );
2672     emit_opcode( cbuf, 0xFF );
2673   %}
2674 
2675   enc_class fpu_flags() %{
2676     // fnstsw_ax
2677     emit_opcode( cbuf, 0xDF);
2678     emit_opcode( cbuf, 0xE0);
2679     // test ax,0x0400
2680     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2681     emit_opcode( cbuf, 0xA9 );
2682     emit_d16   ( cbuf, 0x0400 );
2683     // // // This sequence works, but stalls for 12-16 cycles on PPro
2684     // // test rax,0x0400
2685     // emit_opcode( cbuf, 0xA9 );
2686     // emit_d32   ( cbuf, 0x00000400 );
2687     //
2688     // jz exit (no unordered comparison)
2689     emit_opcode( cbuf, 0x74 );
2690     emit_d8    ( cbuf, 0x02 );
2691     // mov ah,1 - treat as LT case (set carry flag)
2692     emit_opcode( cbuf, 0xB4 );
2693     emit_d8    ( cbuf, 0x01 );
2694     // sahf
2695     emit_opcode( cbuf, 0x9E);
2696   %}
2697 
2698   enc_class cmpF_P6_fixup() %{
2699     // Fixup the integer flags in case comparison involved a NaN
2700     //
2701     // JNP exit (no unordered comparison, P-flag is set by NaN)
2702     emit_opcode( cbuf, 0x7B );
2703     emit_d8    ( cbuf, 0x03 );
2704     // MOV AH,1 - treat as LT case (set carry flag)
2705     emit_opcode( cbuf, 0xB4 );
2706     emit_d8    ( cbuf, 0x01 );
2707     // SAHF
2708     emit_opcode( cbuf, 0x9E);
2709     // NOP     // target for branch to avoid branch to branch
2710     emit_opcode( cbuf, 0x90);
2711   %}
2712 
2713 //     fnstsw_ax();
2714 //     sahf();
2715 //     movl(dst, nan_result);
2716 //     jcc(Assembler::parity, exit);
2717 //     movl(dst, less_result);
2718 //     jcc(Assembler::below, exit);
2719 //     movl(dst, equal_result);
2720 //     jcc(Assembler::equal, exit);
2721 //     movl(dst, greater_result);
2722 
2723 // less_result     =  1;
2724 // greater_result  = -1;
2725 // equal_result    = 0;
2726 // nan_result      = -1;
2727 
2728   enc_class CmpF_Result(rRegI dst) %{
2729     // fnstsw_ax();
2730     emit_opcode( cbuf, 0xDF);
2731     emit_opcode( cbuf, 0xE0);
2732     // sahf
2733     emit_opcode( cbuf, 0x9E);
2734     // movl(dst, nan_result);
2735     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2736     emit_d32( cbuf, -1 );
2737     // jcc(Assembler::parity, exit);
2738     emit_opcode( cbuf, 0x7A );
2739     emit_d8    ( cbuf, 0x13 );
2740     // movl(dst, less_result);
2741     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2742     emit_d32( cbuf, -1 );
2743     // jcc(Assembler::below, exit);
2744     emit_opcode( cbuf, 0x72 );
2745     emit_d8    ( cbuf, 0x0C );
2746     // movl(dst, equal_result);
2747     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2748     emit_d32( cbuf, 0 );
2749     // jcc(Assembler::equal, exit);
2750     emit_opcode( cbuf, 0x74 );
2751     emit_d8    ( cbuf, 0x05 );
2752     // movl(dst, greater_result);
2753     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2754     emit_d32( cbuf, 1 );
2755   %}
2756 
2757 
2758   // Compare the longs and set flags
2759   // BROKEN!  Do Not use as-is
2760   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2761     // CMP    $src1.hi,$src2.hi
2762     emit_opcode( cbuf, 0x3B );
2763     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2764     // JNE,s  done
2765     emit_opcode(cbuf,0x75);
2766     emit_d8(cbuf, 2 );
2767     // CMP    $src1.lo,$src2.lo
2768     emit_opcode( cbuf, 0x3B );
2769     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2770 // done:
2771   %}
2772 
2773   enc_class convert_int_long( regL dst, rRegI src ) %{
2774     // mov $dst.lo,$src
2775     int dst_encoding = $dst$$reg;
2776     int src_encoding = $src$$reg;
2777     encode_Copy( cbuf, dst_encoding  , src_encoding );
2778     // mov $dst.hi,$src
2779     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2780     // sar $dst.hi,31
2781     emit_opcode( cbuf, 0xC1 );
2782     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2783     emit_d8(cbuf, 0x1F );
2784   %}
2785 
2786   enc_class convert_long_double( eRegL src ) %{
2787     // push $src.hi
2788     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2789     // push $src.lo
2790     emit_opcode(cbuf, 0x50+$src$$reg  );
2791     // fild 64-bits at [SP]
2792     emit_opcode(cbuf,0xdf);
2793     emit_d8(cbuf, 0x6C);
2794     emit_d8(cbuf, 0x24);
2795     emit_d8(cbuf, 0x00);
2796     // pop stack
2797     emit_opcode(cbuf, 0x83); // add  SP, #8
2798     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2799     emit_d8(cbuf, 0x8);
2800   %}
2801 
2802   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2803     // IMUL   EDX:EAX,$src1
2804     emit_opcode( cbuf, 0xF7 );
2805     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2806     // SAR    EDX,$cnt-32
2807     int shift_count = ((int)$cnt$$constant) - 32;
2808     if (shift_count > 0) {
2809       emit_opcode(cbuf, 0xC1);
2810       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2811       emit_d8(cbuf, shift_count);
2812     }
2813   %}
2814 
2815   // this version doesn't have add sp, 8
2816   enc_class convert_long_double2( eRegL src ) %{
2817     // push $src.hi
2818     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2819     // push $src.lo
2820     emit_opcode(cbuf, 0x50+$src$$reg  );
2821     // fild 64-bits at [SP]
2822     emit_opcode(cbuf,0xdf);
2823     emit_d8(cbuf, 0x6C);
2824     emit_d8(cbuf, 0x24);
2825     emit_d8(cbuf, 0x00);
2826   %}
2827 
2828   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2829     // Basic idea: long = (long)int * (long)int
2830     // IMUL EDX:EAX, src
2831     emit_opcode( cbuf, 0xF7 );
2832     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2833   %}
2834 
2835   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2836     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2837     // MUL EDX:EAX, src
2838     emit_opcode( cbuf, 0xF7 );
2839     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2840   %}
2841 
2842   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2843     // Basic idea: lo(result) = lo(x_lo * y_lo)
2844     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2845     // MOV    $tmp,$src.lo
2846     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2847     // IMUL   $tmp,EDX
2848     emit_opcode( cbuf, 0x0F );
2849     emit_opcode( cbuf, 0xAF );
2850     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2851     // MOV    EDX,$src.hi
2852     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2853     // IMUL   EDX,EAX
2854     emit_opcode( cbuf, 0x0F );
2855     emit_opcode( cbuf, 0xAF );
2856     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2857     // ADD    $tmp,EDX
2858     emit_opcode( cbuf, 0x03 );
2859     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2860     // MUL   EDX:EAX,$src.lo
2861     emit_opcode( cbuf, 0xF7 );
2862     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2863     // ADD    EDX,ESI
2864     emit_opcode( cbuf, 0x03 );
2865     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2866   %}
2867 
2868   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2869     // Basic idea: lo(result) = lo(src * y_lo)
2870     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2871     // IMUL   $tmp,EDX,$src
2872     emit_opcode( cbuf, 0x6B );
2873     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2874     emit_d8( cbuf, (int)$src$$constant );
2875     // MOV    EDX,$src
2876     emit_opcode(cbuf, 0xB8 + EDX_enc);
2877     emit_d32( cbuf, (int)$src$$constant );
2878     // MUL   EDX:EAX,EDX
2879     emit_opcode( cbuf, 0xF7 );
2880     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2881     // ADD    EDX,ESI
2882     emit_opcode( cbuf, 0x03 );
2883     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2884   %}
2885 
2886   enc_class long_div( eRegL src1, eRegL src2 ) %{
2887     // PUSH src1.hi
2888     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2889     // PUSH src1.lo
2890     emit_opcode(cbuf,               0x50+$src1$$reg  );
2891     // PUSH src2.hi
2892     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2893     // PUSH src2.lo
2894     emit_opcode(cbuf,               0x50+$src2$$reg  );
2895     // CALL directly to the runtime
2896     cbuf.set_insts_mark();
2897     emit_opcode(cbuf,0xE8);       // Call into runtime
2898     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2899     // Restore stack
2900     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2901     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2902     emit_d8(cbuf, 4*4);
2903   %}
2904 
2905   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2906     // PUSH src1.hi
2907     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2908     // PUSH src1.lo
2909     emit_opcode(cbuf,               0x50+$src1$$reg  );
2910     // PUSH src2.hi
2911     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2912     // PUSH src2.lo
2913     emit_opcode(cbuf,               0x50+$src2$$reg  );
2914     // CALL directly to the runtime
2915     cbuf.set_insts_mark();
2916     emit_opcode(cbuf,0xE8);       // Call into runtime
2917     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2918     // Restore stack
2919     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2920     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2921     emit_d8(cbuf, 4*4);
2922   %}
2923 
2924   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2925     // MOV   $tmp,$src.lo
2926     emit_opcode(cbuf, 0x8B);
2927     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2928     // OR    $tmp,$src.hi
2929     emit_opcode(cbuf, 0x0B);
2930     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2931   %}
2932 
2933   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2934     // CMP    $src1.lo,$src2.lo
2935     emit_opcode( cbuf, 0x3B );
2936     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2937     // JNE,s  skip
2938     emit_cc(cbuf, 0x70, 0x5);
2939     emit_d8(cbuf,2);
2940     // CMP    $src1.hi,$src2.hi
2941     emit_opcode( cbuf, 0x3B );
2942     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2943   %}
2944 
2945   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2946     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2947     emit_opcode( cbuf, 0x3B );
2948     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2949     // MOV    $tmp,$src1.hi
2950     emit_opcode( cbuf, 0x8B );
2951     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2952     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2953     emit_opcode( cbuf, 0x1B );
2954     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2955   %}
2956 
2957   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2958     // XOR    $tmp,$tmp
2959     emit_opcode(cbuf,0x33);  // XOR
2960     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2961     // CMP    $tmp,$src.lo
2962     emit_opcode( cbuf, 0x3B );
2963     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2964     // SBB    $tmp,$src.hi
2965     emit_opcode( cbuf, 0x1B );
2966     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2967   %}
2968 
2969  // Sniff, sniff... smells like Gnu Superoptimizer
2970   enc_class neg_long( eRegL dst ) %{
2971     emit_opcode(cbuf,0xF7);    // NEG hi
2972     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2973     emit_opcode(cbuf,0xF7);    // NEG lo
2974     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2975     emit_opcode(cbuf,0x83);    // SBB hi,0
2976     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2977     emit_d8    (cbuf,0 );
2978   %}
2979 
2980   enc_class enc_pop_rdx() %{
2981     emit_opcode(cbuf,0x5A);
2982   %}
2983 
2984   enc_class enc_rethrow() %{
2985     cbuf.set_insts_mark();
2986     emit_opcode(cbuf, 0xE9);        // jmp    entry
2987     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2988                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2989   %}
2990 
2991 
2992   // Convert a double to an int.  Java semantics require we do complex
2993   // manglelations in the corner cases.  So we set the rounding mode to
2994   // 'zero', store the darned double down as an int, and reset the
2995   // rounding mode to 'nearest'.  The hardware throws an exception which
2996   // patches up the correct value directly to the stack.
2997   enc_class DPR2I_encoding( regDPR src ) %{
2998     // Flip to round-to-zero mode.  We attempted to allow invalid-op
2999     // exceptions here, so that a NAN or other corner-case value will
3000     // thrown an exception (but normal values get converted at full speed).
3001     // However, I2C adapters and other float-stack manglers leave pending
3002     // invalid-op exceptions hanging.  We would have to clear them before
3003     // enabling them and that is more expensive than just testing for the
3004     // invalid value Intel stores down in the corner cases.
3005     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3006     emit_opcode(cbuf,0x2D);
3007     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3008     // Allocate a word
3009     emit_opcode(cbuf,0x83);            // SUB ESP,4
3010     emit_opcode(cbuf,0xEC);
3011     emit_d8(cbuf,0x04);
3012     // Encoding assumes a double has been pushed into FPR0.
3013     // Store down the double as an int, popping the FPU stack
3014     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
3015     emit_opcode(cbuf,0x1C);
3016     emit_d8(cbuf,0x24);
3017     // Restore the rounding mode; mask the exception
3018     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3019     emit_opcode(cbuf,0x2D);
3020     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3021         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3022         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3023 
3024     // Load the converted int; adjust CPU stack
3025     emit_opcode(cbuf,0x58);       // POP EAX
3026     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
3027     emit_d32   (cbuf,0x80000000); //         0x80000000
3028     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3029     emit_d8    (cbuf,0x07);       // Size of slow_call
3030     // Push src onto stack slow-path
3031     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3032     emit_d8    (cbuf,0xC0-1+$src$$reg );
3033     // CALL directly to the runtime
3034     cbuf.set_insts_mark();
3035     emit_opcode(cbuf,0xE8);       // Call into runtime
3036     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3037     // Carry on here...
3038   %}
3039 
3040   enc_class DPR2L_encoding( regDPR src ) %{
3041     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3042     emit_opcode(cbuf,0x2D);
3043     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3044     // Allocate a word
3045     emit_opcode(cbuf,0x83);            // SUB ESP,8
3046     emit_opcode(cbuf,0xEC);
3047     emit_d8(cbuf,0x08);
3048     // Encoding assumes a double has been pushed into FPR0.
3049     // Store down the double as a long, popping the FPU stack
3050     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3051     emit_opcode(cbuf,0x3C);
3052     emit_d8(cbuf,0x24);
3053     // Restore the rounding mode; mask the exception
3054     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3055     emit_opcode(cbuf,0x2D);
3056     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3057         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3058         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3059 
3060     // Load the converted int; adjust CPU stack
3061     emit_opcode(cbuf,0x58);       // POP EAX
3062     emit_opcode(cbuf,0x5A);       // POP EDX
3063     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3064     emit_d8    (cbuf,0xFA);       // rdx
3065     emit_d32   (cbuf,0x80000000); //         0x80000000
3066     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3067     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3068     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3069     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3070     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3071     emit_d8    (cbuf,0x07);       // Size of slow_call
3072     // Push src onto stack slow-path
3073     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3074     emit_d8    (cbuf,0xC0-1+$src$$reg );
3075     // CALL directly to the runtime
3076     cbuf.set_insts_mark();
3077     emit_opcode(cbuf,0xE8);       // Call into runtime
3078     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3079     // Carry on here...
3080   %}
3081 
3082   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3083     // Operand was loaded from memory into fp ST (stack top)
3084     // FMUL   ST,$src  /* D8 C8+i */
3085     emit_opcode(cbuf, 0xD8);
3086     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3087   %}
3088 
3089   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3090     // FADDP  ST,src2  /* D8 C0+i */
3091     emit_opcode(cbuf, 0xD8);
3092     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3093     //could use FADDP  src2,fpST  /* DE C0+i */
3094   %}
3095 
3096   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3097     // FADDP  src2,ST  /* DE C0+i */
3098     emit_opcode(cbuf, 0xDE);
3099     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3100   %}
3101 
3102   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3103     // Operand has been loaded into fp ST (stack top)
3104       // FSUB   ST,$src1
3105       emit_opcode(cbuf, 0xD8);
3106       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3107 
3108       // FDIV
3109       emit_opcode(cbuf, 0xD8);
3110       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3111   %}
3112 
3113   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3114     // Operand was loaded from memory into fp ST (stack top)
3115     // FADD   ST,$src  /* D8 C0+i */
3116     emit_opcode(cbuf, 0xD8);
3117     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3118 
3119     // FMUL  ST,src2  /* D8 C*+i */
3120     emit_opcode(cbuf, 0xD8);
3121     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3122   %}
3123 
3124 
3125   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3126     // Operand was loaded from memory into fp ST (stack top)
3127     // FADD   ST,$src  /* D8 C0+i */
3128     emit_opcode(cbuf, 0xD8);
3129     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3130 
3131     // FMULP  src2,ST  /* DE C8+i */
3132     emit_opcode(cbuf, 0xDE);
3133     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3134   %}
3135 
3136   // Atomically load the volatile long
3137   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3138     emit_opcode(cbuf,0xDF);
3139     int rm_byte_opcode = 0x05;
3140     int base     = $mem$$base;
3141     int index    = $mem$$index;
3142     int scale    = $mem$$scale;
3143     int displace = $mem$$disp;
3144     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3145     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3146     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3147   %}
3148 
3149   // Volatile Store Long.  Must be atomic, so move it into
3150   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3151   // target address before the store (for null-ptr checks)
3152   // so the memory operand is used twice in the encoding.
3153   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3154     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3155     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3156     emit_opcode(cbuf,0xDF);
3157     int rm_byte_opcode = 0x07;
3158     int base     = $mem$$base;
3159     int index    = $mem$$index;
3160     int scale    = $mem$$scale;
3161     int displace = $mem$$disp;
3162     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3163     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3164   %}
3165 
3166   // Safepoint Poll.  This polls the safepoint page, and causes an
3167   // exception if it is not readable. Unfortunately, it kills the condition code
3168   // in the process
3169   // We current use TESTL [spp],EDI
3170   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3171 
3172   enc_class Safepoint_Poll() %{
3173     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3174     emit_opcode(cbuf,0x85);
3175     emit_rm (cbuf, 0x0, 0x7, 0x5);
3176     emit_d32(cbuf, (intptr_t)os::get_polling_page());
3177   %}
3178 %}
3179 
3180 
3181 //----------FRAME--------------------------------------------------------------
3182 // Definition of frame structure and management information.
3183 //
3184 //  S T A C K   L A Y O U T    Allocators stack-slot number
3185 //                             |   (to get allocators register number
3186 //  G  Owned by    |        |  v    add OptoReg::stack0())
3187 //  r   CALLER     |        |
3188 //  o     |        +--------+      pad to even-align allocators stack-slot
3189 //  w     V        |  pad0  |        numbers; owned by CALLER
3190 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3191 //  h     ^        |   in   |  5
3192 //        |        |  args  |  4   Holes in incoming args owned by SELF
3193 //  |     |        |        |  3
3194 //  |     |        +--------+
3195 //  V     |        | old out|      Empty on Intel, window on Sparc
3196 //        |    old |preserve|      Must be even aligned.
3197 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3198 //        |        |   in   |  3   area for Intel ret address
3199 //     Owned by    |preserve|      Empty on Sparc.
3200 //       SELF      +--------+
3201 //        |        |  pad2  |  2   pad to align old SP
3202 //        |        +--------+  1
3203 //        |        | locks  |  0
3204 //        |        +--------+----> OptoReg::stack0(), even aligned
3205 //        |        |  pad1  | 11   pad to align new SP
3206 //        |        +--------+
3207 //        |        |        | 10
3208 //        |        | spills |  9   spills
3209 //        V        |        |  8   (pad0 slot for callee)
3210 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3211 //        ^        |  out   |  7
3212 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3213 //     Owned by    +--------+
3214 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3215 //        |    new |preserve|      Must be even-aligned.
3216 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3217 //        |        |        |
3218 //
3219 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3220 //         known from SELF's arguments and the Java calling convention.
3221 //         Region 6-7 is determined per call site.
3222 // Note 2: If the calling convention leaves holes in the incoming argument
3223 //         area, those holes are owned by SELF.  Holes in the outgoing area
3224 //         are owned by the CALLEE.  Holes should not be nessecary in the
3225 //         incoming area, as the Java calling convention is completely under
3226 //         the control of the AD file.  Doubles can be sorted and packed to
3227 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3228 //         varargs C calling conventions.
3229 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3230 //         even aligned with pad0 as needed.
3231 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3232 //         region 6-11 is even aligned; it may be padded out more so that
3233 //         the region from SP to FP meets the minimum stack alignment.
3234 
3235 frame %{
3236   // What direction does stack grow in (assumed to be same for C & Java)
3237   stack_direction(TOWARDS_LOW);
3238 
3239   // These three registers define part of the calling convention
3240   // between compiled code and the interpreter.
3241   inline_cache_reg(EAX);                // Inline Cache Register
3242   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3243 
3244   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3245   cisc_spilling_operand_name(indOffset32);
3246 
3247   // Number of stack slots consumed by locking an object
3248   sync_stack_slots(1);
3249 
3250   // Compiled code's Frame Pointer
3251   frame_pointer(ESP);
3252   // Interpreter stores its frame pointer in a register which is
3253   // stored to the stack by I2CAdaptors.
3254   // I2CAdaptors convert from interpreted java to compiled java.
3255   interpreter_frame_pointer(EBP);
3256 
3257   // Stack alignment requirement
3258   // Alignment size in bytes (128-bit -> 16 bytes)
3259   stack_alignment(StackAlignmentInBytes);
3260 
3261   // Number of stack slots between incoming argument block and the start of
3262   // a new frame.  The PROLOG must add this many slots to the stack.  The
3263   // EPILOG must remove this many slots.  Intel needs one slot for
3264   // return address and one for rbp, (must save rbp)
3265   in_preserve_stack_slots(2+VerifyStackAtCalls);
3266 
3267   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3268   // for calls to C.  Supports the var-args backing area for register parms.
3269   varargs_C_out_slots_killed(0);
3270 
3271   // The after-PROLOG location of the return address.  Location of
3272   // return address specifies a type (REG or STACK) and a number
3273   // representing the register number (i.e. - use a register name) or
3274   // stack slot.
3275   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3276   // Otherwise, it is above the locks and verification slot and alignment word
3277   return_addr(STACK - 1 +
3278               align_up((Compile::current()->in_preserve_stack_slots() +
3279                         Compile::current()->fixed_slots()),
3280                        stack_alignment_in_slots()));
3281 
3282   // Body of function which returns an integer array locating
3283   // arguments either in registers or in stack slots.  Passed an array
3284   // of ideal registers called "sig" and a "length" count.  Stack-slot
3285   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3286   // arguments for a CALLEE.  Incoming stack arguments are
3287   // automatically biased by the preserve_stack_slots field above.
3288   calling_convention %{
3289     // No difference between ingoing/outgoing just pass false
3290     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3291   %}
3292 
3293 
3294   // Body of function which returns an integer array locating
3295   // arguments either in registers or in stack slots.  Passed an array
3296   // of ideal registers called "sig" and a "length" count.  Stack-slot
3297   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3298   // arguments for a CALLEE.  Incoming stack arguments are
3299   // automatically biased by the preserve_stack_slots field above.
3300   c_calling_convention %{
3301     // This is obviously always outgoing
3302     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3303   %}
3304 
3305   // Location of C & interpreter return values
3306   c_return_value %{
3307     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3308     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3309     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3310 
3311     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3312     // that C functions return float and double results in XMM0.
3313     if( ideal_reg == Op_RegD && UseSSE>=2 )
3314       return OptoRegPair(XMM0b_num,XMM0_num);
3315     if( ideal_reg == Op_RegF && UseSSE>=2 )
3316       return OptoRegPair(OptoReg::Bad,XMM0_num);
3317 
3318     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3319   %}
3320 
3321   // Location of return values
3322   return_value %{
3323     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3324     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3325     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3326     if( ideal_reg == Op_RegD && UseSSE>=2 )
3327       return OptoRegPair(XMM0b_num,XMM0_num);
3328     if( ideal_reg == Op_RegF && UseSSE>=1 )
3329       return OptoRegPair(OptoReg::Bad,XMM0_num);
3330     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3331   %}
3332 
3333 %}
3334 
3335 //----------ATTRIBUTES---------------------------------------------------------
3336 //----------Operand Attributes-------------------------------------------------
3337 op_attrib op_cost(0);        // Required cost attribute
3338 
3339 //----------Instruction Attributes---------------------------------------------
3340 ins_attrib ins_cost(100);       // Required cost attribute
3341 ins_attrib ins_size(8);         // Required size attribute (in bits)
3342 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3343                                 // non-matching short branch variant of some
3344                                                             // long branch?
3345 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3346                                 // specifies the alignment that some part of the instruction (not
3347                                 // necessarily the start) requires.  If > 1, a compute_padding()
3348                                 // function must be provided for the instruction
3349 
3350 //----------OPERANDS-----------------------------------------------------------
3351 // Operand definitions must precede instruction definitions for correct parsing
3352 // in the ADLC because operands constitute user defined types which are used in
3353 // instruction definitions.
3354 
3355 //----------Simple Operands----------------------------------------------------
3356 // Immediate Operands
3357 // Integer Immediate
3358 operand immI() %{
3359   match(ConI);
3360 
3361   op_cost(10);
3362   format %{ %}
3363   interface(CONST_INTER);
3364 %}
3365 
3366 // Constant for test vs zero
3367 operand immI0() %{
3368   predicate(n->get_int() == 0);
3369   match(ConI);
3370 
3371   op_cost(0);
3372   format %{ %}
3373   interface(CONST_INTER);
3374 %}
3375 
3376 // Constant for increment
3377 operand immI1() %{
3378   predicate(n->get_int() == 1);
3379   match(ConI);
3380 
3381   op_cost(0);
3382   format %{ %}
3383   interface(CONST_INTER);
3384 %}
3385 
3386 // Constant for decrement
3387 operand immI_M1() %{
3388   predicate(n->get_int() == -1);
3389   match(ConI);
3390 
3391   op_cost(0);
3392   format %{ %}
3393   interface(CONST_INTER);
3394 %}
3395 
3396 // Valid scale values for addressing modes
3397 operand immI2() %{
3398   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3399   match(ConI);
3400 
3401   format %{ %}
3402   interface(CONST_INTER);
3403 %}
3404 
3405 operand immI8() %{
3406   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3407   match(ConI);
3408 
3409   op_cost(5);
3410   format %{ %}
3411   interface(CONST_INTER);
3412 %}
3413 
3414 operand immI16() %{
3415   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3416   match(ConI);
3417 
3418   op_cost(10);
3419   format %{ %}
3420   interface(CONST_INTER);
3421 %}
3422 
3423 // Int Immediate non-negative
3424 operand immU31()
3425 %{
3426   predicate(n->get_int() >= 0);
3427   match(ConI);
3428 
3429   op_cost(0);
3430   format %{ %}
3431   interface(CONST_INTER);
3432 %}
3433 
3434 // Constant for long shifts
3435 operand immI_32() %{
3436   predicate( n->get_int() == 32 );
3437   match(ConI);
3438 
3439   op_cost(0);
3440   format %{ %}
3441   interface(CONST_INTER);
3442 %}
3443 
3444 operand immI_1_31() %{
3445   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3446   match(ConI);
3447 
3448   op_cost(0);
3449   format %{ %}
3450   interface(CONST_INTER);
3451 %}
3452 
3453 operand immI_32_63() %{
3454   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3455   match(ConI);
3456   op_cost(0);
3457 
3458   format %{ %}
3459   interface(CONST_INTER);
3460 %}
3461 
3462 operand immI_1() %{
3463   predicate( n->get_int() == 1 );
3464   match(ConI);
3465 
3466   op_cost(0);
3467   format %{ %}
3468   interface(CONST_INTER);
3469 %}
3470 
3471 operand immI_2() %{
3472   predicate( n->get_int() == 2 );
3473   match(ConI);
3474 
3475   op_cost(0);
3476   format %{ %}
3477   interface(CONST_INTER);
3478 %}
3479 
3480 operand immI_3() %{
3481   predicate( n->get_int() == 3 );
3482   match(ConI);
3483 
3484   op_cost(0);
3485   format %{ %}
3486   interface(CONST_INTER);
3487 %}
3488 
3489 // Pointer Immediate
3490 operand immP() %{
3491   match(ConP);
3492 
3493   op_cost(10);
3494   format %{ %}
3495   interface(CONST_INTER);
3496 %}
3497 
3498 // NULL Pointer Immediate
3499 operand immP0() %{
3500   predicate( n->get_ptr() == 0 );
3501   match(ConP);
3502   op_cost(0);
3503 
3504   format %{ %}
3505   interface(CONST_INTER);
3506 %}
3507 
3508 // Long Immediate
3509 operand immL() %{
3510   match(ConL);
3511 
3512   op_cost(20);
3513   format %{ %}
3514   interface(CONST_INTER);
3515 %}
3516 
3517 // Long Immediate zero
3518 operand immL0() %{
3519   predicate( n->get_long() == 0L );
3520   match(ConL);
3521   op_cost(0);
3522 
3523   format %{ %}
3524   interface(CONST_INTER);
3525 %}
3526 
3527 // Long Immediate zero
3528 operand immL_M1() %{
3529   predicate( n->get_long() == -1L );
3530   match(ConL);
3531   op_cost(0);
3532 
3533   format %{ %}
3534   interface(CONST_INTER);
3535 %}
3536 
3537 // Long immediate from 0 to 127.
3538 // Used for a shorter form of long mul by 10.
3539 operand immL_127() %{
3540   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3541   match(ConL);
3542   op_cost(0);
3543 
3544   format %{ %}
3545   interface(CONST_INTER);
3546 %}
3547 
3548 // Long Immediate: low 32-bit mask
3549 operand immL_32bits() %{
3550   predicate(n->get_long() == 0xFFFFFFFFL);
3551   match(ConL);
3552   op_cost(0);
3553 
3554   format %{ %}
3555   interface(CONST_INTER);
3556 %}
3557 
3558 // Long Immediate: low 32-bit mask
3559 operand immL32() %{
3560   predicate(n->get_long() == (int)(n->get_long()));
3561   match(ConL);
3562   op_cost(20);
3563 
3564   format %{ %}
3565   interface(CONST_INTER);
3566 %}
3567 
3568 //Double Immediate zero
3569 operand immDPR0() %{
3570   // Do additional (and counter-intuitive) test against NaN to work around VC++
3571   // bug that generates code such that NaNs compare equal to 0.0
3572   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3573   match(ConD);
3574 
3575   op_cost(5);
3576   format %{ %}
3577   interface(CONST_INTER);
3578 %}
3579 
3580 // Double Immediate one
3581 operand immDPR1() %{
3582   predicate( UseSSE<=1 && n->getd() == 1.0 );
3583   match(ConD);
3584 
3585   op_cost(5);
3586   format %{ %}
3587   interface(CONST_INTER);
3588 %}
3589 
3590 // Double Immediate
3591 operand immDPR() %{
3592   predicate(UseSSE<=1);
3593   match(ConD);
3594 
3595   op_cost(5);
3596   format %{ %}
3597   interface(CONST_INTER);
3598 %}
3599 
3600 operand immD() %{
3601   predicate(UseSSE>=2);
3602   match(ConD);
3603 
3604   op_cost(5);
3605   format %{ %}
3606   interface(CONST_INTER);
3607 %}
3608 
3609 // Double Immediate zero
3610 operand immD0() %{
3611   // Do additional (and counter-intuitive) test against NaN to work around VC++
3612   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3613   // compare equal to -0.0.
3614   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3615   match(ConD);
3616 
3617   format %{ %}
3618   interface(CONST_INTER);
3619 %}
3620 
3621 // Float Immediate zero
3622 operand immFPR0() %{
3623   predicate(UseSSE == 0 && n->getf() == 0.0F);
3624   match(ConF);
3625 
3626   op_cost(5);
3627   format %{ %}
3628   interface(CONST_INTER);
3629 %}
3630 
3631 // Float Immediate one
3632 operand immFPR1() %{
3633   predicate(UseSSE == 0 && n->getf() == 1.0F);
3634   match(ConF);
3635 
3636   op_cost(5);
3637   format %{ %}
3638   interface(CONST_INTER);
3639 %}
3640 
3641 // Float Immediate
3642 operand immFPR() %{
3643   predicate( UseSSE == 0 );
3644   match(ConF);
3645 
3646   op_cost(5);
3647   format %{ %}
3648   interface(CONST_INTER);
3649 %}
3650 
3651 // Float Immediate
3652 operand immF() %{
3653   predicate(UseSSE >= 1);
3654   match(ConF);
3655 
3656   op_cost(5);
3657   format %{ %}
3658   interface(CONST_INTER);
3659 %}
3660 
3661 // Float Immediate zero.  Zero and not -0.0
3662 operand immF0() %{
3663   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3664   match(ConF);
3665 
3666   op_cost(5);
3667   format %{ %}
3668   interface(CONST_INTER);
3669 %}
3670 
3671 // Immediates for special shifts (sign extend)
3672 
3673 // Constants for increment
3674 operand immI_16() %{
3675   predicate( n->get_int() == 16 );
3676   match(ConI);
3677 
3678   format %{ %}
3679   interface(CONST_INTER);
3680 %}
3681 
3682 operand immI_24() %{
3683   predicate( n->get_int() == 24 );
3684   match(ConI);
3685 
3686   format %{ %}
3687   interface(CONST_INTER);
3688 %}
3689 
3690 // Constant for byte-wide masking
3691 operand immI_255() %{
3692   predicate( n->get_int() == 255 );
3693   match(ConI);
3694 
3695   format %{ %}
3696   interface(CONST_INTER);
3697 %}
3698 
3699 // Constant for short-wide masking
3700 operand immI_65535() %{
3701   predicate(n->get_int() == 65535);
3702   match(ConI);
3703 
3704   format %{ %}
3705   interface(CONST_INTER);
3706 %}
3707 
3708 // Register Operands
3709 // Integer Register
3710 operand rRegI() %{
3711   constraint(ALLOC_IN_RC(int_reg));
3712   match(RegI);
3713   match(xRegI);
3714   match(eAXRegI);
3715   match(eBXRegI);
3716   match(eCXRegI);
3717   match(eDXRegI);
3718   match(eDIRegI);
3719   match(eSIRegI);
3720 
3721   format %{ %}
3722   interface(REG_INTER);
3723 %}
3724 
3725 // Subset of Integer Register
3726 operand xRegI(rRegI reg) %{
3727   constraint(ALLOC_IN_RC(int_x_reg));
3728   match(reg);
3729   match(eAXRegI);
3730   match(eBXRegI);
3731   match(eCXRegI);
3732   match(eDXRegI);
3733 
3734   format %{ %}
3735   interface(REG_INTER);
3736 %}
3737 
3738 // Special Registers
3739 operand eAXRegI(xRegI reg) %{
3740   constraint(ALLOC_IN_RC(eax_reg));
3741   match(reg);
3742   match(rRegI);
3743 
3744   format %{ "EAX" %}
3745   interface(REG_INTER);
3746 %}
3747 
3748 // Special Registers
3749 operand eBXRegI(xRegI reg) %{
3750   constraint(ALLOC_IN_RC(ebx_reg));
3751   match(reg);
3752   match(rRegI);
3753 
3754   format %{ "EBX" %}
3755   interface(REG_INTER);
3756 %}
3757 
3758 operand eCXRegI(xRegI reg) %{
3759   constraint(ALLOC_IN_RC(ecx_reg));
3760   match(reg);
3761   match(rRegI);
3762 
3763   format %{ "ECX" %}
3764   interface(REG_INTER);
3765 %}
3766 
3767 operand eDXRegI(xRegI reg) %{
3768   constraint(ALLOC_IN_RC(edx_reg));
3769   match(reg);
3770   match(rRegI);
3771 
3772   format %{ "EDX" %}
3773   interface(REG_INTER);
3774 %}
3775 
3776 operand eDIRegI(xRegI reg) %{
3777   constraint(ALLOC_IN_RC(edi_reg));
3778   match(reg);
3779   match(rRegI);
3780 
3781   format %{ "EDI" %}
3782   interface(REG_INTER);
3783 %}
3784 
3785 operand naxRegI() %{
3786   constraint(ALLOC_IN_RC(nax_reg));
3787   match(RegI);
3788   match(eCXRegI);
3789   match(eDXRegI);
3790   match(eSIRegI);
3791   match(eDIRegI);
3792 
3793   format %{ %}
3794   interface(REG_INTER);
3795 %}
3796 
3797 operand nadxRegI() %{
3798   constraint(ALLOC_IN_RC(nadx_reg));
3799   match(RegI);
3800   match(eBXRegI);
3801   match(eCXRegI);
3802   match(eSIRegI);
3803   match(eDIRegI);
3804 
3805   format %{ %}
3806   interface(REG_INTER);
3807 %}
3808 
3809 operand ncxRegI() %{
3810   constraint(ALLOC_IN_RC(ncx_reg));
3811   match(RegI);
3812   match(eAXRegI);
3813   match(eDXRegI);
3814   match(eSIRegI);
3815   match(eDIRegI);
3816 
3817   format %{ %}
3818   interface(REG_INTER);
3819 %}
3820 
3821 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3822 // //
3823 operand eSIRegI(xRegI reg) %{
3824    constraint(ALLOC_IN_RC(esi_reg));
3825    match(reg);
3826    match(rRegI);
3827 
3828    format %{ "ESI" %}
3829    interface(REG_INTER);
3830 %}
3831 
3832 // Pointer Register
3833 operand anyRegP() %{
3834   constraint(ALLOC_IN_RC(any_reg));
3835   match(RegP);
3836   match(eAXRegP);
3837   match(eBXRegP);
3838   match(eCXRegP);
3839   match(eDIRegP);
3840   match(eRegP);
3841 
3842   format %{ %}
3843   interface(REG_INTER);
3844 %}
3845 
3846 operand eRegP() %{
3847   constraint(ALLOC_IN_RC(int_reg));
3848   match(RegP);
3849   match(eAXRegP);
3850   match(eBXRegP);
3851   match(eCXRegP);
3852   match(eDIRegP);
3853 
3854   format %{ %}
3855   interface(REG_INTER);
3856 %}
3857 
3858 // On windows95, EBP is not safe to use for implicit null tests.
3859 operand eRegP_no_EBP() %{
3860   constraint(ALLOC_IN_RC(int_reg_no_ebp));
3861   match(RegP);
3862   match(eAXRegP);
3863   match(eBXRegP);
3864   match(eCXRegP);
3865   match(eDIRegP);
3866 
3867   op_cost(100);
3868   format %{ %}
3869   interface(REG_INTER);
3870 %}
3871 
3872 operand naxRegP() %{
3873   constraint(ALLOC_IN_RC(nax_reg));
3874   match(RegP);
3875   match(eBXRegP);
3876   match(eDXRegP);
3877   match(eCXRegP);
3878   match(eSIRegP);
3879   match(eDIRegP);
3880 
3881   format %{ %}
3882   interface(REG_INTER);
3883 %}
3884 
3885 operand nabxRegP() %{
3886   constraint(ALLOC_IN_RC(nabx_reg));
3887   match(RegP);
3888   match(eCXRegP);
3889   match(eDXRegP);
3890   match(eSIRegP);
3891   match(eDIRegP);
3892 
3893   format %{ %}
3894   interface(REG_INTER);
3895 %}
3896 
3897 operand pRegP() %{
3898   constraint(ALLOC_IN_RC(p_reg));
3899   match(RegP);
3900   match(eBXRegP);
3901   match(eDXRegP);
3902   match(eSIRegP);
3903   match(eDIRegP);
3904 
3905   format %{ %}
3906   interface(REG_INTER);
3907 %}
3908 
3909 // Special Registers
3910 // Return a pointer value
3911 operand eAXRegP(eRegP reg) %{
3912   constraint(ALLOC_IN_RC(eax_reg));
3913   match(reg);
3914   format %{ "EAX" %}
3915   interface(REG_INTER);
3916 %}
3917 
3918 // Used in AtomicAdd
3919 operand eBXRegP(eRegP reg) %{
3920   constraint(ALLOC_IN_RC(ebx_reg));
3921   match(reg);
3922   format %{ "EBX" %}
3923   interface(REG_INTER);
3924 %}
3925 
3926 // Tail-call (interprocedural jump) to interpreter
3927 operand eCXRegP(eRegP reg) %{
3928   constraint(ALLOC_IN_RC(ecx_reg));
3929   match(reg);
3930   format %{ "ECX" %}
3931   interface(REG_INTER);
3932 %}
3933 
3934 operand eSIRegP(eRegP reg) %{
3935   constraint(ALLOC_IN_RC(esi_reg));
3936   match(reg);
3937   format %{ "ESI" %}
3938   interface(REG_INTER);
3939 %}
3940 
3941 // Used in rep stosw
3942 operand eDIRegP(eRegP reg) %{
3943   constraint(ALLOC_IN_RC(edi_reg));
3944   match(reg);
3945   format %{ "EDI" %}
3946   interface(REG_INTER);
3947 %}
3948 
3949 operand eRegL() %{
3950   constraint(ALLOC_IN_RC(long_reg));
3951   match(RegL);
3952   match(eADXRegL);
3953 
3954   format %{ %}
3955   interface(REG_INTER);
3956 %}
3957 
3958 operand eADXRegL( eRegL reg ) %{
3959   constraint(ALLOC_IN_RC(eadx_reg));
3960   match(reg);
3961 
3962   format %{ "EDX:EAX" %}
3963   interface(REG_INTER);
3964 %}
3965 
3966 operand eBCXRegL( eRegL reg ) %{
3967   constraint(ALLOC_IN_RC(ebcx_reg));
3968   match(reg);
3969 
3970   format %{ "EBX:ECX" %}
3971   interface(REG_INTER);
3972 %}
3973 
3974 // Special case for integer high multiply
3975 operand eADXRegL_low_only() %{
3976   constraint(ALLOC_IN_RC(eadx_reg));
3977   match(RegL);
3978 
3979   format %{ "EAX" %}
3980   interface(REG_INTER);
3981 %}
3982 
3983 // Flags register, used as output of compare instructions
3984 operand eFlagsReg() %{
3985   constraint(ALLOC_IN_RC(int_flags));
3986   match(RegFlags);
3987 
3988   format %{ "EFLAGS" %}
3989   interface(REG_INTER);
3990 %}
3991 
3992 // Flags register, used as output of FLOATING POINT compare instructions
3993 operand eFlagsRegU() %{
3994   constraint(ALLOC_IN_RC(int_flags));
3995   match(RegFlags);
3996 
3997   format %{ "EFLAGS_U" %}
3998   interface(REG_INTER);
3999 %}
4000 
4001 operand eFlagsRegUCF() %{
4002   constraint(ALLOC_IN_RC(int_flags));
4003   match(RegFlags);
4004   predicate(false);
4005 
4006   format %{ "EFLAGS_U_CF" %}
4007   interface(REG_INTER);
4008 %}
4009 
4010 // Condition Code Register used by long compare
4011 operand flagsReg_long_LTGE() %{
4012   constraint(ALLOC_IN_RC(int_flags));
4013   match(RegFlags);
4014   format %{ "FLAGS_LTGE" %}
4015   interface(REG_INTER);
4016 %}
4017 operand flagsReg_long_EQNE() %{
4018   constraint(ALLOC_IN_RC(int_flags));
4019   match(RegFlags);
4020   format %{ "FLAGS_EQNE" %}
4021   interface(REG_INTER);
4022 %}
4023 operand flagsReg_long_LEGT() %{
4024   constraint(ALLOC_IN_RC(int_flags));
4025   match(RegFlags);
4026   format %{ "FLAGS_LEGT" %}
4027   interface(REG_INTER);
4028 %}
4029 
4030 // Condition Code Register used by unsigned long compare
4031 operand flagsReg_ulong_LTGE() %{
4032   constraint(ALLOC_IN_RC(int_flags));
4033   match(RegFlags);
4034   format %{ "FLAGS_U_LTGE" %}
4035   interface(REG_INTER);
4036 %}
4037 operand flagsReg_ulong_EQNE() %{
4038   constraint(ALLOC_IN_RC(int_flags));
4039   match(RegFlags);
4040   format %{ "FLAGS_U_EQNE" %}
4041   interface(REG_INTER);
4042 %}
4043 operand flagsReg_ulong_LEGT() %{
4044   constraint(ALLOC_IN_RC(int_flags));
4045   match(RegFlags);
4046   format %{ "FLAGS_U_LEGT" %}
4047   interface(REG_INTER);
4048 %}
4049 
4050 // Float register operands
4051 operand regDPR() %{
4052   predicate( UseSSE < 2 );
4053   constraint(ALLOC_IN_RC(fp_dbl_reg));
4054   match(RegD);
4055   match(regDPR1);
4056   match(regDPR2);
4057   format %{ %}
4058   interface(REG_INTER);
4059 %}
4060 
4061 operand regDPR1(regDPR reg) %{
4062   predicate( UseSSE < 2 );
4063   constraint(ALLOC_IN_RC(fp_dbl_reg0));
4064   match(reg);
4065   format %{ "FPR1" %}
4066   interface(REG_INTER);
4067 %}
4068 
4069 operand regDPR2(regDPR reg) %{
4070   predicate( UseSSE < 2 );
4071   constraint(ALLOC_IN_RC(fp_dbl_reg1));
4072   match(reg);
4073   format %{ "FPR2" %}
4074   interface(REG_INTER);
4075 %}
4076 
4077 operand regnotDPR1(regDPR reg) %{
4078   predicate( UseSSE < 2 );
4079   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4080   match(reg);
4081   format %{ %}
4082   interface(REG_INTER);
4083 %}
4084 
4085 // Float register operands
4086 operand regFPR() %{
4087   predicate( UseSSE < 2 );
4088   constraint(ALLOC_IN_RC(fp_flt_reg));
4089   match(RegF);
4090   match(regFPR1);
4091   format %{ %}
4092   interface(REG_INTER);
4093 %}
4094 
4095 // Float register operands
4096 operand regFPR1(regFPR reg) %{
4097   predicate( UseSSE < 2 );
4098   constraint(ALLOC_IN_RC(fp_flt_reg0));
4099   match(reg);
4100   format %{ "FPR1" %}
4101   interface(REG_INTER);
4102 %}
4103 
4104 // XMM Float register operands
4105 operand regF() %{
4106   predicate( UseSSE>=1 );
4107   constraint(ALLOC_IN_RC(float_reg_legacy));
4108   match(RegF);
4109   format %{ %}
4110   interface(REG_INTER);
4111 %}
4112 
4113 // XMM Double register operands
4114 operand regD() %{
4115   predicate( UseSSE>=2 );
4116   constraint(ALLOC_IN_RC(double_reg_legacy));
4117   match(RegD);
4118   format %{ %}
4119   interface(REG_INTER);
4120 %}
4121 
4122 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4123 // runtime code generation via reg_class_dynamic.
4124 operand vecS() %{
4125   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4126   match(VecS);
4127 
4128   format %{ %}
4129   interface(REG_INTER);
4130 %}
4131 
4132 operand vecD() %{
4133   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4134   match(VecD);
4135 
4136   format %{ %}
4137   interface(REG_INTER);
4138 %}
4139 
4140 operand vecX() %{
4141   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4142   match(VecX);
4143 
4144   format %{ %}
4145   interface(REG_INTER);
4146 %}
4147 
4148 operand vecY() %{
4149   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4150   match(VecY);
4151 
4152   format %{ %}
4153   interface(REG_INTER);
4154 %}
4155 
4156 //----------Memory Operands----------------------------------------------------
4157 // Direct Memory Operand
4158 operand direct(immP addr) %{
4159   match(addr);
4160 
4161   format %{ "[$addr]" %}
4162   interface(MEMORY_INTER) %{
4163     base(0xFFFFFFFF);
4164     index(0x4);
4165     scale(0x0);
4166     disp($addr);
4167   %}
4168 %}
4169 
4170 // Indirect Memory Operand
4171 operand indirect(eRegP reg) %{
4172   constraint(ALLOC_IN_RC(int_reg));
4173   match(reg);
4174 
4175   format %{ "[$reg]" %}
4176   interface(MEMORY_INTER) %{
4177     base($reg);
4178     index(0x4);
4179     scale(0x0);
4180     disp(0x0);
4181   %}
4182 %}
4183 
4184 // Indirect Memory Plus Short Offset Operand
4185 operand indOffset8(eRegP reg, immI8 off) %{
4186   match(AddP reg off);
4187 
4188   format %{ "[$reg + $off]" %}
4189   interface(MEMORY_INTER) %{
4190     base($reg);
4191     index(0x4);
4192     scale(0x0);
4193     disp($off);
4194   %}
4195 %}
4196 
4197 // Indirect Memory Plus Long Offset Operand
4198 operand indOffset32(eRegP reg, immI off) %{
4199   match(AddP reg off);
4200 
4201   format %{ "[$reg + $off]" %}
4202   interface(MEMORY_INTER) %{
4203     base($reg);
4204     index(0x4);
4205     scale(0x0);
4206     disp($off);
4207   %}
4208 %}
4209 
4210 // Indirect Memory Plus Long Offset Operand
4211 operand indOffset32X(rRegI reg, immP off) %{
4212   match(AddP off reg);
4213 
4214   format %{ "[$reg + $off]" %}
4215   interface(MEMORY_INTER) %{
4216     base($reg);
4217     index(0x4);
4218     scale(0x0);
4219     disp($off);
4220   %}
4221 %}
4222 
4223 // Indirect Memory Plus Index Register Plus Offset Operand
4224 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4225   match(AddP (AddP reg ireg) off);
4226 
4227   op_cost(10);
4228   format %{"[$reg + $off + $ireg]" %}
4229   interface(MEMORY_INTER) %{
4230     base($reg);
4231     index($ireg);
4232     scale(0x0);
4233     disp($off);
4234   %}
4235 %}
4236 
4237 // Indirect Memory Plus Index Register Plus Offset Operand
4238 operand indIndex(eRegP reg, rRegI ireg) %{
4239   match(AddP reg ireg);
4240 
4241   op_cost(10);
4242   format %{"[$reg + $ireg]" %}
4243   interface(MEMORY_INTER) %{
4244     base($reg);
4245     index($ireg);
4246     scale(0x0);
4247     disp(0x0);
4248   %}
4249 %}
4250 
4251 // // -------------------------------------------------------------------------
4252 // // 486 architecture doesn't support "scale * index + offset" with out a base
4253 // // -------------------------------------------------------------------------
4254 // // Scaled Memory Operands
4255 // // Indirect Memory Times Scale Plus Offset Operand
4256 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4257 //   match(AddP off (LShiftI ireg scale));
4258 //
4259 //   op_cost(10);
4260 //   format %{"[$off + $ireg << $scale]" %}
4261 //   interface(MEMORY_INTER) %{
4262 //     base(0x4);
4263 //     index($ireg);
4264 //     scale($scale);
4265 //     disp($off);
4266 //   %}
4267 // %}
4268 
4269 // Indirect Memory Times Scale Plus Index Register
4270 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4271   match(AddP reg (LShiftI ireg scale));
4272 
4273   op_cost(10);
4274   format %{"[$reg + $ireg << $scale]" %}
4275   interface(MEMORY_INTER) %{
4276     base($reg);
4277     index($ireg);
4278     scale($scale);
4279     disp(0x0);
4280   %}
4281 %}
4282 
4283 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4284 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4285   match(AddP (AddP reg (LShiftI ireg scale)) off);
4286 
4287   op_cost(10);
4288   format %{"[$reg + $off + $ireg << $scale]" %}
4289   interface(MEMORY_INTER) %{
4290     base($reg);
4291     index($ireg);
4292     scale($scale);
4293     disp($off);
4294   %}
4295 %}
4296 
4297 //----------Load Long Memory Operands------------------------------------------
4298 // The load-long idiom will use it's address expression again after loading
4299 // the first word of the long.  If the load-long destination overlaps with
4300 // registers used in the addressing expression, the 2nd half will be loaded
4301 // from a clobbered address.  Fix this by requiring that load-long use
4302 // address registers that do not overlap with the load-long target.
4303 
4304 // load-long support
4305 operand load_long_RegP() %{
4306   constraint(ALLOC_IN_RC(esi_reg));
4307   match(RegP);
4308   match(eSIRegP);
4309   op_cost(100);
4310   format %{  %}
4311   interface(REG_INTER);
4312 %}
4313 
4314 // Indirect Memory Operand Long
4315 operand load_long_indirect(load_long_RegP reg) %{
4316   constraint(ALLOC_IN_RC(esi_reg));
4317   match(reg);
4318 
4319   format %{ "[$reg]" %}
4320   interface(MEMORY_INTER) %{
4321     base($reg);
4322     index(0x4);
4323     scale(0x0);
4324     disp(0x0);
4325   %}
4326 %}
4327 
4328 // Indirect Memory Plus Long Offset Operand
4329 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4330   match(AddP reg off);
4331 
4332   format %{ "[$reg + $off]" %}
4333   interface(MEMORY_INTER) %{
4334     base($reg);
4335     index(0x4);
4336     scale(0x0);
4337     disp($off);
4338   %}
4339 %}
4340 
4341 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4342 
4343 
4344 //----------Special Memory Operands--------------------------------------------
4345 // Stack Slot Operand - This operand is used for loading and storing temporary
4346 //                      values on the stack where a match requires a value to
4347 //                      flow through memory.
4348 operand stackSlotP(sRegP reg) %{
4349   constraint(ALLOC_IN_RC(stack_slots));
4350   // No match rule because this operand is only generated in matching
4351   format %{ "[$reg]" %}
4352   interface(MEMORY_INTER) %{
4353     base(0x4);   // ESP
4354     index(0x4);  // No Index
4355     scale(0x0);  // No Scale
4356     disp($reg);  // Stack Offset
4357   %}
4358 %}
4359 
4360 operand stackSlotI(sRegI reg) %{
4361   constraint(ALLOC_IN_RC(stack_slots));
4362   // No match rule because this operand is only generated in matching
4363   format %{ "[$reg]" %}
4364   interface(MEMORY_INTER) %{
4365     base(0x4);   // ESP
4366     index(0x4);  // No Index
4367     scale(0x0);  // No Scale
4368     disp($reg);  // Stack Offset
4369   %}
4370 %}
4371 
4372 operand stackSlotF(sRegF reg) %{
4373   constraint(ALLOC_IN_RC(stack_slots));
4374   // No match rule because this operand is only generated in matching
4375   format %{ "[$reg]" %}
4376   interface(MEMORY_INTER) %{
4377     base(0x4);   // ESP
4378     index(0x4);  // No Index
4379     scale(0x0);  // No Scale
4380     disp($reg);  // Stack Offset
4381   %}
4382 %}
4383 
4384 operand stackSlotD(sRegD reg) %{
4385   constraint(ALLOC_IN_RC(stack_slots));
4386   // No match rule because this operand is only generated in matching
4387   format %{ "[$reg]" %}
4388   interface(MEMORY_INTER) %{
4389     base(0x4);   // ESP
4390     index(0x4);  // No Index
4391     scale(0x0);  // No Scale
4392     disp($reg);  // Stack Offset
4393   %}
4394 %}
4395 
4396 operand stackSlotL(sRegL reg) %{
4397   constraint(ALLOC_IN_RC(stack_slots));
4398   // No match rule because this operand is only generated in matching
4399   format %{ "[$reg]" %}
4400   interface(MEMORY_INTER) %{
4401     base(0x4);   // ESP
4402     index(0x4);  // No Index
4403     scale(0x0);  // No Scale
4404     disp($reg);  // Stack Offset
4405   %}
4406 %}
4407 
4408 //----------Memory Operands - Win95 Implicit Null Variants----------------
4409 // Indirect Memory Operand
4410 operand indirect_win95_safe(eRegP_no_EBP reg)
4411 %{
4412   constraint(ALLOC_IN_RC(int_reg));
4413   match(reg);
4414 
4415   op_cost(100);
4416   format %{ "[$reg]" %}
4417   interface(MEMORY_INTER) %{
4418     base($reg);
4419     index(0x4);
4420     scale(0x0);
4421     disp(0x0);
4422   %}
4423 %}
4424 
4425 // Indirect Memory Plus Short Offset Operand
4426 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4427 %{
4428   match(AddP reg off);
4429 
4430   op_cost(100);
4431   format %{ "[$reg + $off]" %}
4432   interface(MEMORY_INTER) %{
4433     base($reg);
4434     index(0x4);
4435     scale(0x0);
4436     disp($off);
4437   %}
4438 %}
4439 
4440 // Indirect Memory Plus Long Offset Operand
4441 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4442 %{
4443   match(AddP reg off);
4444 
4445   op_cost(100);
4446   format %{ "[$reg + $off]" %}
4447   interface(MEMORY_INTER) %{
4448     base($reg);
4449     index(0x4);
4450     scale(0x0);
4451     disp($off);
4452   %}
4453 %}
4454 
4455 // Indirect Memory Plus Index Register Plus Offset Operand
4456 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4457 %{
4458   match(AddP (AddP reg ireg) off);
4459 
4460   op_cost(100);
4461   format %{"[$reg + $off + $ireg]" %}
4462   interface(MEMORY_INTER) %{
4463     base($reg);
4464     index($ireg);
4465     scale(0x0);
4466     disp($off);
4467   %}
4468 %}
4469 
4470 // Indirect Memory Times Scale Plus Index Register
4471 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4472 %{
4473   match(AddP reg (LShiftI ireg scale));
4474 
4475   op_cost(100);
4476   format %{"[$reg + $ireg << $scale]" %}
4477   interface(MEMORY_INTER) %{
4478     base($reg);
4479     index($ireg);
4480     scale($scale);
4481     disp(0x0);
4482   %}
4483 %}
4484 
4485 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4486 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4487 %{
4488   match(AddP (AddP reg (LShiftI ireg scale)) off);
4489 
4490   op_cost(100);
4491   format %{"[$reg + $off + $ireg << $scale]" %}
4492   interface(MEMORY_INTER) %{
4493     base($reg);
4494     index($ireg);
4495     scale($scale);
4496     disp($off);
4497   %}
4498 %}
4499 
4500 //----------Conditional Branch Operands----------------------------------------
4501 // Comparison Op  - This is the operation of the comparison, and is limited to
4502 //                  the following set of codes:
4503 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4504 //
4505 // Other attributes of the comparison, such as unsignedness, are specified
4506 // by the comparison instruction that sets a condition code flags register.
4507 // That result is represented by a flags operand whose subtype is appropriate
4508 // to the unsignedness (etc.) of the comparison.
4509 //
4510 // Later, the instruction which matches both the Comparison Op (a Bool) and
4511 // the flags (produced by the Cmp) specifies the coding of the comparison op
4512 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4513 
4514 // Comparision Code
4515 operand cmpOp() %{
4516   match(Bool);
4517 
4518   format %{ "" %}
4519   interface(COND_INTER) %{
4520     equal(0x4, "e");
4521     not_equal(0x5, "ne");
4522     less(0xC, "l");
4523     greater_equal(0xD, "ge");
4524     less_equal(0xE, "le");
4525     greater(0xF, "g");
4526     overflow(0x0, "o");
4527     no_overflow(0x1, "no");
4528   %}
4529 %}
4530 
4531 // Comparison Code, unsigned compare.  Used by FP also, with
4532 // C2 (unordered) turned into GT or LT already.  The other bits
4533 // C0 and C3 are turned into Carry & Zero flags.
4534 operand cmpOpU() %{
4535   match(Bool);
4536 
4537   format %{ "" %}
4538   interface(COND_INTER) %{
4539     equal(0x4, "e");
4540     not_equal(0x5, "ne");
4541     less(0x2, "b");
4542     greater_equal(0x3, "nb");
4543     less_equal(0x6, "be");
4544     greater(0x7, "nbe");
4545     overflow(0x0, "o");
4546     no_overflow(0x1, "no");
4547   %}
4548 %}
4549 
4550 // Floating comparisons that don't require any fixup for the unordered case
4551 operand cmpOpUCF() %{
4552   match(Bool);
4553   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4554             n->as_Bool()->_test._test == BoolTest::ge ||
4555             n->as_Bool()->_test._test == BoolTest::le ||
4556             n->as_Bool()->_test._test == BoolTest::gt);
4557   format %{ "" %}
4558   interface(COND_INTER) %{
4559     equal(0x4, "e");
4560     not_equal(0x5, "ne");
4561     less(0x2, "b");
4562     greater_equal(0x3, "nb");
4563     less_equal(0x6, "be");
4564     greater(0x7, "nbe");
4565     overflow(0x0, "o");
4566     no_overflow(0x1, "no");
4567   %}
4568 %}
4569 
4570 
4571 // Floating comparisons that can be fixed up with extra conditional jumps
4572 operand cmpOpUCF2() %{
4573   match(Bool);
4574   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4575             n->as_Bool()->_test._test == BoolTest::eq);
4576   format %{ "" %}
4577   interface(COND_INTER) %{
4578     equal(0x4, "e");
4579     not_equal(0x5, "ne");
4580     less(0x2, "b");
4581     greater_equal(0x3, "nb");
4582     less_equal(0x6, "be");
4583     greater(0x7, "nbe");
4584     overflow(0x0, "o");
4585     no_overflow(0x1, "no");
4586   %}
4587 %}
4588 
4589 // Comparison Code for FP conditional move
4590 operand cmpOp_fcmov() %{
4591   match(Bool);
4592 
4593   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4594             n->as_Bool()->_test._test != BoolTest::no_overflow);
4595   format %{ "" %}
4596   interface(COND_INTER) %{
4597     equal        (0x0C8);
4598     not_equal    (0x1C8);
4599     less         (0x0C0);
4600     greater_equal(0x1C0);
4601     less_equal   (0x0D0);
4602     greater      (0x1D0);
4603     overflow(0x0, "o"); // not really supported by the instruction
4604     no_overflow(0x1, "no"); // not really supported by the instruction
4605   %}
4606 %}
4607 
4608 // Comparison Code used in long compares
4609 operand cmpOp_commute() %{
4610   match(Bool);
4611 
4612   format %{ "" %}
4613   interface(COND_INTER) %{
4614     equal(0x4, "e");
4615     not_equal(0x5, "ne");
4616     less(0xF, "g");
4617     greater_equal(0xE, "le");
4618     less_equal(0xD, "ge");
4619     greater(0xC, "l");
4620     overflow(0x0, "o");
4621     no_overflow(0x1, "no");
4622   %}
4623 %}
4624 
4625 // Comparison Code used in unsigned long compares
4626 operand cmpOpU_commute() %{
4627   match(Bool);
4628 
4629   format %{ "" %}
4630   interface(COND_INTER) %{
4631     equal(0x4, "e");
4632     not_equal(0x5, "ne");
4633     less(0x7, "nbe");
4634     greater_equal(0x6, "be");
4635     less_equal(0x3, "nb");
4636     greater(0x2, "b");
4637     overflow(0x0, "o");
4638     no_overflow(0x1, "no");
4639   %}
4640 %}
4641 
4642 //----------OPERAND CLASSES----------------------------------------------------
4643 // Operand Classes are groups of operands that are used as to simplify
4644 // instruction definitions by not requiring the AD writer to specify separate
4645 // instructions for every form of operand when the instruction accepts
4646 // multiple operand types with the same basic encoding and format.  The classic
4647 // case of this is memory operands.
4648 
4649 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4650                indIndex, indIndexScale, indIndexScaleOffset);
4651 
4652 // Long memory operations are encoded in 2 instructions and a +4 offset.
4653 // This means some kind of offset is always required and you cannot use
4654 // an oop as the offset (done when working on static globals).
4655 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4656                     indIndex, indIndexScale, indIndexScaleOffset);
4657 
4658 
4659 //----------PIPELINE-----------------------------------------------------------
4660 // Rules which define the behavior of the target architectures pipeline.
4661 pipeline %{
4662 
4663 //----------ATTRIBUTES---------------------------------------------------------
4664 attributes %{
4665   variable_size_instructions;        // Fixed size instructions
4666   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4667   instruction_unit_size = 1;         // An instruction is 1 bytes long
4668   instruction_fetch_unit_size = 16;  // The processor fetches one line
4669   instruction_fetch_units = 1;       // of 16 bytes
4670 
4671   // List of nop instructions
4672   nops( MachNop );
4673 %}
4674 
4675 //----------RESOURCES----------------------------------------------------------
4676 // Resources are the functional units available to the machine
4677 
4678 // Generic P2/P3 pipeline
4679 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4680 // 3 instructions decoded per cycle.
4681 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4682 // 2 ALU op, only ALU0 handles mul/div instructions.
4683 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4684            MS0, MS1, MEM = MS0 | MS1,
4685            BR, FPU,
4686            ALU0, ALU1, ALU = ALU0 | ALU1 );
4687 
4688 //----------PIPELINE DESCRIPTION-----------------------------------------------
4689 // Pipeline Description specifies the stages in the machine's pipeline
4690 
4691 // Generic P2/P3 pipeline
4692 pipe_desc(S0, S1, S2, S3, S4, S5);
4693 
4694 //----------PIPELINE CLASSES---------------------------------------------------
4695 // Pipeline Classes describe the stages in which input and output are
4696 // referenced by the hardware pipeline.
4697 
4698 // Naming convention: ialu or fpu
4699 // Then: _reg
4700 // Then: _reg if there is a 2nd register
4701 // Then: _long if it's a pair of instructions implementing a long
4702 // Then: _fat if it requires the big decoder
4703 //   Or: _mem if it requires the big decoder and a memory unit.
4704 
4705 // Integer ALU reg operation
4706 pipe_class ialu_reg(rRegI dst) %{
4707     single_instruction;
4708     dst    : S4(write);
4709     dst    : S3(read);
4710     DECODE : S0;        // any decoder
4711     ALU    : S3;        // any alu
4712 %}
4713 
4714 // Long ALU reg operation
4715 pipe_class ialu_reg_long(eRegL dst) %{
4716     instruction_count(2);
4717     dst    : S4(write);
4718     dst    : S3(read);
4719     DECODE : S0(2);     // any 2 decoders
4720     ALU    : S3(2);     // both alus
4721 %}
4722 
4723 // Integer ALU reg operation using big decoder
4724 pipe_class ialu_reg_fat(rRegI dst) %{
4725     single_instruction;
4726     dst    : S4(write);
4727     dst    : S3(read);
4728     D0     : S0;        // big decoder only
4729     ALU    : S3;        // any alu
4730 %}
4731 
4732 // Long ALU reg operation using big decoder
4733 pipe_class ialu_reg_long_fat(eRegL dst) %{
4734     instruction_count(2);
4735     dst    : S4(write);
4736     dst    : S3(read);
4737     D0     : S0(2);     // big decoder only; twice
4738     ALU    : S3(2);     // any 2 alus
4739 %}
4740 
4741 // Integer ALU reg-reg operation
4742 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4743     single_instruction;
4744     dst    : S4(write);
4745     src    : S3(read);
4746     DECODE : S0;        // any decoder
4747     ALU    : S3;        // any alu
4748 %}
4749 
4750 // Long ALU reg-reg operation
4751 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4752     instruction_count(2);
4753     dst    : S4(write);
4754     src    : S3(read);
4755     DECODE : S0(2);     // any 2 decoders
4756     ALU    : S3(2);     // both alus
4757 %}
4758 
4759 // Integer ALU reg-reg operation
4760 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4761     single_instruction;
4762     dst    : S4(write);
4763     src    : S3(read);
4764     D0     : S0;        // big decoder only
4765     ALU    : S3;        // any alu
4766 %}
4767 
4768 // Long ALU reg-reg operation
4769 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4770     instruction_count(2);
4771     dst    : S4(write);
4772     src    : S3(read);
4773     D0     : S0(2);     // big decoder only; twice
4774     ALU    : S3(2);     // both alus
4775 %}
4776 
4777 // Integer ALU reg-mem operation
4778 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4779     single_instruction;
4780     dst    : S5(write);
4781     mem    : S3(read);
4782     D0     : S0;        // big decoder only
4783     ALU    : S4;        // any alu
4784     MEM    : S3;        // any mem
4785 %}
4786 
4787 // Long ALU reg-mem operation
4788 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4789     instruction_count(2);
4790     dst    : S5(write);
4791     mem    : S3(read);
4792     D0     : S0(2);     // big decoder only; twice
4793     ALU    : S4(2);     // any 2 alus
4794     MEM    : S3(2);     // both mems
4795 %}
4796 
4797 // Integer mem operation (prefetch)
4798 pipe_class ialu_mem(memory mem)
4799 %{
4800     single_instruction;
4801     mem    : S3(read);
4802     D0     : S0;        // big decoder only
4803     MEM    : S3;        // any mem
4804 %}
4805 
4806 // Integer Store to Memory
4807 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4808     single_instruction;
4809     mem    : S3(read);
4810     src    : S5(read);
4811     D0     : S0;        // big decoder only
4812     ALU    : S4;        // any alu
4813     MEM    : S3;
4814 %}
4815 
4816 // Long Store to Memory
4817 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4818     instruction_count(2);
4819     mem    : S3(read);
4820     src    : S5(read);
4821     D0     : S0(2);     // big decoder only; twice
4822     ALU    : S4(2);     // any 2 alus
4823     MEM    : S3(2);     // Both mems
4824 %}
4825 
4826 // Integer Store to Memory
4827 pipe_class ialu_mem_imm(memory mem) %{
4828     single_instruction;
4829     mem    : S3(read);
4830     D0     : S0;        // big decoder only
4831     ALU    : S4;        // any alu
4832     MEM    : S3;
4833 %}
4834 
4835 // Integer ALU0 reg-reg operation
4836 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4837     single_instruction;
4838     dst    : S4(write);
4839     src    : S3(read);
4840     D0     : S0;        // Big decoder only
4841     ALU0   : S3;        // only alu0
4842 %}
4843 
4844 // Integer ALU0 reg-mem operation
4845 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4846     single_instruction;
4847     dst    : S5(write);
4848     mem    : S3(read);
4849     D0     : S0;        // big decoder only
4850     ALU0   : S4;        // ALU0 only
4851     MEM    : S3;        // any mem
4852 %}
4853 
4854 // Integer ALU reg-reg operation
4855 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4856     single_instruction;
4857     cr     : S4(write);
4858     src1   : S3(read);
4859     src2   : S3(read);
4860     DECODE : S0;        // any decoder
4861     ALU    : S3;        // any alu
4862 %}
4863 
4864 // Integer ALU reg-imm operation
4865 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4866     single_instruction;
4867     cr     : S4(write);
4868     src1   : S3(read);
4869     DECODE : S0;        // any decoder
4870     ALU    : S3;        // any alu
4871 %}
4872 
4873 // Integer ALU reg-mem operation
4874 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4875     single_instruction;
4876     cr     : S4(write);
4877     src1   : S3(read);
4878     src2   : S3(read);
4879     D0     : S0;        // big decoder only
4880     ALU    : S4;        // any alu
4881     MEM    : S3;
4882 %}
4883 
4884 // Conditional move reg-reg
4885 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4886     instruction_count(4);
4887     y      : S4(read);
4888     q      : S3(read);
4889     p      : S3(read);
4890     DECODE : S0(4);     // any decoder
4891 %}
4892 
4893 // Conditional move reg-reg
4894 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4895     single_instruction;
4896     dst    : S4(write);
4897     src    : S3(read);
4898     cr     : S3(read);
4899     DECODE : S0;        // any decoder
4900 %}
4901 
4902 // Conditional move reg-mem
4903 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4904     single_instruction;
4905     dst    : S4(write);
4906     src    : S3(read);
4907     cr     : S3(read);
4908     DECODE : S0;        // any decoder
4909     MEM    : S3;
4910 %}
4911 
4912 // Conditional move reg-reg long
4913 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4914     single_instruction;
4915     dst    : S4(write);
4916     src    : S3(read);
4917     cr     : S3(read);
4918     DECODE : S0(2);     // any 2 decoders
4919 %}
4920 
4921 // Conditional move double reg-reg
4922 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4923     single_instruction;
4924     dst    : S4(write);
4925     src    : S3(read);
4926     cr     : S3(read);
4927     DECODE : S0;        // any decoder
4928 %}
4929 
4930 // Float reg-reg operation
4931 pipe_class fpu_reg(regDPR dst) %{
4932     instruction_count(2);
4933     dst    : S3(read);
4934     DECODE : S0(2);     // any 2 decoders
4935     FPU    : S3;
4936 %}
4937 
4938 // Float reg-reg operation
4939 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4940     instruction_count(2);
4941     dst    : S4(write);
4942     src    : S3(read);
4943     DECODE : S0(2);     // any 2 decoders
4944     FPU    : S3;
4945 %}
4946 
4947 // Float reg-reg operation
4948 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4949     instruction_count(3);
4950     dst    : S4(write);
4951     src1   : S3(read);
4952     src2   : S3(read);
4953     DECODE : S0(3);     // any 3 decoders
4954     FPU    : S3(2);
4955 %}
4956 
4957 // Float reg-reg operation
4958 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4959     instruction_count(4);
4960     dst    : S4(write);
4961     src1   : S3(read);
4962     src2   : S3(read);
4963     src3   : S3(read);
4964     DECODE : S0(4);     // any 3 decoders
4965     FPU    : S3(2);
4966 %}
4967 
4968 // Float reg-reg operation
4969 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4970     instruction_count(4);
4971     dst    : S4(write);
4972     src1   : S3(read);
4973     src2   : S3(read);
4974     src3   : S3(read);
4975     DECODE : S1(3);     // any 3 decoders
4976     D0     : S0;        // Big decoder only
4977     FPU    : S3(2);
4978     MEM    : S3;
4979 %}
4980 
4981 // Float reg-mem operation
4982 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4983     instruction_count(2);
4984     dst    : S5(write);
4985     mem    : S3(read);
4986     D0     : S0;        // big decoder only
4987     DECODE : S1;        // any decoder for FPU POP
4988     FPU    : S4;
4989     MEM    : S3;        // any mem
4990 %}
4991 
4992 // Float reg-mem operation
4993 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4994     instruction_count(3);
4995     dst    : S5(write);
4996     src1   : S3(read);
4997     mem    : S3(read);
4998     D0     : S0;        // big decoder only
4999     DECODE : S1(2);     // any decoder for FPU POP
5000     FPU    : S4;
5001     MEM    : S3;        // any mem
5002 %}
5003 
5004 // Float mem-reg operation
5005 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
5006     instruction_count(2);
5007     src    : S5(read);
5008     mem    : S3(read);
5009     DECODE : S0;        // any decoder for FPU PUSH
5010     D0     : S1;        // big decoder only
5011     FPU    : S4;
5012     MEM    : S3;        // any mem
5013 %}
5014 
5015 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
5016     instruction_count(3);
5017     src1   : S3(read);
5018     src2   : S3(read);
5019     mem    : S3(read);
5020     DECODE : S0(2);     // any decoder for FPU PUSH
5021     D0     : S1;        // big decoder only
5022     FPU    : S4;
5023     MEM    : S3;        // any mem
5024 %}
5025 
5026 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
5027     instruction_count(3);
5028     src1   : S3(read);
5029     src2   : S3(read);
5030     mem    : S4(read);
5031     DECODE : S0;        // any decoder for FPU PUSH
5032     D0     : S0(2);     // big decoder only
5033     FPU    : S4;
5034     MEM    : S3(2);     // any mem
5035 %}
5036 
5037 pipe_class fpu_mem_mem(memory dst, memory src1) %{
5038     instruction_count(2);
5039     src1   : S3(read);
5040     dst    : S4(read);
5041     D0     : S0(2);     // big decoder only
5042     MEM    : S3(2);     // any mem
5043 %}
5044 
5045 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
5046     instruction_count(3);
5047     src1   : S3(read);
5048     src2   : S3(read);
5049     dst    : S4(read);
5050     D0     : S0(3);     // big decoder only
5051     FPU    : S4;
5052     MEM    : S3(3);     // any mem
5053 %}
5054 
5055 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
5056     instruction_count(3);
5057     src1   : S4(read);
5058     mem    : S4(read);
5059     DECODE : S0;        // any decoder for FPU PUSH
5060     D0     : S0(2);     // big decoder only
5061     FPU    : S4;
5062     MEM    : S3(2);     // any mem
5063 %}
5064 
5065 // Float load constant
5066 pipe_class fpu_reg_con(regDPR dst) %{
5067     instruction_count(2);
5068     dst    : S5(write);
5069     D0     : S0;        // big decoder only for the load
5070     DECODE : S1;        // any decoder for FPU POP
5071     FPU    : S4;
5072     MEM    : S3;        // any mem
5073 %}
5074 
5075 // Float load constant
5076 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5077     instruction_count(3);
5078     dst    : S5(write);
5079     src    : S3(read);
5080     D0     : S0;        // big decoder only for the load
5081     DECODE : S1(2);     // any decoder for FPU POP
5082     FPU    : S4;
5083     MEM    : S3;        // any mem
5084 %}
5085 
5086 // UnConditional branch
5087 pipe_class pipe_jmp( label labl ) %{
5088     single_instruction;
5089     BR   : S3;
5090 %}
5091 
5092 // Conditional branch
5093 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5094     single_instruction;
5095     cr    : S1(read);
5096     BR    : S3;
5097 %}
5098 
5099 // Allocation idiom
5100 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5101     instruction_count(1); force_serialization;
5102     fixed_latency(6);
5103     heap_ptr : S3(read);
5104     DECODE   : S0(3);
5105     D0       : S2;
5106     MEM      : S3;
5107     ALU      : S3(2);
5108     dst      : S5(write);
5109     BR       : S5;
5110 %}
5111 
5112 // Generic big/slow expanded idiom
5113 pipe_class pipe_slow(  ) %{
5114     instruction_count(10); multiple_bundles; force_serialization;
5115     fixed_latency(100);
5116     D0  : S0(2);
5117     MEM : S3(2);
5118 %}
5119 
5120 // The real do-nothing guy
5121 pipe_class empty( ) %{
5122     instruction_count(0);
5123 %}
5124 
5125 // Define the class for the Nop node
5126 define %{
5127    MachNop = empty;
5128 %}
5129 
5130 %}
5131 
5132 //----------INSTRUCTIONS-------------------------------------------------------
5133 //
5134 // match      -- States which machine-independent subtree may be replaced
5135 //               by this instruction.
5136 // ins_cost   -- The estimated cost of this instruction is used by instruction
5137 //               selection to identify a minimum cost tree of machine
5138 //               instructions that matches a tree of machine-independent
5139 //               instructions.
5140 // format     -- A string providing the disassembly for this instruction.
5141 //               The value of an instruction's operand may be inserted
5142 //               by referring to it with a '$' prefix.
5143 // opcode     -- Three instruction opcodes may be provided.  These are referred
5144 //               to within an encode class as $primary, $secondary, and $tertiary
5145 //               respectively.  The primary opcode is commonly used to
5146 //               indicate the type of machine instruction, while secondary
5147 //               and tertiary are often used for prefix options or addressing
5148 //               modes.
5149 // ins_encode -- A list of encode classes with parameters. The encode class
5150 //               name must have been defined in an 'enc_class' specification
5151 //               in the encode section of the architecture description.
5152 
5153 //----------BSWAP-Instruction--------------------------------------------------
5154 instruct bytes_reverse_int(rRegI dst) %{
5155   match(Set dst (ReverseBytesI dst));
5156 
5157   format %{ "BSWAP  $dst" %}
5158   opcode(0x0F, 0xC8);
5159   ins_encode( OpcP, OpcSReg(dst) );
5160   ins_pipe( ialu_reg );
5161 %}
5162 
5163 instruct bytes_reverse_long(eRegL dst) %{
5164   match(Set dst (ReverseBytesL dst));
5165 
5166   format %{ "BSWAP  $dst.lo\n\t"
5167             "BSWAP  $dst.hi\n\t"
5168             "XCHG   $dst.lo $dst.hi" %}
5169 
5170   ins_cost(125);
5171   ins_encode( bswap_long_bytes(dst) );
5172   ins_pipe( ialu_reg_reg);
5173 %}
5174 
5175 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5176   match(Set dst (ReverseBytesUS dst));
5177   effect(KILL cr);
5178 
5179   format %{ "BSWAP  $dst\n\t"
5180             "SHR    $dst,16\n\t" %}
5181   ins_encode %{
5182     __ bswapl($dst$$Register);
5183     __ shrl($dst$$Register, 16);
5184   %}
5185   ins_pipe( ialu_reg );
5186 %}
5187 
5188 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5189   match(Set dst (ReverseBytesS dst));
5190   effect(KILL cr);
5191 
5192   format %{ "BSWAP  $dst\n\t"
5193             "SAR    $dst,16\n\t" %}
5194   ins_encode %{
5195     __ bswapl($dst$$Register);
5196     __ sarl($dst$$Register, 16);
5197   %}
5198   ins_pipe( ialu_reg );
5199 %}
5200 
5201 
5202 //---------- Zeros Count Instructions ------------------------------------------
5203 
5204 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5205   predicate(UseCountLeadingZerosInstruction);
5206   match(Set dst (CountLeadingZerosI src));
5207   effect(KILL cr);
5208 
5209   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5210   ins_encode %{
5211     __ lzcntl($dst$$Register, $src$$Register);
5212   %}
5213   ins_pipe(ialu_reg);
5214 %}
5215 
5216 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5217   predicate(!UseCountLeadingZerosInstruction);
5218   match(Set dst (CountLeadingZerosI src));
5219   effect(KILL cr);
5220 
5221   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5222             "JNZ    skip\n\t"
5223             "MOV    $dst, -1\n"
5224       "skip:\n\t"
5225             "NEG    $dst\n\t"
5226             "ADD    $dst, 31" %}
5227   ins_encode %{
5228     Register Rdst = $dst$$Register;
5229     Register Rsrc = $src$$Register;
5230     Label skip;
5231     __ bsrl(Rdst, Rsrc);
5232     __ jccb(Assembler::notZero, skip);
5233     __ movl(Rdst, -1);
5234     __ bind(skip);
5235     __ negl(Rdst);
5236     __ addl(Rdst, BitsPerInt - 1);
5237   %}
5238   ins_pipe(ialu_reg);
5239 %}
5240 
5241 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5242   predicate(UseCountLeadingZerosInstruction);
5243   match(Set dst (CountLeadingZerosL src));
5244   effect(TEMP dst, KILL cr);
5245 
5246   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5247             "JNC    done\n\t"
5248             "LZCNT  $dst, $src.lo\n\t"
5249             "ADD    $dst, 32\n"
5250       "done:" %}
5251   ins_encode %{
5252     Register Rdst = $dst$$Register;
5253     Register Rsrc = $src$$Register;
5254     Label done;
5255     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5256     __ jccb(Assembler::carryClear, done);
5257     __ lzcntl(Rdst, Rsrc);
5258     __ addl(Rdst, BitsPerInt);
5259     __ bind(done);
5260   %}
5261   ins_pipe(ialu_reg);
5262 %}
5263 
5264 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5265   predicate(!UseCountLeadingZerosInstruction);
5266   match(Set dst (CountLeadingZerosL src));
5267   effect(TEMP dst, KILL cr);
5268 
5269   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5270             "JZ     msw_is_zero\n\t"
5271             "ADD    $dst, 32\n\t"
5272             "JMP    not_zero\n"
5273       "msw_is_zero:\n\t"
5274             "BSR    $dst, $src.lo\n\t"
5275             "JNZ    not_zero\n\t"
5276             "MOV    $dst, -1\n"
5277       "not_zero:\n\t"
5278             "NEG    $dst\n\t"
5279             "ADD    $dst, 63\n" %}
5280  ins_encode %{
5281     Register Rdst = $dst$$Register;
5282     Register Rsrc = $src$$Register;
5283     Label msw_is_zero;
5284     Label not_zero;
5285     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5286     __ jccb(Assembler::zero, msw_is_zero);
5287     __ addl(Rdst, BitsPerInt);
5288     __ jmpb(not_zero);
5289     __ bind(msw_is_zero);
5290     __ bsrl(Rdst, Rsrc);
5291     __ jccb(Assembler::notZero, not_zero);
5292     __ movl(Rdst, -1);
5293     __ bind(not_zero);
5294     __ negl(Rdst);
5295     __ addl(Rdst, BitsPerLong - 1);
5296   %}
5297   ins_pipe(ialu_reg);
5298 %}
5299 
5300 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5301   predicate(UseCountTrailingZerosInstruction);
5302   match(Set dst (CountTrailingZerosI src));
5303   effect(KILL cr);
5304 
5305   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5306   ins_encode %{
5307     __ tzcntl($dst$$Register, $src$$Register);
5308   %}
5309   ins_pipe(ialu_reg);
5310 %}
5311 
5312 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5313   predicate(!UseCountTrailingZerosInstruction);
5314   match(Set dst (CountTrailingZerosI src));
5315   effect(KILL cr);
5316 
5317   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5318             "JNZ    done\n\t"
5319             "MOV    $dst, 32\n"
5320       "done:" %}
5321   ins_encode %{
5322     Register Rdst = $dst$$Register;
5323     Label done;
5324     __ bsfl(Rdst, $src$$Register);
5325     __ jccb(Assembler::notZero, done);
5326     __ movl(Rdst, BitsPerInt);
5327     __ bind(done);
5328   %}
5329   ins_pipe(ialu_reg);
5330 %}
5331 
5332 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5333   predicate(UseCountTrailingZerosInstruction);
5334   match(Set dst (CountTrailingZerosL src));
5335   effect(TEMP dst, KILL cr);
5336 
5337   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5338             "JNC    done\n\t"
5339             "TZCNT  $dst, $src.hi\n\t"
5340             "ADD    $dst, 32\n"
5341             "done:" %}
5342   ins_encode %{
5343     Register Rdst = $dst$$Register;
5344     Register Rsrc = $src$$Register;
5345     Label done;
5346     __ tzcntl(Rdst, Rsrc);
5347     __ jccb(Assembler::carryClear, done);
5348     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5349     __ addl(Rdst, BitsPerInt);
5350     __ bind(done);
5351   %}
5352   ins_pipe(ialu_reg);
5353 %}
5354 
5355 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5356   predicate(!UseCountTrailingZerosInstruction);
5357   match(Set dst (CountTrailingZerosL src));
5358   effect(TEMP dst, KILL cr);
5359 
5360   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5361             "JNZ    done\n\t"
5362             "BSF    $dst, $src.hi\n\t"
5363             "JNZ    msw_not_zero\n\t"
5364             "MOV    $dst, 32\n"
5365       "msw_not_zero:\n\t"
5366             "ADD    $dst, 32\n"
5367       "done:" %}
5368   ins_encode %{
5369     Register Rdst = $dst$$Register;
5370     Register Rsrc = $src$$Register;
5371     Label msw_not_zero;
5372     Label done;
5373     __ bsfl(Rdst, Rsrc);
5374     __ jccb(Assembler::notZero, done);
5375     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5376     __ jccb(Assembler::notZero, msw_not_zero);
5377     __ movl(Rdst, BitsPerInt);
5378     __ bind(msw_not_zero);
5379     __ addl(Rdst, BitsPerInt);
5380     __ bind(done);
5381   %}
5382   ins_pipe(ialu_reg);
5383 %}
5384 
5385 
5386 //---------- Population Count Instructions -------------------------------------
5387 
5388 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5389   predicate(UsePopCountInstruction);
5390   match(Set dst (PopCountI src));
5391   effect(KILL cr);
5392 
5393   format %{ "POPCNT $dst, $src" %}
5394   ins_encode %{
5395     __ popcntl($dst$$Register, $src$$Register);
5396   %}
5397   ins_pipe(ialu_reg);
5398 %}
5399 
5400 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5401   predicate(UsePopCountInstruction);
5402   match(Set dst (PopCountI (LoadI mem)));
5403   effect(KILL cr);
5404 
5405   format %{ "POPCNT $dst, $mem" %}
5406   ins_encode %{
5407     __ popcntl($dst$$Register, $mem$$Address);
5408   %}
5409   ins_pipe(ialu_reg);
5410 %}
5411 
5412 // Note: Long.bitCount(long) returns an int.
5413 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5414   predicate(UsePopCountInstruction);
5415   match(Set dst (PopCountL src));
5416   effect(KILL cr, TEMP tmp, TEMP dst);
5417 
5418   format %{ "POPCNT $dst, $src.lo\n\t"
5419             "POPCNT $tmp, $src.hi\n\t"
5420             "ADD    $dst, $tmp" %}
5421   ins_encode %{
5422     __ popcntl($dst$$Register, $src$$Register);
5423     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5424     __ addl($dst$$Register, $tmp$$Register);
5425   %}
5426   ins_pipe(ialu_reg);
5427 %}
5428 
5429 // Note: Long.bitCount(long) returns an int.
5430 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5431   predicate(UsePopCountInstruction);
5432   match(Set dst (PopCountL (LoadL mem)));
5433   effect(KILL cr, TEMP tmp, TEMP dst);
5434 
5435   format %{ "POPCNT $dst, $mem\n\t"
5436             "POPCNT $tmp, $mem+4\n\t"
5437             "ADD    $dst, $tmp" %}
5438   ins_encode %{
5439     //__ popcntl($dst$$Register, $mem$$Address$$first);
5440     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5441     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5442     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5443     __ addl($dst$$Register, $tmp$$Register);
5444   %}
5445   ins_pipe(ialu_reg);
5446 %}
5447 
5448 
5449 //----------Load/Store/Move Instructions---------------------------------------
5450 //----------Load Instructions--------------------------------------------------
5451 // Load Byte (8bit signed)
5452 instruct loadB(xRegI dst, memory mem) %{
5453   match(Set dst (LoadB mem));
5454 
5455   ins_cost(125);
5456   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5457 
5458   ins_encode %{
5459     __ movsbl($dst$$Register, $mem$$Address);
5460   %}
5461 
5462   ins_pipe(ialu_reg_mem);
5463 %}
5464 
5465 // Load Byte (8bit signed) into Long Register
5466 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5467   match(Set dst (ConvI2L (LoadB mem)));
5468   effect(KILL cr);
5469 
5470   ins_cost(375);
5471   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5472             "MOV    $dst.hi,$dst.lo\n\t"
5473             "SAR    $dst.hi,7" %}
5474 
5475   ins_encode %{
5476     __ movsbl($dst$$Register, $mem$$Address);
5477     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5478     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5479   %}
5480 
5481   ins_pipe(ialu_reg_mem);
5482 %}
5483 
5484 // Load Unsigned Byte (8bit UNsigned)
5485 instruct loadUB(xRegI dst, memory mem) %{
5486   match(Set dst (LoadUB mem));
5487 
5488   ins_cost(125);
5489   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5490 
5491   ins_encode %{
5492     __ movzbl($dst$$Register, $mem$$Address);
5493   %}
5494 
5495   ins_pipe(ialu_reg_mem);
5496 %}
5497 
5498 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5499 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5500   match(Set dst (ConvI2L (LoadUB mem)));
5501   effect(KILL cr);
5502 
5503   ins_cost(250);
5504   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5505             "XOR    $dst.hi,$dst.hi" %}
5506 
5507   ins_encode %{
5508     Register Rdst = $dst$$Register;
5509     __ movzbl(Rdst, $mem$$Address);
5510     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5511   %}
5512 
5513   ins_pipe(ialu_reg_mem);
5514 %}
5515 
5516 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5517 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5518   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5519   effect(KILL cr);
5520 
5521   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5522             "XOR    $dst.hi,$dst.hi\n\t"
5523             "AND    $dst.lo,right_n_bits($mask, 8)" %}
5524   ins_encode %{
5525     Register Rdst = $dst$$Register;
5526     __ movzbl(Rdst, $mem$$Address);
5527     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5528     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5529   %}
5530   ins_pipe(ialu_reg_mem);
5531 %}
5532 
5533 // Load Short (16bit signed)
5534 instruct loadS(rRegI dst, memory mem) %{
5535   match(Set dst (LoadS mem));
5536 
5537   ins_cost(125);
5538   format %{ "MOVSX  $dst,$mem\t# short" %}
5539 
5540   ins_encode %{
5541     __ movswl($dst$$Register, $mem$$Address);
5542   %}
5543 
5544   ins_pipe(ialu_reg_mem);
5545 %}
5546 
5547 // Load Short (16 bit signed) to Byte (8 bit signed)
5548 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5549   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5550 
5551   ins_cost(125);
5552   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5553   ins_encode %{
5554     __ movsbl($dst$$Register, $mem$$Address);
5555   %}
5556   ins_pipe(ialu_reg_mem);
5557 %}
5558 
5559 // Load Short (16bit signed) into Long Register
5560 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5561   match(Set dst (ConvI2L (LoadS mem)));
5562   effect(KILL cr);
5563 
5564   ins_cost(375);
5565   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5566             "MOV    $dst.hi,$dst.lo\n\t"
5567             "SAR    $dst.hi,15" %}
5568 
5569   ins_encode %{
5570     __ movswl($dst$$Register, $mem$$Address);
5571     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5572     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5573   %}
5574 
5575   ins_pipe(ialu_reg_mem);
5576 %}
5577 
5578 // Load Unsigned Short/Char (16bit unsigned)
5579 instruct loadUS(rRegI dst, memory mem) %{
5580   match(Set dst (LoadUS mem));
5581 
5582   ins_cost(125);
5583   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5584 
5585   ins_encode %{
5586     __ movzwl($dst$$Register, $mem$$Address);
5587   %}
5588 
5589   ins_pipe(ialu_reg_mem);
5590 %}
5591 
5592 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5593 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5594   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5595 
5596   ins_cost(125);
5597   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5598   ins_encode %{
5599     __ movsbl($dst$$Register, $mem$$Address);
5600   %}
5601   ins_pipe(ialu_reg_mem);
5602 %}
5603 
5604 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5605 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5606   match(Set dst (ConvI2L (LoadUS mem)));
5607   effect(KILL cr);
5608 
5609   ins_cost(250);
5610   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5611             "XOR    $dst.hi,$dst.hi" %}
5612 
5613   ins_encode %{
5614     __ movzwl($dst$$Register, $mem$$Address);
5615     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5616   %}
5617 
5618   ins_pipe(ialu_reg_mem);
5619 %}
5620 
5621 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5622 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5623   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5624   effect(KILL cr);
5625 
5626   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5627             "XOR    $dst.hi,$dst.hi" %}
5628   ins_encode %{
5629     Register Rdst = $dst$$Register;
5630     __ movzbl(Rdst, $mem$$Address);
5631     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5632   %}
5633   ins_pipe(ialu_reg_mem);
5634 %}
5635 
5636 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5637 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5638   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5639   effect(KILL cr);
5640 
5641   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5642             "XOR    $dst.hi,$dst.hi\n\t"
5643             "AND    $dst.lo,right_n_bits($mask, 16)" %}
5644   ins_encode %{
5645     Register Rdst = $dst$$Register;
5646     __ movzwl(Rdst, $mem$$Address);
5647     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5648     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5649   %}
5650   ins_pipe(ialu_reg_mem);
5651 %}
5652 
5653 // Load Integer
5654 instruct loadI(rRegI dst, memory mem) %{
5655   match(Set dst (LoadI mem));
5656 
5657   ins_cost(125);
5658   format %{ "MOV    $dst,$mem\t# int" %}
5659 
5660   ins_encode %{
5661     __ movl($dst$$Register, $mem$$Address);
5662   %}
5663 
5664   ins_pipe(ialu_reg_mem);
5665 %}
5666 
5667 // Load Integer (32 bit signed) to Byte (8 bit signed)
5668 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5669   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5670 
5671   ins_cost(125);
5672   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5673   ins_encode %{
5674     __ movsbl($dst$$Register, $mem$$Address);
5675   %}
5676   ins_pipe(ialu_reg_mem);
5677 %}
5678 
5679 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5680 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5681   match(Set dst (AndI (LoadI mem) mask));
5682 
5683   ins_cost(125);
5684   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5685   ins_encode %{
5686     __ movzbl($dst$$Register, $mem$$Address);
5687   %}
5688   ins_pipe(ialu_reg_mem);
5689 %}
5690 
5691 // Load Integer (32 bit signed) to Short (16 bit signed)
5692 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5693   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5694 
5695   ins_cost(125);
5696   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5697   ins_encode %{
5698     __ movswl($dst$$Register, $mem$$Address);
5699   %}
5700   ins_pipe(ialu_reg_mem);
5701 %}
5702 
5703 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5704 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5705   match(Set dst (AndI (LoadI mem) mask));
5706 
5707   ins_cost(125);
5708   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5709   ins_encode %{
5710     __ movzwl($dst$$Register, $mem$$Address);
5711   %}
5712   ins_pipe(ialu_reg_mem);
5713 %}
5714 
5715 // Load Integer into Long Register
5716 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5717   match(Set dst (ConvI2L (LoadI mem)));
5718   effect(KILL cr);
5719 
5720   ins_cost(375);
5721   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5722             "MOV    $dst.hi,$dst.lo\n\t"
5723             "SAR    $dst.hi,31" %}
5724 
5725   ins_encode %{
5726     __ movl($dst$$Register, $mem$$Address);
5727     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5728     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5729   %}
5730 
5731   ins_pipe(ialu_reg_mem);
5732 %}
5733 
5734 // Load Integer with mask 0xFF into Long Register
5735 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5736   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5737   effect(KILL cr);
5738 
5739   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5740             "XOR    $dst.hi,$dst.hi" %}
5741   ins_encode %{
5742     Register Rdst = $dst$$Register;
5743     __ movzbl(Rdst, $mem$$Address);
5744     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5745   %}
5746   ins_pipe(ialu_reg_mem);
5747 %}
5748 
5749 // Load Integer with mask 0xFFFF into Long Register
5750 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5751   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5752   effect(KILL cr);
5753 
5754   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5755             "XOR    $dst.hi,$dst.hi" %}
5756   ins_encode %{
5757     Register Rdst = $dst$$Register;
5758     __ movzwl(Rdst, $mem$$Address);
5759     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5760   %}
5761   ins_pipe(ialu_reg_mem);
5762 %}
5763 
5764 // Load Integer with 31-bit mask into Long Register
5765 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5766   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5767   effect(KILL cr);
5768 
5769   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5770             "XOR    $dst.hi,$dst.hi\n\t"
5771             "AND    $dst.lo,$mask" %}
5772   ins_encode %{
5773     Register Rdst = $dst$$Register;
5774     __ movl(Rdst, $mem$$Address);
5775     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5776     __ andl(Rdst, $mask$$constant);
5777   %}
5778   ins_pipe(ialu_reg_mem);
5779 %}
5780 
5781 // Load Unsigned Integer into Long Register
5782 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5783   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5784   effect(KILL cr);
5785 
5786   ins_cost(250);
5787   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5788             "XOR    $dst.hi,$dst.hi" %}
5789 
5790   ins_encode %{
5791     __ movl($dst$$Register, $mem$$Address);
5792     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5793   %}
5794 
5795   ins_pipe(ialu_reg_mem);
5796 %}
5797 
5798 // Load Long.  Cannot clobber address while loading, so restrict address
5799 // register to ESI
5800 instruct loadL(eRegL dst, load_long_memory mem) %{
5801   predicate(!((LoadLNode*)n)->require_atomic_access());
5802   match(Set dst (LoadL mem));
5803 
5804   ins_cost(250);
5805   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5806             "MOV    $dst.hi,$mem+4" %}
5807 
5808   ins_encode %{
5809     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5810     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5811     __ movl($dst$$Register, Amemlo);
5812     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5813   %}
5814 
5815   ins_pipe(ialu_reg_long_mem);
5816 %}
5817 
5818 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5819 // then store it down to the stack and reload on the int
5820 // side.
5821 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5822   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5823   match(Set dst (LoadL mem));
5824 
5825   ins_cost(200);
5826   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5827             "FISTp  $dst" %}
5828   ins_encode(enc_loadL_volatile(mem,dst));
5829   ins_pipe( fpu_reg_mem );
5830 %}
5831 
5832 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5833   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5834   match(Set dst (LoadL mem));
5835   effect(TEMP tmp);
5836   ins_cost(180);
5837   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5838             "MOVSD  $dst,$tmp" %}
5839   ins_encode %{
5840     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5841     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5842   %}
5843   ins_pipe( pipe_slow );
5844 %}
5845 
5846 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5847   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5848   match(Set dst (LoadL mem));
5849   effect(TEMP tmp);
5850   ins_cost(160);
5851   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5852             "MOVD   $dst.lo,$tmp\n\t"
5853             "PSRLQ  $tmp,32\n\t"
5854             "MOVD   $dst.hi,$tmp" %}
5855   ins_encode %{
5856     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5857     __ movdl($dst$$Register, $tmp$$XMMRegister);
5858     __ psrlq($tmp$$XMMRegister, 32);
5859     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5860   %}
5861   ins_pipe( pipe_slow );
5862 %}
5863 
5864 // Load Range
5865 instruct loadRange(rRegI dst, memory mem) %{
5866   match(Set dst (LoadRange mem));
5867 
5868   ins_cost(125);
5869   format %{ "MOV    $dst,$mem" %}
5870   opcode(0x8B);
5871   ins_encode( OpcP, RegMem(dst,mem));
5872   ins_pipe( ialu_reg_mem );
5873 %}
5874 
5875 
5876 // Load Pointer
5877 instruct loadP(eRegP dst, memory mem) %{
5878   match(Set dst (LoadP mem));
5879 
5880   ins_cost(125);
5881   format %{ "MOV    $dst,$mem" %}
5882   opcode(0x8B);
5883   ins_encode( OpcP, RegMem(dst,mem));
5884   ins_pipe( ialu_reg_mem );
5885 %}
5886 
5887 // Load Klass Pointer
5888 instruct loadKlass(eRegP dst, memory mem) %{
5889   match(Set dst (LoadKlass mem));
5890 
5891   ins_cost(125);
5892   format %{ "MOV    $dst,$mem" %}
5893   opcode(0x8B);
5894   ins_encode( OpcP, RegMem(dst,mem));
5895   ins_pipe( ialu_reg_mem );
5896 %}
5897 
5898 // Load Double
5899 instruct loadDPR(regDPR dst, memory mem) %{
5900   predicate(UseSSE<=1);
5901   match(Set dst (LoadD mem));
5902 
5903   ins_cost(150);
5904   format %{ "FLD_D  ST,$mem\n\t"
5905             "FSTP   $dst" %}
5906   opcode(0xDD);               /* DD /0 */
5907   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5908               Pop_Reg_DPR(dst) );
5909   ins_pipe( fpu_reg_mem );
5910 %}
5911 
5912 // Load Double to XMM
5913 instruct loadD(regD dst, memory mem) %{
5914   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5915   match(Set dst (LoadD mem));
5916   ins_cost(145);
5917   format %{ "MOVSD  $dst,$mem" %}
5918   ins_encode %{
5919     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5920   %}
5921   ins_pipe( pipe_slow );
5922 %}
5923 
5924 instruct loadD_partial(regD dst, memory mem) %{
5925   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5926   match(Set dst (LoadD mem));
5927   ins_cost(145);
5928   format %{ "MOVLPD $dst,$mem" %}
5929   ins_encode %{
5930     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5931   %}
5932   ins_pipe( pipe_slow );
5933 %}
5934 
5935 // Load to XMM register (single-precision floating point)
5936 // MOVSS instruction
5937 instruct loadF(regF dst, memory mem) %{
5938   predicate(UseSSE>=1);
5939   match(Set dst (LoadF mem));
5940   ins_cost(145);
5941   format %{ "MOVSS  $dst,$mem" %}
5942   ins_encode %{
5943     __ movflt ($dst$$XMMRegister, $mem$$Address);
5944   %}
5945   ins_pipe( pipe_slow );
5946 %}
5947 
5948 // Load Float
5949 instruct loadFPR(regFPR dst, memory mem) %{
5950   predicate(UseSSE==0);
5951   match(Set dst (LoadF mem));
5952 
5953   ins_cost(150);
5954   format %{ "FLD_S  ST,$mem\n\t"
5955             "FSTP   $dst" %}
5956   opcode(0xD9);               /* D9 /0 */
5957   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5958               Pop_Reg_FPR(dst) );
5959   ins_pipe( fpu_reg_mem );
5960 %}
5961 
5962 // Load Effective Address
5963 instruct leaP8(eRegP dst, indOffset8 mem) %{
5964   match(Set dst mem);
5965 
5966   ins_cost(110);
5967   format %{ "LEA    $dst,$mem" %}
5968   opcode(0x8D);
5969   ins_encode( OpcP, RegMem(dst,mem));
5970   ins_pipe( ialu_reg_reg_fat );
5971 %}
5972 
5973 instruct leaP32(eRegP dst, indOffset32 mem) %{
5974   match(Set dst mem);
5975 
5976   ins_cost(110);
5977   format %{ "LEA    $dst,$mem" %}
5978   opcode(0x8D);
5979   ins_encode( OpcP, RegMem(dst,mem));
5980   ins_pipe( ialu_reg_reg_fat );
5981 %}
5982 
5983 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5984   match(Set dst mem);
5985 
5986   ins_cost(110);
5987   format %{ "LEA    $dst,$mem" %}
5988   opcode(0x8D);
5989   ins_encode( OpcP, RegMem(dst,mem));
5990   ins_pipe( ialu_reg_reg_fat );
5991 %}
5992 
5993 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5994   match(Set dst mem);
5995 
5996   ins_cost(110);
5997   format %{ "LEA    $dst,$mem" %}
5998   opcode(0x8D);
5999   ins_encode( OpcP, RegMem(dst,mem));
6000   ins_pipe( ialu_reg_reg_fat );
6001 %}
6002 
6003 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
6004   match(Set dst mem);
6005 
6006   ins_cost(110);
6007   format %{ "LEA    $dst,$mem" %}
6008   opcode(0x8D);
6009   ins_encode( OpcP, RegMem(dst,mem));
6010   ins_pipe( ialu_reg_reg_fat );
6011 %}
6012 
6013 // Load Constant
6014 instruct loadConI(rRegI dst, immI src) %{
6015   match(Set dst src);
6016 
6017   format %{ "MOV    $dst,$src" %}
6018   ins_encode( LdImmI(dst, src) );
6019   ins_pipe( ialu_reg_fat );
6020 %}
6021 
6022 // Load Constant zero
6023 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
6024   match(Set dst src);
6025   effect(KILL cr);
6026 
6027   ins_cost(50);
6028   format %{ "XOR    $dst,$dst" %}
6029   opcode(0x33);  /* + rd */
6030   ins_encode( OpcP, RegReg( dst, dst ) );
6031   ins_pipe( ialu_reg );
6032 %}
6033 
6034 instruct loadConP(eRegP dst, immP src) %{
6035   match(Set dst src);
6036 
6037   format %{ "MOV    $dst,$src" %}
6038   opcode(0xB8);  /* + rd */
6039   ins_encode( LdImmP(dst, src) );
6040   ins_pipe( ialu_reg_fat );
6041 %}
6042 
6043 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
6044   match(Set dst src);
6045   effect(KILL cr);
6046   ins_cost(200);
6047   format %{ "MOV    $dst.lo,$src.lo\n\t"
6048             "MOV    $dst.hi,$src.hi" %}
6049   opcode(0xB8);
6050   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
6051   ins_pipe( ialu_reg_long_fat );
6052 %}
6053 
6054 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
6055   match(Set dst src);
6056   effect(KILL cr);
6057   ins_cost(150);
6058   format %{ "XOR    $dst.lo,$dst.lo\n\t"
6059             "XOR    $dst.hi,$dst.hi" %}
6060   opcode(0x33,0x33);
6061   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
6062   ins_pipe( ialu_reg_long );
6063 %}
6064 
6065 // The instruction usage is guarded by predicate in operand immFPR().
6066 instruct loadConFPR(regFPR dst, immFPR con) %{
6067   match(Set dst con);
6068   ins_cost(125);
6069   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
6070             "FSTP   $dst" %}
6071   ins_encode %{
6072     __ fld_s($constantaddress($con));
6073     __ fstp_d($dst$$reg);
6074   %}
6075   ins_pipe(fpu_reg_con);
6076 %}
6077 
6078 // The instruction usage is guarded by predicate in operand immFPR0().
6079 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6080   match(Set dst con);
6081   ins_cost(125);
6082   format %{ "FLDZ   ST\n\t"
6083             "FSTP   $dst" %}
6084   ins_encode %{
6085     __ fldz();
6086     __ fstp_d($dst$$reg);
6087   %}
6088   ins_pipe(fpu_reg_con);
6089 %}
6090 
6091 // The instruction usage is guarded by predicate in operand immFPR1().
6092 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6093   match(Set dst con);
6094   ins_cost(125);
6095   format %{ "FLD1   ST\n\t"
6096             "FSTP   $dst" %}
6097   ins_encode %{
6098     __ fld1();
6099     __ fstp_d($dst$$reg);
6100   %}
6101   ins_pipe(fpu_reg_con);
6102 %}
6103 
6104 // The instruction usage is guarded by predicate in operand immF().
6105 instruct loadConF(regF dst, immF con) %{
6106   match(Set dst con);
6107   ins_cost(125);
6108   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6109   ins_encode %{
6110     __ movflt($dst$$XMMRegister, $constantaddress($con));
6111   %}
6112   ins_pipe(pipe_slow);
6113 %}
6114 
6115 // The instruction usage is guarded by predicate in operand immF0().
6116 instruct loadConF0(regF dst, immF0 src) %{
6117   match(Set dst src);
6118   ins_cost(100);
6119   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6120   ins_encode %{
6121     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6122   %}
6123   ins_pipe(pipe_slow);
6124 %}
6125 
6126 // The instruction usage is guarded by predicate in operand immDPR().
6127 instruct loadConDPR(regDPR dst, immDPR con) %{
6128   match(Set dst con);
6129   ins_cost(125);
6130 
6131   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6132             "FSTP   $dst" %}
6133   ins_encode %{
6134     __ fld_d($constantaddress($con));
6135     __ fstp_d($dst$$reg);
6136   %}
6137   ins_pipe(fpu_reg_con);
6138 %}
6139 
6140 // The instruction usage is guarded by predicate in operand immDPR0().
6141 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6142   match(Set dst con);
6143   ins_cost(125);
6144 
6145   format %{ "FLDZ   ST\n\t"
6146             "FSTP   $dst" %}
6147   ins_encode %{
6148     __ fldz();
6149     __ fstp_d($dst$$reg);
6150   %}
6151   ins_pipe(fpu_reg_con);
6152 %}
6153 
6154 // The instruction usage is guarded by predicate in operand immDPR1().
6155 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6156   match(Set dst con);
6157   ins_cost(125);
6158 
6159   format %{ "FLD1   ST\n\t"
6160             "FSTP   $dst" %}
6161   ins_encode %{
6162     __ fld1();
6163     __ fstp_d($dst$$reg);
6164   %}
6165   ins_pipe(fpu_reg_con);
6166 %}
6167 
6168 // The instruction usage is guarded by predicate in operand immD().
6169 instruct loadConD(regD dst, immD con) %{
6170   match(Set dst con);
6171   ins_cost(125);
6172   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6173   ins_encode %{
6174     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6175   %}
6176   ins_pipe(pipe_slow);
6177 %}
6178 
6179 // The instruction usage is guarded by predicate in operand immD0().
6180 instruct loadConD0(regD dst, immD0 src) %{
6181   match(Set dst src);
6182   ins_cost(100);
6183   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6184   ins_encode %{
6185     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6186   %}
6187   ins_pipe( pipe_slow );
6188 %}
6189 
6190 // Load Stack Slot
6191 instruct loadSSI(rRegI dst, stackSlotI src) %{
6192   match(Set dst src);
6193   ins_cost(125);
6194 
6195   format %{ "MOV    $dst,$src" %}
6196   opcode(0x8B);
6197   ins_encode( OpcP, RegMem(dst,src));
6198   ins_pipe( ialu_reg_mem );
6199 %}
6200 
6201 instruct loadSSL(eRegL dst, stackSlotL src) %{
6202   match(Set dst src);
6203 
6204   ins_cost(200);
6205   format %{ "MOV    $dst,$src.lo\n\t"
6206             "MOV    $dst+4,$src.hi" %}
6207   opcode(0x8B, 0x8B);
6208   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6209   ins_pipe( ialu_mem_long_reg );
6210 %}
6211 
6212 // Load Stack Slot
6213 instruct loadSSP(eRegP dst, stackSlotP src) %{
6214   match(Set dst src);
6215   ins_cost(125);
6216 
6217   format %{ "MOV    $dst,$src" %}
6218   opcode(0x8B);
6219   ins_encode( OpcP, RegMem(dst,src));
6220   ins_pipe( ialu_reg_mem );
6221 %}
6222 
6223 // Load Stack Slot
6224 instruct loadSSF(regFPR dst, stackSlotF src) %{
6225   match(Set dst src);
6226   ins_cost(125);
6227 
6228   format %{ "FLD_S  $src\n\t"
6229             "FSTP   $dst" %}
6230   opcode(0xD9);               /* D9 /0, FLD m32real */
6231   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6232               Pop_Reg_FPR(dst) );
6233   ins_pipe( fpu_reg_mem );
6234 %}
6235 
6236 // Load Stack Slot
6237 instruct loadSSD(regDPR dst, stackSlotD src) %{
6238   match(Set dst src);
6239   ins_cost(125);
6240 
6241   format %{ "FLD_D  $src\n\t"
6242             "FSTP   $dst" %}
6243   opcode(0xDD);               /* DD /0, FLD m64real */
6244   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6245               Pop_Reg_DPR(dst) );
6246   ins_pipe( fpu_reg_mem );
6247 %}
6248 
6249 // Prefetch instructions for allocation.
6250 // Must be safe to execute with invalid address (cannot fault).
6251 
6252 instruct prefetchAlloc0( memory mem ) %{
6253   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6254   match(PrefetchAllocation mem);
6255   ins_cost(0);
6256   size(0);
6257   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6258   ins_encode();
6259   ins_pipe(empty);
6260 %}
6261 
6262 instruct prefetchAlloc( memory mem ) %{
6263   predicate(AllocatePrefetchInstr==3);
6264   match( PrefetchAllocation mem );
6265   ins_cost(100);
6266 
6267   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6268   ins_encode %{
6269     __ prefetchw($mem$$Address);
6270   %}
6271   ins_pipe(ialu_mem);
6272 %}
6273 
6274 instruct prefetchAllocNTA( memory mem ) %{
6275   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6276   match(PrefetchAllocation mem);
6277   ins_cost(100);
6278 
6279   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6280   ins_encode %{
6281     __ prefetchnta($mem$$Address);
6282   %}
6283   ins_pipe(ialu_mem);
6284 %}
6285 
6286 instruct prefetchAllocT0( memory mem ) %{
6287   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6288   match(PrefetchAllocation mem);
6289   ins_cost(100);
6290 
6291   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6292   ins_encode %{
6293     __ prefetcht0($mem$$Address);
6294   %}
6295   ins_pipe(ialu_mem);
6296 %}
6297 
6298 instruct prefetchAllocT2( memory mem ) %{
6299   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6300   match(PrefetchAllocation mem);
6301   ins_cost(100);
6302 
6303   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6304   ins_encode %{
6305     __ prefetcht2($mem$$Address);
6306   %}
6307   ins_pipe(ialu_mem);
6308 %}
6309 
6310 //----------Store Instructions-------------------------------------------------
6311 
6312 // Store Byte
6313 instruct storeB(memory mem, xRegI src) %{
6314   match(Set mem (StoreB mem src));
6315 
6316   ins_cost(125);
6317   format %{ "MOV8   $mem,$src" %}
6318   opcode(0x88);
6319   ins_encode( OpcP, RegMem( src, mem ) );
6320   ins_pipe( ialu_mem_reg );
6321 %}
6322 
6323 // Store Char/Short
6324 instruct storeC(memory mem, rRegI src) %{
6325   match(Set mem (StoreC mem src));
6326 
6327   ins_cost(125);
6328   format %{ "MOV16  $mem,$src" %}
6329   opcode(0x89, 0x66);
6330   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6331   ins_pipe( ialu_mem_reg );
6332 %}
6333 
6334 // Store Integer
6335 instruct storeI(memory mem, rRegI src) %{
6336   match(Set mem (StoreI mem src));
6337 
6338   ins_cost(125);
6339   format %{ "MOV    $mem,$src" %}
6340   opcode(0x89);
6341   ins_encode( OpcP, RegMem( src, mem ) );
6342   ins_pipe( ialu_mem_reg );
6343 %}
6344 
6345 // Store Long
6346 instruct storeL(long_memory mem, eRegL src) %{
6347   predicate(!((StoreLNode*)n)->require_atomic_access());
6348   match(Set mem (StoreL mem src));
6349 
6350   ins_cost(200);
6351   format %{ "MOV    $mem,$src.lo\n\t"
6352             "MOV    $mem+4,$src.hi" %}
6353   opcode(0x89, 0x89);
6354   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6355   ins_pipe( ialu_mem_long_reg );
6356 %}
6357 
6358 // Store Long to Integer
6359 instruct storeL2I(memory mem, eRegL src) %{
6360   match(Set mem (StoreI mem (ConvL2I src)));
6361 
6362   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6363   ins_encode %{
6364     __ movl($mem$$Address, $src$$Register);
6365   %}
6366   ins_pipe(ialu_mem_reg);
6367 %}
6368 
6369 // Volatile Store Long.  Must be atomic, so move it into
6370 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6371 // target address before the store (for null-ptr checks)
6372 // so the memory operand is used twice in the encoding.
6373 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6374   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6375   match(Set mem (StoreL mem src));
6376   effect( KILL cr );
6377   ins_cost(400);
6378   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6379             "FILD   $src\n\t"
6380             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6381   opcode(0x3B);
6382   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6383   ins_pipe( fpu_reg_mem );
6384 %}
6385 
6386 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6387   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6388   match(Set mem (StoreL mem src));
6389   effect( TEMP tmp, KILL cr );
6390   ins_cost(380);
6391   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6392             "MOVSD  $tmp,$src\n\t"
6393             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6394   ins_encode %{
6395     __ cmpl(rax, $mem$$Address);
6396     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6397     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6398   %}
6399   ins_pipe( pipe_slow );
6400 %}
6401 
6402 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6403   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6404   match(Set mem (StoreL mem src));
6405   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6406   ins_cost(360);
6407   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6408             "MOVD   $tmp,$src.lo\n\t"
6409             "MOVD   $tmp2,$src.hi\n\t"
6410             "PUNPCKLDQ $tmp,$tmp2\n\t"
6411             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6412   ins_encode %{
6413     __ cmpl(rax, $mem$$Address);
6414     __ movdl($tmp$$XMMRegister, $src$$Register);
6415     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6416     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6417     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6418   %}
6419   ins_pipe( pipe_slow );
6420 %}
6421 
6422 // Store Pointer; for storing unknown oops and raw pointers
6423 instruct storeP(memory mem, anyRegP src) %{
6424   match(Set mem (StoreP mem src));
6425 
6426   ins_cost(125);
6427   format %{ "MOV    $mem,$src" %}
6428   opcode(0x89);
6429   ins_encode( OpcP, RegMem( src, mem ) );
6430   ins_pipe( ialu_mem_reg );
6431 %}
6432 
6433 // Store Integer Immediate
6434 instruct storeImmI(memory mem, immI src) %{
6435   match(Set mem (StoreI mem src));
6436 
6437   ins_cost(150);
6438   format %{ "MOV    $mem,$src" %}
6439   opcode(0xC7);               /* C7 /0 */
6440   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6441   ins_pipe( ialu_mem_imm );
6442 %}
6443 
6444 // Store Short/Char Immediate
6445 instruct storeImmI16(memory mem, immI16 src) %{
6446   predicate(UseStoreImmI16);
6447   match(Set mem (StoreC mem src));
6448 
6449   ins_cost(150);
6450   format %{ "MOV16  $mem,$src" %}
6451   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6452   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6453   ins_pipe( ialu_mem_imm );
6454 %}
6455 
6456 // Store Pointer Immediate; null pointers or constant oops that do not
6457 // need card-mark barriers.
6458 instruct storeImmP(memory mem, immP src) %{
6459   match(Set mem (StoreP mem src));
6460 
6461   ins_cost(150);
6462   format %{ "MOV    $mem,$src" %}
6463   opcode(0xC7);               /* C7 /0 */
6464   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6465   ins_pipe( ialu_mem_imm );
6466 %}
6467 
6468 // Store Byte Immediate
6469 instruct storeImmB(memory mem, immI8 src) %{
6470   match(Set mem (StoreB mem src));
6471 
6472   ins_cost(150);
6473   format %{ "MOV8   $mem,$src" %}
6474   opcode(0xC6);               /* C6 /0 */
6475   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6476   ins_pipe( ialu_mem_imm );
6477 %}
6478 
6479 // Store CMS card-mark Immediate
6480 instruct storeImmCM(memory mem, immI8 src) %{
6481   match(Set mem (StoreCM mem src));
6482 
6483   ins_cost(150);
6484   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6485   opcode(0xC6);               /* C6 /0 */
6486   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6487   ins_pipe( ialu_mem_imm );
6488 %}
6489 
6490 // Store Double
6491 instruct storeDPR( memory mem, regDPR1 src) %{
6492   predicate(UseSSE<=1);
6493   match(Set mem (StoreD mem src));
6494 
6495   ins_cost(100);
6496   format %{ "FST_D  $mem,$src" %}
6497   opcode(0xDD);       /* DD /2 */
6498   ins_encode( enc_FPR_store(mem,src) );
6499   ins_pipe( fpu_mem_reg );
6500 %}
6501 
6502 // Store double does rounding on x86
6503 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6504   predicate(UseSSE<=1);
6505   match(Set mem (StoreD mem (RoundDouble src)));
6506 
6507   ins_cost(100);
6508   format %{ "FST_D  $mem,$src\t# round" %}
6509   opcode(0xDD);       /* DD /2 */
6510   ins_encode( enc_FPR_store(mem,src) );
6511   ins_pipe( fpu_mem_reg );
6512 %}
6513 
6514 // Store XMM register to memory (double-precision floating points)
6515 // MOVSD instruction
6516 instruct storeD(memory mem, regD src) %{
6517   predicate(UseSSE>=2);
6518   match(Set mem (StoreD mem src));
6519   ins_cost(95);
6520   format %{ "MOVSD  $mem,$src" %}
6521   ins_encode %{
6522     __ movdbl($mem$$Address, $src$$XMMRegister);
6523   %}
6524   ins_pipe( pipe_slow );
6525 %}
6526 
6527 // Store XMM register to memory (single-precision floating point)
6528 // MOVSS instruction
6529 instruct storeF(memory mem, regF src) %{
6530   predicate(UseSSE>=1);
6531   match(Set mem (StoreF mem src));
6532   ins_cost(95);
6533   format %{ "MOVSS  $mem,$src" %}
6534   ins_encode %{
6535     __ movflt($mem$$Address, $src$$XMMRegister);
6536   %}
6537   ins_pipe( pipe_slow );
6538 %}
6539 
6540 // Store Float
6541 instruct storeFPR( memory mem, regFPR1 src) %{
6542   predicate(UseSSE==0);
6543   match(Set mem (StoreF mem src));
6544 
6545   ins_cost(100);
6546   format %{ "FST_S  $mem,$src" %}
6547   opcode(0xD9);       /* D9 /2 */
6548   ins_encode( enc_FPR_store(mem,src) );
6549   ins_pipe( fpu_mem_reg );
6550 %}
6551 
6552 // Store Float does rounding on x86
6553 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6554   predicate(UseSSE==0);
6555   match(Set mem (StoreF mem (RoundFloat src)));
6556 
6557   ins_cost(100);
6558   format %{ "FST_S  $mem,$src\t# round" %}
6559   opcode(0xD9);       /* D9 /2 */
6560   ins_encode( enc_FPR_store(mem,src) );
6561   ins_pipe( fpu_mem_reg );
6562 %}
6563 
6564 // Store Float does rounding on x86
6565 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6566   predicate(UseSSE<=1);
6567   match(Set mem (StoreF mem (ConvD2F src)));
6568 
6569   ins_cost(100);
6570   format %{ "FST_S  $mem,$src\t# D-round" %}
6571   opcode(0xD9);       /* D9 /2 */
6572   ins_encode( enc_FPR_store(mem,src) );
6573   ins_pipe( fpu_mem_reg );
6574 %}
6575 
6576 // Store immediate Float value (it is faster than store from FPU register)
6577 // The instruction usage is guarded by predicate in operand immFPR().
6578 instruct storeFPR_imm( memory mem, immFPR src) %{
6579   match(Set mem (StoreF mem src));
6580 
6581   ins_cost(50);
6582   format %{ "MOV    $mem,$src\t# store float" %}
6583   opcode(0xC7);               /* C7 /0 */
6584   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6585   ins_pipe( ialu_mem_imm );
6586 %}
6587 
6588 // Store immediate Float value (it is faster than store from XMM register)
6589 // The instruction usage is guarded by predicate in operand immF().
6590 instruct storeF_imm( memory mem, immF src) %{
6591   match(Set mem (StoreF mem src));
6592 
6593   ins_cost(50);
6594   format %{ "MOV    $mem,$src\t# store float" %}
6595   opcode(0xC7);               /* C7 /0 */
6596   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6597   ins_pipe( ialu_mem_imm );
6598 %}
6599 
6600 // Store Integer to stack slot
6601 instruct storeSSI(stackSlotI dst, rRegI src) %{
6602   match(Set dst src);
6603 
6604   ins_cost(100);
6605   format %{ "MOV    $dst,$src" %}
6606   opcode(0x89);
6607   ins_encode( OpcPRegSS( dst, src ) );
6608   ins_pipe( ialu_mem_reg );
6609 %}
6610 
6611 // Store Integer to stack slot
6612 instruct storeSSP(stackSlotP dst, eRegP src) %{
6613   match(Set dst src);
6614 
6615   ins_cost(100);
6616   format %{ "MOV    $dst,$src" %}
6617   opcode(0x89);
6618   ins_encode( OpcPRegSS( dst, src ) );
6619   ins_pipe( ialu_mem_reg );
6620 %}
6621 
6622 // Store Long to stack slot
6623 instruct storeSSL(stackSlotL dst, eRegL src) %{
6624   match(Set dst src);
6625 
6626   ins_cost(200);
6627   format %{ "MOV    $dst,$src.lo\n\t"
6628             "MOV    $dst+4,$src.hi" %}
6629   opcode(0x89, 0x89);
6630   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6631   ins_pipe( ialu_mem_long_reg );
6632 %}
6633 
6634 //----------MemBar Instructions-----------------------------------------------
6635 // Memory barrier flavors
6636 
6637 instruct membar_acquire() %{
6638   match(MemBarAcquire);
6639   match(LoadFence);
6640   ins_cost(400);
6641 
6642   size(0);
6643   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6644   ins_encode();
6645   ins_pipe(empty);
6646 %}
6647 
6648 instruct membar_acquire_lock() %{
6649   match(MemBarAcquireLock);
6650   ins_cost(0);
6651 
6652   size(0);
6653   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6654   ins_encode( );
6655   ins_pipe(empty);
6656 %}
6657 
6658 instruct membar_release() %{
6659   match(MemBarRelease);
6660   match(StoreFence);
6661   ins_cost(400);
6662 
6663   size(0);
6664   format %{ "MEMBAR-release ! (empty encoding)" %}
6665   ins_encode( );
6666   ins_pipe(empty);
6667 %}
6668 
6669 instruct membar_release_lock() %{
6670   match(MemBarReleaseLock);
6671   ins_cost(0);
6672 
6673   size(0);
6674   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6675   ins_encode( );
6676   ins_pipe(empty);
6677 %}
6678 
6679 instruct membar_volatile(eFlagsReg cr) %{
6680   match(MemBarVolatile);
6681   effect(KILL cr);
6682   ins_cost(400);
6683 
6684   format %{
6685     $$template
6686     if (os::is_MP()) {
6687       $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6688     } else {
6689       $$emit$$"MEMBAR-volatile ! (empty encoding)"
6690     }
6691   %}
6692   ins_encode %{
6693     __ membar(Assembler::StoreLoad);
6694   %}
6695   ins_pipe(pipe_slow);
6696 %}
6697 
6698 instruct unnecessary_membar_volatile() %{
6699   match(MemBarVolatile);
6700   predicate(Matcher::post_store_load_barrier(n));
6701   ins_cost(0);
6702 
6703   size(0);
6704   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6705   ins_encode( );
6706   ins_pipe(empty);
6707 %}
6708 
6709 instruct membar_storestore() %{
6710   match(MemBarStoreStore);
6711   ins_cost(0);
6712 
6713   size(0);
6714   format %{ "MEMBAR-storestore (empty encoding)" %}
6715   ins_encode( );
6716   ins_pipe(empty);
6717 %}
6718 
6719 //----------Move Instructions--------------------------------------------------
6720 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6721   match(Set dst (CastX2P src));
6722   format %{ "# X2P  $dst, $src" %}
6723   ins_encode( /*empty encoding*/ );
6724   ins_cost(0);
6725   ins_pipe(empty);
6726 %}
6727 
6728 instruct castP2X(rRegI dst, eRegP src ) %{
6729   match(Set dst (CastP2X src));
6730   ins_cost(50);
6731   format %{ "MOV    $dst, $src\t# CastP2X" %}
6732   ins_encode( enc_Copy( dst, src) );
6733   ins_pipe( ialu_reg_reg );
6734 %}
6735 
6736 //----------Conditional Move---------------------------------------------------
6737 // Conditional move
6738 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6739   predicate(!VM_Version::supports_cmov() );
6740   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6741   ins_cost(200);
6742   format %{ "J$cop,us skip\t# signed cmove\n\t"
6743             "MOV    $dst,$src\n"
6744       "skip:" %}
6745   ins_encode %{
6746     Label Lskip;
6747     // Invert sense of branch from sense of CMOV
6748     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6749     __ movl($dst$$Register, $src$$Register);
6750     __ bind(Lskip);
6751   %}
6752   ins_pipe( pipe_cmov_reg );
6753 %}
6754 
6755 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6756   predicate(!VM_Version::supports_cmov() );
6757   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6758   ins_cost(200);
6759   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6760             "MOV    $dst,$src\n"
6761       "skip:" %}
6762   ins_encode %{
6763     Label Lskip;
6764     // Invert sense of branch from sense of CMOV
6765     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6766     __ movl($dst$$Register, $src$$Register);
6767     __ bind(Lskip);
6768   %}
6769   ins_pipe( pipe_cmov_reg );
6770 %}
6771 
6772 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6773   predicate(VM_Version::supports_cmov() );
6774   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6775   ins_cost(200);
6776   format %{ "CMOV$cop $dst,$src" %}
6777   opcode(0x0F,0x40);
6778   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6779   ins_pipe( pipe_cmov_reg );
6780 %}
6781 
6782 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6783   predicate(VM_Version::supports_cmov() );
6784   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6785   ins_cost(200);
6786   format %{ "CMOV$cop $dst,$src" %}
6787   opcode(0x0F,0x40);
6788   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6789   ins_pipe( pipe_cmov_reg );
6790 %}
6791 
6792 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6793   predicate(VM_Version::supports_cmov() );
6794   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6795   ins_cost(200);
6796   expand %{
6797     cmovI_regU(cop, cr, dst, src);
6798   %}
6799 %}
6800 
6801 // Conditional move
6802 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6803   predicate(VM_Version::supports_cmov() );
6804   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6805   ins_cost(250);
6806   format %{ "CMOV$cop $dst,$src" %}
6807   opcode(0x0F,0x40);
6808   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6809   ins_pipe( pipe_cmov_mem );
6810 %}
6811 
6812 // Conditional move
6813 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6814   predicate(VM_Version::supports_cmov() );
6815   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6816   ins_cost(250);
6817   format %{ "CMOV$cop $dst,$src" %}
6818   opcode(0x0F,0x40);
6819   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6820   ins_pipe( pipe_cmov_mem );
6821 %}
6822 
6823 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6824   predicate(VM_Version::supports_cmov() );
6825   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6826   ins_cost(250);
6827   expand %{
6828     cmovI_memU(cop, cr, dst, src);
6829   %}
6830 %}
6831 
6832 // Conditional move
6833 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6834   predicate(VM_Version::supports_cmov() );
6835   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6836   ins_cost(200);
6837   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6838   opcode(0x0F,0x40);
6839   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6840   ins_pipe( pipe_cmov_reg );
6841 %}
6842 
6843 // Conditional move (non-P6 version)
6844 // Note:  a CMoveP is generated for  stubs and native wrappers
6845 //        regardless of whether we are on a P6, so we
6846 //        emulate a cmov here
6847 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6848   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6849   ins_cost(300);
6850   format %{ "Jn$cop   skip\n\t"
6851           "MOV    $dst,$src\t# pointer\n"
6852       "skip:" %}
6853   opcode(0x8b);
6854   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6855   ins_pipe( pipe_cmov_reg );
6856 %}
6857 
6858 // Conditional move
6859 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6860   predicate(VM_Version::supports_cmov() );
6861   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6862   ins_cost(200);
6863   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6864   opcode(0x0F,0x40);
6865   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6866   ins_pipe( pipe_cmov_reg );
6867 %}
6868 
6869 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6870   predicate(VM_Version::supports_cmov() );
6871   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6872   ins_cost(200);
6873   expand %{
6874     cmovP_regU(cop, cr, dst, src);
6875   %}
6876 %}
6877 
6878 // DISABLED: Requires the ADLC to emit a bottom_type call that
6879 // correctly meets the two pointer arguments; one is an incoming
6880 // register but the other is a memory operand.  ALSO appears to
6881 // be buggy with implicit null checks.
6882 //
6883 //// Conditional move
6884 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6885 //  predicate(VM_Version::supports_cmov() );
6886 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6887 //  ins_cost(250);
6888 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6889 //  opcode(0x0F,0x40);
6890 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6891 //  ins_pipe( pipe_cmov_mem );
6892 //%}
6893 //
6894 //// Conditional move
6895 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6896 //  predicate(VM_Version::supports_cmov() );
6897 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6898 //  ins_cost(250);
6899 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6900 //  opcode(0x0F,0x40);
6901 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6902 //  ins_pipe( pipe_cmov_mem );
6903 //%}
6904 
6905 // Conditional move
6906 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6907   predicate(UseSSE<=1);
6908   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6909   ins_cost(200);
6910   format %{ "FCMOV$cop $dst,$src\t# double" %}
6911   opcode(0xDA);
6912   ins_encode( enc_cmov_dpr(cop,src) );
6913   ins_pipe( pipe_cmovDPR_reg );
6914 %}
6915 
6916 // Conditional move
6917 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6918   predicate(UseSSE==0);
6919   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6920   ins_cost(200);
6921   format %{ "FCMOV$cop $dst,$src\t# float" %}
6922   opcode(0xDA);
6923   ins_encode( enc_cmov_dpr(cop,src) );
6924   ins_pipe( pipe_cmovDPR_reg );
6925 %}
6926 
6927 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6928 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6929   predicate(UseSSE<=1);
6930   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6931   ins_cost(200);
6932   format %{ "Jn$cop   skip\n\t"
6933             "MOV    $dst,$src\t# double\n"
6934       "skip:" %}
6935   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6936   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6937   ins_pipe( pipe_cmovDPR_reg );
6938 %}
6939 
6940 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6941 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6942   predicate(UseSSE==0);
6943   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6944   ins_cost(200);
6945   format %{ "Jn$cop    skip\n\t"
6946             "MOV    $dst,$src\t# float\n"
6947       "skip:" %}
6948   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6949   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6950   ins_pipe( pipe_cmovDPR_reg );
6951 %}
6952 
6953 // No CMOVE with SSE/SSE2
6954 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6955   predicate (UseSSE>=1);
6956   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6957   ins_cost(200);
6958   format %{ "Jn$cop   skip\n\t"
6959             "MOVSS  $dst,$src\t# float\n"
6960       "skip:" %}
6961   ins_encode %{
6962     Label skip;
6963     // Invert sense of branch from sense of CMOV
6964     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6965     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6966     __ bind(skip);
6967   %}
6968   ins_pipe( pipe_slow );
6969 %}
6970 
6971 // No CMOVE with SSE/SSE2
6972 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6973   predicate (UseSSE>=2);
6974   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6975   ins_cost(200);
6976   format %{ "Jn$cop   skip\n\t"
6977             "MOVSD  $dst,$src\t# float\n"
6978       "skip:" %}
6979   ins_encode %{
6980     Label skip;
6981     // Invert sense of branch from sense of CMOV
6982     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6983     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6984     __ bind(skip);
6985   %}
6986   ins_pipe( pipe_slow );
6987 %}
6988 
6989 // unsigned version
6990 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6991   predicate (UseSSE>=1);
6992   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6993   ins_cost(200);
6994   format %{ "Jn$cop   skip\n\t"
6995             "MOVSS  $dst,$src\t# float\n"
6996       "skip:" %}
6997   ins_encode %{
6998     Label skip;
6999     // Invert sense of branch from sense of CMOV
7000     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7001     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7002     __ bind(skip);
7003   %}
7004   ins_pipe( pipe_slow );
7005 %}
7006 
7007 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
7008   predicate (UseSSE>=1);
7009   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7010   ins_cost(200);
7011   expand %{
7012     fcmovF_regU(cop, cr, dst, src);
7013   %}
7014 %}
7015 
7016 // unsigned version
7017 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
7018   predicate (UseSSE>=2);
7019   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7020   ins_cost(200);
7021   format %{ "Jn$cop   skip\n\t"
7022             "MOVSD  $dst,$src\t# float\n"
7023       "skip:" %}
7024   ins_encode %{
7025     Label skip;
7026     // Invert sense of branch from sense of CMOV
7027     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7028     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7029     __ bind(skip);
7030   %}
7031   ins_pipe( pipe_slow );
7032 %}
7033 
7034 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
7035   predicate (UseSSE>=2);
7036   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7037   ins_cost(200);
7038   expand %{
7039     fcmovD_regU(cop, cr, dst, src);
7040   %}
7041 %}
7042 
7043 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
7044   predicate(VM_Version::supports_cmov() );
7045   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7046   ins_cost(200);
7047   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7048             "CMOV$cop $dst.hi,$src.hi" %}
7049   opcode(0x0F,0x40);
7050   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7051   ins_pipe( pipe_cmov_reg_long );
7052 %}
7053 
7054 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
7055   predicate(VM_Version::supports_cmov() );
7056   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7057   ins_cost(200);
7058   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7059             "CMOV$cop $dst.hi,$src.hi" %}
7060   opcode(0x0F,0x40);
7061   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7062   ins_pipe( pipe_cmov_reg_long );
7063 %}
7064 
7065 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
7066   predicate(VM_Version::supports_cmov() );
7067   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7068   ins_cost(200);
7069   expand %{
7070     cmovL_regU(cop, cr, dst, src);
7071   %}
7072 %}
7073 
7074 //----------Arithmetic Instructions--------------------------------------------
7075 //----------Addition Instructions----------------------------------------------
7076 
7077 // Integer Addition Instructions
7078 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7079   match(Set dst (AddI dst src));
7080   effect(KILL cr);
7081 
7082   size(2);
7083   format %{ "ADD    $dst,$src" %}
7084   opcode(0x03);
7085   ins_encode( OpcP, RegReg( dst, src) );
7086   ins_pipe( ialu_reg_reg );
7087 %}
7088 
7089 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7090   match(Set dst (AddI dst src));
7091   effect(KILL cr);
7092 
7093   format %{ "ADD    $dst,$src" %}
7094   opcode(0x81, 0x00); /* /0 id */
7095   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7096   ins_pipe( ialu_reg );
7097 %}
7098 
7099 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7100   predicate(UseIncDec);
7101   match(Set dst (AddI dst src));
7102   effect(KILL cr);
7103 
7104   size(1);
7105   format %{ "INC    $dst" %}
7106   opcode(0x40); /*  */
7107   ins_encode( Opc_plus( primary, dst ) );
7108   ins_pipe( ialu_reg );
7109 %}
7110 
7111 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7112   match(Set dst (AddI src0 src1));
7113   ins_cost(110);
7114 
7115   format %{ "LEA    $dst,[$src0 + $src1]" %}
7116   opcode(0x8D); /* 0x8D /r */
7117   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7118   ins_pipe( ialu_reg_reg );
7119 %}
7120 
7121 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7122   match(Set dst (AddP src0 src1));
7123   ins_cost(110);
7124 
7125   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7126   opcode(0x8D); /* 0x8D /r */
7127   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7128   ins_pipe( ialu_reg_reg );
7129 %}
7130 
7131 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7132   predicate(UseIncDec);
7133   match(Set dst (AddI dst src));
7134   effect(KILL cr);
7135 
7136   size(1);
7137   format %{ "DEC    $dst" %}
7138   opcode(0x48); /*  */
7139   ins_encode( Opc_plus( primary, dst ) );
7140   ins_pipe( ialu_reg );
7141 %}
7142 
7143 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7144   match(Set dst (AddP dst src));
7145   effect(KILL cr);
7146 
7147   size(2);
7148   format %{ "ADD    $dst,$src" %}
7149   opcode(0x03);
7150   ins_encode( OpcP, RegReg( dst, src) );
7151   ins_pipe( ialu_reg_reg );
7152 %}
7153 
7154 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7155   match(Set dst (AddP dst src));
7156   effect(KILL cr);
7157 
7158   format %{ "ADD    $dst,$src" %}
7159   opcode(0x81,0x00); /* Opcode 81 /0 id */
7160   // ins_encode( RegImm( dst, src) );
7161   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7162   ins_pipe( ialu_reg );
7163 %}
7164 
7165 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7166   match(Set dst (AddI dst (LoadI src)));
7167   effect(KILL cr);
7168 
7169   ins_cost(125);
7170   format %{ "ADD    $dst,$src" %}
7171   opcode(0x03);
7172   ins_encode( OpcP, RegMem( dst, src) );
7173   ins_pipe( ialu_reg_mem );
7174 %}
7175 
7176 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7177   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7178   effect(KILL cr);
7179 
7180   ins_cost(150);
7181   format %{ "ADD    $dst,$src" %}
7182   opcode(0x01);  /* Opcode 01 /r */
7183   ins_encode( OpcP, RegMem( src, dst ) );
7184   ins_pipe( ialu_mem_reg );
7185 %}
7186 
7187 // Add Memory with Immediate
7188 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7189   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7190   effect(KILL cr);
7191 
7192   ins_cost(125);
7193   format %{ "ADD    $dst,$src" %}
7194   opcode(0x81);               /* Opcode 81 /0 id */
7195   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7196   ins_pipe( ialu_mem_imm );
7197 %}
7198 
7199 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7200   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7201   effect(KILL cr);
7202 
7203   ins_cost(125);
7204   format %{ "INC    $dst" %}
7205   opcode(0xFF);               /* Opcode FF /0 */
7206   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7207   ins_pipe( ialu_mem_imm );
7208 %}
7209 
7210 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7211   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7212   effect(KILL cr);
7213 
7214   ins_cost(125);
7215   format %{ "DEC    $dst" %}
7216   opcode(0xFF);               /* Opcode FF /1 */
7217   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7218   ins_pipe( ialu_mem_imm );
7219 %}
7220 
7221 
7222 instruct checkCastPP( eRegP dst ) %{
7223   match(Set dst (CheckCastPP dst));
7224 
7225   size(0);
7226   format %{ "#checkcastPP of $dst" %}
7227   ins_encode( /*empty encoding*/ );
7228   ins_pipe( empty );
7229 %}
7230 
7231 instruct castPP( eRegP dst ) %{
7232   match(Set dst (CastPP dst));
7233   format %{ "#castPP of $dst" %}
7234   ins_encode( /*empty encoding*/ );
7235   ins_pipe( empty );
7236 %}
7237 
7238 instruct castII( rRegI dst ) %{
7239   match(Set dst (CastII dst));
7240   format %{ "#castII of $dst" %}
7241   ins_encode( /*empty encoding*/ );
7242   ins_cost(0);
7243   ins_pipe( empty );
7244 %}
7245 
7246 
7247 // Load-locked - same as a regular pointer load when used with compare-swap
7248 instruct loadPLocked(eRegP dst, memory mem) %{
7249   match(Set dst (LoadPLocked mem));
7250 
7251   ins_cost(125);
7252   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7253   opcode(0x8B);
7254   ins_encode( OpcP, RegMem(dst,mem));
7255   ins_pipe( ialu_reg_mem );
7256 %}
7257 
7258 // Conditional-store of the updated heap-top.
7259 // Used during allocation of the shared heap.
7260 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7261 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7262   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7263   // EAX is killed if there is contention, but then it's also unused.
7264   // In the common case of no contention, EAX holds the new oop address.
7265   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7266   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7267   ins_pipe( pipe_cmpxchg );
7268 %}
7269 
7270 // Conditional-store of an int value.
7271 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7272 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7273   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7274   effect(KILL oldval);
7275   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7276   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7277   ins_pipe( pipe_cmpxchg );
7278 %}
7279 
7280 // Conditional-store of a long value.
7281 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7282 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7283   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7284   effect(KILL oldval);
7285   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7286             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7287             "XCHG   EBX,ECX"
7288   %}
7289   ins_encode %{
7290     // Note: we need to swap rbx, and rcx before and after the
7291     //       cmpxchg8 instruction because the instruction uses
7292     //       rcx as the high order word of the new value to store but
7293     //       our register encoding uses rbx.
7294     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7295     if( os::is_MP() )
7296       __ lock();
7297     __ cmpxchg8($mem$$Address);
7298     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7299   %}
7300   ins_pipe( pipe_cmpxchg );
7301 %}
7302 
7303 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7304 
7305 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7306   predicate(VM_Version::supports_cx8());
7307   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7308   match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7309   effect(KILL cr, KILL oldval);
7310   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7311             "MOV    $res,0\n\t"
7312             "JNE,s  fail\n\t"
7313             "MOV    $res,1\n"
7314           "fail:" %}
7315   ins_encode( enc_cmpxchg8(mem_ptr),
7316               enc_flags_ne_to_boolean(res) );
7317   ins_pipe( pipe_cmpxchg );
7318 %}
7319 
7320 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7321   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7322   match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7323   effect(KILL cr, KILL oldval);
7324   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7325             "MOV    $res,0\n\t"
7326             "JNE,s  fail\n\t"
7327             "MOV    $res,1\n"
7328           "fail:" %}
7329   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7330   ins_pipe( pipe_cmpxchg );
7331 %}
7332 
7333 instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7334   match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
7335   match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
7336   effect(KILL cr, KILL oldval);
7337   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7338             "MOV    $res,0\n\t"
7339             "JNE,s  fail\n\t"
7340             "MOV    $res,1\n"
7341           "fail:" %}
7342   ins_encode( enc_cmpxchgb(mem_ptr),
7343               enc_flags_ne_to_boolean(res) );
7344   ins_pipe( pipe_cmpxchg );
7345 %}
7346 
7347 instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7348   match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
7349   match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
7350   effect(KILL cr, KILL oldval);
7351   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7352             "MOV    $res,0\n\t"
7353             "JNE,s  fail\n\t"
7354             "MOV    $res,1\n"
7355           "fail:" %}
7356   ins_encode( enc_cmpxchgw(mem_ptr),
7357               enc_flags_ne_to_boolean(res) );
7358   ins_pipe( pipe_cmpxchg );
7359 %}
7360 
7361 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7362   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7363   match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7364   effect(KILL cr, KILL oldval);
7365   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7366             "MOV    $res,0\n\t"
7367             "JNE,s  fail\n\t"
7368             "MOV    $res,1\n"
7369           "fail:" %}
7370   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7371   ins_pipe( pipe_cmpxchg );
7372 %}
7373 
7374 instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7375   predicate(VM_Version::supports_cx8());
7376   match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7377   effect(KILL cr);
7378   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7379   ins_encode( enc_cmpxchg8(mem_ptr) );
7380   ins_pipe( pipe_cmpxchg );
7381 %}
7382 
7383 instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7384   match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7385   effect(KILL cr);
7386   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7387   ins_encode( enc_cmpxchg(mem_ptr) );
7388   ins_pipe( pipe_cmpxchg );
7389 %}
7390 
7391 instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7392   match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
7393   effect(KILL cr);
7394   format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7395   ins_encode( enc_cmpxchgb(mem_ptr) );
7396   ins_pipe( pipe_cmpxchg );
7397 %}
7398 
7399 instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7400   match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
7401   effect(KILL cr);
7402   format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7403   ins_encode( enc_cmpxchgw(mem_ptr) );
7404   ins_pipe( pipe_cmpxchg );
7405 %}
7406 
7407 instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7408   match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7409   effect(KILL cr);
7410   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7411   ins_encode( enc_cmpxchg(mem_ptr) );
7412   ins_pipe( pipe_cmpxchg );
7413 %}
7414 
7415 instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7416   predicate(n->as_LoadStore()->result_not_used());
7417   match(Set dummy (GetAndAddB mem add));
7418   effect(KILL cr);
7419   format %{ "ADDB  [$mem],$add" %}
7420   ins_encode %{
7421     if (os::is_MP()) { __ lock(); }
7422     __ addb($mem$$Address, $add$$constant);
7423   %}
7424   ins_pipe( pipe_cmpxchg );
7425 %}
7426 
7427 // Important to match to xRegI: only 8-bit regs.
7428 instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
7429   match(Set newval (GetAndAddB mem newval));
7430   effect(KILL cr);
7431   format %{ "XADDB  [$mem],$newval" %}
7432   ins_encode %{
7433     if (os::is_MP()) { __ lock(); }
7434     __ xaddb($mem$$Address, $newval$$Register);
7435   %}
7436   ins_pipe( pipe_cmpxchg );
7437 %}
7438 
7439 instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7440   predicate(n->as_LoadStore()->result_not_used());
7441   match(Set dummy (GetAndAddS mem add));
7442   effect(KILL cr);
7443   format %{ "ADDS  [$mem],$add" %}
7444   ins_encode %{
7445     if (os::is_MP()) { __ lock(); }
7446     __ addw($mem$$Address, $add$$constant);
7447   %}
7448   ins_pipe( pipe_cmpxchg );
7449 %}
7450 
7451 instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
7452   match(Set newval (GetAndAddS mem newval));
7453   effect(KILL cr);
7454   format %{ "XADDS  [$mem],$newval" %}
7455   ins_encode %{
7456     if (os::is_MP()) { __ lock(); }
7457     __ xaddw($mem$$Address, $newval$$Register);
7458   %}
7459   ins_pipe( pipe_cmpxchg );
7460 %}
7461 
7462 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7463   predicate(n->as_LoadStore()->result_not_used());
7464   match(Set dummy (GetAndAddI mem add));
7465   effect(KILL cr);
7466   format %{ "ADDL  [$mem],$add" %}
7467   ins_encode %{
7468     if (os::is_MP()) { __ lock(); }
7469     __ addl($mem$$Address, $add$$constant);
7470   %}
7471   ins_pipe( pipe_cmpxchg );
7472 %}
7473 
7474 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7475   match(Set newval (GetAndAddI mem newval));
7476   effect(KILL cr);
7477   format %{ "XADDL  [$mem],$newval" %}
7478   ins_encode %{
7479     if (os::is_MP()) { __ lock(); }
7480     __ xaddl($mem$$Address, $newval$$Register);
7481   %}
7482   ins_pipe( pipe_cmpxchg );
7483 %}
7484 
7485 // Important to match to xRegI: only 8-bit regs.
7486 instruct xchgB( memory mem, xRegI newval) %{
7487   match(Set newval (GetAndSetB mem newval));
7488   format %{ "XCHGB  $newval,[$mem]" %}
7489   ins_encode %{
7490     __ xchgb($newval$$Register, $mem$$Address);
7491   %}
7492   ins_pipe( pipe_cmpxchg );
7493 %}
7494 
7495 instruct xchgS( memory mem, rRegI newval) %{
7496   match(Set newval (GetAndSetS mem newval));
7497   format %{ "XCHGW  $newval,[$mem]" %}
7498   ins_encode %{
7499     __ xchgw($newval$$Register, $mem$$Address);
7500   %}
7501   ins_pipe( pipe_cmpxchg );
7502 %}
7503 
7504 instruct xchgI( memory mem, rRegI newval) %{
7505   match(Set newval (GetAndSetI mem newval));
7506   format %{ "XCHGL  $newval,[$mem]" %}
7507   ins_encode %{
7508     __ xchgl($newval$$Register, $mem$$Address);
7509   %}
7510   ins_pipe( pipe_cmpxchg );
7511 %}
7512 
7513 instruct xchgP( memory mem, pRegP newval) %{
7514   match(Set newval (GetAndSetP mem newval));
7515   format %{ "XCHGL  $newval,[$mem]" %}
7516   ins_encode %{
7517     __ xchgl($newval$$Register, $mem$$Address);
7518   %}
7519   ins_pipe( pipe_cmpxchg );
7520 %}
7521 
7522 //----------Subtraction Instructions-------------------------------------------
7523 
7524 // Integer Subtraction Instructions
7525 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7526   match(Set dst (SubI dst src));
7527   effect(KILL cr);
7528 
7529   size(2);
7530   format %{ "SUB    $dst,$src" %}
7531   opcode(0x2B);
7532   ins_encode( OpcP, RegReg( dst, src) );
7533   ins_pipe( ialu_reg_reg );
7534 %}
7535 
7536 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7537   match(Set dst (SubI dst src));
7538   effect(KILL cr);
7539 
7540   format %{ "SUB    $dst,$src" %}
7541   opcode(0x81,0x05);  /* Opcode 81 /5 */
7542   // ins_encode( RegImm( dst, src) );
7543   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7544   ins_pipe( ialu_reg );
7545 %}
7546 
7547 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7548   match(Set dst (SubI dst (LoadI src)));
7549   effect(KILL cr);
7550 
7551   ins_cost(125);
7552   format %{ "SUB    $dst,$src" %}
7553   opcode(0x2B);
7554   ins_encode( OpcP, RegMem( dst, src) );
7555   ins_pipe( ialu_reg_mem );
7556 %}
7557 
7558 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7559   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7560   effect(KILL cr);
7561 
7562   ins_cost(150);
7563   format %{ "SUB    $dst,$src" %}
7564   opcode(0x29);  /* Opcode 29 /r */
7565   ins_encode( OpcP, RegMem( src, dst ) );
7566   ins_pipe( ialu_mem_reg );
7567 %}
7568 
7569 // Subtract from a pointer
7570 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7571   match(Set dst (AddP dst (SubI zero src)));
7572   effect(KILL cr);
7573 
7574   size(2);
7575   format %{ "SUB    $dst,$src" %}
7576   opcode(0x2B);
7577   ins_encode( OpcP, RegReg( dst, src) );
7578   ins_pipe( ialu_reg_reg );
7579 %}
7580 
7581 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7582   match(Set dst (SubI zero dst));
7583   effect(KILL cr);
7584 
7585   size(2);
7586   format %{ "NEG    $dst" %}
7587   opcode(0xF7,0x03);  // Opcode F7 /3
7588   ins_encode( OpcP, RegOpc( dst ) );
7589   ins_pipe( ialu_reg );
7590 %}
7591 
7592 //----------Multiplication/Division Instructions-------------------------------
7593 // Integer Multiplication Instructions
7594 // Multiply Register
7595 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7596   match(Set dst (MulI dst src));
7597   effect(KILL cr);
7598 
7599   size(3);
7600   ins_cost(300);
7601   format %{ "IMUL   $dst,$src" %}
7602   opcode(0xAF, 0x0F);
7603   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7604   ins_pipe( ialu_reg_reg_alu0 );
7605 %}
7606 
7607 // Multiply 32-bit Immediate
7608 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7609   match(Set dst (MulI src imm));
7610   effect(KILL cr);
7611 
7612   ins_cost(300);
7613   format %{ "IMUL   $dst,$src,$imm" %}
7614   opcode(0x69);  /* 69 /r id */
7615   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7616   ins_pipe( ialu_reg_reg_alu0 );
7617 %}
7618 
7619 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7620   match(Set dst src);
7621   effect(KILL cr);
7622 
7623   // Note that this is artificially increased to make it more expensive than loadConL
7624   ins_cost(250);
7625   format %{ "MOV    EAX,$src\t// low word only" %}
7626   opcode(0xB8);
7627   ins_encode( LdImmL_Lo(dst, src) );
7628   ins_pipe( ialu_reg_fat );
7629 %}
7630 
7631 // Multiply by 32-bit Immediate, taking the shifted high order results
7632 //  (special case for shift by 32)
7633 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7634   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7635   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7636              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7637              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7638   effect(USE src1, KILL cr);
7639 
7640   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7641   ins_cost(0*100 + 1*400 - 150);
7642   format %{ "IMUL   EDX:EAX,$src1" %}
7643   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7644   ins_pipe( pipe_slow );
7645 %}
7646 
7647 // Multiply by 32-bit Immediate, taking the shifted high order results
7648 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7649   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7650   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7651              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7652              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7653   effect(USE src1, KILL cr);
7654 
7655   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7656   ins_cost(1*100 + 1*400 - 150);
7657   format %{ "IMUL   EDX:EAX,$src1\n\t"
7658             "SAR    EDX,$cnt-32" %}
7659   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7660   ins_pipe( pipe_slow );
7661 %}
7662 
7663 // Multiply Memory 32-bit Immediate
7664 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7665   match(Set dst (MulI (LoadI src) imm));
7666   effect(KILL cr);
7667 
7668   ins_cost(300);
7669   format %{ "IMUL   $dst,$src,$imm" %}
7670   opcode(0x69);  /* 69 /r id */
7671   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7672   ins_pipe( ialu_reg_mem_alu0 );
7673 %}
7674 
7675 // Multiply Memory
7676 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7677   match(Set dst (MulI dst (LoadI src)));
7678   effect(KILL cr);
7679 
7680   ins_cost(350);
7681   format %{ "IMUL   $dst,$src" %}
7682   opcode(0xAF, 0x0F);
7683   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7684   ins_pipe( ialu_reg_mem_alu0 );
7685 %}
7686 
7687 // Multiply Register Int to Long
7688 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7689   // Basic Idea: long = (long)int * (long)int
7690   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7691   effect(DEF dst, USE src, USE src1, KILL flags);
7692 
7693   ins_cost(300);
7694   format %{ "IMUL   $dst,$src1" %}
7695 
7696   ins_encode( long_int_multiply( dst, src1 ) );
7697   ins_pipe( ialu_reg_reg_alu0 );
7698 %}
7699 
7700 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7701   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7702   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7703   effect(KILL flags);
7704 
7705   ins_cost(300);
7706   format %{ "MUL    $dst,$src1" %}
7707 
7708   ins_encode( long_uint_multiply(dst, src1) );
7709   ins_pipe( ialu_reg_reg_alu0 );
7710 %}
7711 
7712 // Multiply Register Long
7713 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7714   match(Set dst (MulL dst src));
7715   effect(KILL cr, TEMP tmp);
7716   ins_cost(4*100+3*400);
7717 // Basic idea: lo(result) = lo(x_lo * y_lo)
7718 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7719   format %{ "MOV    $tmp,$src.lo\n\t"
7720             "IMUL   $tmp,EDX\n\t"
7721             "MOV    EDX,$src.hi\n\t"
7722             "IMUL   EDX,EAX\n\t"
7723             "ADD    $tmp,EDX\n\t"
7724             "MUL    EDX:EAX,$src.lo\n\t"
7725             "ADD    EDX,$tmp" %}
7726   ins_encode( long_multiply( dst, src, tmp ) );
7727   ins_pipe( pipe_slow );
7728 %}
7729 
7730 // Multiply Register Long where the left operand's high 32 bits are zero
7731 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7732   predicate(is_operand_hi32_zero(n->in(1)));
7733   match(Set dst (MulL dst src));
7734   effect(KILL cr, TEMP tmp);
7735   ins_cost(2*100+2*400);
7736 // Basic idea: lo(result) = lo(x_lo * y_lo)
7737 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7738   format %{ "MOV    $tmp,$src.hi\n\t"
7739             "IMUL   $tmp,EAX\n\t"
7740             "MUL    EDX:EAX,$src.lo\n\t"
7741             "ADD    EDX,$tmp" %}
7742   ins_encode %{
7743     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7744     __ imull($tmp$$Register, rax);
7745     __ mull($src$$Register);
7746     __ addl(rdx, $tmp$$Register);
7747   %}
7748   ins_pipe( pipe_slow );
7749 %}
7750 
7751 // Multiply Register Long where the right operand's high 32 bits are zero
7752 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7753   predicate(is_operand_hi32_zero(n->in(2)));
7754   match(Set dst (MulL dst src));
7755   effect(KILL cr, TEMP tmp);
7756   ins_cost(2*100+2*400);
7757 // Basic idea: lo(result) = lo(x_lo * y_lo)
7758 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7759   format %{ "MOV    $tmp,$src.lo\n\t"
7760             "IMUL   $tmp,EDX\n\t"
7761             "MUL    EDX:EAX,$src.lo\n\t"
7762             "ADD    EDX,$tmp" %}
7763   ins_encode %{
7764     __ movl($tmp$$Register, $src$$Register);
7765     __ imull($tmp$$Register, rdx);
7766     __ mull($src$$Register);
7767     __ addl(rdx, $tmp$$Register);
7768   %}
7769   ins_pipe( pipe_slow );
7770 %}
7771 
7772 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7773 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7774   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7775   match(Set dst (MulL dst src));
7776   effect(KILL cr);
7777   ins_cost(1*400);
7778 // Basic idea: lo(result) = lo(x_lo * y_lo)
7779 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7780   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7781   ins_encode %{
7782     __ mull($src$$Register);
7783   %}
7784   ins_pipe( pipe_slow );
7785 %}
7786 
7787 // Multiply Register Long by small constant
7788 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7789   match(Set dst (MulL dst src));
7790   effect(KILL cr, TEMP tmp);
7791   ins_cost(2*100+2*400);
7792   size(12);
7793 // Basic idea: lo(result) = lo(src * EAX)
7794 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7795   format %{ "IMUL   $tmp,EDX,$src\n\t"
7796             "MOV    EDX,$src\n\t"
7797             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7798             "ADD    EDX,$tmp" %}
7799   ins_encode( long_multiply_con( dst, src, tmp ) );
7800   ins_pipe( pipe_slow );
7801 %}
7802 
7803 // Integer DIV with Register
7804 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7805   match(Set rax (DivI rax div));
7806   effect(KILL rdx, KILL cr);
7807   size(26);
7808   ins_cost(30*100+10*100);
7809   format %{ "CMP    EAX,0x80000000\n\t"
7810             "JNE,s  normal\n\t"
7811             "XOR    EDX,EDX\n\t"
7812             "CMP    ECX,-1\n\t"
7813             "JE,s   done\n"
7814     "normal: CDQ\n\t"
7815             "IDIV   $div\n\t"
7816     "done:"        %}
7817   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7818   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7819   ins_pipe( ialu_reg_reg_alu0 );
7820 %}
7821 
7822 // Divide Register Long
7823 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7824   match(Set dst (DivL src1 src2));
7825   effect( KILL cr, KILL cx, KILL bx );
7826   ins_cost(10000);
7827   format %{ "PUSH   $src1.hi\n\t"
7828             "PUSH   $src1.lo\n\t"
7829             "PUSH   $src2.hi\n\t"
7830             "PUSH   $src2.lo\n\t"
7831             "CALL   SharedRuntime::ldiv\n\t"
7832             "ADD    ESP,16" %}
7833   ins_encode( long_div(src1,src2) );
7834   ins_pipe( pipe_slow );
7835 %}
7836 
7837 // Integer DIVMOD with Register, both quotient and mod results
7838 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7839   match(DivModI rax div);
7840   effect(KILL cr);
7841   size(26);
7842   ins_cost(30*100+10*100);
7843   format %{ "CMP    EAX,0x80000000\n\t"
7844             "JNE,s  normal\n\t"
7845             "XOR    EDX,EDX\n\t"
7846             "CMP    ECX,-1\n\t"
7847             "JE,s   done\n"
7848     "normal: CDQ\n\t"
7849             "IDIV   $div\n\t"
7850     "done:"        %}
7851   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7852   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7853   ins_pipe( pipe_slow );
7854 %}
7855 
7856 // Integer MOD with Register
7857 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7858   match(Set rdx (ModI rax div));
7859   effect(KILL rax, KILL cr);
7860 
7861   size(26);
7862   ins_cost(300);
7863   format %{ "CDQ\n\t"
7864             "IDIV   $div" %}
7865   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7866   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7867   ins_pipe( ialu_reg_reg_alu0 );
7868 %}
7869 
7870 // Remainder Register Long
7871 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7872   match(Set dst (ModL src1 src2));
7873   effect( KILL cr, KILL cx, KILL bx );
7874   ins_cost(10000);
7875   format %{ "PUSH   $src1.hi\n\t"
7876             "PUSH   $src1.lo\n\t"
7877             "PUSH   $src2.hi\n\t"
7878             "PUSH   $src2.lo\n\t"
7879             "CALL   SharedRuntime::lrem\n\t"
7880             "ADD    ESP,16" %}
7881   ins_encode( long_mod(src1,src2) );
7882   ins_pipe( pipe_slow );
7883 %}
7884 
7885 // Divide Register Long (no special case since divisor != -1)
7886 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7887   match(Set dst (DivL dst imm));
7888   effect( TEMP tmp, TEMP tmp2, KILL cr );
7889   ins_cost(1000);
7890   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7891             "XOR    $tmp2,$tmp2\n\t"
7892             "CMP    $tmp,EDX\n\t"
7893             "JA,s   fast\n\t"
7894             "MOV    $tmp2,EAX\n\t"
7895             "MOV    EAX,EDX\n\t"
7896             "MOV    EDX,0\n\t"
7897             "JLE,s  pos\n\t"
7898             "LNEG   EAX : $tmp2\n\t"
7899             "DIV    $tmp # unsigned division\n\t"
7900             "XCHG   EAX,$tmp2\n\t"
7901             "DIV    $tmp\n\t"
7902             "LNEG   $tmp2 : EAX\n\t"
7903             "JMP,s  done\n"
7904     "pos:\n\t"
7905             "DIV    $tmp\n\t"
7906             "XCHG   EAX,$tmp2\n"
7907     "fast:\n\t"
7908             "DIV    $tmp\n"
7909     "done:\n\t"
7910             "MOV    EDX,$tmp2\n\t"
7911             "NEG    EDX:EAX # if $imm < 0" %}
7912   ins_encode %{
7913     int con = (int)$imm$$constant;
7914     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7915     int pcon = (con > 0) ? con : -con;
7916     Label Lfast, Lpos, Ldone;
7917 
7918     __ movl($tmp$$Register, pcon);
7919     __ xorl($tmp2$$Register,$tmp2$$Register);
7920     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7921     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7922 
7923     __ movl($tmp2$$Register, $dst$$Register); // save
7924     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7925     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7926     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7927 
7928     // Negative dividend.
7929     // convert value to positive to use unsigned division
7930     __ lneg($dst$$Register, $tmp2$$Register);
7931     __ divl($tmp$$Register);
7932     __ xchgl($dst$$Register, $tmp2$$Register);
7933     __ divl($tmp$$Register);
7934     // revert result back to negative
7935     __ lneg($tmp2$$Register, $dst$$Register);
7936     __ jmpb(Ldone);
7937 
7938     __ bind(Lpos);
7939     __ divl($tmp$$Register); // Use unsigned division
7940     __ xchgl($dst$$Register, $tmp2$$Register);
7941     // Fallthrow for final divide, tmp2 has 32 bit hi result
7942 
7943     __ bind(Lfast);
7944     // fast path: src is positive
7945     __ divl($tmp$$Register); // Use unsigned division
7946 
7947     __ bind(Ldone);
7948     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7949     if (con < 0) {
7950       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7951     }
7952   %}
7953   ins_pipe( pipe_slow );
7954 %}
7955 
7956 // Remainder Register Long (remainder fit into 32 bits)
7957 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7958   match(Set dst (ModL dst imm));
7959   effect( TEMP tmp, TEMP tmp2, KILL cr );
7960   ins_cost(1000);
7961   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7962             "CMP    $tmp,EDX\n\t"
7963             "JA,s   fast\n\t"
7964             "MOV    $tmp2,EAX\n\t"
7965             "MOV    EAX,EDX\n\t"
7966             "MOV    EDX,0\n\t"
7967             "JLE,s  pos\n\t"
7968             "LNEG   EAX : $tmp2\n\t"
7969             "DIV    $tmp # unsigned division\n\t"
7970             "MOV    EAX,$tmp2\n\t"
7971             "DIV    $tmp\n\t"
7972             "NEG    EDX\n\t"
7973             "JMP,s  done\n"
7974     "pos:\n\t"
7975             "DIV    $tmp\n\t"
7976             "MOV    EAX,$tmp2\n"
7977     "fast:\n\t"
7978             "DIV    $tmp\n"
7979     "done:\n\t"
7980             "MOV    EAX,EDX\n\t"
7981             "SAR    EDX,31\n\t" %}
7982   ins_encode %{
7983     int con = (int)$imm$$constant;
7984     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7985     int pcon = (con > 0) ? con : -con;
7986     Label  Lfast, Lpos, Ldone;
7987 
7988     __ movl($tmp$$Register, pcon);
7989     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7990     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7991 
7992     __ movl($tmp2$$Register, $dst$$Register); // save
7993     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7994     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7995     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7996 
7997     // Negative dividend.
7998     // convert value to positive to use unsigned division
7999     __ lneg($dst$$Register, $tmp2$$Register);
8000     __ divl($tmp$$Register);
8001     __ movl($dst$$Register, $tmp2$$Register);
8002     __ divl($tmp$$Register);
8003     // revert remainder back to negative
8004     __ negl(HIGH_FROM_LOW($dst$$Register));
8005     __ jmpb(Ldone);
8006 
8007     __ bind(Lpos);
8008     __ divl($tmp$$Register);
8009     __ movl($dst$$Register, $tmp2$$Register);
8010 
8011     __ bind(Lfast);
8012     // fast path: src is positive
8013     __ divl($tmp$$Register);
8014 
8015     __ bind(Ldone);
8016     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8017     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
8018 
8019   %}
8020   ins_pipe( pipe_slow );
8021 %}
8022 
8023 // Integer Shift Instructions
8024 // Shift Left by one
8025 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8026   match(Set dst (LShiftI dst shift));
8027   effect(KILL cr);
8028 
8029   size(2);
8030   format %{ "SHL    $dst,$shift" %}
8031   opcode(0xD1, 0x4);  /* D1 /4 */
8032   ins_encode( OpcP, RegOpc( dst ) );
8033   ins_pipe( ialu_reg );
8034 %}
8035 
8036 // Shift Left by 8-bit immediate
8037 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8038   match(Set dst (LShiftI dst shift));
8039   effect(KILL cr);
8040 
8041   size(3);
8042   format %{ "SHL    $dst,$shift" %}
8043   opcode(0xC1, 0x4);  /* C1 /4 ib */
8044   ins_encode( RegOpcImm( dst, shift) );
8045   ins_pipe( ialu_reg );
8046 %}
8047 
8048 // Shift Left by variable
8049 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8050   match(Set dst (LShiftI dst shift));
8051   effect(KILL cr);
8052 
8053   size(2);
8054   format %{ "SHL    $dst,$shift" %}
8055   opcode(0xD3, 0x4);  /* D3 /4 */
8056   ins_encode( OpcP, RegOpc( dst ) );
8057   ins_pipe( ialu_reg_reg );
8058 %}
8059 
8060 // Arithmetic shift right by one
8061 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8062   match(Set dst (RShiftI dst shift));
8063   effect(KILL cr);
8064 
8065   size(2);
8066   format %{ "SAR    $dst,$shift" %}
8067   opcode(0xD1, 0x7);  /* D1 /7 */
8068   ins_encode( OpcP, RegOpc( dst ) );
8069   ins_pipe( ialu_reg );
8070 %}
8071 
8072 // Arithmetic shift right by one
8073 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
8074   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8075   effect(KILL cr);
8076   format %{ "SAR    $dst,$shift" %}
8077   opcode(0xD1, 0x7);  /* D1 /7 */
8078   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
8079   ins_pipe( ialu_mem_imm );
8080 %}
8081 
8082 // Arithmetic Shift Right by 8-bit immediate
8083 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8084   match(Set dst (RShiftI dst shift));
8085   effect(KILL cr);
8086 
8087   size(3);
8088   format %{ "SAR    $dst,$shift" %}
8089   opcode(0xC1, 0x7);  /* C1 /7 ib */
8090   ins_encode( RegOpcImm( dst, shift ) );
8091   ins_pipe( ialu_mem_imm );
8092 %}
8093 
8094 // Arithmetic Shift Right by 8-bit immediate
8095 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
8096   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8097   effect(KILL cr);
8098 
8099   format %{ "SAR    $dst,$shift" %}
8100   opcode(0xC1, 0x7);  /* C1 /7 ib */
8101   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
8102   ins_pipe( ialu_mem_imm );
8103 %}
8104 
8105 // Arithmetic Shift Right by variable
8106 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8107   match(Set dst (RShiftI dst shift));
8108   effect(KILL cr);
8109 
8110   size(2);
8111   format %{ "SAR    $dst,$shift" %}
8112   opcode(0xD3, 0x7);  /* D3 /7 */
8113   ins_encode( OpcP, RegOpc( dst ) );
8114   ins_pipe( ialu_reg_reg );
8115 %}
8116 
8117 // Logical shift right by one
8118 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8119   match(Set dst (URShiftI dst shift));
8120   effect(KILL cr);
8121 
8122   size(2);
8123   format %{ "SHR    $dst,$shift" %}
8124   opcode(0xD1, 0x5);  /* D1 /5 */
8125   ins_encode( OpcP, RegOpc( dst ) );
8126   ins_pipe( ialu_reg );
8127 %}
8128 
8129 // Logical Shift Right by 8-bit immediate
8130 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8131   match(Set dst (URShiftI dst shift));
8132   effect(KILL cr);
8133 
8134   size(3);
8135   format %{ "SHR    $dst,$shift" %}
8136   opcode(0xC1, 0x5);  /* C1 /5 ib */
8137   ins_encode( RegOpcImm( dst, shift) );
8138   ins_pipe( ialu_reg );
8139 %}
8140 
8141 
8142 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8143 // This idiom is used by the compiler for the i2b bytecode.
8144 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
8145   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8146 
8147   size(3);
8148   format %{ "MOVSX  $dst,$src :8" %}
8149   ins_encode %{
8150     __ movsbl($dst$$Register, $src$$Register);
8151   %}
8152   ins_pipe(ialu_reg_reg);
8153 %}
8154 
8155 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8156 // This idiom is used by the compiler the i2s bytecode.
8157 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
8158   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8159 
8160   size(3);
8161   format %{ "MOVSX  $dst,$src :16" %}
8162   ins_encode %{
8163     __ movswl($dst$$Register, $src$$Register);
8164   %}
8165   ins_pipe(ialu_reg_reg);
8166 %}
8167 
8168 
8169 // Logical Shift Right by variable
8170 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8171   match(Set dst (URShiftI dst shift));
8172   effect(KILL cr);
8173 
8174   size(2);
8175   format %{ "SHR    $dst,$shift" %}
8176   opcode(0xD3, 0x5);  /* D3 /5 */
8177   ins_encode( OpcP, RegOpc( dst ) );
8178   ins_pipe( ialu_reg_reg );
8179 %}
8180 
8181 
8182 //----------Logical Instructions-----------------------------------------------
8183 //----------Integer Logical Instructions---------------------------------------
8184 // And Instructions
8185 // And Register with Register
8186 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8187   match(Set dst (AndI dst src));
8188   effect(KILL cr);
8189 
8190   size(2);
8191   format %{ "AND    $dst,$src" %}
8192   opcode(0x23);
8193   ins_encode( OpcP, RegReg( dst, src) );
8194   ins_pipe( ialu_reg_reg );
8195 %}
8196 
8197 // And Register with Immediate
8198 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8199   match(Set dst (AndI dst src));
8200   effect(KILL cr);
8201 
8202   format %{ "AND    $dst,$src" %}
8203   opcode(0x81,0x04);  /* Opcode 81 /4 */
8204   // ins_encode( RegImm( dst, src) );
8205   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8206   ins_pipe( ialu_reg );
8207 %}
8208 
8209 // And Register with Memory
8210 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8211   match(Set dst (AndI dst (LoadI src)));
8212   effect(KILL cr);
8213 
8214   ins_cost(125);
8215   format %{ "AND    $dst,$src" %}
8216   opcode(0x23);
8217   ins_encode( OpcP, RegMem( dst, src) );
8218   ins_pipe( ialu_reg_mem );
8219 %}
8220 
8221 // And Memory with Register
8222 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8223   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8224   effect(KILL cr);
8225 
8226   ins_cost(150);
8227   format %{ "AND    $dst,$src" %}
8228   opcode(0x21);  /* Opcode 21 /r */
8229   ins_encode( OpcP, RegMem( src, dst ) );
8230   ins_pipe( ialu_mem_reg );
8231 %}
8232 
8233 // And Memory with Immediate
8234 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8235   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8236   effect(KILL cr);
8237 
8238   ins_cost(125);
8239   format %{ "AND    $dst,$src" %}
8240   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8241   // ins_encode( MemImm( dst, src) );
8242   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8243   ins_pipe( ialu_mem_imm );
8244 %}
8245 
8246 // BMI1 instructions
8247 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8248   match(Set dst (AndI (XorI src1 minus_1) src2));
8249   predicate(UseBMI1Instructions);
8250   effect(KILL cr);
8251 
8252   format %{ "ANDNL  $dst, $src1, $src2" %}
8253 
8254   ins_encode %{
8255     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8256   %}
8257   ins_pipe(ialu_reg);
8258 %}
8259 
8260 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8261   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8262   predicate(UseBMI1Instructions);
8263   effect(KILL cr);
8264 
8265   ins_cost(125);
8266   format %{ "ANDNL  $dst, $src1, $src2" %}
8267 
8268   ins_encode %{
8269     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8270   %}
8271   ins_pipe(ialu_reg_mem);
8272 %}
8273 
8274 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8275   match(Set dst (AndI (SubI imm_zero src) src));
8276   predicate(UseBMI1Instructions);
8277   effect(KILL cr);
8278 
8279   format %{ "BLSIL  $dst, $src" %}
8280 
8281   ins_encode %{
8282     __ blsil($dst$$Register, $src$$Register);
8283   %}
8284   ins_pipe(ialu_reg);
8285 %}
8286 
8287 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8288   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8289   predicate(UseBMI1Instructions);
8290   effect(KILL cr);
8291 
8292   ins_cost(125);
8293   format %{ "BLSIL  $dst, $src" %}
8294 
8295   ins_encode %{
8296     __ blsil($dst$$Register, $src$$Address);
8297   %}
8298   ins_pipe(ialu_reg_mem);
8299 %}
8300 
8301 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8302 %{
8303   match(Set dst (XorI (AddI src minus_1) src));
8304   predicate(UseBMI1Instructions);
8305   effect(KILL cr);
8306 
8307   format %{ "BLSMSKL $dst, $src" %}
8308 
8309   ins_encode %{
8310     __ blsmskl($dst$$Register, $src$$Register);
8311   %}
8312 
8313   ins_pipe(ialu_reg);
8314 %}
8315 
8316 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8317 %{
8318   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8319   predicate(UseBMI1Instructions);
8320   effect(KILL cr);
8321 
8322   ins_cost(125);
8323   format %{ "BLSMSKL $dst, $src" %}
8324 
8325   ins_encode %{
8326     __ blsmskl($dst$$Register, $src$$Address);
8327   %}
8328 
8329   ins_pipe(ialu_reg_mem);
8330 %}
8331 
8332 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8333 %{
8334   match(Set dst (AndI (AddI src minus_1) src) );
8335   predicate(UseBMI1Instructions);
8336   effect(KILL cr);
8337 
8338   format %{ "BLSRL  $dst, $src" %}
8339 
8340   ins_encode %{
8341     __ blsrl($dst$$Register, $src$$Register);
8342   %}
8343 
8344   ins_pipe(ialu_reg);
8345 %}
8346 
8347 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8348 %{
8349   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8350   predicate(UseBMI1Instructions);
8351   effect(KILL cr);
8352 
8353   ins_cost(125);
8354   format %{ "BLSRL  $dst, $src" %}
8355 
8356   ins_encode %{
8357     __ blsrl($dst$$Register, $src$$Address);
8358   %}
8359 
8360   ins_pipe(ialu_reg_mem);
8361 %}
8362 
8363 // Or Instructions
8364 // Or Register with Register
8365 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8366   match(Set dst (OrI dst src));
8367   effect(KILL cr);
8368 
8369   size(2);
8370   format %{ "OR     $dst,$src" %}
8371   opcode(0x0B);
8372   ins_encode( OpcP, RegReg( dst, src) );
8373   ins_pipe( ialu_reg_reg );
8374 %}
8375 
8376 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8377   match(Set dst (OrI dst (CastP2X src)));
8378   effect(KILL cr);
8379 
8380   size(2);
8381   format %{ "OR     $dst,$src" %}
8382   opcode(0x0B);
8383   ins_encode( OpcP, RegReg( dst, src) );
8384   ins_pipe( ialu_reg_reg );
8385 %}
8386 
8387 
8388 // Or Register with Immediate
8389 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8390   match(Set dst (OrI dst src));
8391   effect(KILL cr);
8392 
8393   format %{ "OR     $dst,$src" %}
8394   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8395   // ins_encode( RegImm( dst, src) );
8396   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8397   ins_pipe( ialu_reg );
8398 %}
8399 
8400 // Or Register with Memory
8401 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8402   match(Set dst (OrI dst (LoadI src)));
8403   effect(KILL cr);
8404 
8405   ins_cost(125);
8406   format %{ "OR     $dst,$src" %}
8407   opcode(0x0B);
8408   ins_encode( OpcP, RegMem( dst, src) );
8409   ins_pipe( ialu_reg_mem );
8410 %}
8411 
8412 // Or Memory with Register
8413 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8414   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8415   effect(KILL cr);
8416 
8417   ins_cost(150);
8418   format %{ "OR     $dst,$src" %}
8419   opcode(0x09);  /* Opcode 09 /r */
8420   ins_encode( OpcP, RegMem( src, dst ) );
8421   ins_pipe( ialu_mem_reg );
8422 %}
8423 
8424 // Or Memory with Immediate
8425 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8426   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8427   effect(KILL cr);
8428 
8429   ins_cost(125);
8430   format %{ "OR     $dst,$src" %}
8431   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8432   // ins_encode( MemImm( dst, src) );
8433   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8434   ins_pipe( ialu_mem_imm );
8435 %}
8436 
8437 // ROL/ROR
8438 // ROL expand
8439 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8440   effect(USE_DEF dst, USE shift, KILL cr);
8441 
8442   format %{ "ROL    $dst, $shift" %}
8443   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8444   ins_encode( OpcP, RegOpc( dst ));
8445   ins_pipe( ialu_reg );
8446 %}
8447 
8448 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8449   effect(USE_DEF dst, USE shift, KILL cr);
8450 
8451   format %{ "ROL    $dst, $shift" %}
8452   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8453   ins_encode( RegOpcImm(dst, shift) );
8454   ins_pipe(ialu_reg);
8455 %}
8456 
8457 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8458   effect(USE_DEF dst, USE shift, KILL cr);
8459 
8460   format %{ "ROL    $dst, $shift" %}
8461   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8462   ins_encode(OpcP, RegOpc(dst));
8463   ins_pipe( ialu_reg_reg );
8464 %}
8465 // end of ROL expand
8466 
8467 // ROL 32bit by one once
8468 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8469   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8470 
8471   expand %{
8472     rolI_eReg_imm1(dst, lshift, cr);
8473   %}
8474 %}
8475 
8476 // ROL 32bit var by imm8 once
8477 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8478   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8479   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8480 
8481   expand %{
8482     rolI_eReg_imm8(dst, lshift, cr);
8483   %}
8484 %}
8485 
8486 // ROL 32bit var by var once
8487 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8488   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8489 
8490   expand %{
8491     rolI_eReg_CL(dst, shift, cr);
8492   %}
8493 %}
8494 
8495 // ROL 32bit var by var once
8496 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8497   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8498 
8499   expand %{
8500     rolI_eReg_CL(dst, shift, cr);
8501   %}
8502 %}
8503 
8504 // ROR expand
8505 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8506   effect(USE_DEF dst, USE shift, KILL cr);
8507 
8508   format %{ "ROR    $dst, $shift" %}
8509   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8510   ins_encode( OpcP, RegOpc( dst ) );
8511   ins_pipe( ialu_reg );
8512 %}
8513 
8514 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8515   effect (USE_DEF dst, USE shift, KILL cr);
8516 
8517   format %{ "ROR    $dst, $shift" %}
8518   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8519   ins_encode( RegOpcImm(dst, shift) );
8520   ins_pipe( ialu_reg );
8521 %}
8522 
8523 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8524   effect(USE_DEF dst, USE shift, KILL cr);
8525 
8526   format %{ "ROR    $dst, $shift" %}
8527   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8528   ins_encode(OpcP, RegOpc(dst));
8529   ins_pipe( ialu_reg_reg );
8530 %}
8531 // end of ROR expand
8532 
8533 // ROR right once
8534 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8535   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8536 
8537   expand %{
8538     rorI_eReg_imm1(dst, rshift, cr);
8539   %}
8540 %}
8541 
8542 // ROR 32bit by immI8 once
8543 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8544   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8545   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8546 
8547   expand %{
8548     rorI_eReg_imm8(dst, rshift, cr);
8549   %}
8550 %}
8551 
8552 // ROR 32bit var by var once
8553 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8554   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8555 
8556   expand %{
8557     rorI_eReg_CL(dst, shift, cr);
8558   %}
8559 %}
8560 
8561 // ROR 32bit var by var once
8562 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8563   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8564 
8565   expand %{
8566     rorI_eReg_CL(dst, shift, cr);
8567   %}
8568 %}
8569 
8570 // Xor Instructions
8571 // Xor Register with Register
8572 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8573   match(Set dst (XorI dst src));
8574   effect(KILL cr);
8575 
8576   size(2);
8577   format %{ "XOR    $dst,$src" %}
8578   opcode(0x33);
8579   ins_encode( OpcP, RegReg( dst, src) );
8580   ins_pipe( ialu_reg_reg );
8581 %}
8582 
8583 // Xor Register with Immediate -1
8584 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8585   match(Set dst (XorI dst imm));
8586 
8587   size(2);
8588   format %{ "NOT    $dst" %}
8589   ins_encode %{
8590      __ notl($dst$$Register);
8591   %}
8592   ins_pipe( ialu_reg );
8593 %}
8594 
8595 // Xor Register with Immediate
8596 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8597   match(Set dst (XorI dst src));
8598   effect(KILL cr);
8599 
8600   format %{ "XOR    $dst,$src" %}
8601   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8602   // ins_encode( RegImm( dst, src) );
8603   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8604   ins_pipe( ialu_reg );
8605 %}
8606 
8607 // Xor Register with Memory
8608 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8609   match(Set dst (XorI dst (LoadI src)));
8610   effect(KILL cr);
8611 
8612   ins_cost(125);
8613   format %{ "XOR    $dst,$src" %}
8614   opcode(0x33);
8615   ins_encode( OpcP, RegMem(dst, src) );
8616   ins_pipe( ialu_reg_mem );
8617 %}
8618 
8619 // Xor Memory with Register
8620 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8621   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8622   effect(KILL cr);
8623 
8624   ins_cost(150);
8625   format %{ "XOR    $dst,$src" %}
8626   opcode(0x31);  /* Opcode 31 /r */
8627   ins_encode( OpcP, RegMem( src, dst ) );
8628   ins_pipe( ialu_mem_reg );
8629 %}
8630 
8631 // Xor Memory with Immediate
8632 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8633   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8634   effect(KILL cr);
8635 
8636   ins_cost(125);
8637   format %{ "XOR    $dst,$src" %}
8638   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8639   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8640   ins_pipe( ialu_mem_imm );
8641 %}
8642 
8643 //----------Convert Int to Boolean---------------------------------------------
8644 
8645 instruct movI_nocopy(rRegI dst, rRegI src) %{
8646   effect( DEF dst, USE src );
8647   format %{ "MOV    $dst,$src" %}
8648   ins_encode( enc_Copy( dst, src) );
8649   ins_pipe( ialu_reg_reg );
8650 %}
8651 
8652 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8653   effect( USE_DEF dst, USE src, KILL cr );
8654 
8655   size(4);
8656   format %{ "NEG    $dst\n\t"
8657             "ADC    $dst,$src" %}
8658   ins_encode( neg_reg(dst),
8659               OpcRegReg(0x13,dst,src) );
8660   ins_pipe( ialu_reg_reg_long );
8661 %}
8662 
8663 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8664   match(Set dst (Conv2B src));
8665 
8666   expand %{
8667     movI_nocopy(dst,src);
8668     ci2b(dst,src,cr);
8669   %}
8670 %}
8671 
8672 instruct movP_nocopy(rRegI dst, eRegP src) %{
8673   effect( DEF dst, USE src );
8674   format %{ "MOV    $dst,$src" %}
8675   ins_encode( enc_Copy( dst, src) );
8676   ins_pipe( ialu_reg_reg );
8677 %}
8678 
8679 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8680   effect( USE_DEF dst, USE src, KILL cr );
8681   format %{ "NEG    $dst\n\t"
8682             "ADC    $dst,$src" %}
8683   ins_encode( neg_reg(dst),
8684               OpcRegReg(0x13,dst,src) );
8685   ins_pipe( ialu_reg_reg_long );
8686 %}
8687 
8688 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8689   match(Set dst (Conv2B src));
8690 
8691   expand %{
8692     movP_nocopy(dst,src);
8693     cp2b(dst,src,cr);
8694   %}
8695 %}
8696 
8697 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8698   match(Set dst (CmpLTMask p q));
8699   effect(KILL cr);
8700   ins_cost(400);
8701 
8702   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8703   format %{ "XOR    $dst,$dst\n\t"
8704             "CMP    $p,$q\n\t"
8705             "SETlt  $dst\n\t"
8706             "NEG    $dst" %}
8707   ins_encode %{
8708     Register Rp = $p$$Register;
8709     Register Rq = $q$$Register;
8710     Register Rd = $dst$$Register;
8711     Label done;
8712     __ xorl(Rd, Rd);
8713     __ cmpl(Rp, Rq);
8714     __ setb(Assembler::less, Rd);
8715     __ negl(Rd);
8716   %}
8717 
8718   ins_pipe(pipe_slow);
8719 %}
8720 
8721 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8722   match(Set dst (CmpLTMask dst zero));
8723   effect(DEF dst, KILL cr);
8724   ins_cost(100);
8725 
8726   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8727   ins_encode %{
8728   __ sarl($dst$$Register, 31);
8729   %}
8730   ins_pipe(ialu_reg);
8731 %}
8732 
8733 /* better to save a register than avoid a branch */
8734 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8735   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8736   effect(KILL cr);
8737   ins_cost(400);
8738   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8739             "JGE    done\n\t"
8740             "ADD    $p,$y\n"
8741             "done:  " %}
8742   ins_encode %{
8743     Register Rp = $p$$Register;
8744     Register Rq = $q$$Register;
8745     Register Ry = $y$$Register;
8746     Label done;
8747     __ subl(Rp, Rq);
8748     __ jccb(Assembler::greaterEqual, done);
8749     __ addl(Rp, Ry);
8750     __ bind(done);
8751   %}
8752 
8753   ins_pipe(pipe_cmplt);
8754 %}
8755 
8756 /* better to save a register than avoid a branch */
8757 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8758   match(Set y (AndI (CmpLTMask p q) y));
8759   effect(KILL cr);
8760 
8761   ins_cost(300);
8762 
8763   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8764             "JLT      done\n\t"
8765             "XORL     $y, $y\n"
8766             "done:  " %}
8767   ins_encode %{
8768     Register Rp = $p$$Register;
8769     Register Rq = $q$$Register;
8770     Register Ry = $y$$Register;
8771     Label done;
8772     __ cmpl(Rp, Rq);
8773     __ jccb(Assembler::less, done);
8774     __ xorl(Ry, Ry);
8775     __ bind(done);
8776   %}
8777 
8778   ins_pipe(pipe_cmplt);
8779 %}
8780 
8781 /* If I enable this, I encourage spilling in the inner loop of compress.
8782 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8783   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8784 */
8785 //----------Overflow Math Instructions-----------------------------------------
8786 
8787 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8788 %{
8789   match(Set cr (OverflowAddI op1 op2));
8790   effect(DEF cr, USE_KILL op1, USE op2);
8791 
8792   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8793 
8794   ins_encode %{
8795     __ addl($op1$$Register, $op2$$Register);
8796   %}
8797   ins_pipe(ialu_reg_reg);
8798 %}
8799 
8800 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8801 %{
8802   match(Set cr (OverflowAddI op1 op2));
8803   effect(DEF cr, USE_KILL op1, USE op2);
8804 
8805   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8806 
8807   ins_encode %{
8808     __ addl($op1$$Register, $op2$$constant);
8809   %}
8810   ins_pipe(ialu_reg_reg);
8811 %}
8812 
8813 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8814 %{
8815   match(Set cr (OverflowSubI op1 op2));
8816 
8817   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8818   ins_encode %{
8819     __ cmpl($op1$$Register, $op2$$Register);
8820   %}
8821   ins_pipe(ialu_reg_reg);
8822 %}
8823 
8824 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8825 %{
8826   match(Set cr (OverflowSubI op1 op2));
8827 
8828   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8829   ins_encode %{
8830     __ cmpl($op1$$Register, $op2$$constant);
8831   %}
8832   ins_pipe(ialu_reg_reg);
8833 %}
8834 
8835 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8836 %{
8837   match(Set cr (OverflowSubI zero op2));
8838   effect(DEF cr, USE_KILL op2);
8839 
8840   format %{ "NEG    $op2\t# overflow check int" %}
8841   ins_encode %{
8842     __ negl($op2$$Register);
8843   %}
8844   ins_pipe(ialu_reg_reg);
8845 %}
8846 
8847 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8848 %{
8849   match(Set cr (OverflowMulI op1 op2));
8850   effect(DEF cr, USE_KILL op1, USE op2);
8851 
8852   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8853   ins_encode %{
8854     __ imull($op1$$Register, $op2$$Register);
8855   %}
8856   ins_pipe(ialu_reg_reg_alu0);
8857 %}
8858 
8859 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8860 %{
8861   match(Set cr (OverflowMulI op1 op2));
8862   effect(DEF cr, TEMP tmp, USE op1, USE op2);
8863 
8864   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8865   ins_encode %{
8866     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8867   %}
8868   ins_pipe(ialu_reg_reg_alu0);
8869 %}
8870 
8871 //----------Long Instructions------------------------------------------------
8872 // Add Long Register with Register
8873 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8874   match(Set dst (AddL dst src));
8875   effect(KILL cr);
8876   ins_cost(200);
8877   format %{ "ADD    $dst.lo,$src.lo\n\t"
8878             "ADC    $dst.hi,$src.hi" %}
8879   opcode(0x03, 0x13);
8880   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8881   ins_pipe( ialu_reg_reg_long );
8882 %}
8883 
8884 // Add Long Register with Immediate
8885 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8886   match(Set dst (AddL dst src));
8887   effect(KILL cr);
8888   format %{ "ADD    $dst.lo,$src.lo\n\t"
8889             "ADC    $dst.hi,$src.hi" %}
8890   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8891   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8892   ins_pipe( ialu_reg_long );
8893 %}
8894 
8895 // Add Long Register with Memory
8896 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8897   match(Set dst (AddL dst (LoadL mem)));
8898   effect(KILL cr);
8899   ins_cost(125);
8900   format %{ "ADD    $dst.lo,$mem\n\t"
8901             "ADC    $dst.hi,$mem+4" %}
8902   opcode(0x03, 0x13);
8903   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8904   ins_pipe( ialu_reg_long_mem );
8905 %}
8906 
8907 // Subtract Long Register with Register.
8908 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8909   match(Set dst (SubL dst src));
8910   effect(KILL cr);
8911   ins_cost(200);
8912   format %{ "SUB    $dst.lo,$src.lo\n\t"
8913             "SBB    $dst.hi,$src.hi" %}
8914   opcode(0x2B, 0x1B);
8915   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8916   ins_pipe( ialu_reg_reg_long );
8917 %}
8918 
8919 // Subtract Long Register with Immediate
8920 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8921   match(Set dst (SubL dst src));
8922   effect(KILL cr);
8923   format %{ "SUB    $dst.lo,$src.lo\n\t"
8924             "SBB    $dst.hi,$src.hi" %}
8925   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
8926   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8927   ins_pipe( ialu_reg_long );
8928 %}
8929 
8930 // Subtract Long Register with Memory
8931 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8932   match(Set dst (SubL dst (LoadL mem)));
8933   effect(KILL cr);
8934   ins_cost(125);
8935   format %{ "SUB    $dst.lo,$mem\n\t"
8936             "SBB    $dst.hi,$mem+4" %}
8937   opcode(0x2B, 0x1B);
8938   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8939   ins_pipe( ialu_reg_long_mem );
8940 %}
8941 
8942 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8943   match(Set dst (SubL zero dst));
8944   effect(KILL cr);
8945   ins_cost(300);
8946   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
8947   ins_encode( neg_long(dst) );
8948   ins_pipe( ialu_reg_reg_long );
8949 %}
8950 
8951 // And Long Register with Register
8952 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8953   match(Set dst (AndL dst src));
8954   effect(KILL cr);
8955   format %{ "AND    $dst.lo,$src.lo\n\t"
8956             "AND    $dst.hi,$src.hi" %}
8957   opcode(0x23,0x23);
8958   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8959   ins_pipe( ialu_reg_reg_long );
8960 %}
8961 
8962 // And Long Register with Immediate
8963 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8964   match(Set dst (AndL dst src));
8965   effect(KILL cr);
8966   format %{ "AND    $dst.lo,$src.lo\n\t"
8967             "AND    $dst.hi,$src.hi" %}
8968   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
8969   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8970   ins_pipe( ialu_reg_long );
8971 %}
8972 
8973 // And Long Register with Memory
8974 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8975   match(Set dst (AndL dst (LoadL mem)));
8976   effect(KILL cr);
8977   ins_cost(125);
8978   format %{ "AND    $dst.lo,$mem\n\t"
8979             "AND    $dst.hi,$mem+4" %}
8980   opcode(0x23, 0x23);
8981   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8982   ins_pipe( ialu_reg_long_mem );
8983 %}
8984 
8985 // BMI1 instructions
8986 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
8987   match(Set dst (AndL (XorL src1 minus_1) src2));
8988   predicate(UseBMI1Instructions);
8989   effect(KILL cr, TEMP dst);
8990 
8991   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
8992             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
8993          %}
8994 
8995   ins_encode %{
8996     Register Rdst = $dst$$Register;
8997     Register Rsrc1 = $src1$$Register;
8998     Register Rsrc2 = $src2$$Register;
8999     __ andnl(Rdst, Rsrc1, Rsrc2);
9000     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
9001   %}
9002   ins_pipe(ialu_reg_reg_long);
9003 %}
9004 
9005 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
9006   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
9007   predicate(UseBMI1Instructions);
9008   effect(KILL cr, TEMP dst);
9009 
9010   ins_cost(125);
9011   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
9012             "ANDNL  $dst.hi, $src1.hi, $src2+4"
9013          %}
9014 
9015   ins_encode %{
9016     Register Rdst = $dst$$Register;
9017     Register Rsrc1 = $src1$$Register;
9018     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
9019 
9020     __ andnl(Rdst, Rsrc1, $src2$$Address);
9021     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
9022   %}
9023   ins_pipe(ialu_reg_mem);
9024 %}
9025 
9026 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
9027   match(Set dst (AndL (SubL imm_zero src) src));
9028   predicate(UseBMI1Instructions);
9029   effect(KILL cr, TEMP dst);
9030 
9031   format %{ "MOVL   $dst.hi, 0\n\t"
9032             "BLSIL  $dst.lo, $src.lo\n\t"
9033             "JNZ    done\n\t"
9034             "BLSIL  $dst.hi, $src.hi\n"
9035             "done:"
9036          %}
9037 
9038   ins_encode %{
9039     Label done;
9040     Register Rdst = $dst$$Register;
9041     Register Rsrc = $src$$Register;
9042     __ movl(HIGH_FROM_LOW(Rdst), 0);
9043     __ blsil(Rdst, Rsrc);
9044     __ jccb(Assembler::notZero, done);
9045     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9046     __ bind(done);
9047   %}
9048   ins_pipe(ialu_reg);
9049 %}
9050 
9051 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
9052   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
9053   predicate(UseBMI1Instructions);
9054   effect(KILL cr, TEMP dst);
9055 
9056   ins_cost(125);
9057   format %{ "MOVL   $dst.hi, 0\n\t"
9058             "BLSIL  $dst.lo, $src\n\t"
9059             "JNZ    done\n\t"
9060             "BLSIL  $dst.hi, $src+4\n"
9061             "done:"
9062          %}
9063 
9064   ins_encode %{
9065     Label done;
9066     Register Rdst = $dst$$Register;
9067     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9068 
9069     __ movl(HIGH_FROM_LOW(Rdst), 0);
9070     __ blsil(Rdst, $src$$Address);
9071     __ jccb(Assembler::notZero, done);
9072     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
9073     __ bind(done);
9074   %}
9075   ins_pipe(ialu_reg_mem);
9076 %}
9077 
9078 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9079 %{
9080   match(Set dst (XorL (AddL src minus_1) src));
9081   predicate(UseBMI1Instructions);
9082   effect(KILL cr, TEMP dst);
9083 
9084   format %{ "MOVL    $dst.hi, 0\n\t"
9085             "BLSMSKL $dst.lo, $src.lo\n\t"
9086             "JNC     done\n\t"
9087             "BLSMSKL $dst.hi, $src.hi\n"
9088             "done:"
9089          %}
9090 
9091   ins_encode %{
9092     Label done;
9093     Register Rdst = $dst$$Register;
9094     Register Rsrc = $src$$Register;
9095     __ movl(HIGH_FROM_LOW(Rdst), 0);
9096     __ blsmskl(Rdst, Rsrc);
9097     __ jccb(Assembler::carryClear, done);
9098     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9099     __ bind(done);
9100   %}
9101 
9102   ins_pipe(ialu_reg);
9103 %}
9104 
9105 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9106 %{
9107   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
9108   predicate(UseBMI1Instructions);
9109   effect(KILL cr, TEMP dst);
9110 
9111   ins_cost(125);
9112   format %{ "MOVL    $dst.hi, 0\n\t"
9113             "BLSMSKL $dst.lo, $src\n\t"
9114             "JNC     done\n\t"
9115             "BLSMSKL $dst.hi, $src+4\n"
9116             "done:"
9117          %}
9118 
9119   ins_encode %{
9120     Label done;
9121     Register Rdst = $dst$$Register;
9122     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9123 
9124     __ movl(HIGH_FROM_LOW(Rdst), 0);
9125     __ blsmskl(Rdst, $src$$Address);
9126     __ jccb(Assembler::carryClear, done);
9127     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
9128     __ bind(done);
9129   %}
9130 
9131   ins_pipe(ialu_reg_mem);
9132 %}
9133 
9134 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9135 %{
9136   match(Set dst (AndL (AddL src minus_1) src) );
9137   predicate(UseBMI1Instructions);
9138   effect(KILL cr, TEMP dst);
9139 
9140   format %{ "MOVL   $dst.hi, $src.hi\n\t"
9141             "BLSRL  $dst.lo, $src.lo\n\t"
9142             "JNC    done\n\t"
9143             "BLSRL  $dst.hi, $src.hi\n"
9144             "done:"
9145   %}
9146 
9147   ins_encode %{
9148     Label done;
9149     Register Rdst = $dst$$Register;
9150     Register Rsrc = $src$$Register;
9151     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9152     __ blsrl(Rdst, Rsrc);
9153     __ jccb(Assembler::carryClear, done);
9154     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9155     __ bind(done);
9156   %}
9157 
9158   ins_pipe(ialu_reg);
9159 %}
9160 
9161 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9162 %{
9163   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
9164   predicate(UseBMI1Instructions);
9165   effect(KILL cr, TEMP dst);
9166 
9167   ins_cost(125);
9168   format %{ "MOVL   $dst.hi, $src+4\n\t"
9169             "BLSRL  $dst.lo, $src\n\t"
9170             "JNC    done\n\t"
9171             "BLSRL  $dst.hi, $src+4\n"
9172             "done:"
9173   %}
9174 
9175   ins_encode %{
9176     Label done;
9177     Register Rdst = $dst$$Register;
9178     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9179     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
9180     __ blsrl(Rdst, $src$$Address);
9181     __ jccb(Assembler::carryClear, done);
9182     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
9183     __ bind(done);
9184   %}
9185 
9186   ins_pipe(ialu_reg_mem);
9187 %}
9188 
9189 // Or Long Register with Register
9190 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9191   match(Set dst (OrL dst src));
9192   effect(KILL cr);
9193   format %{ "OR     $dst.lo,$src.lo\n\t"
9194             "OR     $dst.hi,$src.hi" %}
9195   opcode(0x0B,0x0B);
9196   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9197   ins_pipe( ialu_reg_reg_long );
9198 %}
9199 
9200 // Or Long Register with Immediate
9201 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9202   match(Set dst (OrL dst src));
9203   effect(KILL cr);
9204   format %{ "OR     $dst.lo,$src.lo\n\t"
9205             "OR     $dst.hi,$src.hi" %}
9206   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9207   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9208   ins_pipe( ialu_reg_long );
9209 %}
9210 
9211 // Or Long Register with Memory
9212 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9213   match(Set dst (OrL dst (LoadL mem)));
9214   effect(KILL cr);
9215   ins_cost(125);
9216   format %{ "OR     $dst.lo,$mem\n\t"
9217             "OR     $dst.hi,$mem+4" %}
9218   opcode(0x0B,0x0B);
9219   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9220   ins_pipe( ialu_reg_long_mem );
9221 %}
9222 
9223 // Xor Long Register with Register
9224 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9225   match(Set dst (XorL dst src));
9226   effect(KILL cr);
9227   format %{ "XOR    $dst.lo,$src.lo\n\t"
9228             "XOR    $dst.hi,$src.hi" %}
9229   opcode(0x33,0x33);
9230   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9231   ins_pipe( ialu_reg_reg_long );
9232 %}
9233 
9234 // Xor Long Register with Immediate -1
9235 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9236   match(Set dst (XorL dst imm));
9237   format %{ "NOT    $dst.lo\n\t"
9238             "NOT    $dst.hi" %}
9239   ins_encode %{
9240      __ notl($dst$$Register);
9241      __ notl(HIGH_FROM_LOW($dst$$Register));
9242   %}
9243   ins_pipe( ialu_reg_long );
9244 %}
9245 
9246 // Xor Long Register with Immediate
9247 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9248   match(Set dst (XorL dst src));
9249   effect(KILL cr);
9250   format %{ "XOR    $dst.lo,$src.lo\n\t"
9251             "XOR    $dst.hi,$src.hi" %}
9252   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9253   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9254   ins_pipe( ialu_reg_long );
9255 %}
9256 
9257 // Xor Long Register with Memory
9258 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9259   match(Set dst (XorL dst (LoadL mem)));
9260   effect(KILL cr);
9261   ins_cost(125);
9262   format %{ "XOR    $dst.lo,$mem\n\t"
9263             "XOR    $dst.hi,$mem+4" %}
9264   opcode(0x33,0x33);
9265   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9266   ins_pipe( ialu_reg_long_mem );
9267 %}
9268 
9269 // Shift Left Long by 1
9270 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9271   predicate(UseNewLongLShift);
9272   match(Set dst (LShiftL dst cnt));
9273   effect(KILL cr);
9274   ins_cost(100);
9275   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9276             "ADC    $dst.hi,$dst.hi" %}
9277   ins_encode %{
9278     __ addl($dst$$Register,$dst$$Register);
9279     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9280   %}
9281   ins_pipe( ialu_reg_long );
9282 %}
9283 
9284 // Shift Left Long by 2
9285 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9286   predicate(UseNewLongLShift);
9287   match(Set dst (LShiftL dst cnt));
9288   effect(KILL cr);
9289   ins_cost(100);
9290   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9291             "ADC    $dst.hi,$dst.hi\n\t"
9292             "ADD    $dst.lo,$dst.lo\n\t"
9293             "ADC    $dst.hi,$dst.hi" %}
9294   ins_encode %{
9295     __ addl($dst$$Register,$dst$$Register);
9296     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9297     __ addl($dst$$Register,$dst$$Register);
9298     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9299   %}
9300   ins_pipe( ialu_reg_long );
9301 %}
9302 
9303 // Shift Left Long by 3
9304 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9305   predicate(UseNewLongLShift);
9306   match(Set dst (LShiftL dst cnt));
9307   effect(KILL cr);
9308   ins_cost(100);
9309   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9310             "ADC    $dst.hi,$dst.hi\n\t"
9311             "ADD    $dst.lo,$dst.lo\n\t"
9312             "ADC    $dst.hi,$dst.hi\n\t"
9313             "ADD    $dst.lo,$dst.lo\n\t"
9314             "ADC    $dst.hi,$dst.hi" %}
9315   ins_encode %{
9316     __ addl($dst$$Register,$dst$$Register);
9317     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9318     __ addl($dst$$Register,$dst$$Register);
9319     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9320     __ addl($dst$$Register,$dst$$Register);
9321     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9322   %}
9323   ins_pipe( ialu_reg_long );
9324 %}
9325 
9326 // Shift Left Long by 1-31
9327 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9328   match(Set dst (LShiftL dst cnt));
9329   effect(KILL cr);
9330   ins_cost(200);
9331   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9332             "SHL    $dst.lo,$cnt" %}
9333   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9334   ins_encode( move_long_small_shift(dst,cnt) );
9335   ins_pipe( ialu_reg_long );
9336 %}
9337 
9338 // Shift Left Long by 32-63
9339 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9340   match(Set dst (LShiftL dst cnt));
9341   effect(KILL cr);
9342   ins_cost(300);
9343   format %{ "MOV    $dst.hi,$dst.lo\n"
9344           "\tSHL    $dst.hi,$cnt-32\n"
9345           "\tXOR    $dst.lo,$dst.lo" %}
9346   opcode(0xC1, 0x4);  /* C1 /4 ib */
9347   ins_encode( move_long_big_shift_clr(dst,cnt) );
9348   ins_pipe( ialu_reg_long );
9349 %}
9350 
9351 // Shift Left Long by variable
9352 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9353   match(Set dst (LShiftL dst shift));
9354   effect(KILL cr);
9355   ins_cost(500+200);
9356   size(17);
9357   format %{ "TEST   $shift,32\n\t"
9358             "JEQ,s  small\n\t"
9359             "MOV    $dst.hi,$dst.lo\n\t"
9360             "XOR    $dst.lo,$dst.lo\n"
9361     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9362             "SHL    $dst.lo,$shift" %}
9363   ins_encode( shift_left_long( dst, shift ) );
9364   ins_pipe( pipe_slow );
9365 %}
9366 
9367 // Shift Right Long by 1-31
9368 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9369   match(Set dst (URShiftL dst cnt));
9370   effect(KILL cr);
9371   ins_cost(200);
9372   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9373             "SHR    $dst.hi,$cnt" %}
9374   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9375   ins_encode( move_long_small_shift(dst,cnt) );
9376   ins_pipe( ialu_reg_long );
9377 %}
9378 
9379 // Shift Right Long by 32-63
9380 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9381   match(Set dst (URShiftL dst cnt));
9382   effect(KILL cr);
9383   ins_cost(300);
9384   format %{ "MOV    $dst.lo,$dst.hi\n"
9385           "\tSHR    $dst.lo,$cnt-32\n"
9386           "\tXOR    $dst.hi,$dst.hi" %}
9387   opcode(0xC1, 0x5);  /* C1 /5 ib */
9388   ins_encode( move_long_big_shift_clr(dst,cnt) );
9389   ins_pipe( ialu_reg_long );
9390 %}
9391 
9392 // Shift Right Long by variable
9393 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9394   match(Set dst (URShiftL dst shift));
9395   effect(KILL cr);
9396   ins_cost(600);
9397   size(17);
9398   format %{ "TEST   $shift,32\n\t"
9399             "JEQ,s  small\n\t"
9400             "MOV    $dst.lo,$dst.hi\n\t"
9401             "XOR    $dst.hi,$dst.hi\n"
9402     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9403             "SHR    $dst.hi,$shift" %}
9404   ins_encode( shift_right_long( dst, shift ) );
9405   ins_pipe( pipe_slow );
9406 %}
9407 
9408 // Shift Right Long by 1-31
9409 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9410   match(Set dst (RShiftL dst cnt));
9411   effect(KILL cr);
9412   ins_cost(200);
9413   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9414             "SAR    $dst.hi,$cnt" %}
9415   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9416   ins_encode( move_long_small_shift(dst,cnt) );
9417   ins_pipe( ialu_reg_long );
9418 %}
9419 
9420 // Shift Right Long by 32-63
9421 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9422   match(Set dst (RShiftL dst cnt));
9423   effect(KILL cr);
9424   ins_cost(300);
9425   format %{ "MOV    $dst.lo,$dst.hi\n"
9426           "\tSAR    $dst.lo,$cnt-32\n"
9427           "\tSAR    $dst.hi,31" %}
9428   opcode(0xC1, 0x7);  /* C1 /7 ib */
9429   ins_encode( move_long_big_shift_sign(dst,cnt) );
9430   ins_pipe( ialu_reg_long );
9431 %}
9432 
9433 // Shift Right arithmetic Long by variable
9434 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9435   match(Set dst (RShiftL dst shift));
9436   effect(KILL cr);
9437   ins_cost(600);
9438   size(18);
9439   format %{ "TEST   $shift,32\n\t"
9440             "JEQ,s  small\n\t"
9441             "MOV    $dst.lo,$dst.hi\n\t"
9442             "SAR    $dst.hi,31\n"
9443     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9444             "SAR    $dst.hi,$shift" %}
9445   ins_encode( shift_right_arith_long( dst, shift ) );
9446   ins_pipe( pipe_slow );
9447 %}
9448 
9449 
9450 //----------Double Instructions------------------------------------------------
9451 // Double Math
9452 
9453 // Compare & branch
9454 
9455 // P6 version of float compare, sets condition codes in EFLAGS
9456 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9457   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9458   match(Set cr (CmpD src1 src2));
9459   effect(KILL rax);
9460   ins_cost(150);
9461   format %{ "FLD    $src1\n\t"
9462             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9463             "JNP    exit\n\t"
9464             "MOV    ah,1       // saw a NaN, set CF\n\t"
9465             "SAHF\n"
9466      "exit:\tNOP               // avoid branch to branch" %}
9467   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9468   ins_encode( Push_Reg_DPR(src1),
9469               OpcP, RegOpc(src2),
9470               cmpF_P6_fixup );
9471   ins_pipe( pipe_slow );
9472 %}
9473 
9474 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9475   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9476   match(Set cr (CmpD src1 src2));
9477   ins_cost(150);
9478   format %{ "FLD    $src1\n\t"
9479             "FUCOMIP ST,$src2  // P6 instruction" %}
9480   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9481   ins_encode( Push_Reg_DPR(src1),
9482               OpcP, RegOpc(src2));
9483   ins_pipe( pipe_slow );
9484 %}
9485 
9486 // Compare & branch
9487 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9488   predicate(UseSSE<=1);
9489   match(Set cr (CmpD src1 src2));
9490   effect(KILL rax);
9491   ins_cost(200);
9492   format %{ "FLD    $src1\n\t"
9493             "FCOMp  $src2\n\t"
9494             "FNSTSW AX\n\t"
9495             "TEST   AX,0x400\n\t"
9496             "JZ,s   flags\n\t"
9497             "MOV    AH,1\t# unordered treat as LT\n"
9498     "flags:\tSAHF" %}
9499   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9500   ins_encode( Push_Reg_DPR(src1),
9501               OpcP, RegOpc(src2),
9502               fpu_flags);
9503   ins_pipe( pipe_slow );
9504 %}
9505 
9506 // Compare vs zero into -1,0,1
9507 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9508   predicate(UseSSE<=1);
9509   match(Set dst (CmpD3 src1 zero));
9510   effect(KILL cr, KILL rax);
9511   ins_cost(280);
9512   format %{ "FTSTD  $dst,$src1" %}
9513   opcode(0xE4, 0xD9);
9514   ins_encode( Push_Reg_DPR(src1),
9515               OpcS, OpcP, PopFPU,
9516               CmpF_Result(dst));
9517   ins_pipe( pipe_slow );
9518 %}
9519 
9520 // Compare into -1,0,1
9521 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9522   predicate(UseSSE<=1);
9523   match(Set dst (CmpD3 src1 src2));
9524   effect(KILL cr, KILL rax);
9525   ins_cost(300);
9526   format %{ "FCMPD  $dst,$src1,$src2" %}
9527   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9528   ins_encode( Push_Reg_DPR(src1),
9529               OpcP, RegOpc(src2),
9530               CmpF_Result(dst));
9531   ins_pipe( pipe_slow );
9532 %}
9533 
9534 // float compare and set condition codes in EFLAGS by XMM regs
9535 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9536   predicate(UseSSE>=2);
9537   match(Set cr (CmpD src1 src2));
9538   ins_cost(145);
9539   format %{ "UCOMISD $src1,$src2\n\t"
9540             "JNP,s   exit\n\t"
9541             "PUSHF\t# saw NaN, set CF\n\t"
9542             "AND     [rsp], #0xffffff2b\n\t"
9543             "POPF\n"
9544     "exit:" %}
9545   ins_encode %{
9546     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9547     emit_cmpfp_fixup(_masm);
9548   %}
9549   ins_pipe( pipe_slow );
9550 %}
9551 
9552 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9553   predicate(UseSSE>=2);
9554   match(Set cr (CmpD src1 src2));
9555   ins_cost(100);
9556   format %{ "UCOMISD $src1,$src2" %}
9557   ins_encode %{
9558     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9559   %}
9560   ins_pipe( pipe_slow );
9561 %}
9562 
9563 // float compare and set condition codes in EFLAGS by XMM regs
9564 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9565   predicate(UseSSE>=2);
9566   match(Set cr (CmpD src1 (LoadD src2)));
9567   ins_cost(145);
9568   format %{ "UCOMISD $src1,$src2\n\t"
9569             "JNP,s   exit\n\t"
9570             "PUSHF\t# saw NaN, set CF\n\t"
9571             "AND     [rsp], #0xffffff2b\n\t"
9572             "POPF\n"
9573     "exit:" %}
9574   ins_encode %{
9575     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9576     emit_cmpfp_fixup(_masm);
9577   %}
9578   ins_pipe( pipe_slow );
9579 %}
9580 
9581 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9582   predicate(UseSSE>=2);
9583   match(Set cr (CmpD src1 (LoadD src2)));
9584   ins_cost(100);
9585   format %{ "UCOMISD $src1,$src2" %}
9586   ins_encode %{
9587     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9588   %}
9589   ins_pipe( pipe_slow );
9590 %}
9591 
9592 // Compare into -1,0,1 in XMM
9593 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9594   predicate(UseSSE>=2);
9595   match(Set dst (CmpD3 src1 src2));
9596   effect(KILL cr);
9597   ins_cost(255);
9598   format %{ "UCOMISD $src1, $src2\n\t"
9599             "MOV     $dst, #-1\n\t"
9600             "JP,s    done\n\t"
9601             "JB,s    done\n\t"
9602             "SETNE   $dst\n\t"
9603             "MOVZB   $dst, $dst\n"
9604     "done:" %}
9605   ins_encode %{
9606     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9607     emit_cmpfp3(_masm, $dst$$Register);
9608   %}
9609   ins_pipe( pipe_slow );
9610 %}
9611 
9612 // Compare into -1,0,1 in XMM and memory
9613 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9614   predicate(UseSSE>=2);
9615   match(Set dst (CmpD3 src1 (LoadD src2)));
9616   effect(KILL cr);
9617   ins_cost(275);
9618   format %{ "UCOMISD $src1, $src2\n\t"
9619             "MOV     $dst, #-1\n\t"
9620             "JP,s    done\n\t"
9621             "JB,s    done\n\t"
9622             "SETNE   $dst\n\t"
9623             "MOVZB   $dst, $dst\n"
9624     "done:" %}
9625   ins_encode %{
9626     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9627     emit_cmpfp3(_masm, $dst$$Register);
9628   %}
9629   ins_pipe( pipe_slow );
9630 %}
9631 
9632 
9633 instruct subDPR_reg(regDPR dst, regDPR src) %{
9634   predicate (UseSSE <=1);
9635   match(Set dst (SubD dst src));
9636 
9637   format %{ "FLD    $src\n\t"
9638             "DSUBp  $dst,ST" %}
9639   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9640   ins_cost(150);
9641   ins_encode( Push_Reg_DPR(src),
9642               OpcP, RegOpc(dst) );
9643   ins_pipe( fpu_reg_reg );
9644 %}
9645 
9646 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9647   predicate (UseSSE <=1);
9648   match(Set dst (RoundDouble (SubD src1 src2)));
9649   ins_cost(250);
9650 
9651   format %{ "FLD    $src2\n\t"
9652             "DSUB   ST,$src1\n\t"
9653             "FSTP_D $dst\t# D-round" %}
9654   opcode(0xD8, 0x5);
9655   ins_encode( Push_Reg_DPR(src2),
9656               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9657   ins_pipe( fpu_mem_reg_reg );
9658 %}
9659 
9660 
9661 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9662   predicate (UseSSE <=1);
9663   match(Set dst (SubD dst (LoadD src)));
9664   ins_cost(150);
9665 
9666   format %{ "FLD    $src\n\t"
9667             "DSUBp  $dst,ST" %}
9668   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9669   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9670               OpcP, RegOpc(dst) );
9671   ins_pipe( fpu_reg_mem );
9672 %}
9673 
9674 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9675   predicate (UseSSE<=1);
9676   match(Set dst (AbsD src));
9677   ins_cost(100);
9678   format %{ "FABS" %}
9679   opcode(0xE1, 0xD9);
9680   ins_encode( OpcS, OpcP );
9681   ins_pipe( fpu_reg_reg );
9682 %}
9683 
9684 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9685   predicate(UseSSE<=1);
9686   match(Set dst (NegD src));
9687   ins_cost(100);
9688   format %{ "FCHS" %}
9689   opcode(0xE0, 0xD9);
9690   ins_encode( OpcS, OpcP );
9691   ins_pipe( fpu_reg_reg );
9692 %}
9693 
9694 instruct addDPR_reg(regDPR dst, regDPR src) %{
9695   predicate(UseSSE<=1);
9696   match(Set dst (AddD dst src));
9697   format %{ "FLD    $src\n\t"
9698             "DADD   $dst,ST" %}
9699   size(4);
9700   ins_cost(150);
9701   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9702   ins_encode( Push_Reg_DPR(src),
9703               OpcP, RegOpc(dst) );
9704   ins_pipe( fpu_reg_reg );
9705 %}
9706 
9707 
9708 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9709   predicate(UseSSE<=1);
9710   match(Set dst (RoundDouble (AddD src1 src2)));
9711   ins_cost(250);
9712 
9713   format %{ "FLD    $src2\n\t"
9714             "DADD   ST,$src1\n\t"
9715             "FSTP_D $dst\t# D-round" %}
9716   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9717   ins_encode( Push_Reg_DPR(src2),
9718               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9719   ins_pipe( fpu_mem_reg_reg );
9720 %}
9721 
9722 
9723 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9724   predicate(UseSSE<=1);
9725   match(Set dst (AddD dst (LoadD src)));
9726   ins_cost(150);
9727 
9728   format %{ "FLD    $src\n\t"
9729             "DADDp  $dst,ST" %}
9730   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9731   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9732               OpcP, RegOpc(dst) );
9733   ins_pipe( fpu_reg_mem );
9734 %}
9735 
9736 // add-to-memory
9737 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9738   predicate(UseSSE<=1);
9739   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9740   ins_cost(150);
9741 
9742   format %{ "FLD_D  $dst\n\t"
9743             "DADD   ST,$src\n\t"
9744             "FST_D  $dst" %}
9745   opcode(0xDD, 0x0);
9746   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9747               Opcode(0xD8), RegOpc(src),
9748               set_instruction_start,
9749               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9750   ins_pipe( fpu_reg_mem );
9751 %}
9752 
9753 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9754   predicate(UseSSE<=1);
9755   match(Set dst (AddD dst con));
9756   ins_cost(125);
9757   format %{ "FLD1\n\t"
9758             "DADDp  $dst,ST" %}
9759   ins_encode %{
9760     __ fld1();
9761     __ faddp($dst$$reg);
9762   %}
9763   ins_pipe(fpu_reg);
9764 %}
9765 
9766 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9767   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9768   match(Set dst (AddD dst con));
9769   ins_cost(200);
9770   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9771             "DADDp  $dst,ST" %}
9772   ins_encode %{
9773     __ fld_d($constantaddress($con));
9774     __ faddp($dst$$reg);
9775   %}
9776   ins_pipe(fpu_reg_mem);
9777 %}
9778 
9779 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9780   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9781   match(Set dst (RoundDouble (AddD src con)));
9782   ins_cost(200);
9783   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9784             "DADD   ST,$src\n\t"
9785             "FSTP_D $dst\t# D-round" %}
9786   ins_encode %{
9787     __ fld_d($constantaddress($con));
9788     __ fadd($src$$reg);
9789     __ fstp_d(Address(rsp, $dst$$disp));
9790   %}
9791   ins_pipe(fpu_mem_reg_con);
9792 %}
9793 
9794 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9795   predicate(UseSSE<=1);
9796   match(Set dst (MulD dst src));
9797   format %{ "FLD    $src\n\t"
9798             "DMULp  $dst,ST" %}
9799   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9800   ins_cost(150);
9801   ins_encode( Push_Reg_DPR(src),
9802               OpcP, RegOpc(dst) );
9803   ins_pipe( fpu_reg_reg );
9804 %}
9805 
9806 // Strict FP instruction biases argument before multiply then
9807 // biases result to avoid double rounding of subnormals.
9808 //
9809 // scale arg1 by multiplying arg1 by 2^(-15360)
9810 // load arg2
9811 // multiply scaled arg1 by arg2
9812 // rescale product by 2^(15360)
9813 //
9814 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9815   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9816   match(Set dst (MulD dst src));
9817   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9818 
9819   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9820             "DMULp  $dst,ST\n\t"
9821             "FLD    $src\n\t"
9822             "DMULp  $dst,ST\n\t"
9823             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9824             "DMULp  $dst,ST\n\t" %}
9825   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9826   ins_encode( strictfp_bias1(dst),
9827               Push_Reg_DPR(src),
9828               OpcP, RegOpc(dst),
9829               strictfp_bias2(dst) );
9830   ins_pipe( fpu_reg_reg );
9831 %}
9832 
9833 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9834   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9835   match(Set dst (MulD dst con));
9836   ins_cost(200);
9837   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9838             "DMULp  $dst,ST" %}
9839   ins_encode %{
9840     __ fld_d($constantaddress($con));
9841     __ fmulp($dst$$reg);
9842   %}
9843   ins_pipe(fpu_reg_mem);
9844 %}
9845 
9846 
9847 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9848   predicate( UseSSE<=1 );
9849   match(Set dst (MulD dst (LoadD src)));
9850   ins_cost(200);
9851   format %{ "FLD_D  $src\n\t"
9852             "DMULp  $dst,ST" %}
9853   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9854   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9855               OpcP, RegOpc(dst) );
9856   ins_pipe( fpu_reg_mem );
9857 %}
9858 
9859 //
9860 // Cisc-alternate to reg-reg multiply
9861 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9862   predicate( UseSSE<=1 );
9863   match(Set dst (MulD src (LoadD mem)));
9864   ins_cost(250);
9865   format %{ "FLD_D  $mem\n\t"
9866             "DMUL   ST,$src\n\t"
9867             "FSTP_D $dst" %}
9868   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9869   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9870               OpcReg_FPR(src),
9871               Pop_Reg_DPR(dst) );
9872   ins_pipe( fpu_reg_reg_mem );
9873 %}
9874 
9875 
9876 // MACRO3 -- addDPR a mulDPR
9877 // This instruction is a '2-address' instruction in that the result goes
9878 // back to src2.  This eliminates a move from the macro; possibly the
9879 // register allocator will have to add it back (and maybe not).
9880 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9881   predicate( UseSSE<=1 );
9882   match(Set src2 (AddD (MulD src0 src1) src2));
9883   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9884             "DMUL   ST,$src1\n\t"
9885             "DADDp  $src2,ST" %}
9886   ins_cost(250);
9887   opcode(0xDD); /* LoadD DD /0 */
9888   ins_encode( Push_Reg_FPR(src0),
9889               FMul_ST_reg(src1),
9890               FAddP_reg_ST(src2) );
9891   ins_pipe( fpu_reg_reg_reg );
9892 %}
9893 
9894 
9895 // MACRO3 -- subDPR a mulDPR
9896 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9897   predicate( UseSSE<=1 );
9898   match(Set src2 (SubD (MulD src0 src1) src2));
9899   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9900             "DMUL   ST,$src1\n\t"
9901             "DSUBRp $src2,ST" %}
9902   ins_cost(250);
9903   ins_encode( Push_Reg_FPR(src0),
9904               FMul_ST_reg(src1),
9905               Opcode(0xDE), Opc_plus(0xE0,src2));
9906   ins_pipe( fpu_reg_reg_reg );
9907 %}
9908 
9909 
9910 instruct divDPR_reg(regDPR dst, regDPR src) %{
9911   predicate( UseSSE<=1 );
9912   match(Set dst (DivD dst src));
9913 
9914   format %{ "FLD    $src\n\t"
9915             "FDIVp  $dst,ST" %}
9916   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9917   ins_cost(150);
9918   ins_encode( Push_Reg_DPR(src),
9919               OpcP, RegOpc(dst) );
9920   ins_pipe( fpu_reg_reg );
9921 %}
9922 
9923 // Strict FP instruction biases argument before division then
9924 // biases result, to avoid double rounding of subnormals.
9925 //
9926 // scale dividend by multiplying dividend by 2^(-15360)
9927 // load divisor
9928 // divide scaled dividend by divisor
9929 // rescale quotient by 2^(15360)
9930 //
9931 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9932   predicate (UseSSE<=1);
9933   match(Set dst (DivD dst src));
9934   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9935   ins_cost(01);
9936 
9937   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9938             "DMULp  $dst,ST\n\t"
9939             "FLD    $src\n\t"
9940             "FDIVp  $dst,ST\n\t"
9941             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9942             "DMULp  $dst,ST\n\t" %}
9943   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9944   ins_encode( strictfp_bias1(dst),
9945               Push_Reg_DPR(src),
9946               OpcP, RegOpc(dst),
9947               strictfp_bias2(dst) );
9948   ins_pipe( fpu_reg_reg );
9949 %}
9950 
9951 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9952   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
9953   match(Set dst (RoundDouble (DivD src1 src2)));
9954 
9955   format %{ "FLD    $src1\n\t"
9956             "FDIV   ST,$src2\n\t"
9957             "FSTP_D $dst\t# D-round" %}
9958   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
9959   ins_encode( Push_Reg_DPR(src1),
9960               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
9961   ins_pipe( fpu_mem_reg_reg );
9962 %}
9963 
9964 
9965 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9966   predicate(UseSSE<=1);
9967   match(Set dst (ModD dst src));
9968   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9969 
9970   format %{ "DMOD   $dst,$src" %}
9971   ins_cost(250);
9972   ins_encode(Push_Reg_Mod_DPR(dst, src),
9973               emitModDPR(),
9974               Push_Result_Mod_DPR(src),
9975               Pop_Reg_DPR(dst));
9976   ins_pipe( pipe_slow );
9977 %}
9978 
9979 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9980   predicate(UseSSE>=2);
9981   match(Set dst (ModD src0 src1));
9982   effect(KILL rax, KILL cr);
9983 
9984   format %{ "SUB    ESP,8\t # DMOD\n"
9985           "\tMOVSD  [ESP+0],$src1\n"
9986           "\tFLD_D  [ESP+0]\n"
9987           "\tMOVSD  [ESP+0],$src0\n"
9988           "\tFLD_D  [ESP+0]\n"
9989      "loop:\tFPREM\n"
9990           "\tFWAIT\n"
9991           "\tFNSTSW AX\n"
9992           "\tSAHF\n"
9993           "\tJP     loop\n"
9994           "\tFSTP_D [ESP+0]\n"
9995           "\tMOVSD  $dst,[ESP+0]\n"
9996           "\tADD    ESP,8\n"
9997           "\tFSTP   ST0\t # Restore FPU Stack"
9998     %}
9999   ins_cost(250);
10000   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
10001   ins_pipe( pipe_slow );
10002 %}
10003 
10004 instruct atanDPR_reg(regDPR dst, regDPR src) %{
10005   predicate (UseSSE<=1);
10006   match(Set dst(AtanD dst src));
10007   format %{ "DATA   $dst,$src" %}
10008   opcode(0xD9, 0xF3);
10009   ins_encode( Push_Reg_DPR(src),
10010               OpcP, OpcS, RegOpc(dst) );
10011   ins_pipe( pipe_slow );
10012 %}
10013 
10014 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10015   predicate (UseSSE>=2);
10016   match(Set dst(AtanD dst src));
10017   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10018   format %{ "DATA   $dst,$src" %}
10019   opcode(0xD9, 0xF3);
10020   ins_encode( Push_SrcD(src),
10021               OpcP, OpcS, Push_ResultD(dst) );
10022   ins_pipe( pipe_slow );
10023 %}
10024 
10025 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10026   predicate (UseSSE<=1);
10027   match(Set dst (SqrtD src));
10028   format %{ "DSQRT  $dst,$src" %}
10029   opcode(0xFA, 0xD9);
10030   ins_encode( Push_Reg_DPR(src),
10031               OpcS, OpcP, Pop_Reg_DPR(dst) );
10032   ins_pipe( pipe_slow );
10033 %}
10034 
10035 //-------------Float Instructions-------------------------------
10036 // Float Math
10037 
10038 // Code for float compare:
10039 //     fcompp();
10040 //     fwait(); fnstsw_ax();
10041 //     sahf();
10042 //     movl(dst, unordered_result);
10043 //     jcc(Assembler::parity, exit);
10044 //     movl(dst, less_result);
10045 //     jcc(Assembler::below, exit);
10046 //     movl(dst, equal_result);
10047 //     jcc(Assembler::equal, exit);
10048 //     movl(dst, greater_result);
10049 //   exit:
10050 
10051 // P6 version of float compare, sets condition codes in EFLAGS
10052 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10053   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10054   match(Set cr (CmpF src1 src2));
10055   effect(KILL rax);
10056   ins_cost(150);
10057   format %{ "FLD    $src1\n\t"
10058             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10059             "JNP    exit\n\t"
10060             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10061             "SAHF\n"
10062      "exit:\tNOP               // avoid branch to branch" %}
10063   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10064   ins_encode( Push_Reg_DPR(src1),
10065               OpcP, RegOpc(src2),
10066               cmpF_P6_fixup );
10067   ins_pipe( pipe_slow );
10068 %}
10069 
10070 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10071   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10072   match(Set cr (CmpF src1 src2));
10073   ins_cost(100);
10074   format %{ "FLD    $src1\n\t"
10075             "FUCOMIP ST,$src2  // P6 instruction" %}
10076   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10077   ins_encode( Push_Reg_DPR(src1),
10078               OpcP, RegOpc(src2));
10079   ins_pipe( pipe_slow );
10080 %}
10081 
10082 
10083 // Compare & branch
10084 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10085   predicate(UseSSE == 0);
10086   match(Set cr (CmpF src1 src2));
10087   effect(KILL rax);
10088   ins_cost(200);
10089   format %{ "FLD    $src1\n\t"
10090             "FCOMp  $src2\n\t"
10091             "FNSTSW AX\n\t"
10092             "TEST   AX,0x400\n\t"
10093             "JZ,s   flags\n\t"
10094             "MOV    AH,1\t# unordered treat as LT\n"
10095     "flags:\tSAHF" %}
10096   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10097   ins_encode( Push_Reg_DPR(src1),
10098               OpcP, RegOpc(src2),
10099               fpu_flags);
10100   ins_pipe( pipe_slow );
10101 %}
10102 
10103 // Compare vs zero into -1,0,1
10104 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10105   predicate(UseSSE == 0);
10106   match(Set dst (CmpF3 src1 zero));
10107   effect(KILL cr, KILL rax);
10108   ins_cost(280);
10109   format %{ "FTSTF  $dst,$src1" %}
10110   opcode(0xE4, 0xD9);
10111   ins_encode( Push_Reg_DPR(src1),
10112               OpcS, OpcP, PopFPU,
10113               CmpF_Result(dst));
10114   ins_pipe( pipe_slow );
10115 %}
10116 
10117 // Compare into -1,0,1
10118 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10119   predicate(UseSSE == 0);
10120   match(Set dst (CmpF3 src1 src2));
10121   effect(KILL cr, KILL rax);
10122   ins_cost(300);
10123   format %{ "FCMPF  $dst,$src1,$src2" %}
10124   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10125   ins_encode( Push_Reg_DPR(src1),
10126               OpcP, RegOpc(src2),
10127               CmpF_Result(dst));
10128   ins_pipe( pipe_slow );
10129 %}
10130 
10131 // float compare and set condition codes in EFLAGS by XMM regs
10132 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10133   predicate(UseSSE>=1);
10134   match(Set cr (CmpF src1 src2));
10135   ins_cost(145);
10136   format %{ "UCOMISS $src1,$src2\n\t"
10137             "JNP,s   exit\n\t"
10138             "PUSHF\t# saw NaN, set CF\n\t"
10139             "AND     [rsp], #0xffffff2b\n\t"
10140             "POPF\n"
10141     "exit:" %}
10142   ins_encode %{
10143     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10144     emit_cmpfp_fixup(_masm);
10145   %}
10146   ins_pipe( pipe_slow );
10147 %}
10148 
10149 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10150   predicate(UseSSE>=1);
10151   match(Set cr (CmpF src1 src2));
10152   ins_cost(100);
10153   format %{ "UCOMISS $src1,$src2" %}
10154   ins_encode %{
10155     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10156   %}
10157   ins_pipe( pipe_slow );
10158 %}
10159 
10160 // float compare and set condition codes in EFLAGS by XMM regs
10161 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10162   predicate(UseSSE>=1);
10163   match(Set cr (CmpF src1 (LoadF src2)));
10164   ins_cost(165);
10165   format %{ "UCOMISS $src1,$src2\n\t"
10166             "JNP,s   exit\n\t"
10167             "PUSHF\t# saw NaN, set CF\n\t"
10168             "AND     [rsp], #0xffffff2b\n\t"
10169             "POPF\n"
10170     "exit:" %}
10171   ins_encode %{
10172     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10173     emit_cmpfp_fixup(_masm);
10174   %}
10175   ins_pipe( pipe_slow );
10176 %}
10177 
10178 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10179   predicate(UseSSE>=1);
10180   match(Set cr (CmpF src1 (LoadF src2)));
10181   ins_cost(100);
10182   format %{ "UCOMISS $src1,$src2" %}
10183   ins_encode %{
10184     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10185   %}
10186   ins_pipe( pipe_slow );
10187 %}
10188 
10189 // Compare into -1,0,1 in XMM
10190 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10191   predicate(UseSSE>=1);
10192   match(Set dst (CmpF3 src1 src2));
10193   effect(KILL cr);
10194   ins_cost(255);
10195   format %{ "UCOMISS $src1, $src2\n\t"
10196             "MOV     $dst, #-1\n\t"
10197             "JP,s    done\n\t"
10198             "JB,s    done\n\t"
10199             "SETNE   $dst\n\t"
10200             "MOVZB   $dst, $dst\n"
10201     "done:" %}
10202   ins_encode %{
10203     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10204     emit_cmpfp3(_masm, $dst$$Register);
10205   %}
10206   ins_pipe( pipe_slow );
10207 %}
10208 
10209 // Compare into -1,0,1 in XMM and memory
10210 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10211   predicate(UseSSE>=1);
10212   match(Set dst (CmpF3 src1 (LoadF src2)));
10213   effect(KILL cr);
10214   ins_cost(275);
10215   format %{ "UCOMISS $src1, $src2\n\t"
10216             "MOV     $dst, #-1\n\t"
10217             "JP,s    done\n\t"
10218             "JB,s    done\n\t"
10219             "SETNE   $dst\n\t"
10220             "MOVZB   $dst, $dst\n"
10221     "done:" %}
10222   ins_encode %{
10223     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10224     emit_cmpfp3(_masm, $dst$$Register);
10225   %}
10226   ins_pipe( pipe_slow );
10227 %}
10228 
10229 // Spill to obtain 24-bit precision
10230 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10231   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10232   match(Set dst (SubF src1 src2));
10233 
10234   format %{ "FSUB   $dst,$src1 - $src2" %}
10235   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10236   ins_encode( Push_Reg_FPR(src1),
10237               OpcReg_FPR(src2),
10238               Pop_Mem_FPR(dst) );
10239   ins_pipe( fpu_mem_reg_reg );
10240 %}
10241 //
10242 // This instruction does not round to 24-bits
10243 instruct subFPR_reg(regFPR dst, regFPR src) %{
10244   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10245   match(Set dst (SubF dst src));
10246 
10247   format %{ "FSUB   $dst,$src" %}
10248   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10249   ins_encode( Push_Reg_FPR(src),
10250               OpcP, RegOpc(dst) );
10251   ins_pipe( fpu_reg_reg );
10252 %}
10253 
10254 // Spill to obtain 24-bit precision
10255 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10256   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10257   match(Set dst (AddF src1 src2));
10258 
10259   format %{ "FADD   $dst,$src1,$src2" %}
10260   opcode(0xD8, 0x0); /* D8 C0+i */
10261   ins_encode( Push_Reg_FPR(src2),
10262               OpcReg_FPR(src1),
10263               Pop_Mem_FPR(dst) );
10264   ins_pipe( fpu_mem_reg_reg );
10265 %}
10266 //
10267 // This instruction does not round to 24-bits
10268 instruct addFPR_reg(regFPR dst, regFPR src) %{
10269   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10270   match(Set dst (AddF dst src));
10271 
10272   format %{ "FLD    $src\n\t"
10273             "FADDp  $dst,ST" %}
10274   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10275   ins_encode( Push_Reg_FPR(src),
10276               OpcP, RegOpc(dst) );
10277   ins_pipe( fpu_reg_reg );
10278 %}
10279 
10280 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10281   predicate(UseSSE==0);
10282   match(Set dst (AbsF src));
10283   ins_cost(100);
10284   format %{ "FABS" %}
10285   opcode(0xE1, 0xD9);
10286   ins_encode( OpcS, OpcP );
10287   ins_pipe( fpu_reg_reg );
10288 %}
10289 
10290 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10291   predicate(UseSSE==0);
10292   match(Set dst (NegF src));
10293   ins_cost(100);
10294   format %{ "FCHS" %}
10295   opcode(0xE0, 0xD9);
10296   ins_encode( OpcS, OpcP );
10297   ins_pipe( fpu_reg_reg );
10298 %}
10299 
10300 // Cisc-alternate to addFPR_reg
10301 // Spill to obtain 24-bit precision
10302 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10303   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10304   match(Set dst (AddF src1 (LoadF src2)));
10305 
10306   format %{ "FLD    $src2\n\t"
10307             "FADD   ST,$src1\n\t"
10308             "FSTP_S $dst" %}
10309   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10310   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10311               OpcReg_FPR(src1),
10312               Pop_Mem_FPR(dst) );
10313   ins_pipe( fpu_mem_reg_mem );
10314 %}
10315 //
10316 // Cisc-alternate to addFPR_reg
10317 // This instruction does not round to 24-bits
10318 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10319   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10320   match(Set dst (AddF dst (LoadF src)));
10321 
10322   format %{ "FADD   $dst,$src" %}
10323   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10324   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10325               OpcP, RegOpc(dst) );
10326   ins_pipe( fpu_reg_mem );
10327 %}
10328 
10329 // // Following two instructions for _222_mpegaudio
10330 // Spill to obtain 24-bit precision
10331 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10332   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10333   match(Set dst (AddF src1 src2));
10334 
10335   format %{ "FADD   $dst,$src1,$src2" %}
10336   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10337   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10338               OpcReg_FPR(src2),
10339               Pop_Mem_FPR(dst) );
10340   ins_pipe( fpu_mem_reg_mem );
10341 %}
10342 
10343 // Cisc-spill variant
10344 // Spill to obtain 24-bit precision
10345 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10346   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10347   match(Set dst (AddF src1 (LoadF src2)));
10348 
10349   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10350   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10351   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10352               set_instruction_start,
10353               OpcP, RMopc_Mem(secondary,src1),
10354               Pop_Mem_FPR(dst) );
10355   ins_pipe( fpu_mem_mem_mem );
10356 %}
10357 
10358 // Spill to obtain 24-bit precision
10359 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10360   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10361   match(Set dst (AddF src1 src2));
10362 
10363   format %{ "FADD   $dst,$src1,$src2" %}
10364   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10365   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10366               set_instruction_start,
10367               OpcP, RMopc_Mem(secondary,src1),
10368               Pop_Mem_FPR(dst) );
10369   ins_pipe( fpu_mem_mem_mem );
10370 %}
10371 
10372 
10373 // Spill to obtain 24-bit precision
10374 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10375   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10376   match(Set dst (AddF src con));
10377   format %{ "FLD    $src\n\t"
10378             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10379             "FSTP_S $dst"  %}
10380   ins_encode %{
10381     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10382     __ fadd_s($constantaddress($con));
10383     __ fstp_s(Address(rsp, $dst$$disp));
10384   %}
10385   ins_pipe(fpu_mem_reg_con);
10386 %}
10387 //
10388 // This instruction does not round to 24-bits
10389 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10390   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10391   match(Set dst (AddF src con));
10392   format %{ "FLD    $src\n\t"
10393             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10394             "FSTP   $dst"  %}
10395   ins_encode %{
10396     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10397     __ fadd_s($constantaddress($con));
10398     __ fstp_d($dst$$reg);
10399   %}
10400   ins_pipe(fpu_reg_reg_con);
10401 %}
10402 
10403 // Spill to obtain 24-bit precision
10404 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10405   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10406   match(Set dst (MulF src1 src2));
10407 
10408   format %{ "FLD    $src1\n\t"
10409             "FMUL   $src2\n\t"
10410             "FSTP_S $dst"  %}
10411   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10412   ins_encode( Push_Reg_FPR(src1),
10413               OpcReg_FPR(src2),
10414               Pop_Mem_FPR(dst) );
10415   ins_pipe( fpu_mem_reg_reg );
10416 %}
10417 //
10418 // This instruction does not round to 24-bits
10419 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10420   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10421   match(Set dst (MulF src1 src2));
10422 
10423   format %{ "FLD    $src1\n\t"
10424             "FMUL   $src2\n\t"
10425             "FSTP_S $dst"  %}
10426   opcode(0xD8, 0x1); /* D8 C8+i */
10427   ins_encode( Push_Reg_FPR(src2),
10428               OpcReg_FPR(src1),
10429               Pop_Reg_FPR(dst) );
10430   ins_pipe( fpu_reg_reg_reg );
10431 %}
10432 
10433 
10434 // Spill to obtain 24-bit precision
10435 // Cisc-alternate to reg-reg multiply
10436 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10437   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10438   match(Set dst (MulF src1 (LoadF src2)));
10439 
10440   format %{ "FLD_S  $src2\n\t"
10441             "FMUL   $src1\n\t"
10442             "FSTP_S $dst"  %}
10443   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10444   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10445               OpcReg_FPR(src1),
10446               Pop_Mem_FPR(dst) );
10447   ins_pipe( fpu_mem_reg_mem );
10448 %}
10449 //
10450 // This instruction does not round to 24-bits
10451 // Cisc-alternate to reg-reg multiply
10452 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10453   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10454   match(Set dst (MulF src1 (LoadF src2)));
10455 
10456   format %{ "FMUL   $dst,$src1,$src2" %}
10457   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10458   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10459               OpcReg_FPR(src1),
10460               Pop_Reg_FPR(dst) );
10461   ins_pipe( fpu_reg_reg_mem );
10462 %}
10463 
10464 // Spill to obtain 24-bit precision
10465 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10466   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10467   match(Set dst (MulF src1 src2));
10468 
10469   format %{ "FMUL   $dst,$src1,$src2" %}
10470   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10471   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10472               set_instruction_start,
10473               OpcP, RMopc_Mem(secondary,src1),
10474               Pop_Mem_FPR(dst) );
10475   ins_pipe( fpu_mem_mem_mem );
10476 %}
10477 
10478 // Spill to obtain 24-bit precision
10479 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10480   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10481   match(Set dst (MulF src con));
10482 
10483   format %{ "FLD    $src\n\t"
10484             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10485             "FSTP_S $dst"  %}
10486   ins_encode %{
10487     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10488     __ fmul_s($constantaddress($con));
10489     __ fstp_s(Address(rsp, $dst$$disp));
10490   %}
10491   ins_pipe(fpu_mem_reg_con);
10492 %}
10493 //
10494 // This instruction does not round to 24-bits
10495 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10496   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10497   match(Set dst (MulF src con));
10498 
10499   format %{ "FLD    $src\n\t"
10500             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10501             "FSTP   $dst"  %}
10502   ins_encode %{
10503     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10504     __ fmul_s($constantaddress($con));
10505     __ fstp_d($dst$$reg);
10506   %}
10507   ins_pipe(fpu_reg_reg_con);
10508 %}
10509 
10510 
10511 //
10512 // MACRO1 -- subsume unshared load into mulFPR
10513 // This instruction does not round to 24-bits
10514 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10515   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10516   match(Set dst (MulF (LoadF mem1) src));
10517 
10518   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10519             "FMUL   ST,$src\n\t"
10520             "FSTP   $dst" %}
10521   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10522   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10523               OpcReg_FPR(src),
10524               Pop_Reg_FPR(dst) );
10525   ins_pipe( fpu_reg_reg_mem );
10526 %}
10527 //
10528 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10529 // This instruction does not round to 24-bits
10530 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10531   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10532   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10533   ins_cost(95);
10534 
10535   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10536             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10537             "FADD   ST,$src2\n\t"
10538             "FSTP   $dst" %}
10539   opcode(0xD9); /* LoadF D9 /0 */
10540   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10541               FMul_ST_reg(src1),
10542               FAdd_ST_reg(src2),
10543               Pop_Reg_FPR(dst) );
10544   ins_pipe( fpu_reg_mem_reg_reg );
10545 %}
10546 
10547 // MACRO3 -- addFPR a mulFPR
10548 // This instruction does not round to 24-bits.  It is a '2-address'
10549 // instruction in that the result goes back to src2.  This eliminates
10550 // a move from the macro; possibly the register allocator will have
10551 // to add it back (and maybe not).
10552 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10553   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10554   match(Set src2 (AddF (MulF src0 src1) src2));
10555 
10556   format %{ "FLD    $src0     ===MACRO3===\n\t"
10557             "FMUL   ST,$src1\n\t"
10558             "FADDP  $src2,ST" %}
10559   opcode(0xD9); /* LoadF D9 /0 */
10560   ins_encode( Push_Reg_FPR(src0),
10561               FMul_ST_reg(src1),
10562               FAddP_reg_ST(src2) );
10563   ins_pipe( fpu_reg_reg_reg );
10564 %}
10565 
10566 // MACRO4 -- divFPR subFPR
10567 // This instruction does not round to 24-bits
10568 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10569   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10570   match(Set dst (DivF (SubF src2 src1) src3));
10571 
10572   format %{ "FLD    $src2   ===MACRO4===\n\t"
10573             "FSUB   ST,$src1\n\t"
10574             "FDIV   ST,$src3\n\t"
10575             "FSTP  $dst" %}
10576   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10577   ins_encode( Push_Reg_FPR(src2),
10578               subFPR_divFPR_encode(src1,src3),
10579               Pop_Reg_FPR(dst) );
10580   ins_pipe( fpu_reg_reg_reg_reg );
10581 %}
10582 
10583 // Spill to obtain 24-bit precision
10584 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10585   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10586   match(Set dst (DivF src1 src2));
10587 
10588   format %{ "FDIV   $dst,$src1,$src2" %}
10589   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10590   ins_encode( Push_Reg_FPR(src1),
10591               OpcReg_FPR(src2),
10592               Pop_Mem_FPR(dst) );
10593   ins_pipe( fpu_mem_reg_reg );
10594 %}
10595 //
10596 // This instruction does not round to 24-bits
10597 instruct divFPR_reg(regFPR dst, regFPR src) %{
10598   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10599   match(Set dst (DivF dst src));
10600 
10601   format %{ "FDIV   $dst,$src" %}
10602   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10603   ins_encode( Push_Reg_FPR(src),
10604               OpcP, RegOpc(dst) );
10605   ins_pipe( fpu_reg_reg );
10606 %}
10607 
10608 
10609 // Spill to obtain 24-bit precision
10610 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10611   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10612   match(Set dst (ModF src1 src2));
10613   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10614 
10615   format %{ "FMOD   $dst,$src1,$src2" %}
10616   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10617               emitModDPR(),
10618               Push_Result_Mod_DPR(src2),
10619               Pop_Mem_FPR(dst));
10620   ins_pipe( pipe_slow );
10621 %}
10622 //
10623 // This instruction does not round to 24-bits
10624 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10625   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10626   match(Set dst (ModF dst src));
10627   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10628 
10629   format %{ "FMOD   $dst,$src" %}
10630   ins_encode(Push_Reg_Mod_DPR(dst, src),
10631               emitModDPR(),
10632               Push_Result_Mod_DPR(src),
10633               Pop_Reg_FPR(dst));
10634   ins_pipe( pipe_slow );
10635 %}
10636 
10637 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10638   predicate(UseSSE>=1);
10639   match(Set dst (ModF src0 src1));
10640   effect(KILL rax, KILL cr);
10641   format %{ "SUB    ESP,4\t # FMOD\n"
10642           "\tMOVSS  [ESP+0],$src1\n"
10643           "\tFLD_S  [ESP+0]\n"
10644           "\tMOVSS  [ESP+0],$src0\n"
10645           "\tFLD_S  [ESP+0]\n"
10646      "loop:\tFPREM\n"
10647           "\tFWAIT\n"
10648           "\tFNSTSW AX\n"
10649           "\tSAHF\n"
10650           "\tJP     loop\n"
10651           "\tFSTP_S [ESP+0]\n"
10652           "\tMOVSS  $dst,[ESP+0]\n"
10653           "\tADD    ESP,4\n"
10654           "\tFSTP   ST0\t # Restore FPU Stack"
10655     %}
10656   ins_cost(250);
10657   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10658   ins_pipe( pipe_slow );
10659 %}
10660 
10661 
10662 //----------Arithmetic Conversion Instructions---------------------------------
10663 // The conversions operations are all Alpha sorted.  Please keep it that way!
10664 
10665 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10666   predicate(UseSSE==0);
10667   match(Set dst (RoundFloat src));
10668   ins_cost(125);
10669   format %{ "FST_S  $dst,$src\t# F-round" %}
10670   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10671   ins_pipe( fpu_mem_reg );
10672 %}
10673 
10674 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10675   predicate(UseSSE<=1);
10676   match(Set dst (RoundDouble src));
10677   ins_cost(125);
10678   format %{ "FST_D  $dst,$src\t# D-round" %}
10679   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10680   ins_pipe( fpu_mem_reg );
10681 %}
10682 
10683 // Force rounding to 24-bit precision and 6-bit exponent
10684 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10685   predicate(UseSSE==0);
10686   match(Set dst (ConvD2F src));
10687   format %{ "FST_S  $dst,$src\t# F-round" %}
10688   expand %{
10689     roundFloat_mem_reg(dst,src);
10690   %}
10691 %}
10692 
10693 // Force rounding to 24-bit precision and 6-bit exponent
10694 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10695   predicate(UseSSE==1);
10696   match(Set dst (ConvD2F src));
10697   effect( KILL cr );
10698   format %{ "SUB    ESP,4\n\t"
10699             "FST_S  [ESP],$src\t# F-round\n\t"
10700             "MOVSS  $dst,[ESP]\n\t"
10701             "ADD ESP,4" %}
10702   ins_encode %{
10703     __ subptr(rsp, 4);
10704     if ($src$$reg != FPR1L_enc) {
10705       __ fld_s($src$$reg-1);
10706       __ fstp_s(Address(rsp, 0));
10707     } else {
10708       __ fst_s(Address(rsp, 0));
10709     }
10710     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10711     __ addptr(rsp, 4);
10712   %}
10713   ins_pipe( pipe_slow );
10714 %}
10715 
10716 // Force rounding double precision to single precision
10717 instruct convD2F_reg(regF dst, regD src) %{
10718   predicate(UseSSE>=2);
10719   match(Set dst (ConvD2F src));
10720   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10721   ins_encode %{
10722     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10723   %}
10724   ins_pipe( pipe_slow );
10725 %}
10726 
10727 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10728   predicate(UseSSE==0);
10729   match(Set dst (ConvF2D src));
10730   format %{ "FST_S  $dst,$src\t# D-round" %}
10731   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10732   ins_pipe( fpu_reg_reg );
10733 %}
10734 
10735 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10736   predicate(UseSSE==1);
10737   match(Set dst (ConvF2D src));
10738   format %{ "FST_D  $dst,$src\t# D-round" %}
10739   expand %{
10740     roundDouble_mem_reg(dst,src);
10741   %}
10742 %}
10743 
10744 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10745   predicate(UseSSE==1);
10746   match(Set dst (ConvF2D src));
10747   effect( KILL cr );
10748   format %{ "SUB    ESP,4\n\t"
10749             "MOVSS  [ESP] $src\n\t"
10750             "FLD_S  [ESP]\n\t"
10751             "ADD    ESP,4\n\t"
10752             "FSTP   $dst\t# D-round" %}
10753   ins_encode %{
10754     __ subptr(rsp, 4);
10755     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10756     __ fld_s(Address(rsp, 0));
10757     __ addptr(rsp, 4);
10758     __ fstp_d($dst$$reg);
10759   %}
10760   ins_pipe( pipe_slow );
10761 %}
10762 
10763 instruct convF2D_reg(regD dst, regF src) %{
10764   predicate(UseSSE>=2);
10765   match(Set dst (ConvF2D src));
10766   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10767   ins_encode %{
10768     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10769   %}
10770   ins_pipe( pipe_slow );
10771 %}
10772 
10773 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10774 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10775   predicate(UseSSE<=1);
10776   match(Set dst (ConvD2I src));
10777   effect( KILL tmp, KILL cr );
10778   format %{ "FLD    $src\t# Convert double to int \n\t"
10779             "FLDCW  trunc mode\n\t"
10780             "SUB    ESP,4\n\t"
10781             "FISTp  [ESP + #0]\n\t"
10782             "FLDCW  std/24-bit mode\n\t"
10783             "POP    EAX\n\t"
10784             "CMP    EAX,0x80000000\n\t"
10785             "JNE,s  fast\n\t"
10786             "FLD_D  $src\n\t"
10787             "CALL   d2i_wrapper\n"
10788       "fast:" %}
10789   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10790   ins_pipe( pipe_slow );
10791 %}
10792 
10793 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10794 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10795   predicate(UseSSE>=2);
10796   match(Set dst (ConvD2I src));
10797   effect( KILL tmp, KILL cr );
10798   format %{ "CVTTSD2SI $dst, $src\n\t"
10799             "CMP    $dst,0x80000000\n\t"
10800             "JNE,s  fast\n\t"
10801             "SUB    ESP, 8\n\t"
10802             "MOVSD  [ESP], $src\n\t"
10803             "FLD_D  [ESP]\n\t"
10804             "ADD    ESP, 8\n\t"
10805             "CALL   d2i_wrapper\n"
10806       "fast:" %}
10807   ins_encode %{
10808     Label fast;
10809     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10810     __ cmpl($dst$$Register, 0x80000000);
10811     __ jccb(Assembler::notEqual, fast);
10812     __ subptr(rsp, 8);
10813     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10814     __ fld_d(Address(rsp, 0));
10815     __ addptr(rsp, 8);
10816     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10817     __ bind(fast);
10818   %}
10819   ins_pipe( pipe_slow );
10820 %}
10821 
10822 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10823   predicate(UseSSE<=1);
10824   match(Set dst (ConvD2L src));
10825   effect( KILL cr );
10826   format %{ "FLD    $src\t# Convert double to long\n\t"
10827             "FLDCW  trunc mode\n\t"
10828             "SUB    ESP,8\n\t"
10829             "FISTp  [ESP + #0]\n\t"
10830             "FLDCW  std/24-bit mode\n\t"
10831             "POP    EAX\n\t"
10832             "POP    EDX\n\t"
10833             "CMP    EDX,0x80000000\n\t"
10834             "JNE,s  fast\n\t"
10835             "TEST   EAX,EAX\n\t"
10836             "JNE,s  fast\n\t"
10837             "FLD    $src\n\t"
10838             "CALL   d2l_wrapper\n"
10839       "fast:" %}
10840   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10841   ins_pipe( pipe_slow );
10842 %}
10843 
10844 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10845 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10846   predicate (UseSSE>=2);
10847   match(Set dst (ConvD2L src));
10848   effect( KILL cr );
10849   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10850             "MOVSD  [ESP],$src\n\t"
10851             "FLD_D  [ESP]\n\t"
10852             "FLDCW  trunc mode\n\t"
10853             "FISTp  [ESP + #0]\n\t"
10854             "FLDCW  std/24-bit mode\n\t"
10855             "POP    EAX\n\t"
10856             "POP    EDX\n\t"
10857             "CMP    EDX,0x80000000\n\t"
10858             "JNE,s  fast\n\t"
10859             "TEST   EAX,EAX\n\t"
10860             "JNE,s  fast\n\t"
10861             "SUB    ESP,8\n\t"
10862             "MOVSD  [ESP],$src\n\t"
10863             "FLD_D  [ESP]\n\t"
10864             "ADD    ESP,8\n\t"
10865             "CALL   d2l_wrapper\n"
10866       "fast:" %}
10867   ins_encode %{
10868     Label fast;
10869     __ subptr(rsp, 8);
10870     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10871     __ fld_d(Address(rsp, 0));
10872     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10873     __ fistp_d(Address(rsp, 0));
10874     // Restore the rounding mode, mask the exception
10875     if (Compile::current()->in_24_bit_fp_mode()) {
10876       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10877     } else {
10878       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10879     }
10880     // Load the converted long, adjust CPU stack
10881     __ pop(rax);
10882     __ pop(rdx);
10883     __ cmpl(rdx, 0x80000000);
10884     __ jccb(Assembler::notEqual, fast);
10885     __ testl(rax, rax);
10886     __ jccb(Assembler::notEqual, fast);
10887     __ subptr(rsp, 8);
10888     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10889     __ fld_d(Address(rsp, 0));
10890     __ addptr(rsp, 8);
10891     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10892     __ bind(fast);
10893   %}
10894   ins_pipe( pipe_slow );
10895 %}
10896 
10897 // Convert a double to an int.  Java semantics require we do complex
10898 // manglations in the corner cases.  So we set the rounding mode to
10899 // 'zero', store the darned double down as an int, and reset the
10900 // rounding mode to 'nearest'.  The hardware stores a flag value down
10901 // if we would overflow or converted a NAN; we check for this and
10902 // and go the slow path if needed.
10903 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10904   predicate(UseSSE==0);
10905   match(Set dst (ConvF2I src));
10906   effect( KILL tmp, KILL cr );
10907   format %{ "FLD    $src\t# Convert float to int \n\t"
10908             "FLDCW  trunc mode\n\t"
10909             "SUB    ESP,4\n\t"
10910             "FISTp  [ESP + #0]\n\t"
10911             "FLDCW  std/24-bit mode\n\t"
10912             "POP    EAX\n\t"
10913             "CMP    EAX,0x80000000\n\t"
10914             "JNE,s  fast\n\t"
10915             "FLD    $src\n\t"
10916             "CALL   d2i_wrapper\n"
10917       "fast:" %}
10918   // DPR2I_encoding works for FPR2I
10919   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10920   ins_pipe( pipe_slow );
10921 %}
10922 
10923 // Convert a float in xmm to an int reg.
10924 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10925   predicate(UseSSE>=1);
10926   match(Set dst (ConvF2I src));
10927   effect( KILL tmp, KILL cr );
10928   format %{ "CVTTSS2SI $dst, $src\n\t"
10929             "CMP    $dst,0x80000000\n\t"
10930             "JNE,s  fast\n\t"
10931             "SUB    ESP, 4\n\t"
10932             "MOVSS  [ESP], $src\n\t"
10933             "FLD    [ESP]\n\t"
10934             "ADD    ESP, 4\n\t"
10935             "CALL   d2i_wrapper\n"
10936       "fast:" %}
10937   ins_encode %{
10938     Label fast;
10939     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10940     __ cmpl($dst$$Register, 0x80000000);
10941     __ jccb(Assembler::notEqual, fast);
10942     __ subptr(rsp, 4);
10943     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10944     __ fld_s(Address(rsp, 0));
10945     __ addptr(rsp, 4);
10946     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10947     __ bind(fast);
10948   %}
10949   ins_pipe( pipe_slow );
10950 %}
10951 
10952 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10953   predicate(UseSSE==0);
10954   match(Set dst (ConvF2L src));
10955   effect( KILL cr );
10956   format %{ "FLD    $src\t# Convert float to long\n\t"
10957             "FLDCW  trunc mode\n\t"
10958             "SUB    ESP,8\n\t"
10959             "FISTp  [ESP + #0]\n\t"
10960             "FLDCW  std/24-bit mode\n\t"
10961             "POP    EAX\n\t"
10962             "POP    EDX\n\t"
10963             "CMP    EDX,0x80000000\n\t"
10964             "JNE,s  fast\n\t"
10965             "TEST   EAX,EAX\n\t"
10966             "JNE,s  fast\n\t"
10967             "FLD    $src\n\t"
10968             "CALL   d2l_wrapper\n"
10969       "fast:" %}
10970   // DPR2L_encoding works for FPR2L
10971   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10972   ins_pipe( pipe_slow );
10973 %}
10974 
10975 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10976 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10977   predicate (UseSSE>=1);
10978   match(Set dst (ConvF2L src));
10979   effect( KILL cr );
10980   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10981             "MOVSS  [ESP],$src\n\t"
10982             "FLD_S  [ESP]\n\t"
10983             "FLDCW  trunc mode\n\t"
10984             "FISTp  [ESP + #0]\n\t"
10985             "FLDCW  std/24-bit mode\n\t"
10986             "POP    EAX\n\t"
10987             "POP    EDX\n\t"
10988             "CMP    EDX,0x80000000\n\t"
10989             "JNE,s  fast\n\t"
10990             "TEST   EAX,EAX\n\t"
10991             "JNE,s  fast\n\t"
10992             "SUB    ESP,4\t# Convert float to long\n\t"
10993             "MOVSS  [ESP],$src\n\t"
10994             "FLD_S  [ESP]\n\t"
10995             "ADD    ESP,4\n\t"
10996             "CALL   d2l_wrapper\n"
10997       "fast:" %}
10998   ins_encode %{
10999     Label fast;
11000     __ subptr(rsp, 8);
11001     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11002     __ fld_s(Address(rsp, 0));
11003     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
11004     __ fistp_d(Address(rsp, 0));
11005     // Restore the rounding mode, mask the exception
11006     if (Compile::current()->in_24_bit_fp_mode()) {
11007       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
11008     } else {
11009       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
11010     }
11011     // Load the converted long, adjust CPU stack
11012     __ pop(rax);
11013     __ pop(rdx);
11014     __ cmpl(rdx, 0x80000000);
11015     __ jccb(Assembler::notEqual, fast);
11016     __ testl(rax, rax);
11017     __ jccb(Assembler::notEqual, fast);
11018     __ subptr(rsp, 4);
11019     __ movflt(Address(rsp, 0), $src$$XMMRegister);
11020     __ fld_s(Address(rsp, 0));
11021     __ addptr(rsp, 4);
11022     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11023     __ bind(fast);
11024   %}
11025   ins_pipe( pipe_slow );
11026 %}
11027 
11028 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11029   predicate( UseSSE<=1 );
11030   match(Set dst (ConvI2D src));
11031   format %{ "FILD   $src\n\t"
11032             "FSTP   $dst" %}
11033   opcode(0xDB, 0x0);  /* DB /0 */
11034   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11035   ins_pipe( fpu_reg_mem );
11036 %}
11037 
11038 instruct convI2D_reg(regD dst, rRegI src) %{
11039   predicate( UseSSE>=2 && !UseXmmI2D );
11040   match(Set dst (ConvI2D src));
11041   format %{ "CVTSI2SD $dst,$src" %}
11042   ins_encode %{
11043     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11044   %}
11045   ins_pipe( pipe_slow );
11046 %}
11047 
11048 instruct convI2D_mem(regD dst, memory mem) %{
11049   predicate( UseSSE>=2 );
11050   match(Set dst (ConvI2D (LoadI mem)));
11051   format %{ "CVTSI2SD $dst,$mem" %}
11052   ins_encode %{
11053     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11054   %}
11055   ins_pipe( pipe_slow );
11056 %}
11057 
11058 instruct convXI2D_reg(regD dst, rRegI src)
11059 %{
11060   predicate( UseSSE>=2 && UseXmmI2D );
11061   match(Set dst (ConvI2D src));
11062 
11063   format %{ "MOVD  $dst,$src\n\t"
11064             "CVTDQ2PD $dst,$dst\t# i2d" %}
11065   ins_encode %{
11066     __ movdl($dst$$XMMRegister, $src$$Register);
11067     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11068   %}
11069   ins_pipe(pipe_slow); // XXX
11070 %}
11071 
11072 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11073   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11074   match(Set dst (ConvI2D (LoadI mem)));
11075   format %{ "FILD   $mem\n\t"
11076             "FSTP   $dst" %}
11077   opcode(0xDB);      /* DB /0 */
11078   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11079               Pop_Reg_DPR(dst));
11080   ins_pipe( fpu_reg_mem );
11081 %}
11082 
11083 // Convert a byte to a float; no rounding step needed.
11084 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11085   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11086   match(Set dst (ConvI2F src));
11087   format %{ "FILD   $src\n\t"
11088             "FSTP   $dst" %}
11089 
11090   opcode(0xDB, 0x0);  /* DB /0 */
11091   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11092   ins_pipe( fpu_reg_mem );
11093 %}
11094 
11095 // In 24-bit mode, force exponent rounding by storing back out
11096 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11097   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11098   match(Set dst (ConvI2F src));
11099   ins_cost(200);
11100   format %{ "FILD   $src\n\t"
11101             "FSTP_S $dst" %}
11102   opcode(0xDB, 0x0);  /* DB /0 */
11103   ins_encode( Push_Mem_I(src),
11104               Pop_Mem_FPR(dst));
11105   ins_pipe( fpu_mem_mem );
11106 %}
11107 
11108 // In 24-bit mode, force exponent rounding by storing back out
11109 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11110   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11111   match(Set dst (ConvI2F (LoadI mem)));
11112   ins_cost(200);
11113   format %{ "FILD   $mem\n\t"
11114             "FSTP_S $dst" %}
11115   opcode(0xDB);  /* DB /0 */
11116   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11117               Pop_Mem_FPR(dst));
11118   ins_pipe( fpu_mem_mem );
11119 %}
11120 
11121 // This instruction does not round to 24-bits
11122 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11123   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11124   match(Set dst (ConvI2F src));
11125   format %{ "FILD   $src\n\t"
11126             "FSTP   $dst" %}
11127   opcode(0xDB, 0x0);  /* DB /0 */
11128   ins_encode( Push_Mem_I(src),
11129               Pop_Reg_FPR(dst));
11130   ins_pipe( fpu_reg_mem );
11131 %}
11132 
11133 // This instruction does not round to 24-bits
11134 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11135   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11136   match(Set dst (ConvI2F (LoadI mem)));
11137   format %{ "FILD   $mem\n\t"
11138             "FSTP   $dst" %}
11139   opcode(0xDB);      /* DB /0 */
11140   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11141               Pop_Reg_FPR(dst));
11142   ins_pipe( fpu_reg_mem );
11143 %}
11144 
11145 // Convert an int to a float in xmm; no rounding step needed.
11146 instruct convI2F_reg(regF dst, rRegI src) %{
11147   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11148   match(Set dst (ConvI2F src));
11149   format %{ "CVTSI2SS $dst, $src" %}
11150   ins_encode %{
11151     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11152   %}
11153   ins_pipe( pipe_slow );
11154 %}
11155 
11156  instruct convXI2F_reg(regF dst, rRegI src)
11157 %{
11158   predicate( UseSSE>=2 && UseXmmI2F );
11159   match(Set dst (ConvI2F src));
11160 
11161   format %{ "MOVD  $dst,$src\n\t"
11162             "CVTDQ2PS $dst,$dst\t# i2f" %}
11163   ins_encode %{
11164     __ movdl($dst$$XMMRegister, $src$$Register);
11165     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11166   %}
11167   ins_pipe(pipe_slow); // XXX
11168 %}
11169 
11170 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11171   match(Set dst (ConvI2L src));
11172   effect(KILL cr);
11173   ins_cost(375);
11174   format %{ "MOV    $dst.lo,$src\n\t"
11175             "MOV    $dst.hi,$src\n\t"
11176             "SAR    $dst.hi,31" %}
11177   ins_encode(convert_int_long(dst,src));
11178   ins_pipe( ialu_reg_reg_long );
11179 %}
11180 
11181 // Zero-extend convert int to long
11182 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11183   match(Set dst (AndL (ConvI2L src) mask) );
11184   effect( KILL flags );
11185   ins_cost(250);
11186   format %{ "MOV    $dst.lo,$src\n\t"
11187             "XOR    $dst.hi,$dst.hi" %}
11188   opcode(0x33); // XOR
11189   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11190   ins_pipe( ialu_reg_reg_long );
11191 %}
11192 
11193 // Zero-extend long
11194 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11195   match(Set dst (AndL src mask) );
11196   effect( KILL flags );
11197   ins_cost(250);
11198   format %{ "MOV    $dst.lo,$src.lo\n\t"
11199             "XOR    $dst.hi,$dst.hi\n\t" %}
11200   opcode(0x33); // XOR
11201   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11202   ins_pipe( ialu_reg_reg_long );
11203 %}
11204 
11205 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11206   predicate (UseSSE<=1);
11207   match(Set dst (ConvL2D src));
11208   effect( KILL cr );
11209   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11210             "PUSH   $src.lo\n\t"
11211             "FILD   ST,[ESP + #0]\n\t"
11212             "ADD    ESP,8\n\t"
11213             "FSTP_D $dst\t# D-round" %}
11214   opcode(0xDF, 0x5);  /* DF /5 */
11215   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11216   ins_pipe( pipe_slow );
11217 %}
11218 
11219 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11220   predicate (UseSSE>=2);
11221   match(Set dst (ConvL2D src));
11222   effect( KILL cr );
11223   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11224             "PUSH   $src.lo\n\t"
11225             "FILD_D [ESP]\n\t"
11226             "FSTP_D [ESP]\n\t"
11227             "MOVSD  $dst,[ESP]\n\t"
11228             "ADD    ESP,8" %}
11229   opcode(0xDF, 0x5);  /* DF /5 */
11230   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11231   ins_pipe( pipe_slow );
11232 %}
11233 
11234 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11235   predicate (UseSSE>=1);
11236   match(Set dst (ConvL2F src));
11237   effect( KILL cr );
11238   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11239             "PUSH   $src.lo\n\t"
11240             "FILD_D [ESP]\n\t"
11241             "FSTP_S [ESP]\n\t"
11242             "MOVSS  $dst,[ESP]\n\t"
11243             "ADD    ESP,8" %}
11244   opcode(0xDF, 0x5);  /* DF /5 */
11245   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11246   ins_pipe( pipe_slow );
11247 %}
11248 
11249 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11250   match(Set dst (ConvL2F src));
11251   effect( KILL cr );
11252   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11253             "PUSH   $src.lo\n\t"
11254             "FILD   ST,[ESP + #0]\n\t"
11255             "ADD    ESP,8\n\t"
11256             "FSTP_S $dst\t# F-round" %}
11257   opcode(0xDF, 0x5);  /* DF /5 */
11258   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11259   ins_pipe( pipe_slow );
11260 %}
11261 
11262 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11263   match(Set dst (ConvL2I src));
11264   effect( DEF dst, USE src );
11265   format %{ "MOV    $dst,$src.lo" %}
11266   ins_encode(enc_CopyL_Lo(dst,src));
11267   ins_pipe( ialu_reg_reg );
11268 %}
11269 
11270 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11271   match(Set dst (MoveF2I src));
11272   effect( DEF dst, USE src );
11273   ins_cost(100);
11274   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11275   ins_encode %{
11276     __ movl($dst$$Register, Address(rsp, $src$$disp));
11277   %}
11278   ins_pipe( ialu_reg_mem );
11279 %}
11280 
11281 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11282   predicate(UseSSE==0);
11283   match(Set dst (MoveF2I src));
11284   effect( DEF dst, USE src );
11285 
11286   ins_cost(125);
11287   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11288   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11289   ins_pipe( fpu_mem_reg );
11290 %}
11291 
11292 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11293   predicate(UseSSE>=1);
11294   match(Set dst (MoveF2I src));
11295   effect( DEF dst, USE src );
11296 
11297   ins_cost(95);
11298   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11299   ins_encode %{
11300     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11301   %}
11302   ins_pipe( pipe_slow );
11303 %}
11304 
11305 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11306   predicate(UseSSE>=2);
11307   match(Set dst (MoveF2I src));
11308   effect( DEF dst, USE src );
11309   ins_cost(85);
11310   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11311   ins_encode %{
11312     __ movdl($dst$$Register, $src$$XMMRegister);
11313   %}
11314   ins_pipe( pipe_slow );
11315 %}
11316 
11317 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11318   match(Set dst (MoveI2F src));
11319   effect( DEF dst, USE src );
11320 
11321   ins_cost(100);
11322   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11323   ins_encode %{
11324     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11325   %}
11326   ins_pipe( ialu_mem_reg );
11327 %}
11328 
11329 
11330 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11331   predicate(UseSSE==0);
11332   match(Set dst (MoveI2F src));
11333   effect(DEF dst, USE src);
11334 
11335   ins_cost(125);
11336   format %{ "FLD_S  $src\n\t"
11337             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11338   opcode(0xD9);               /* D9 /0, FLD m32real */
11339   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11340               Pop_Reg_FPR(dst) );
11341   ins_pipe( fpu_reg_mem );
11342 %}
11343 
11344 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11345   predicate(UseSSE>=1);
11346   match(Set dst (MoveI2F src));
11347   effect( DEF dst, USE src );
11348 
11349   ins_cost(95);
11350   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11351   ins_encode %{
11352     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11353   %}
11354   ins_pipe( pipe_slow );
11355 %}
11356 
11357 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11358   predicate(UseSSE>=2);
11359   match(Set dst (MoveI2F src));
11360   effect( DEF dst, USE src );
11361 
11362   ins_cost(85);
11363   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11364   ins_encode %{
11365     __ movdl($dst$$XMMRegister, $src$$Register);
11366   %}
11367   ins_pipe( pipe_slow );
11368 %}
11369 
11370 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11371   match(Set dst (MoveD2L src));
11372   effect(DEF dst, USE src);
11373 
11374   ins_cost(250);
11375   format %{ "MOV    $dst.lo,$src\n\t"
11376             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11377   opcode(0x8B, 0x8B);
11378   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11379   ins_pipe( ialu_mem_long_reg );
11380 %}
11381 
11382 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11383   predicate(UseSSE<=1);
11384   match(Set dst (MoveD2L src));
11385   effect(DEF dst, USE src);
11386 
11387   ins_cost(125);
11388   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11389   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11390   ins_pipe( fpu_mem_reg );
11391 %}
11392 
11393 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11394   predicate(UseSSE>=2);
11395   match(Set dst (MoveD2L src));
11396   effect(DEF dst, USE src);
11397   ins_cost(95);
11398   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11399   ins_encode %{
11400     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11401   %}
11402   ins_pipe( pipe_slow );
11403 %}
11404 
11405 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11406   predicate(UseSSE>=2);
11407   match(Set dst (MoveD2L src));
11408   effect(DEF dst, USE src, TEMP tmp);
11409   ins_cost(85);
11410   format %{ "MOVD   $dst.lo,$src\n\t"
11411             "PSHUFLW $tmp,$src,0x4E\n\t"
11412             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11413   ins_encode %{
11414     __ movdl($dst$$Register, $src$$XMMRegister);
11415     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11416     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11417   %}
11418   ins_pipe( pipe_slow );
11419 %}
11420 
11421 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11422   match(Set dst (MoveL2D src));
11423   effect(DEF dst, USE src);
11424 
11425   ins_cost(200);
11426   format %{ "MOV    $dst,$src.lo\n\t"
11427             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11428   opcode(0x89, 0x89);
11429   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11430   ins_pipe( ialu_mem_long_reg );
11431 %}
11432 
11433 
11434 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11435   predicate(UseSSE<=1);
11436   match(Set dst (MoveL2D src));
11437   effect(DEF dst, USE src);
11438   ins_cost(125);
11439 
11440   format %{ "FLD_D  $src\n\t"
11441             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11442   opcode(0xDD);               /* DD /0, FLD m64real */
11443   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11444               Pop_Reg_DPR(dst) );
11445   ins_pipe( fpu_reg_mem );
11446 %}
11447 
11448 
11449 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11450   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11451   match(Set dst (MoveL2D src));
11452   effect(DEF dst, USE src);
11453 
11454   ins_cost(95);
11455   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11456   ins_encode %{
11457     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11458   %}
11459   ins_pipe( pipe_slow );
11460 %}
11461 
11462 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11463   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11464   match(Set dst (MoveL2D src));
11465   effect(DEF dst, USE src);
11466 
11467   ins_cost(95);
11468   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11469   ins_encode %{
11470     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11471   %}
11472   ins_pipe( pipe_slow );
11473 %}
11474 
11475 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11476   predicate(UseSSE>=2);
11477   match(Set dst (MoveL2D src));
11478   effect(TEMP dst, USE src, TEMP tmp);
11479   ins_cost(85);
11480   format %{ "MOVD   $dst,$src.lo\n\t"
11481             "MOVD   $tmp,$src.hi\n\t"
11482             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11483   ins_encode %{
11484     __ movdl($dst$$XMMRegister, $src$$Register);
11485     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11486     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11487   %}
11488   ins_pipe( pipe_slow );
11489 %}
11490 
11491 
11492 // =======================================================================
11493 // fast clearing of an array
11494 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11495   predicate(!((ClearArrayNode*)n)->is_large());
11496   match(Set dummy (ClearArray cnt base));
11497   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11498 
11499   format %{ $$template
11500     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11501     $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11502     $$emit$$"JG     LARGE\n\t"
11503     $$emit$$"SHL    ECX, 1\n\t"
11504     $$emit$$"DEC    ECX\n\t"
11505     $$emit$$"JS     DONE\t# Zero length\n\t"
11506     $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11507     $$emit$$"DEC    ECX\n\t"
11508     $$emit$$"JGE    LOOP\n\t"
11509     $$emit$$"JMP    DONE\n\t"
11510     $$emit$$"# LARGE:\n\t"
11511     if (UseFastStosb) {
11512        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11513        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11514     } else {
11515        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11516        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11517     }
11518     $$emit$$"# DONE"
11519   %}
11520   ins_encode %{
11521     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, false);
11522   %}
11523   ins_pipe( pipe_slow );
11524 %}
11525 
11526 instruct rep_stos_large(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11527   predicate(((ClearArrayNode*)n)->is_large());
11528   match(Set dummy (ClearArray cnt base));
11529   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11530   format %{ $$template
11531     $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11532     if (UseFastStosb) {
11533        $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11534        $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11535     } else {
11536        $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11537        $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11538     }
11539     $$emit$$"# DONE"
11540   %}
11541   ins_encode %{
11542     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, true);
11543   %}
11544   ins_pipe( pipe_slow );
11545 %}
11546 
11547 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11548                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11549   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11550   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11551   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11552 
11553   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11554   ins_encode %{
11555     __ string_compare($str1$$Register, $str2$$Register,
11556                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11557                       $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11558   %}
11559   ins_pipe( pipe_slow );
11560 %}
11561 
11562 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11563                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11564   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11565   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11566   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11567 
11568   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11569   ins_encode %{
11570     __ string_compare($str1$$Register, $str2$$Register,
11571                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11572                       $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11573   %}
11574   ins_pipe( pipe_slow );
11575 %}
11576 
11577 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11578                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11579   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11580   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11581   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11582 
11583   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11584   ins_encode %{
11585     __ string_compare($str1$$Register, $str2$$Register,
11586                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11587                       $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11588   %}
11589   ins_pipe( pipe_slow );
11590 %}
11591 
11592 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11593                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11594   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11595   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11596   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11597 
11598   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11599   ins_encode %{
11600     __ string_compare($str2$$Register, $str1$$Register,
11601                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11602                       $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11603   %}
11604   ins_pipe( pipe_slow );
11605 %}
11606 
11607 // fast string equals
11608 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11609                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11610   match(Set result (StrEquals (Binary str1 str2) cnt));
11611   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11612 
11613   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11614   ins_encode %{
11615     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11616                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11617                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11618   %}
11619 
11620   ins_pipe( pipe_slow );
11621 %}
11622 
11623 // fast search of substring with known size.
11624 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11625                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11626   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11627   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11628   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11629 
11630   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11631   ins_encode %{
11632     int icnt2 = (int)$int_cnt2$$constant;
11633     if (icnt2 >= 16) {
11634       // IndexOf for constant substrings with size >= 16 elements
11635       // which don't need to be loaded through stack.
11636       __ string_indexofC8($str1$$Register, $str2$$Register,
11637                           $cnt1$$Register, $cnt2$$Register,
11638                           icnt2, $result$$Register,
11639                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11640     } else {
11641       // Small strings are loaded through stack if they cross page boundary.
11642       __ string_indexof($str1$$Register, $str2$$Register,
11643                         $cnt1$$Register, $cnt2$$Register,
11644                         icnt2, $result$$Register,
11645                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11646     }
11647   %}
11648   ins_pipe( pipe_slow );
11649 %}
11650 
11651 // fast search of substring with known size.
11652 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11653                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11654   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11655   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11656   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11657 
11658   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11659   ins_encode %{
11660     int icnt2 = (int)$int_cnt2$$constant;
11661     if (icnt2 >= 8) {
11662       // IndexOf for constant substrings with size >= 8 elements
11663       // which don't need to be loaded through stack.
11664       __ string_indexofC8($str1$$Register, $str2$$Register,
11665                           $cnt1$$Register, $cnt2$$Register,
11666                           icnt2, $result$$Register,
11667                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11668     } else {
11669       // Small strings are loaded through stack if they cross page boundary.
11670       __ string_indexof($str1$$Register, $str2$$Register,
11671                         $cnt1$$Register, $cnt2$$Register,
11672                         icnt2, $result$$Register,
11673                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11674     }
11675   %}
11676   ins_pipe( pipe_slow );
11677 %}
11678 
11679 // fast search of substring with known size.
11680 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11681                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11682   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11683   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11684   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11685 
11686   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11687   ins_encode %{
11688     int icnt2 = (int)$int_cnt2$$constant;
11689     if (icnt2 >= 8) {
11690       // IndexOf for constant substrings with size >= 8 elements
11691       // which don't need to be loaded through stack.
11692       __ string_indexofC8($str1$$Register, $str2$$Register,
11693                           $cnt1$$Register, $cnt2$$Register,
11694                           icnt2, $result$$Register,
11695                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11696     } else {
11697       // Small strings are loaded through stack if they cross page boundary.
11698       __ string_indexof($str1$$Register, $str2$$Register,
11699                         $cnt1$$Register, $cnt2$$Register,
11700                         icnt2, $result$$Register,
11701                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11702     }
11703   %}
11704   ins_pipe( pipe_slow );
11705 %}
11706 
11707 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11708                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11709   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11710   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11711   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11712 
11713   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11714   ins_encode %{
11715     __ string_indexof($str1$$Register, $str2$$Register,
11716                       $cnt1$$Register, $cnt2$$Register,
11717                       (-1), $result$$Register,
11718                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11719   %}
11720   ins_pipe( pipe_slow );
11721 %}
11722 
11723 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11724                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11725   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11726   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11727   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11728 
11729   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11730   ins_encode %{
11731     __ string_indexof($str1$$Register, $str2$$Register,
11732                       $cnt1$$Register, $cnt2$$Register,
11733                       (-1), $result$$Register,
11734                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11735   %}
11736   ins_pipe( pipe_slow );
11737 %}
11738 
11739 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11740                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11741   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11742   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11743   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11744 
11745   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11746   ins_encode %{
11747     __ string_indexof($str1$$Register, $str2$$Register,
11748                       $cnt1$$Register, $cnt2$$Register,
11749                       (-1), $result$$Register,
11750                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11751   %}
11752   ins_pipe( pipe_slow );
11753 %}
11754 
11755 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11756                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11757   predicate(UseSSE42Intrinsics);
11758   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11759   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11760   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11761   ins_encode %{
11762     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11763                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11764   %}
11765   ins_pipe( pipe_slow );
11766 %}
11767 
11768 // fast array equals
11769 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11770                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11771 %{
11772   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11773   match(Set result (AryEq ary1 ary2));
11774   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11775   //ins_cost(300);
11776 
11777   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11778   ins_encode %{
11779     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11780                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11781                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11782   %}
11783   ins_pipe( pipe_slow );
11784 %}
11785 
11786 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11787                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11788 %{
11789   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11790   match(Set result (AryEq ary1 ary2));
11791   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11792   //ins_cost(300);
11793 
11794   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11795   ins_encode %{
11796     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11797                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11798                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11799   %}
11800   ins_pipe( pipe_slow );
11801 %}
11802 
11803 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
11804                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
11805 %{
11806   match(Set result (HasNegatives ary1 len));
11807   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11808 
11809   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11810   ins_encode %{
11811     __ has_negatives($ary1$$Register, $len$$Register,
11812                      $result$$Register, $tmp3$$Register,
11813                      $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11814   %}
11815   ins_pipe( pipe_slow );
11816 %}
11817 
11818 // fast char[] to byte[] compression
11819 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11820                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11821   match(Set result (StrCompressedCopy src (Binary dst len)));
11822   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11823 
11824   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11825   ins_encode %{
11826     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11827                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11828                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11829   %}
11830   ins_pipe( pipe_slow );
11831 %}
11832 
11833 // fast byte[] to char[] inflation
11834 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
11835                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
11836   match(Set dummy (StrInflatedCopy src (Binary dst len)));
11837   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11838 
11839   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11840   ins_encode %{
11841     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11842                           $tmp1$$XMMRegister, $tmp2$$Register);
11843   %}
11844   ins_pipe( pipe_slow );
11845 %}
11846 
11847 // encode char[] to byte[] in ISO_8859_1
11848 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11849                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11850                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11851   match(Set result (EncodeISOArray src (Binary dst len)));
11852   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11853 
11854   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11855   ins_encode %{
11856     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11857                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11858                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11859   %}
11860   ins_pipe( pipe_slow );
11861 %}
11862 
11863 
11864 //----------Control Flow Instructions------------------------------------------
11865 // Signed compare Instructions
11866 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
11867   match(Set cr (CmpI op1 op2));
11868   effect( DEF cr, USE op1, USE op2 );
11869   format %{ "CMP    $op1,$op2" %}
11870   opcode(0x3B);  /* Opcode 3B /r */
11871   ins_encode( OpcP, RegReg( op1, op2) );
11872   ins_pipe( ialu_cr_reg_reg );
11873 %}
11874 
11875 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
11876   match(Set cr (CmpI op1 op2));
11877   effect( DEF cr, USE op1 );
11878   format %{ "CMP    $op1,$op2" %}
11879   opcode(0x81,0x07);  /* Opcode 81 /7 */
11880   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
11881   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11882   ins_pipe( ialu_cr_reg_imm );
11883 %}
11884 
11885 // Cisc-spilled version of cmpI_eReg
11886 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
11887   match(Set cr (CmpI op1 (LoadI op2)));
11888 
11889   format %{ "CMP    $op1,$op2" %}
11890   ins_cost(500);
11891   opcode(0x3B);  /* Opcode 3B /r */
11892   ins_encode( OpcP, RegMem( op1, op2) );
11893   ins_pipe( ialu_cr_reg_mem );
11894 %}
11895 
11896 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
11897   match(Set cr (CmpI src zero));
11898   effect( DEF cr, USE src );
11899 
11900   format %{ "TEST   $src,$src" %}
11901   opcode(0x85);
11902   ins_encode( OpcP, RegReg( src, src ) );
11903   ins_pipe( ialu_cr_reg_imm );
11904 %}
11905 
11906 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
11907   match(Set cr (CmpI (AndI src con) zero));
11908 
11909   format %{ "TEST   $src,$con" %}
11910   opcode(0xF7,0x00);
11911   ins_encode( OpcP, RegOpc(src), Con32(con) );
11912   ins_pipe( ialu_cr_reg_imm );
11913 %}
11914 
11915 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
11916   match(Set cr (CmpI (AndI src mem) zero));
11917 
11918   format %{ "TEST   $src,$mem" %}
11919   opcode(0x85);
11920   ins_encode( OpcP, RegMem( src, mem ) );
11921   ins_pipe( ialu_cr_reg_mem );
11922 %}
11923 
11924 // Unsigned compare Instructions; really, same as signed except they
11925 // produce an eFlagsRegU instead of eFlagsReg.
11926 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
11927   match(Set cr (CmpU op1 op2));
11928 
11929   format %{ "CMPu   $op1,$op2" %}
11930   opcode(0x3B);  /* Opcode 3B /r */
11931   ins_encode( OpcP, RegReg( op1, op2) );
11932   ins_pipe( ialu_cr_reg_reg );
11933 %}
11934 
11935 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
11936   match(Set cr (CmpU op1 op2));
11937 
11938   format %{ "CMPu   $op1,$op2" %}
11939   opcode(0x81,0x07);  /* Opcode 81 /7 */
11940   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11941   ins_pipe( ialu_cr_reg_imm );
11942 %}
11943 
11944 // // Cisc-spilled version of cmpU_eReg
11945 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
11946   match(Set cr (CmpU op1 (LoadI op2)));
11947 
11948   format %{ "CMPu   $op1,$op2" %}
11949   ins_cost(500);
11950   opcode(0x3B);  /* Opcode 3B /r */
11951   ins_encode( OpcP, RegMem( op1, op2) );
11952   ins_pipe( ialu_cr_reg_mem );
11953 %}
11954 
11955 // // Cisc-spilled version of cmpU_eReg
11956 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
11957 //  match(Set cr (CmpU (LoadI op1) op2));
11958 //
11959 //  format %{ "CMPu   $op1,$op2" %}
11960 //  ins_cost(500);
11961 //  opcode(0x39);  /* Opcode 39 /r */
11962 //  ins_encode( OpcP, RegMem( op1, op2) );
11963 //%}
11964 
11965 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
11966   match(Set cr (CmpU src zero));
11967 
11968   format %{ "TESTu  $src,$src" %}
11969   opcode(0x85);
11970   ins_encode( OpcP, RegReg( src, src ) );
11971   ins_pipe( ialu_cr_reg_imm );
11972 %}
11973 
11974 // Unsigned pointer compare Instructions
11975 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
11976   match(Set cr (CmpP op1 op2));
11977 
11978   format %{ "CMPu   $op1,$op2" %}
11979   opcode(0x3B);  /* Opcode 3B /r */
11980   ins_encode( OpcP, RegReg( op1, op2) );
11981   ins_pipe( ialu_cr_reg_reg );
11982 %}
11983 
11984 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
11985   match(Set cr (CmpP op1 op2));
11986 
11987   format %{ "CMPu   $op1,$op2" %}
11988   opcode(0x81,0x07);  /* Opcode 81 /7 */
11989   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11990   ins_pipe( ialu_cr_reg_imm );
11991 %}
11992 
11993 // // Cisc-spilled version of cmpP_eReg
11994 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
11995   match(Set cr (CmpP op1 (LoadP op2)));
11996 
11997   format %{ "CMPu   $op1,$op2" %}
11998   ins_cost(500);
11999   opcode(0x3B);  /* Opcode 3B /r */
12000   ins_encode( OpcP, RegMem( op1, op2) );
12001   ins_pipe( ialu_cr_reg_mem );
12002 %}
12003 
12004 // // Cisc-spilled version of cmpP_eReg
12005 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12006 //  match(Set cr (CmpP (LoadP op1) op2));
12007 //
12008 //  format %{ "CMPu   $op1,$op2" %}
12009 //  ins_cost(500);
12010 //  opcode(0x39);  /* Opcode 39 /r */
12011 //  ins_encode( OpcP, RegMem( op1, op2) );
12012 //%}
12013 
12014 // Compare raw pointer (used in out-of-heap check).
12015 // Only works because non-oop pointers must be raw pointers
12016 // and raw pointers have no anti-dependencies.
12017 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12018   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12019   match(Set cr (CmpP op1 (LoadP op2)));
12020 
12021   format %{ "CMPu   $op1,$op2" %}
12022   opcode(0x3B);  /* Opcode 3B /r */
12023   ins_encode( OpcP, RegMem( op1, op2) );
12024   ins_pipe( ialu_cr_reg_mem );
12025 %}
12026 
12027 //
12028 // This will generate a signed flags result. This should be ok
12029 // since any compare to a zero should be eq/neq.
12030 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12031   match(Set cr (CmpP src zero));
12032 
12033   format %{ "TEST   $src,$src" %}
12034   opcode(0x85);
12035   ins_encode( OpcP, RegReg( src, src ) );
12036   ins_pipe( ialu_cr_reg_imm );
12037 %}
12038 
12039 // Cisc-spilled version of testP_reg
12040 // This will generate a signed flags result. This should be ok
12041 // since any compare to a zero should be eq/neq.
12042 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
12043   match(Set cr (CmpP (LoadP op) zero));
12044 
12045   format %{ "TEST   $op,0xFFFFFFFF" %}
12046   ins_cost(500);
12047   opcode(0xF7);               /* Opcode F7 /0 */
12048   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12049   ins_pipe( ialu_cr_reg_imm );
12050 %}
12051 
12052 // Yanked all unsigned pointer compare operations.
12053 // Pointer compares are done with CmpP which is already unsigned.
12054 
12055 //----------Max and Min--------------------------------------------------------
12056 // Min Instructions
12057 ////
12058 //   *** Min and Max using the conditional move are slower than the
12059 //   *** branch version on a Pentium III.
12060 // // Conditional move for min
12061 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12062 //  effect( USE_DEF op2, USE op1, USE cr );
12063 //  format %{ "CMOVlt $op2,$op1\t! min" %}
12064 //  opcode(0x4C,0x0F);
12065 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12066 //  ins_pipe( pipe_cmov_reg );
12067 //%}
12068 //
12069 //// Min Register with Register (P6 version)
12070 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12071 //  predicate(VM_Version::supports_cmov() );
12072 //  match(Set op2 (MinI op1 op2));
12073 //  ins_cost(200);
12074 //  expand %{
12075 //    eFlagsReg cr;
12076 //    compI_eReg(cr,op1,op2);
12077 //    cmovI_reg_lt(op2,op1,cr);
12078 //  %}
12079 //%}
12080 
12081 // Min Register with Register (generic version)
12082 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12083   match(Set dst (MinI dst src));
12084   effect(KILL flags);
12085   ins_cost(300);
12086 
12087   format %{ "MIN    $dst,$src" %}
12088   opcode(0xCC);
12089   ins_encode( min_enc(dst,src) );
12090   ins_pipe( pipe_slow );
12091 %}
12092 
12093 // Max Register with Register
12094 //   *** Min and Max using the conditional move are slower than the
12095 //   *** branch version on a Pentium III.
12096 // // Conditional move for max
12097 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12098 //  effect( USE_DEF op2, USE op1, USE cr );
12099 //  format %{ "CMOVgt $op2,$op1\t! max" %}
12100 //  opcode(0x4F,0x0F);
12101 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12102 //  ins_pipe( pipe_cmov_reg );
12103 //%}
12104 //
12105 // // Max Register with Register (P6 version)
12106 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12107 //  predicate(VM_Version::supports_cmov() );
12108 //  match(Set op2 (MaxI op1 op2));
12109 //  ins_cost(200);
12110 //  expand %{
12111 //    eFlagsReg cr;
12112 //    compI_eReg(cr,op1,op2);
12113 //    cmovI_reg_gt(op2,op1,cr);
12114 //  %}
12115 //%}
12116 
12117 // Max Register with Register (generic version)
12118 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12119   match(Set dst (MaxI dst src));
12120   effect(KILL flags);
12121   ins_cost(300);
12122 
12123   format %{ "MAX    $dst,$src" %}
12124   opcode(0xCC);
12125   ins_encode( max_enc(dst,src) );
12126   ins_pipe( pipe_slow );
12127 %}
12128 
12129 // ============================================================================
12130 // Counted Loop limit node which represents exact final iterator value.
12131 // Note: the resulting value should fit into integer range since
12132 // counted loops have limit check on overflow.
12133 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12134   match(Set limit (LoopLimit (Binary init limit) stride));
12135   effect(TEMP limit_hi, TEMP tmp, KILL flags);
12136   ins_cost(300);
12137 
12138   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12139   ins_encode %{
12140     int strd = (int)$stride$$constant;
12141     assert(strd != 1 && strd != -1, "sanity");
12142     int m1 = (strd > 0) ? 1 : -1;
12143     // Convert limit to long (EAX:EDX)
12144     __ cdql();
12145     // Convert init to long (init:tmp)
12146     __ movl($tmp$$Register, $init$$Register);
12147     __ sarl($tmp$$Register, 31);
12148     // $limit - $init
12149     __ subl($limit$$Register, $init$$Register);
12150     __ sbbl($limit_hi$$Register, $tmp$$Register);
12151     // + ($stride - 1)
12152     if (strd > 0) {
12153       __ addl($limit$$Register, (strd - 1));
12154       __ adcl($limit_hi$$Register, 0);
12155       __ movl($tmp$$Register, strd);
12156     } else {
12157       __ addl($limit$$Register, (strd + 1));
12158       __ adcl($limit_hi$$Register, -1);
12159       __ lneg($limit_hi$$Register, $limit$$Register);
12160       __ movl($tmp$$Register, -strd);
12161     }
12162     // signed devision: (EAX:EDX) / pos_stride
12163     __ idivl($tmp$$Register);
12164     if (strd < 0) {
12165       // restore sign
12166       __ negl($tmp$$Register);
12167     }
12168     // (EAX) * stride
12169     __ mull($tmp$$Register);
12170     // + init (ignore upper bits)
12171     __ addl($limit$$Register, $init$$Register);
12172   %}
12173   ins_pipe( pipe_slow );
12174 %}
12175 
12176 // ============================================================================
12177 // Branch Instructions
12178 // Jump Table
12179 instruct jumpXtnd(rRegI switch_val) %{
12180   match(Jump switch_val);
12181   ins_cost(350);
12182   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12183   ins_encode %{
12184     // Jump to Address(table_base + switch_reg)
12185     Address index(noreg, $switch_val$$Register, Address::times_1);
12186     __ jump(ArrayAddress($constantaddress, index));
12187   %}
12188   ins_pipe(pipe_jmp);
12189 %}
12190 
12191 // Jump Direct - Label defines a relative address from JMP+1
12192 instruct jmpDir(label labl) %{
12193   match(Goto);
12194   effect(USE labl);
12195 
12196   ins_cost(300);
12197   format %{ "JMP    $labl" %}
12198   size(5);
12199   ins_encode %{
12200     Label* L = $labl$$label;
12201     __ jmp(*L, false); // Always long jump
12202   %}
12203   ins_pipe( pipe_jmp );
12204 %}
12205 
12206 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12207 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12208   match(If cop cr);
12209   effect(USE labl);
12210 
12211   ins_cost(300);
12212   format %{ "J$cop    $labl" %}
12213   size(6);
12214   ins_encode %{
12215     Label* L = $labl$$label;
12216     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12217   %}
12218   ins_pipe( pipe_jcc );
12219 %}
12220 
12221 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12222 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12223   predicate(!n->has_vector_mask_set());
12224   match(CountedLoopEnd cop cr);
12225   effect(USE labl);
12226 
12227   ins_cost(300);
12228   format %{ "J$cop    $labl\t# Loop end" %}
12229   size(6);
12230   ins_encode %{
12231     Label* L = $labl$$label;
12232     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12233   %}
12234   ins_pipe( pipe_jcc );
12235 %}
12236 
12237 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12238 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12239   predicate(!n->has_vector_mask_set());
12240   match(CountedLoopEnd cop cmp);
12241   effect(USE labl);
12242 
12243   ins_cost(300);
12244   format %{ "J$cop,u  $labl\t# Loop end" %}
12245   size(6);
12246   ins_encode %{
12247     Label* L = $labl$$label;
12248     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12249   %}
12250   ins_pipe( pipe_jcc );
12251 %}
12252 
12253 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12254   predicate(!n->has_vector_mask_set());
12255   match(CountedLoopEnd cop cmp);
12256   effect(USE labl);
12257 
12258   ins_cost(200);
12259   format %{ "J$cop,u  $labl\t# Loop end" %}
12260   size(6);
12261   ins_encode %{
12262     Label* L = $labl$$label;
12263     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12264   %}
12265   ins_pipe( pipe_jcc );
12266 %}
12267 
12268 // mask version
12269 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12270 instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{
12271   predicate(n->has_vector_mask_set());
12272   match(CountedLoopEnd cop cr);
12273   effect(USE labl);
12274 
12275   ins_cost(400);
12276   format %{ "J$cop    $labl\t# Loop end\n\t"
12277             "restorevectmask \t# vector mask restore for loops" %}
12278   size(10);
12279   ins_encode %{
12280     Label* L = $labl$$label;
12281     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12282     __ restorevectmask();
12283   %}
12284   ins_pipe( pipe_jcc );
12285 %}
12286 
12287 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12288 instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12289   predicate(n->has_vector_mask_set());
12290   match(CountedLoopEnd cop cmp);
12291   effect(USE labl);
12292 
12293   ins_cost(400);
12294   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12295             "restorevectmask \t# vector mask restore for loops" %}
12296   size(10);
12297   ins_encode %{
12298     Label* L = $labl$$label;
12299     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12300     __ restorevectmask();
12301   %}
12302   ins_pipe( pipe_jcc );
12303 %}
12304 
12305 instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12306   predicate(n->has_vector_mask_set());
12307   match(CountedLoopEnd cop cmp);
12308   effect(USE labl);
12309 
12310   ins_cost(300);
12311   format %{ "J$cop,u  $labl\t# Loop end\n\t"
12312             "restorevectmask \t# vector mask restore for loops" %}
12313   size(10);
12314   ins_encode %{
12315     Label* L = $labl$$label;
12316     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12317     __ restorevectmask();
12318   %}
12319   ins_pipe( pipe_jcc );
12320 %}
12321 
12322 // Jump Direct Conditional - using unsigned comparison
12323 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12324   match(If cop cmp);
12325   effect(USE labl);
12326 
12327   ins_cost(300);
12328   format %{ "J$cop,u  $labl" %}
12329   size(6);
12330   ins_encode %{
12331     Label* L = $labl$$label;
12332     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12333   %}
12334   ins_pipe(pipe_jcc);
12335 %}
12336 
12337 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12338   match(If cop cmp);
12339   effect(USE labl);
12340 
12341   ins_cost(200);
12342   format %{ "J$cop,u  $labl" %}
12343   size(6);
12344   ins_encode %{
12345     Label* L = $labl$$label;
12346     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12347   %}
12348   ins_pipe(pipe_jcc);
12349 %}
12350 
12351 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12352   match(If cop cmp);
12353   effect(USE labl);
12354 
12355   ins_cost(200);
12356   format %{ $$template
12357     if ($cop$$cmpcode == Assembler::notEqual) {
12358       $$emit$$"JP,u   $labl\n\t"
12359       $$emit$$"J$cop,u   $labl"
12360     } else {
12361       $$emit$$"JP,u   done\n\t"
12362       $$emit$$"J$cop,u   $labl\n\t"
12363       $$emit$$"done:"
12364     }
12365   %}
12366   ins_encode %{
12367     Label* l = $labl$$label;
12368     if ($cop$$cmpcode == Assembler::notEqual) {
12369       __ jcc(Assembler::parity, *l, false);
12370       __ jcc(Assembler::notEqual, *l, false);
12371     } else if ($cop$$cmpcode == Assembler::equal) {
12372       Label done;
12373       __ jccb(Assembler::parity, done);
12374       __ jcc(Assembler::equal, *l, false);
12375       __ bind(done);
12376     } else {
12377        ShouldNotReachHere();
12378     }
12379   %}
12380   ins_pipe(pipe_jcc);
12381 %}
12382 
12383 // ============================================================================
12384 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12385 // array for an instance of the superklass.  Set a hidden internal cache on a
12386 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12387 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12388 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12389   match(Set result (PartialSubtypeCheck sub super));
12390   effect( KILL rcx, KILL cr );
12391 
12392   ins_cost(1100);  // slightly larger than the next version
12393   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12394             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12395             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12396             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12397             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12398             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12399             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12400      "miss:\t" %}
12401 
12402   opcode(0x1); // Force a XOR of EDI
12403   ins_encode( enc_PartialSubtypeCheck() );
12404   ins_pipe( pipe_slow );
12405 %}
12406 
12407 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12408   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12409   effect( KILL rcx, KILL result );
12410 
12411   ins_cost(1000);
12412   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12413             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12414             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12415             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12416             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12417             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12418      "miss:\t" %}
12419 
12420   opcode(0x0);  // No need to XOR EDI
12421   ins_encode( enc_PartialSubtypeCheck() );
12422   ins_pipe( pipe_slow );
12423 %}
12424 
12425 // ============================================================================
12426 // Branch Instructions -- short offset versions
12427 //
12428 // These instructions are used to replace jumps of a long offset (the default
12429 // match) with jumps of a shorter offset.  These instructions are all tagged
12430 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12431 // match rules in general matching.  Instead, the ADLC generates a conversion
12432 // method in the MachNode which can be used to do in-place replacement of the
12433 // long variant with the shorter variant.  The compiler will determine if a
12434 // branch can be taken by the is_short_branch_offset() predicate in the machine
12435 // specific code section of the file.
12436 
12437 // Jump Direct - Label defines a relative address from JMP+1
12438 instruct jmpDir_short(label labl) %{
12439   match(Goto);
12440   effect(USE labl);
12441 
12442   ins_cost(300);
12443   format %{ "JMP,s  $labl" %}
12444   size(2);
12445   ins_encode %{
12446     Label* L = $labl$$label;
12447     __ jmpb(*L);
12448   %}
12449   ins_pipe( pipe_jmp );
12450   ins_short_branch(1);
12451 %}
12452 
12453 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12454 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12455   match(If cop cr);
12456   effect(USE labl);
12457 
12458   ins_cost(300);
12459   format %{ "J$cop,s  $labl" %}
12460   size(2);
12461   ins_encode %{
12462     Label* L = $labl$$label;
12463     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12464   %}
12465   ins_pipe( pipe_jcc );
12466   ins_short_branch(1);
12467 %}
12468 
12469 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12470 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12471   match(CountedLoopEnd cop cr);
12472   effect(USE labl);
12473 
12474   ins_cost(300);
12475   format %{ "J$cop,s  $labl\t# Loop end" %}
12476   size(2);
12477   ins_encode %{
12478     Label* L = $labl$$label;
12479     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12480   %}
12481   ins_pipe( pipe_jcc );
12482   ins_short_branch(1);
12483 %}
12484 
12485 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12486 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12487   match(CountedLoopEnd cop cmp);
12488   effect(USE labl);
12489 
12490   ins_cost(300);
12491   format %{ "J$cop,us $labl\t# Loop end" %}
12492   size(2);
12493   ins_encode %{
12494     Label* L = $labl$$label;
12495     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12496   %}
12497   ins_pipe( pipe_jcc );
12498   ins_short_branch(1);
12499 %}
12500 
12501 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12502   match(CountedLoopEnd cop cmp);
12503   effect(USE labl);
12504 
12505   ins_cost(300);
12506   format %{ "J$cop,us $labl\t# Loop end" %}
12507   size(2);
12508   ins_encode %{
12509     Label* L = $labl$$label;
12510     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12511   %}
12512   ins_pipe( pipe_jcc );
12513   ins_short_branch(1);
12514 %}
12515 
12516 // Jump Direct Conditional - using unsigned comparison
12517 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12518   match(If cop cmp);
12519   effect(USE labl);
12520 
12521   ins_cost(300);
12522   format %{ "J$cop,us $labl" %}
12523   size(2);
12524   ins_encode %{
12525     Label* L = $labl$$label;
12526     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12527   %}
12528   ins_pipe( pipe_jcc );
12529   ins_short_branch(1);
12530 %}
12531 
12532 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12533   match(If cop cmp);
12534   effect(USE labl);
12535 
12536   ins_cost(300);
12537   format %{ "J$cop,us $labl" %}
12538   size(2);
12539   ins_encode %{
12540     Label* L = $labl$$label;
12541     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12542   %}
12543   ins_pipe( pipe_jcc );
12544   ins_short_branch(1);
12545 %}
12546 
12547 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12548   match(If cop cmp);
12549   effect(USE labl);
12550 
12551   ins_cost(300);
12552   format %{ $$template
12553     if ($cop$$cmpcode == Assembler::notEqual) {
12554       $$emit$$"JP,u,s   $labl\n\t"
12555       $$emit$$"J$cop,u,s   $labl"
12556     } else {
12557       $$emit$$"JP,u,s   done\n\t"
12558       $$emit$$"J$cop,u,s  $labl\n\t"
12559       $$emit$$"done:"
12560     }
12561   %}
12562   size(4);
12563   ins_encode %{
12564     Label* l = $labl$$label;
12565     if ($cop$$cmpcode == Assembler::notEqual) {
12566       __ jccb(Assembler::parity, *l);
12567       __ jccb(Assembler::notEqual, *l);
12568     } else if ($cop$$cmpcode == Assembler::equal) {
12569       Label done;
12570       __ jccb(Assembler::parity, done);
12571       __ jccb(Assembler::equal, *l);
12572       __ bind(done);
12573     } else {
12574        ShouldNotReachHere();
12575     }
12576   %}
12577   ins_pipe(pipe_jcc);
12578   ins_short_branch(1);
12579 %}
12580 
12581 // ============================================================================
12582 // Long Compare
12583 //
12584 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12585 // is tricky.  The flavor of compare used depends on whether we are testing
12586 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12587 // The GE test is the negated LT test.  The LE test can be had by commuting
12588 // the operands (yielding a GE test) and then negating; negate again for the
12589 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12590 // NE test is negated from that.
12591 
12592 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12593 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12594 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12595 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12596 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12597 // foo match ends up with the wrong leaf.  One fix is to not match both
12598 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12599 // both forms beat the trinary form of long-compare and both are very useful
12600 // on Intel which has so few registers.
12601 
12602 // Manifest a CmpL result in an integer register.  Very painful.
12603 // This is the test to avoid.
12604 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12605   match(Set dst (CmpL3 src1 src2));
12606   effect( KILL flags );
12607   ins_cost(1000);
12608   format %{ "XOR    $dst,$dst\n\t"
12609             "CMP    $src1.hi,$src2.hi\n\t"
12610             "JLT,s  m_one\n\t"
12611             "JGT,s  p_one\n\t"
12612             "CMP    $src1.lo,$src2.lo\n\t"
12613             "JB,s   m_one\n\t"
12614             "JEQ,s  done\n"
12615     "p_one:\tINC    $dst\n\t"
12616             "JMP,s  done\n"
12617     "m_one:\tDEC    $dst\n"
12618      "done:" %}
12619   ins_encode %{
12620     Label p_one, m_one, done;
12621     __ xorptr($dst$$Register, $dst$$Register);
12622     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12623     __ jccb(Assembler::less,    m_one);
12624     __ jccb(Assembler::greater, p_one);
12625     __ cmpl($src1$$Register, $src2$$Register);
12626     __ jccb(Assembler::below,   m_one);
12627     __ jccb(Assembler::equal,   done);
12628     __ bind(p_one);
12629     __ incrementl($dst$$Register);
12630     __ jmpb(done);
12631     __ bind(m_one);
12632     __ decrementl($dst$$Register);
12633     __ bind(done);
12634   %}
12635   ins_pipe( pipe_slow );
12636 %}
12637 
12638 //======
12639 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12640 // compares.  Can be used for LE or GT compares by reversing arguments.
12641 // NOT GOOD FOR EQ/NE tests.
12642 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12643   match( Set flags (CmpL src zero ));
12644   ins_cost(100);
12645   format %{ "TEST   $src.hi,$src.hi" %}
12646   opcode(0x85);
12647   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12648   ins_pipe( ialu_cr_reg_reg );
12649 %}
12650 
12651 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12652 // compares.  Can be used for LE or GT compares by reversing arguments.
12653 // NOT GOOD FOR EQ/NE tests.
12654 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12655   match( Set flags (CmpL src1 src2 ));
12656   effect( TEMP tmp );
12657   ins_cost(300);
12658   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12659             "MOV    $tmp,$src1.hi\n\t"
12660             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12661   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12662   ins_pipe( ialu_cr_reg_reg );
12663 %}
12664 
12665 // Long compares reg < zero/req OR reg >= zero/req.
12666 // Just a wrapper for a normal branch, plus the predicate test.
12667 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12668   match(If cmp flags);
12669   effect(USE labl);
12670   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12671   expand %{
12672     jmpCon(cmp,flags,labl);    // JLT or JGE...
12673   %}
12674 %}
12675 
12676 //======
12677 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12678 // compares.  Can be used for LE or GT compares by reversing arguments.
12679 // NOT GOOD FOR EQ/NE tests.
12680 instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
12681   match(Set flags (CmpUL src zero));
12682   ins_cost(100);
12683   format %{ "TEST   $src.hi,$src.hi" %}
12684   opcode(0x85);
12685   ins_encode(OpcP, RegReg_Hi2(src, src));
12686   ins_pipe(ialu_cr_reg_reg);
12687 %}
12688 
12689 // Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12690 // compares.  Can be used for LE or GT compares by reversing arguments.
12691 // NOT GOOD FOR EQ/NE tests.
12692 instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
12693   match(Set flags (CmpUL src1 src2));
12694   effect(TEMP tmp);
12695   ins_cost(300);
12696   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12697             "MOV    $tmp,$src1.hi\n\t"
12698             "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
12699   ins_encode(long_cmp_flags2(src1, src2, tmp));
12700   ins_pipe(ialu_cr_reg_reg);
12701 %}
12702 
12703 // Unsigned long compares reg < zero/req OR reg >= zero/req.
12704 // Just a wrapper for a normal branch, plus the predicate test.
12705 instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
12706   match(If cmp flags);
12707   effect(USE labl);
12708   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
12709   expand %{
12710     jmpCon(cmp, flags, labl);    // JLT or JGE...
12711   %}
12712 %}
12713 
12714 // Compare 2 longs and CMOVE longs.
12715 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12716   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12717   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12718   ins_cost(400);
12719   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12720             "CMOV$cmp $dst.hi,$src.hi" %}
12721   opcode(0x0F,0x40);
12722   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12723   ins_pipe( pipe_cmov_reg_long );
12724 %}
12725 
12726 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12727   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12728   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12729   ins_cost(500);
12730   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12731             "CMOV$cmp $dst.hi,$src.hi" %}
12732   opcode(0x0F,0x40);
12733   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12734   ins_pipe( pipe_cmov_reg_long );
12735 %}
12736 
12737 // Compare 2 longs and CMOVE ints.
12738 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12739   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12740   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12741   ins_cost(200);
12742   format %{ "CMOV$cmp $dst,$src" %}
12743   opcode(0x0F,0x40);
12744   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12745   ins_pipe( pipe_cmov_reg );
12746 %}
12747 
12748 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12749   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12750   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12751   ins_cost(250);
12752   format %{ "CMOV$cmp $dst,$src" %}
12753   opcode(0x0F,0x40);
12754   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12755   ins_pipe( pipe_cmov_mem );
12756 %}
12757 
12758 // Compare 2 longs and CMOVE ints.
12759 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12760   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12761   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12762   ins_cost(200);
12763   format %{ "CMOV$cmp $dst,$src" %}
12764   opcode(0x0F,0x40);
12765   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12766   ins_pipe( pipe_cmov_reg );
12767 %}
12768 
12769 // Compare 2 longs and CMOVE doubles
12770 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12771   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12772   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12773   ins_cost(200);
12774   expand %{
12775     fcmovDPR_regS(cmp,flags,dst,src);
12776   %}
12777 %}
12778 
12779 // Compare 2 longs and CMOVE doubles
12780 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12781   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12782   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12783   ins_cost(200);
12784   expand %{
12785     fcmovD_regS(cmp,flags,dst,src);
12786   %}
12787 %}
12788 
12789 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12790   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12791   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12792   ins_cost(200);
12793   expand %{
12794     fcmovFPR_regS(cmp,flags,dst,src);
12795   %}
12796 %}
12797 
12798 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12799   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12800   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12801   ins_cost(200);
12802   expand %{
12803     fcmovF_regS(cmp,flags,dst,src);
12804   %}
12805 %}
12806 
12807 //======
12808 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12809 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12810   match( Set flags (CmpL src zero ));
12811   effect(TEMP tmp);
12812   ins_cost(200);
12813   format %{ "MOV    $tmp,$src.lo\n\t"
12814             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12815   ins_encode( long_cmp_flags0( src, tmp ) );
12816   ins_pipe( ialu_reg_reg_long );
12817 %}
12818 
12819 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12820 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12821   match( Set flags (CmpL src1 src2 ));
12822   ins_cost(200+300);
12823   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12824             "JNE,s  skip\n\t"
12825             "CMP    $src1.hi,$src2.hi\n\t"
12826      "skip:\t" %}
12827   ins_encode( long_cmp_flags1( src1, src2 ) );
12828   ins_pipe( ialu_cr_reg_reg );
12829 %}
12830 
12831 // Long compare reg == zero/reg OR reg != zero/reg
12832 // Just a wrapper for a normal branch, plus the predicate test.
12833 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12834   match(If cmp flags);
12835   effect(USE labl);
12836   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12837   expand %{
12838     jmpCon(cmp,flags,labl);    // JEQ or JNE...
12839   %}
12840 %}
12841 
12842 //======
12843 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
12844 instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
12845   match(Set flags (CmpUL src zero));
12846   effect(TEMP tmp);
12847   ins_cost(200);
12848   format %{ "MOV    $tmp,$src.lo\n\t"
12849             "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
12850   ins_encode(long_cmp_flags0(src, tmp));
12851   ins_pipe(ialu_reg_reg_long);
12852 %}
12853 
12854 // Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
12855 instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
12856   match(Set flags (CmpUL src1 src2));
12857   ins_cost(200+300);
12858   format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12859             "JNE,s  skip\n\t"
12860             "CMP    $src1.hi,$src2.hi\n\t"
12861      "skip:\t" %}
12862   ins_encode(long_cmp_flags1(src1, src2));
12863   ins_pipe(ialu_cr_reg_reg);
12864 %}
12865 
12866 // Unsigned long compare reg == zero/reg OR reg != zero/reg
12867 // Just a wrapper for a normal branch, plus the predicate test.
12868 instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
12869   match(If cmp flags);
12870   effect(USE labl);
12871   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
12872   expand %{
12873     jmpCon(cmp, flags, labl);    // JEQ or JNE...
12874   %}
12875 %}
12876 
12877 // Compare 2 longs and CMOVE longs.
12878 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12879   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12880   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12881   ins_cost(400);
12882   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12883             "CMOV$cmp $dst.hi,$src.hi" %}
12884   opcode(0x0F,0x40);
12885   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12886   ins_pipe( pipe_cmov_reg_long );
12887 %}
12888 
12889 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12890   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12891   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12892   ins_cost(500);
12893   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12894             "CMOV$cmp $dst.hi,$src.hi" %}
12895   opcode(0x0F,0x40);
12896   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12897   ins_pipe( pipe_cmov_reg_long );
12898 %}
12899 
12900 // Compare 2 longs and CMOVE ints.
12901 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
12902   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12903   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12904   ins_cost(200);
12905   format %{ "CMOV$cmp $dst,$src" %}
12906   opcode(0x0F,0x40);
12907   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12908   ins_pipe( pipe_cmov_reg );
12909 %}
12910 
12911 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
12912   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12913   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12914   ins_cost(250);
12915   format %{ "CMOV$cmp $dst,$src" %}
12916   opcode(0x0F,0x40);
12917   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12918   ins_pipe( pipe_cmov_mem );
12919 %}
12920 
12921 // Compare 2 longs and CMOVE ints.
12922 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
12923   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12924   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12925   ins_cost(200);
12926   format %{ "CMOV$cmp $dst,$src" %}
12927   opcode(0x0F,0x40);
12928   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12929   ins_pipe( pipe_cmov_reg );
12930 %}
12931 
12932 // Compare 2 longs and CMOVE doubles
12933 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
12934   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12935   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12936   ins_cost(200);
12937   expand %{
12938     fcmovDPR_regS(cmp,flags,dst,src);
12939   %}
12940 %}
12941 
12942 // Compare 2 longs and CMOVE doubles
12943 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
12944   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12945   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12946   ins_cost(200);
12947   expand %{
12948     fcmovD_regS(cmp,flags,dst,src);
12949   %}
12950 %}
12951 
12952 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
12953   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12954   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12955   ins_cost(200);
12956   expand %{
12957     fcmovFPR_regS(cmp,flags,dst,src);
12958   %}
12959 %}
12960 
12961 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
12962   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12963   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12964   ins_cost(200);
12965   expand %{
12966     fcmovF_regS(cmp,flags,dst,src);
12967   %}
12968 %}
12969 
12970 //======
12971 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12972 // Same as cmpL_reg_flags_LEGT except must negate src
12973 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
12974   match( Set flags (CmpL src zero ));
12975   effect( TEMP tmp );
12976   ins_cost(300);
12977   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
12978             "CMP    $tmp,$src.lo\n\t"
12979             "SBB    $tmp,$src.hi\n\t" %}
12980   ins_encode( long_cmp_flags3(src, tmp) );
12981   ins_pipe( ialu_reg_reg_long );
12982 %}
12983 
12984 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12985 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
12986 // requires a commuted test to get the same result.
12987 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12988   match( Set flags (CmpL src1 src2 ));
12989   effect( TEMP tmp );
12990   ins_cost(300);
12991   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
12992             "MOV    $tmp,$src2.hi\n\t"
12993             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
12994   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
12995   ins_pipe( ialu_cr_reg_reg );
12996 %}
12997 
12998 // Long compares reg < zero/req OR reg >= zero/req.
12999 // Just a wrapper for a normal branch, plus the predicate test
13000 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13001   match(If cmp flags);
13002   effect(USE labl);
13003   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13004   ins_cost(300);
13005   expand %{
13006     jmpCon(cmp,flags,labl);    // JGT or JLE...
13007   %}
13008 %}
13009 
13010 //======
13011 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13012 // Same as cmpUL_reg_flags_LEGT except must negate src
13013 instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13014   match(Set flags (CmpUL src zero));
13015   effect(TEMP tmp);
13016   ins_cost(300);
13017   format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13018             "CMP    $tmp,$src.lo\n\t"
13019             "SBB    $tmp,$src.hi\n\t" %}
13020   ins_encode(long_cmp_flags3(src, tmp));
13021   ins_pipe(ialu_reg_reg_long);
13022 %}
13023 
13024 // Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13025 // Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13026 // requires a commuted test to get the same result.
13027 instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13028   match(Set flags (CmpUL src1 src2));
13029   effect(TEMP tmp);
13030   ins_cost(300);
13031   format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13032             "MOV    $tmp,$src2.hi\n\t"
13033             "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13034   ins_encode(long_cmp_flags2( src2, src1, tmp));
13035   ins_pipe(ialu_cr_reg_reg);
13036 %}
13037 
13038 // Unsigned long compares reg < zero/req OR reg >= zero/req.
13039 // Just a wrapper for a normal branch, plus the predicate test
13040 instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13041   match(If cmp flags);
13042   effect(USE labl);
13043   predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13044   ins_cost(300);
13045   expand %{
13046     jmpCon(cmp, flags, labl);    // JGT or JLE...
13047   %}
13048 %}
13049 
13050 // Compare 2 longs and CMOVE longs.
13051 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13052   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13053   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13054   ins_cost(400);
13055   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13056             "CMOV$cmp $dst.hi,$src.hi" %}
13057   opcode(0x0F,0x40);
13058   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13059   ins_pipe( pipe_cmov_reg_long );
13060 %}
13061 
13062 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13063   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13064   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13065   ins_cost(500);
13066   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13067             "CMOV$cmp $dst.hi,$src.hi+4" %}
13068   opcode(0x0F,0x40);
13069   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13070   ins_pipe( pipe_cmov_reg_long );
13071 %}
13072 
13073 // Compare 2 longs and CMOVE ints.
13074 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13075   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13076   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13077   ins_cost(200);
13078   format %{ "CMOV$cmp $dst,$src" %}
13079   opcode(0x0F,0x40);
13080   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13081   ins_pipe( pipe_cmov_reg );
13082 %}
13083 
13084 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13085   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13086   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13087   ins_cost(250);
13088   format %{ "CMOV$cmp $dst,$src" %}
13089   opcode(0x0F,0x40);
13090   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13091   ins_pipe( pipe_cmov_mem );
13092 %}
13093 
13094 // Compare 2 longs and CMOVE ptrs.
13095 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13096   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13097   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13098   ins_cost(200);
13099   format %{ "CMOV$cmp $dst,$src" %}
13100   opcode(0x0F,0x40);
13101   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13102   ins_pipe( pipe_cmov_reg );
13103 %}
13104 
13105 // Compare 2 longs and CMOVE doubles
13106 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13107   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13108   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13109   ins_cost(200);
13110   expand %{
13111     fcmovDPR_regS(cmp,flags,dst,src);
13112   %}
13113 %}
13114 
13115 // Compare 2 longs and CMOVE doubles
13116 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13117   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13118   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13119   ins_cost(200);
13120   expand %{
13121     fcmovD_regS(cmp,flags,dst,src);
13122   %}
13123 %}
13124 
13125 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13126   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13127   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13128   ins_cost(200);
13129   expand %{
13130     fcmovFPR_regS(cmp,flags,dst,src);
13131   %}
13132 %}
13133 
13134 
13135 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13136   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13137   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13138   ins_cost(200);
13139   expand %{
13140     fcmovF_regS(cmp,flags,dst,src);
13141   %}
13142 %}
13143 
13144 
13145 // ============================================================================
13146 // Procedure Call/Return Instructions
13147 // Call Java Static Instruction
13148 // Note: If this code changes, the corresponding ret_addr_offset() and
13149 //       compute_padding() functions will have to be adjusted.
13150 instruct CallStaticJavaDirect(method meth) %{
13151   match(CallStaticJava);
13152   effect(USE meth);
13153 
13154   ins_cost(300);
13155   format %{ "CALL,static " %}
13156   opcode(0xE8); /* E8 cd */
13157   ins_encode( pre_call_resets,
13158               Java_Static_Call( meth ),
13159               call_epilog,
13160               post_call_FPU );
13161   ins_pipe( pipe_slow );
13162   ins_alignment(4);
13163 %}
13164 
13165 // Call Java Dynamic Instruction
13166 // Note: If this code changes, the corresponding ret_addr_offset() and
13167 //       compute_padding() functions will have to be adjusted.
13168 instruct CallDynamicJavaDirect(method meth) %{
13169   match(CallDynamicJava);
13170   effect(USE meth);
13171 
13172   ins_cost(300);
13173   format %{ "MOV    EAX,(oop)-1\n\t"
13174             "CALL,dynamic" %}
13175   opcode(0xE8); /* E8 cd */
13176   ins_encode( pre_call_resets,
13177               Java_Dynamic_Call( meth ),
13178               call_epilog,
13179               post_call_FPU );
13180   ins_pipe( pipe_slow );
13181   ins_alignment(4);
13182 %}
13183 
13184 // Call Runtime Instruction
13185 instruct CallRuntimeDirect(method meth) %{
13186   match(CallRuntime );
13187   effect(USE meth);
13188 
13189   ins_cost(300);
13190   format %{ "CALL,runtime " %}
13191   opcode(0xE8); /* E8 cd */
13192   // Use FFREEs to clear entries in float stack
13193   ins_encode( pre_call_resets,
13194               FFree_Float_Stack_All,
13195               Java_To_Runtime( meth ),
13196               post_call_FPU );
13197   ins_pipe( pipe_slow );
13198 %}
13199 
13200 // Call runtime without safepoint
13201 instruct CallLeafDirect(method meth) %{
13202   match(CallLeaf);
13203   effect(USE meth);
13204 
13205   ins_cost(300);
13206   format %{ "CALL_LEAF,runtime " %}
13207   opcode(0xE8); /* E8 cd */
13208   ins_encode( pre_call_resets,
13209               FFree_Float_Stack_All,
13210               Java_To_Runtime( meth ),
13211               Verify_FPU_For_Leaf, post_call_FPU );
13212   ins_pipe( pipe_slow );
13213 %}
13214 
13215 instruct CallLeafNoFPDirect(method meth) %{
13216   match(CallLeafNoFP);
13217   effect(USE meth);
13218 
13219   ins_cost(300);
13220   format %{ "CALL_LEAF_NOFP,runtime " %}
13221   opcode(0xE8); /* E8 cd */
13222   ins_encode(pre_call_resets, Java_To_Runtime(meth));
13223   ins_pipe( pipe_slow );
13224 %}
13225 
13226 
13227 // Return Instruction
13228 // Remove the return address & jump to it.
13229 instruct Ret() %{
13230   match(Return);
13231   format %{ "RET" %}
13232   opcode(0xC3);
13233   ins_encode(OpcP);
13234   ins_pipe( pipe_jmp );
13235 %}
13236 
13237 // Tail Call; Jump from runtime stub to Java code.
13238 // Also known as an 'interprocedural jump'.
13239 // Target of jump will eventually return to caller.
13240 // TailJump below removes the return address.
13241 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
13242   match(TailCall jump_target method_oop );
13243   ins_cost(300);
13244   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
13245   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13246   ins_encode( OpcP, RegOpc(jump_target) );
13247   ins_pipe( pipe_jmp );
13248 %}
13249 
13250 
13251 // Tail Jump; remove the return address; jump to target.
13252 // TailCall above leaves the return address around.
13253 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13254   match( TailJump jump_target ex_oop );
13255   ins_cost(300);
13256   format %{ "POP    EDX\t# pop return address into dummy\n\t"
13257             "JMP    $jump_target " %}
13258   opcode(0xFF, 0x4);  /* Opcode FF /4 */
13259   ins_encode( enc_pop_rdx,
13260               OpcP, RegOpc(jump_target) );
13261   ins_pipe( pipe_jmp );
13262 %}
13263 
13264 // Create exception oop: created by stack-crawling runtime code.
13265 // Created exception is now available to this handler, and is setup
13266 // just prior to jumping to this handler.  No code emitted.
13267 instruct CreateException( eAXRegP ex_oop )
13268 %{
13269   match(Set ex_oop (CreateEx));
13270 
13271   size(0);
13272   // use the following format syntax
13273   format %{ "# exception oop is in EAX; no code emitted" %}
13274   ins_encode();
13275   ins_pipe( empty );
13276 %}
13277 
13278 
13279 // Rethrow exception:
13280 // The exception oop will come in the first argument position.
13281 // Then JUMP (not call) to the rethrow stub code.
13282 instruct RethrowException()
13283 %{
13284   match(Rethrow);
13285 
13286   // use the following format syntax
13287   format %{ "JMP    rethrow_stub" %}
13288   ins_encode(enc_rethrow);
13289   ins_pipe( pipe_jmp );
13290 %}
13291 
13292 // inlined locking and unlocking
13293 
13294 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13295   predicate(Compile::current()->use_rtm());
13296   match(Set cr (FastLock object box));
13297   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13298   ins_cost(300);
13299   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13300   ins_encode %{
13301     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13302                  $scr$$Register, $cx1$$Register, $cx2$$Register,
13303                  _counters, _rtm_counters, _stack_rtm_counters,
13304                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13305                  true, ra_->C->profile_rtm());
13306   %}
13307   ins_pipe(pipe_slow);
13308 %}
13309 
13310 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13311   predicate(!Compile::current()->use_rtm());
13312   match(Set cr (FastLock object box));
13313   effect(TEMP tmp, TEMP scr, USE_KILL box);
13314   ins_cost(300);
13315   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13316   ins_encode %{
13317     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13318                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
13319   %}
13320   ins_pipe(pipe_slow);
13321 %}
13322 
13323 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13324   match(Set cr (FastUnlock object box));
13325   effect(TEMP tmp, USE_KILL box);
13326   ins_cost(300);
13327   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13328   ins_encode %{
13329     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13330   %}
13331   ins_pipe(pipe_slow);
13332 %}
13333 
13334 
13335 
13336 // ============================================================================
13337 // Safepoint Instruction
13338 instruct safePoint_poll(eFlagsReg cr) %{
13339   match(SafePoint);
13340   effect(KILL cr);
13341 
13342   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13343   // On SPARC that might be acceptable as we can generate the address with
13344   // just a sethi, saving an or.  By polling at offset 0 we can end up
13345   // putting additional pressure on the index-0 in the D$.  Because of
13346   // alignment (just like the situation at hand) the lower indices tend
13347   // to see more traffic.  It'd be better to change the polling address
13348   // to offset 0 of the last $line in the polling page.
13349 
13350   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13351   ins_cost(125);
13352   size(6) ;
13353   ins_encode( Safepoint_Poll() );
13354   ins_pipe( ialu_reg_mem );
13355 %}
13356 
13357 
13358 // ============================================================================
13359 // This name is KNOWN by the ADLC and cannot be changed.
13360 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13361 // for this guy.
13362 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13363   match(Set dst (ThreadLocal));
13364   effect(DEF dst, KILL cr);
13365 
13366   format %{ "MOV    $dst, Thread::current()" %}
13367   ins_encode %{
13368     Register dstReg = as_Register($dst$$reg);
13369     __ get_thread(dstReg);
13370   %}
13371   ins_pipe( ialu_reg_fat );
13372 %}
13373 
13374 
13375 
13376 //----------PEEPHOLE RULES-----------------------------------------------------
13377 // These must follow all instruction definitions as they use the names
13378 // defined in the instructions definitions.
13379 //
13380 // peepmatch ( root_instr_name [preceding_instruction]* );
13381 //
13382 // peepconstraint %{
13383 // (instruction_number.operand_name relational_op instruction_number.operand_name
13384 //  [, ...] );
13385 // // instruction numbers are zero-based using left to right order in peepmatch
13386 //
13387 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13388 // // provide an instruction_number.operand_name for each operand that appears
13389 // // in the replacement instruction's match rule
13390 //
13391 // ---------VM FLAGS---------------------------------------------------------
13392 //
13393 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13394 //
13395 // Each peephole rule is given an identifying number starting with zero and
13396 // increasing by one in the order seen by the parser.  An individual peephole
13397 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13398 // on the command-line.
13399 //
13400 // ---------CURRENT LIMITATIONS----------------------------------------------
13401 //
13402 // Only match adjacent instructions in same basic block
13403 // Only equality constraints
13404 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13405 // Only one replacement instruction
13406 //
13407 // ---------EXAMPLE----------------------------------------------------------
13408 //
13409 // // pertinent parts of existing instructions in architecture description
13410 // instruct movI(rRegI dst, rRegI src) %{
13411 //   match(Set dst (CopyI src));
13412 // %}
13413 //
13414 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
13415 //   match(Set dst (AddI dst src));
13416 //   effect(KILL cr);
13417 // %}
13418 //
13419 // // Change (inc mov) to lea
13420 // peephole %{
13421 //   // increment preceeded by register-register move
13422 //   peepmatch ( incI_eReg movI );
13423 //   // require that the destination register of the increment
13424 //   // match the destination register of the move
13425 //   peepconstraint ( 0.dst == 1.dst );
13426 //   // construct a replacement instruction that sets
13427 //   // the destination to ( move's source register + one )
13428 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13429 // %}
13430 //
13431 // Implementation no longer uses movX instructions since
13432 // machine-independent system no longer uses CopyX nodes.
13433 //
13434 // peephole %{
13435 //   peepmatch ( incI_eReg movI );
13436 //   peepconstraint ( 0.dst == 1.dst );
13437 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13438 // %}
13439 //
13440 // peephole %{
13441 //   peepmatch ( decI_eReg movI );
13442 //   peepconstraint ( 0.dst == 1.dst );
13443 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13444 // %}
13445 //
13446 // peephole %{
13447 //   peepmatch ( addI_eReg_imm movI );
13448 //   peepconstraint ( 0.dst == 1.dst );
13449 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13450 // %}
13451 //
13452 // peephole %{
13453 //   peepmatch ( addP_eReg_imm movP );
13454 //   peepconstraint ( 0.dst == 1.dst );
13455 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13456 // %}
13457 
13458 // // Change load of spilled value to only a spill
13459 // instruct storeI(memory mem, rRegI src) %{
13460 //   match(Set mem (StoreI mem src));
13461 // %}
13462 //
13463 // instruct loadI(rRegI dst, memory mem) %{
13464 //   match(Set dst (LoadI mem));
13465 // %}
13466 //
13467 peephole %{
13468   peepmatch ( loadI storeI );
13469   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13470   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13471 %}
13472 
13473 //----------SMARTSPILL RULES---------------------------------------------------
13474 // These must follow all instruction definitions as they use the names
13475 // defined in the instructions definitions.