1 //
   2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 //
 105 // Empty fill registers, which are never used, but supply alignment to xmm regs
 106 //
 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2));
 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3));
 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4));
 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5));
 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6));
 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7));
 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8));
 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9));
 115 
 116 // Specify priority of register selection within phases of register
 117 // allocation.  Highest priority is first.  A useful heuristic is to
 118 // give registers a low priority when they are required by machine
 119 // instructions, like EAX and EDX.  Registers which are used as
 120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 121 // For the Intel integer registers, the equivalent Long pairs are
 122 // EDX:EAX, EBX:ECX, and EDI:EBP.
 123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 126                     FPR6L, FPR6H, FPR7L, FPR7H,
 127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
 128 
 129 
 130 //----------Architecture Description Register Classes--------------------------
 131 // Several register classes are automatically defined based upon information in
 132 // this architecture description.
 133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 134 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 137 //
 138 // Class for no registers (empty set).
 139 reg_class no_reg();
 140 
 141 // Class for all registers
 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 143 // Class for all registers (excluding EBP)
 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
 145 // Dynamic register class that selects at runtime between register classes
 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
 149 
 150 // Class for general registers
 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 152 // Class for general registers (excluding EBP).
 153 // This register class can be used for implicit null checks on win95.
 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
 155 // Used also if the PreserveFramePointer flag is true.
 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
 159 
 160 // Class of "X" registers
 161 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 162 
 163 // Class of registers that can appear in an address with no offset.
 164 // EBP and ESP require an extra instruction byte for zero offset.
 165 // Used in fast-unlock
 166 reg_class p_reg(EDX, EDI, ESI, EBX);
 167 
 168 // Class for general registers excluding ECX
 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
 170 // Class for general registers excluding ECX (and EBP)
 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
 174 
 175 // Class for general registers excluding EAX
 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 177 
 178 // Class for general registers excluding EAX and EBX.
 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
 180 // Class for general registers excluding EAX and EBX (and EBP)
 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
 184 
 185 // Class of EAX (for multiply and divide operations)
 186 reg_class eax_reg(EAX);
 187 
 188 // Class of EBX (for atomic add)
 189 reg_class ebx_reg(EBX);
 190 
 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 192 reg_class ecx_reg(ECX);
 193 
 194 // Class of EDX (for multiply and divide operations)
 195 reg_class edx_reg(EDX);
 196 
 197 // Class of EDI (for synchronization)
 198 reg_class edi_reg(EDI);
 199 
 200 // Class of ESI (for synchronization)
 201 reg_class esi_reg(ESI);
 202 
 203 // Singleton class for stack pointer
 204 reg_class sp_reg(ESP);
 205 
 206 // Singleton class for instruction pointer
 207 // reg_class ip_reg(EIP);
 208 
 209 // Class of integer register pairs
 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
 211 // Class of integer register pairs (excluding EBP and EDI);
 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
 215 
 216 // Class of integer register pairs that aligns with calling convention
 217 reg_class eadx_reg( EAX,EDX );
 218 reg_class ebcx_reg( ECX,EBX );
 219 
 220 // Not AX or DX, used in divides
 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
 222 // Not AX or DX (and neither EBP), used in divides
 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 226 
 227 // Floating point registers.  Notice FPR0 is not a choice.
 228 // FPR0 is not ever allocated; we use clever encodings to fake
 229 // a 2-address instructions out of Intels FP stack.
 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 231 
 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 233                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 234                       FPR7L,FPR7H );
 235 
 236 reg_class fp_flt_reg0( FPR1L );
 237 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 238 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 240                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 241 
 242 %}
 243 
 244 
 245 //----------SOURCE BLOCK-------------------------------------------------------
 246 // This is a block of C++ code which provides values, functions, and
 247 // definitions necessary in the rest of the architecture description
 248 source_hpp %{
 249 // Must be visible to the DFA in dfa_x86_32.cpp
 250 extern bool is_operand_hi32_zero(Node* n);
 251 %}
 252 
 253 source %{
 254 #define   RELOC_IMM32    Assembler::imm_operand
 255 #define   RELOC_DISP32   Assembler::disp32_operand
 256 
 257 #define __ _masm.
 258 
 259 // How to find the high register of a Long pair, given the low register
 260 #define   HIGH_FROM_LOW(x) ((x)+2)
 261 
 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 263 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 264 // fast versions of NegF/NegD and AbsF/AbsD.
 265 
 266 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 268   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 269   // of 128-bits operands for SSE instructions.
 270   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 271   // Store the value to a 128-bits operand.
 272   operand[0] = lo;
 273   operand[1] = hi;
 274   return operand;
 275 }
 276 
 277 // Buffer for 128-bits masks used by SSE instructions.
 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 279 
 280 // Static initialization during VM startup.
 281 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 283 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 285 
 286 // Offset hacking within calls.
 287 static int pre_call_resets_size() {
 288   int size = 0;
 289   Compile* C = Compile::current();
 290   if (C->in_24_bit_fp_mode()) {
 291     size += 6; // fldcw
 292   }
 293   if (C->max_vector_size() > 16) {
 294     size += 3; // vzeroupper
 295   }
 296   return size;
 297 }
 298 
 299 // !!!!! Special hack to get all type of calls to specify the byte offset
 300 //       from the start of the call to the point where the return address
 301 //       will point.
 302 int MachCallStaticJavaNode::ret_addr_offset() {
 303   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
 304 }
 305 
 306 int MachCallDynamicJavaNode::ret_addr_offset() {
 307   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 308 }
 309 
 310 static int sizeof_FFree_Float_Stack_All = -1;
 311 
 312 int MachCallRuntimeNode::ret_addr_offset() {
 313   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 314   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 315 }
 316 
 317 // Indicate if the safepoint node needs the polling page as an input.
 318 // Since x86 does have absolute addressing, it doesn't.
 319 bool SafePointNode::needs_polling_address_input() {
 320   return false;
 321 }
 322 
 323 //
 324 // Compute padding required for nodes which need alignment
 325 //
 326 
 327 // The address of the call instruction needs to be 4-byte aligned to
 328 // ensure that it does not span a cache line so that it can be patched.
 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 330   current_offset += pre_call_resets_size();  // skip fldcw, if any
 331   current_offset += 1;      // skip call opcode byte
 332   return round_to(current_offset, alignment_required()) - current_offset;
 333 }
 334 
 335 // The address of the call instruction needs to be 4-byte aligned to
 336 // ensure that it does not span a cache line so that it can be patched.
 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 338   current_offset += pre_call_resets_size();  // skip fldcw, if any
 339   current_offset += 5;      // skip MOV instruction
 340   current_offset += 1;      // skip call opcode byte
 341   return round_to(current_offset, alignment_required()) - current_offset;
 342 }
 343 
 344 // EMIT_RM()
 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 346   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 347   cbuf.insts()->emit_int8(c);
 348 }
 349 
 350 // EMIT_CC()
 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 352   unsigned char c = (unsigned char)( f1 | f2 );
 353   cbuf.insts()->emit_int8(c);
 354 }
 355 
 356 // EMIT_OPCODE()
 357 void emit_opcode(CodeBuffer &cbuf, int code) {
 358   cbuf.insts()->emit_int8((unsigned char) code);
 359 }
 360 
 361 // EMIT_OPCODE() w/ relocation information
 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 363   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 364   emit_opcode(cbuf, code);
 365 }
 366 
 367 // EMIT_D8()
 368 void emit_d8(CodeBuffer &cbuf, int d8) {
 369   cbuf.insts()->emit_int8((unsigned char) d8);
 370 }
 371 
 372 // EMIT_D16()
 373 void emit_d16(CodeBuffer &cbuf, int d16) {
 374   cbuf.insts()->emit_int16(d16);
 375 }
 376 
 377 // EMIT_D32()
 378 void emit_d32(CodeBuffer &cbuf, int d32) {
 379   cbuf.insts()->emit_int32(d32);
 380 }
 381 
 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 384         int format) {
 385   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 386   cbuf.insts()->emit_int32(d32);
 387 }
 388 
 389 // emit 32 bit value and construct relocation entry from RelocationHolder
 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 391         int format) {
 392 #ifdef ASSERT
 393   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 394     assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
 395   }
 396 #endif
 397   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 398   cbuf.insts()->emit_int32(d32);
 399 }
 400 
 401 // Access stack slot for load or store
 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 403   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 404   if( -128 <= disp && disp <= 127 ) {
 405     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 406     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 407     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 408   } else {
 409     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 410     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 411     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 412   }
 413 }
 414 
 415    // rRegI ereg, memory mem) %{    // emit_reg_mem
 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 417   // There is no index & no scale, use form without SIB byte
 418   if ((index == 0x4) &&
 419       (scale == 0) && (base != ESP_enc)) {
 420     // If no displacement, mode is 0x0; unless base is [EBP]
 421     if ( (displace == 0) && (base != EBP_enc) ) {
 422       emit_rm(cbuf, 0x0, reg_encoding, base);
 423     }
 424     else {                    // If 8-bit displacement, mode 0x1
 425       if ((displace >= -128) && (displace <= 127)
 426           && (disp_reloc == relocInfo::none) ) {
 427         emit_rm(cbuf, 0x1, reg_encoding, base);
 428         emit_d8(cbuf, displace);
 429       }
 430       else {                  // If 32-bit displacement
 431         if (base == -1) { // Special flag for absolute address
 432           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 433           // (manual lies; no SIB needed here)
 434           if ( disp_reloc != relocInfo::none ) {
 435             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 436           } else {
 437             emit_d32      (cbuf, displace);
 438           }
 439         }
 440         else {                // Normal base + offset
 441           emit_rm(cbuf, 0x2, reg_encoding, base);
 442           if ( disp_reloc != relocInfo::none ) {
 443             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 444           } else {
 445             emit_d32      (cbuf, displace);
 446           }
 447         }
 448       }
 449     }
 450   }
 451   else {                      // Else, encode with the SIB byte
 452     // If no displacement, mode is 0x0; unless base is [EBP]
 453     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 454       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 455       emit_rm(cbuf, scale, index, base);
 456     }
 457     else {                    // If 8-bit displacement, mode 0x1
 458       if ((displace >= -128) && (displace <= 127)
 459           && (disp_reloc == relocInfo::none) ) {
 460         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 461         emit_rm(cbuf, scale, index, base);
 462         emit_d8(cbuf, displace);
 463       }
 464       else {                  // If 32-bit displacement
 465         if (base == 0x04 ) {
 466           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 467           emit_rm(cbuf, scale, index, 0x04);
 468         } else {
 469           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 470           emit_rm(cbuf, scale, index, base);
 471         }
 472         if ( disp_reloc != relocInfo::none ) {
 473           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 474         } else {
 475           emit_d32      (cbuf, displace);
 476         }
 477       }
 478     }
 479   }
 480 }
 481 
 482 
 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 484   if( dst_encoding == src_encoding ) {
 485     // reg-reg copy, use an empty encoding
 486   } else {
 487     emit_opcode( cbuf, 0x8B );
 488     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 489   }
 490 }
 491 
 492 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 493   Label exit;
 494   __ jccb(Assembler::noParity, exit);
 495   __ pushf();
 496   //
 497   // comiss/ucomiss instructions set ZF,PF,CF flags and
 498   // zero OF,AF,SF for NaN values.
 499   // Fixup flags by zeroing ZF,PF so that compare of NaN
 500   // values returns 'less than' result (CF is set).
 501   // Leave the rest of flags unchanged.
 502   //
 503   //    7 6 5 4 3 2 1 0
 504   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 505   //    0 0 1 0 1 0 1 1   (0x2B)
 506   //
 507   __ andl(Address(rsp, 0), 0xffffff2b);
 508   __ popf();
 509   __ bind(exit);
 510 }
 511 
 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 513   Label done;
 514   __ movl(dst, -1);
 515   __ jcc(Assembler::parity, done);
 516   __ jcc(Assembler::below, done);
 517   __ setb(Assembler::notEqual, dst);
 518   __ movzbl(dst, dst);
 519   __ bind(done);
 520 }
 521 
 522 
 523 //=============================================================================
 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 525 
 526 int Compile::ConstantTable::calculate_table_base_offset() const {
 527   return 0;  // absolute addressing, no offset
 528 }
 529 
 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 532   ShouldNotReachHere();
 533 }
 534 
 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 536   // Empty encoding
 537 }
 538 
 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 540   return 0;
 541 }
 542 
 543 #ifndef PRODUCT
 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 545   st->print("# MachConstantBaseNode (empty encoding)");
 546 }
 547 #endif
 548 
 549 
 550 //=============================================================================
 551 #ifndef PRODUCT
 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 553   Compile* C = ra_->C;
 554 
 555   int framesize = C->frame_size_in_bytes();
 556   int bangsize = C->bang_size_in_bytes();
 557   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 558   // Remove wordSize for return addr which is already pushed.
 559   framesize -= wordSize;
 560 
 561   if (C->need_stack_bang(bangsize)) {
 562     framesize -= wordSize;
 563     st->print("# stack bang (%d bytes)", bangsize);
 564     st->print("\n\t");
 565     st->print("PUSH   EBP\t# Save EBP");
 566     if (PreserveFramePointer) {
 567       st->print("\n\t");
 568       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 569     }
 570     if (framesize) {
 571       st->print("\n\t");
 572       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 573     }
 574   } else {
 575     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 576     st->print("\n\t");
 577     framesize -= wordSize;
 578     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 579     if (PreserveFramePointer) {
 580       st->print("\n\t");
 581       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 582       if (framesize > 0) {
 583         st->print("\n\t");
 584         st->print("ADD    EBP, #%d", framesize);
 585       }
 586     }
 587   }
 588 
 589   if (VerifyStackAtCalls) {
 590     st->print("\n\t");
 591     framesize -= wordSize;
 592     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 593   }
 594 
 595   if( C->in_24_bit_fp_mode() ) {
 596     st->print("\n\t");
 597     st->print("FLDCW  \t# load 24 bit fpu control word");
 598   }
 599   if (UseSSE >= 2 && VerifyFPU) {
 600     st->print("\n\t");
 601     st->print("# verify FPU stack (must be clean on entry)");
 602   }
 603 
 604 #ifdef ASSERT
 605   if (VerifyStackAtCalls) {
 606     st->print("\n\t");
 607     st->print("# stack alignment check");
 608   }
 609 #endif
 610   st->cr();
 611 }
 612 #endif
 613 
 614 
 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 616   Compile* C = ra_->C;
 617   MacroAssembler _masm(&cbuf);
 618 
 619   int framesize = C->frame_size_in_bytes();
 620   int bangsize = C->bang_size_in_bytes();
 621 
 622   __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode());
 623 
 624   C->set_frame_complete(cbuf.insts_size());
 625 
 626   if (C->has_mach_constant_base_node()) {
 627     // NOTE: We set the table base offset here because users might be
 628     // emitted before MachConstantBaseNode.
 629     Compile::ConstantTable& constant_table = C->constant_table();
 630     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 631   }
 632 }
 633 
 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 635   return MachNode::size(ra_); // too many variables; just compute it the hard way
 636 }
 637 
 638 int MachPrologNode::reloc() const {
 639   return 0; // a large enough number
 640 }
 641 
 642 //=============================================================================
 643 #ifndef PRODUCT
 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 645   Compile *C = ra_->C;
 646   int framesize = C->frame_size_in_bytes();
 647   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 648   // Remove two words for return addr and rbp,
 649   framesize -= 2*wordSize;
 650 
 651   if (C->max_vector_size() > 16) {
 652     st->print("VZEROUPPER");
 653     st->cr(); st->print("\t");
 654   }
 655   if (C->in_24_bit_fp_mode()) {
 656     st->print("FLDCW  standard control word");
 657     st->cr(); st->print("\t");
 658   }
 659   if (framesize) {
 660     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 661     st->cr(); st->print("\t");
 662   }
 663   st->print_cr("POPL   EBP"); st->print("\t");
 664   if (do_polling() && C->is_method_compilation()) {
 665     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 666     st->cr(); st->print("\t");
 667   }
 668 }
 669 #endif
 670 
 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 672   Compile *C = ra_->C;
 673   MacroAssembler _masm(&cbuf);
 674 
 675   if (C->max_vector_size() > 16) {
 676     // Clear upper bits of YMM registers when current compiled code uses
 677     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 678     _masm.vzeroupper();
 679   }
 680   // If method set FPU control word, restore to standard control word
 681   if (C->in_24_bit_fp_mode()) {
 682     _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 683   }
 684 
 685   int framesize = C->frame_size_in_bytes();
 686   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 687   // Remove two words for return addr and rbp,
 688   framesize -= 2*wordSize;
 689 
 690   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 691 
 692   if (framesize >= 128) {
 693     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 694     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 695     emit_d32(cbuf, framesize);
 696   } else if (framesize) {
 697     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 698     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 699     emit_d8(cbuf, framesize);
 700   }
 701 
 702   emit_opcode(cbuf, 0x58 | EBP_enc);
 703 
 704   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 705     __ reserved_stack_check();
 706   }
 707 
 708   if (do_polling() && C->is_method_compilation()) {
 709     cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 710     emit_opcode(cbuf,0x85);
 711     emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 712     emit_d32(cbuf, (intptr_t)os::get_polling_page());
 713   }
 714 }
 715 
 716 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 717   Compile *C = ra_->C;
 718   // If method set FPU control word, restore to standard control word
 719   int size = C->in_24_bit_fp_mode() ? 6 : 0;
 720   if (C->max_vector_size() > 16) size += 3; // vzeroupper
 721   if (do_polling() && C->is_method_compilation()) size += 6;
 722 
 723   int framesize = C->frame_size_in_bytes();
 724   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 725   // Remove two words for return addr and rbp,
 726   framesize -= 2*wordSize;
 727 
 728   size++; // popl rbp,
 729 
 730   if (framesize >= 128) {
 731     size += 6;
 732   } else {
 733     size += framesize ? 3 : 0;
 734   }
 735   size += 64; // added to support ReservedStackAccess
 736   return size;
 737 }
 738 
 739 int MachEpilogNode::reloc() const {
 740   return 0; // a large enough number
 741 }
 742 
 743 const Pipeline * MachEpilogNode::pipeline() const {
 744   return MachNode::pipeline_class();
 745 }
 746 
 747 int MachEpilogNode::safepoint_offset() const { return 0; }
 748 
 749 //=============================================================================
 750 
 751 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 752 static enum RC rc_class( OptoReg::Name reg ) {
 753 
 754   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 755   if (OptoReg::is_stack(reg)) return rc_stack;
 756 
 757   VMReg r = OptoReg::as_VMReg(reg);
 758   if (r->is_Register()) return rc_int;
 759   if (r->is_FloatRegister()) {
 760     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 761     return rc_float;
 762   }
 763   assert(r->is_XMMRegister(), "must be");
 764   return rc_xmm;
 765 }
 766 
 767 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 768                         int opcode, const char *op_str, int size, outputStream* st ) {
 769   if( cbuf ) {
 770     emit_opcode  (*cbuf, opcode );
 771     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 772 #ifndef PRODUCT
 773   } else if( !do_size ) {
 774     if( size != 0 ) st->print("\n\t");
 775     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 776       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 777       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 778     } else { // FLD, FST, PUSH, POP
 779       st->print("%s [ESP + #%d]",op_str,offset);
 780     }
 781 #endif
 782   }
 783   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 784   return size+3+offset_size;
 785 }
 786 
 787 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 788 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 789                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 790   int in_size_in_bits = Assembler::EVEX_32bit;
 791   int evex_encoding = 0;
 792   if (reg_lo+1 == reg_hi) {
 793     in_size_in_bits = Assembler::EVEX_64bit;
 794     evex_encoding = Assembler::VEX_W;
 795   }
 796   if (cbuf) {
 797     MacroAssembler _masm(cbuf);
 798     if (reg_lo+1 == reg_hi) { // double move?
 799       if (is_load) {
 800         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 801       } else {
 802         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 803       }
 804     } else {
 805       if (is_load) {
 806         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 807       } else {
 808         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 809       }
 810     }
 811 #ifndef PRODUCT
 812   } else if (!do_size) {
 813     if (size != 0) st->print("\n\t");
 814     if (reg_lo+1 == reg_hi) { // double move?
 815       if (is_load) st->print("%s %s,[ESP + #%d]",
 816                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 817                               Matcher::regName[reg_lo], offset);
 818       else         st->print("MOVSD  [ESP + #%d],%s",
 819                               offset, Matcher::regName[reg_lo]);
 820     } else {
 821       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 822                               Matcher::regName[reg_lo], offset);
 823       else         st->print("MOVSS  [ESP + #%d],%s",
 824                               offset, Matcher::regName[reg_lo]);
 825     }
 826 #endif
 827   }
 828   bool is_single_byte = false;
 829   if ((UseAVX > 2) && (offset != 0)) {
 830     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
 831   }
 832   int offset_size = 0;
 833   if (UseAVX > 2 ) {
 834     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
 835   } else {
 836     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 837   }
 838   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
 839   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 840   return size+5+offset_size;
 841 }
 842 
 843 
 844 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 845                             int src_hi, int dst_hi, int size, outputStream* st ) {
 846   if (cbuf) {
 847     MacroAssembler _masm(cbuf);
 848     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 849       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 850                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 851     } else {
 852       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 853                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 854     }
 855 #ifndef PRODUCT
 856   } else if (!do_size) {
 857     if (size != 0) st->print("\n\t");
 858     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 859       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 860         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 861       } else {
 862         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 863       }
 864     } else {
 865       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 866         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 867       } else {
 868         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 869       }
 870     }
 871 #endif
 872   }
 873   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 874   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
 875   int sz = (UseAVX > 2) ? 6 : 4;
 876   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 877       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 878   return size + sz;
 879 }
 880 
 881 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 882                             int src_hi, int dst_hi, int size, outputStream* st ) {
 883   // 32-bit
 884   if (cbuf) {
 885     MacroAssembler _masm(cbuf);
 886     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 887              as_Register(Matcher::_regEncode[src_lo]));
 888 #ifndef PRODUCT
 889   } else if (!do_size) {
 890     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 891 #endif
 892   }
 893   return (UseAVX> 2) ? 6 : 4;
 894 }
 895 
 896 
 897 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 898                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 899   // 32-bit
 900   if (cbuf) {
 901     MacroAssembler _masm(cbuf);
 902     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 903              as_XMMRegister(Matcher::_regEncode[src_lo]));
 904 #ifndef PRODUCT
 905   } else if (!do_size) {
 906     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 907 #endif
 908   }
 909   return (UseAVX> 2) ? 6 : 4;
 910 }
 911 
 912 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 913   if( cbuf ) {
 914     emit_opcode(*cbuf, 0x8B );
 915     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 916 #ifndef PRODUCT
 917   } else if( !do_size ) {
 918     if( size != 0 ) st->print("\n\t");
 919     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 920 #endif
 921   }
 922   return size+2;
 923 }
 924 
 925 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 926                                  int offset, int size, outputStream* st ) {
 927   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 928     if( cbuf ) {
 929       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 930       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 931 #ifndef PRODUCT
 932     } else if( !do_size ) {
 933       if( size != 0 ) st->print("\n\t");
 934       st->print("FLD    %s",Matcher::regName[src_lo]);
 935 #endif
 936     }
 937     size += 2;
 938   }
 939 
 940   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 941   const char *op_str;
 942   int op;
 943   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 944     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 945     op = 0xDD;
 946   } else {                   // 32-bit store
 947     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 948     op = 0xD9;
 949     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 950   }
 951 
 952   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 953 }
 954 
 955 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 956 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 957                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 958 
 959 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 960                             int stack_offset, int reg, uint ireg, outputStream* st);
 961 
 962 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 963                                      int dst_offset, uint ireg, outputStream* st) {
 964   int calc_size = 0;
 965   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 966   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 967   switch (ireg) {
 968   case Op_VecS:
 969     calc_size = 3+src_offset_size + 3+dst_offset_size;
 970     break;
 971   case Op_VecD:
 972     calc_size = 3+src_offset_size + 3+dst_offset_size;
 973     src_offset += 4;
 974     dst_offset += 4;
 975     src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 976     dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 977     calc_size += 3+src_offset_size + 3+dst_offset_size;
 978     break;
 979   case Op_VecX:
 980   case Op_VecY:
 981   case Op_VecZ:
 982     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 983     break;
 984   default:
 985     ShouldNotReachHere();
 986   }
 987   if (cbuf) {
 988     MacroAssembler _masm(cbuf);
 989     int offset = __ offset();
 990     switch (ireg) {
 991     case Op_VecS:
 992       __ pushl(Address(rsp, src_offset));
 993       __ popl (Address(rsp, dst_offset));
 994       break;
 995     case Op_VecD:
 996       __ pushl(Address(rsp, src_offset));
 997       __ popl (Address(rsp, dst_offset));
 998       __ pushl(Address(rsp, src_offset+4));
 999       __ popl (Address(rsp, dst_offset+4));
1000       break;
1001     case Op_VecX:
1002       __ movdqu(Address(rsp, -16), xmm0);
1003       __ movdqu(xmm0, Address(rsp, src_offset));
1004       __ movdqu(Address(rsp, dst_offset), xmm0);
1005       __ movdqu(xmm0, Address(rsp, -16));
1006       break;
1007     case Op_VecY:
1008       __ vmovdqu(Address(rsp, -32), xmm0);
1009       __ vmovdqu(xmm0, Address(rsp, src_offset));
1010       __ vmovdqu(Address(rsp, dst_offset), xmm0);
1011       __ vmovdqu(xmm0, Address(rsp, -32));
1012       break;
1013     case Op_VecZ:
1014       __ evmovdqul(Address(rsp, -64), xmm0, 2);
1015       __ evmovdqul(xmm0, Address(rsp, src_offset), 2);
1016       __ evmovdqul(Address(rsp, dst_offset), xmm0, 2);
1017       __ evmovdqul(xmm0, Address(rsp, -64), 2);
1018       break;
1019     default:
1020       ShouldNotReachHere();
1021     }
1022     int size = __ offset() - offset;
1023     assert(size == calc_size, "incorrect size calculattion");
1024     return size;
1025 #ifndef PRODUCT
1026   } else if (!do_size) {
1027     switch (ireg) {
1028     case Op_VecS:
1029       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1030                 "popl    [rsp + #%d]",
1031                 src_offset, dst_offset);
1032       break;
1033     case Op_VecD:
1034       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1035                 "popq    [rsp + #%d]\n\t"
1036                 "pushl   [rsp + #%d]\n\t"
1037                 "popq    [rsp + #%d]",
1038                 src_offset, dst_offset, src_offset+4, dst_offset+4);
1039       break;
1040      case Op_VecX:
1041       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1042                 "movdqu  xmm0, [rsp + #%d]\n\t"
1043                 "movdqu  [rsp + #%d], xmm0\n\t"
1044                 "movdqu  xmm0, [rsp - #16]",
1045                 src_offset, dst_offset);
1046       break;
1047     case Op_VecY:
1048       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1049                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1050                 "vmovdqu [rsp + #%d], xmm0\n\t"
1051                 "vmovdqu xmm0, [rsp - #32]",
1052                 src_offset, dst_offset);
1053       break;
1054     case Op_VecZ:
1055       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1056                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1057                 "vmovdqu [rsp + #%d], xmm0\n\t"
1058                 "vmovdqu xmm0, [rsp - #64]",
1059                 src_offset, dst_offset);
1060       break;
1061     default:
1062       ShouldNotReachHere();
1063     }
1064 #endif
1065   }
1066   return calc_size;
1067 }
1068 
1069 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1070   // Get registers to move
1071   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1072   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1073   OptoReg::Name dst_second = ra_->get_reg_second(this );
1074   OptoReg::Name dst_first = ra_->get_reg_first(this );
1075 
1076   enum RC src_second_rc = rc_class(src_second);
1077   enum RC src_first_rc = rc_class(src_first);
1078   enum RC dst_second_rc = rc_class(dst_second);
1079   enum RC dst_first_rc = rc_class(dst_first);
1080 
1081   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1082 
1083   // Generate spill code!
1084   int size = 0;
1085 
1086   if( src_first == dst_first && src_second == dst_second )
1087     return size;            // Self copy, no move
1088 
1089   if (bottom_type()->isa_vect() != NULL) {
1090     uint ireg = ideal_reg();
1091     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1092     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1093     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1094     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1095       // mem -> mem
1096       int src_offset = ra_->reg2offset(src_first);
1097       int dst_offset = ra_->reg2offset(dst_first);
1098       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1099     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1100       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1101     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1102       int stack_offset = ra_->reg2offset(dst_first);
1103       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1104     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1105       int stack_offset = ra_->reg2offset(src_first);
1106       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1107     } else {
1108       ShouldNotReachHere();
1109     }
1110   }
1111 
1112   // --------------------------------------
1113   // Check for mem-mem move.  push/pop to move.
1114   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1115     if( src_second == dst_first ) { // overlapping stack copy ranges
1116       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1117       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1118       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1119       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1120     }
1121     // move low bits
1122     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1123     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1124     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1125       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1126       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1127     }
1128     return size;
1129   }
1130 
1131   // --------------------------------------
1132   // Check for integer reg-reg copy
1133   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1134     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1135 
1136   // Check for integer store
1137   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1138     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1139 
1140   // Check for integer load
1141   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1142     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1143 
1144   // Check for integer reg-xmm reg copy
1145   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1146     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1147             "no 64 bit integer-float reg moves" );
1148     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1149   }
1150   // --------------------------------------
1151   // Check for float reg-reg copy
1152   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1153     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1154             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1155     if( cbuf ) {
1156 
1157       // Note the mucking with the register encode to compensate for the 0/1
1158       // indexing issue mentioned in a comment in the reg_def sections
1159       // for FPR registers many lines above here.
1160 
1161       if( src_first != FPR1L_num ) {
1162         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1163         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1164         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1165         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1166      } else {
1167         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1168         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1169      }
1170 #ifndef PRODUCT
1171     } else if( !do_size ) {
1172       if( size != 0 ) st->print("\n\t");
1173       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1174       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1175 #endif
1176     }
1177     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1178   }
1179 
1180   // Check for float store
1181   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1182     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1183   }
1184 
1185   // Check for float load
1186   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1187     int offset = ra_->reg2offset(src_first);
1188     const char *op_str;
1189     int op;
1190     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1191       op_str = "FLD_D";
1192       op = 0xDD;
1193     } else {                   // 32-bit load
1194       op_str = "FLD_S";
1195       op = 0xD9;
1196       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1197     }
1198     if( cbuf ) {
1199       emit_opcode  (*cbuf, op );
1200       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1201       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1202       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1203 #ifndef PRODUCT
1204     } else if( !do_size ) {
1205       if( size != 0 ) st->print("\n\t");
1206       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1207 #endif
1208     }
1209     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1210     return size + 3+offset_size+2;
1211   }
1212 
1213   // Check for xmm reg-reg copy
1214   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1215     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1216             (src_first+1 == src_second && dst_first+1 == dst_second),
1217             "no non-adjacent float-moves" );
1218     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1219   }
1220 
1221   // Check for xmm reg-integer reg copy
1222   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1223     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1224             "no 64 bit float-integer reg moves" );
1225     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1226   }
1227 
1228   // Check for xmm store
1229   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1230     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1231   }
1232 
1233   // Check for float xmm load
1234   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1235     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1236   }
1237 
1238   // Copy from float reg to xmm reg
1239   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1240     // copy to the top of stack from floating point reg
1241     // and use LEA to preserve flags
1242     if( cbuf ) {
1243       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1244       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1245       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1246       emit_d8(*cbuf,0xF8);
1247 #ifndef PRODUCT
1248     } else if( !do_size ) {
1249       if( size != 0 ) st->print("\n\t");
1250       st->print("LEA    ESP,[ESP-8]");
1251 #endif
1252     }
1253     size += 4;
1254 
1255     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1256 
1257     // Copy from the temp memory to the xmm reg.
1258     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1259 
1260     if( cbuf ) {
1261       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1262       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1263       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1264       emit_d8(*cbuf,0x08);
1265 #ifndef PRODUCT
1266     } else if( !do_size ) {
1267       if( size != 0 ) st->print("\n\t");
1268       st->print("LEA    ESP,[ESP+8]");
1269 #endif
1270     }
1271     size += 4;
1272     return size;
1273   }
1274 
1275   assert( size > 0, "missed a case" );
1276 
1277   // --------------------------------------------------------------------
1278   // Check for second bits still needing moving.
1279   if( src_second == dst_second )
1280     return size;               // Self copy; no move
1281   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1282 
1283   // Check for second word int-int move
1284   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1285     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1286 
1287   // Check for second word integer store
1288   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1289     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1290 
1291   // Check for second word integer load
1292   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1293     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1294 
1295 
1296   Unimplemented();
1297   return 0; // Mute compiler
1298 }
1299 
1300 #ifndef PRODUCT
1301 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1302   implementation( NULL, ra_, false, st );
1303 }
1304 #endif
1305 
1306 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1307   implementation( &cbuf, ra_, false, NULL );
1308 }
1309 
1310 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1311   return implementation( NULL, ra_, true, NULL );
1312 }
1313 
1314 
1315 //=============================================================================
1316 #ifndef PRODUCT
1317 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1318   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1319   int reg = ra_->get_reg_first(this);
1320   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1321 }
1322 #endif
1323 
1324 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1325   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1326   int reg = ra_->get_encode(this);
1327   if( offset >= 128 ) {
1328     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1329     emit_rm(cbuf, 0x2, reg, 0x04);
1330     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1331     emit_d32(cbuf, offset);
1332   }
1333   else {
1334     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1335     emit_rm(cbuf, 0x1, reg, 0x04);
1336     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1337     emit_d8(cbuf, offset);
1338   }
1339 }
1340 
1341 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1342   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1343   if( offset >= 128 ) {
1344     return 7;
1345   }
1346   else {
1347     return 4;
1348   }
1349 }
1350 
1351 //=============================================================================
1352 #ifndef PRODUCT
1353 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1354   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1355   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1356   st->print_cr("\tNOP");
1357   st->print_cr("\tNOP");
1358   if( !OptoBreakpoint )
1359     st->print_cr("\tNOP");
1360 }
1361 #endif
1362 
1363 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1364   MacroAssembler masm(&cbuf);
1365 #ifdef ASSERT
1366   uint insts_size = cbuf.insts_size();
1367 #endif
1368   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1369   masm.jump_cc(Assembler::notEqual,
1370                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1371   /* WARNING these NOPs are critical so that verified entry point is properly
1372      aligned for patching by NativeJump::patch_verified_entry() */
1373   int nops_cnt = 2;
1374   if( !OptoBreakpoint ) // Leave space for int3
1375      nops_cnt += 1;
1376   masm.nop(nops_cnt);
1377 
1378   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1379 }
1380 
1381 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1382   return OptoBreakpoint ? 11 : 12;
1383 }
1384 
1385 
1386 //=============================================================================
1387 
1388 int Matcher::regnum_to_fpu_offset(int regnum) {
1389   return regnum - 32; // The FP registers are in the second chunk
1390 }
1391 
1392 // This is UltraSparc specific, true just means we have fast l2f conversion
1393 const bool Matcher::convL2FSupported(void) {
1394   return true;
1395 }
1396 
1397 // Is this branch offset short enough that a short branch can be used?
1398 //
1399 // NOTE: If the platform does not provide any short branch variants, then
1400 //       this method should return false for offset 0.
1401 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1402   // The passed offset is relative to address of the branch.
1403   // On 86 a branch displacement is calculated relative to address
1404   // of a next instruction.
1405   offset -= br_size;
1406 
1407   // the short version of jmpConUCF2 contains multiple branches,
1408   // making the reach slightly less
1409   if (rule == jmpConUCF2_rule)
1410     return (-126 <= offset && offset <= 125);
1411   return (-128 <= offset && offset <= 127);
1412 }
1413 
1414 const bool Matcher::isSimpleConstant64(jlong value) {
1415   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1416   return false;
1417 }
1418 
1419 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1420 const bool Matcher::init_array_count_is_in_bytes = false;
1421 
1422 // Threshold size for cleararray.
1423 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1424 
1425 // Needs 2 CMOV's for longs.
1426 const int Matcher::long_cmove_cost() { return 1; }
1427 
1428 // No CMOVF/CMOVD with SSE/SSE2
1429 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1430 
1431 // Does the CPU require late expand (see block.cpp for description of late expand)?
1432 const bool Matcher::require_postalloc_expand = false;
1433 
1434 // Should the Matcher clone shifts on addressing modes, expecting them to
1435 // be subsumed into complex addressing expressions or compute them into
1436 // registers?  True for Intel but false for most RISCs
1437 const bool Matcher::clone_shift_expressions = true;
1438 
1439 // Do we need to mask the count passed to shift instructions or does
1440 // the cpu only look at the lower 5/6 bits anyway?
1441 const bool Matcher::need_masked_shift_count = false;
1442 
1443 bool Matcher::narrow_oop_use_complex_address() {
1444   ShouldNotCallThis();
1445   return true;
1446 }
1447 
1448 bool Matcher::narrow_klass_use_complex_address() {
1449   ShouldNotCallThis();
1450   return true;
1451 }
1452 
1453 
1454 // Is it better to copy float constants, or load them directly from memory?
1455 // Intel can load a float constant from a direct address, requiring no
1456 // extra registers.  Most RISCs will have to materialize an address into a
1457 // register first, so they would do better to copy the constant from stack.
1458 const bool Matcher::rematerialize_float_constants = true;
1459 
1460 // If CPU can load and store mis-aligned doubles directly then no fixup is
1461 // needed.  Else we split the double into 2 integer pieces and move it
1462 // piece-by-piece.  Only happens when passing doubles into C code as the
1463 // Java calling convention forces doubles to be aligned.
1464 const bool Matcher::misaligned_doubles_ok = true;
1465 
1466 
1467 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1468   // Get the memory operand from the node
1469   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1470   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1471   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1472   uint opcnt     = 1;                 // First operand
1473   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1474   while( idx >= skipped+num_edges ) {
1475     skipped += num_edges;
1476     opcnt++;                          // Bump operand count
1477     assert( opcnt < numopnds, "Accessing non-existent operand" );
1478     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1479   }
1480 
1481   MachOper *memory = node->_opnds[opcnt];
1482   MachOper *new_memory = NULL;
1483   switch (memory->opcode()) {
1484   case DIRECT:
1485   case INDOFFSET32X:
1486     // No transformation necessary.
1487     return;
1488   case INDIRECT:
1489     new_memory = new indirect_win95_safeOper( );
1490     break;
1491   case INDOFFSET8:
1492     new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1493     break;
1494   case INDOFFSET32:
1495     new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1496     break;
1497   case INDINDEXOFFSET:
1498     new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1499     break;
1500   case INDINDEXSCALE:
1501     new_memory = new indIndexScale_win95_safeOper(memory->scale());
1502     break;
1503   case INDINDEXSCALEOFFSET:
1504     new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1505     break;
1506   case LOAD_LONG_INDIRECT:
1507   case LOAD_LONG_INDOFFSET32:
1508     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1509     return;
1510   default:
1511     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1512     return;
1513   }
1514   node->_opnds[opcnt] = new_memory;
1515 }
1516 
1517 // Advertise here if the CPU requires explicit rounding operations
1518 // to implement the UseStrictFP mode.
1519 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1520 
1521 // Are floats conerted to double when stored to stack during deoptimization?
1522 // On x32 it is stored with convertion only when FPU is used for floats.
1523 bool Matcher::float_in_double() { return (UseSSE == 0); }
1524 
1525 // Do ints take an entire long register or just half?
1526 const bool Matcher::int_in_long = false;
1527 
1528 // Return whether or not this register is ever used as an argument.  This
1529 // function is used on startup to build the trampoline stubs in generateOptoStub.
1530 // Registers not mentioned will be killed by the VM call in the trampoline, and
1531 // arguments in those registers not be available to the callee.
1532 bool Matcher::can_be_java_arg( int reg ) {
1533   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1534   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1535   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1536   return false;
1537 }
1538 
1539 bool Matcher::is_spillable_arg( int reg ) {
1540   return can_be_java_arg(reg);
1541 }
1542 
1543 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1544   // Use hardware integer DIV instruction when
1545   // it is faster than a code which use multiply.
1546   // Only when constant divisor fits into 32 bit
1547   // (min_jint is excluded to get only correct
1548   // positive 32 bit values from negative).
1549   return VM_Version::has_fast_idiv() &&
1550          (divisor == (int)divisor && divisor != min_jint);
1551 }
1552 
1553 // Register for DIVI projection of divmodI
1554 RegMask Matcher::divI_proj_mask() {
1555   return EAX_REG_mask();
1556 }
1557 
1558 // Register for MODI projection of divmodI
1559 RegMask Matcher::modI_proj_mask() {
1560   return EDX_REG_mask();
1561 }
1562 
1563 // Register for DIVL projection of divmodL
1564 RegMask Matcher::divL_proj_mask() {
1565   ShouldNotReachHere();
1566   return RegMask();
1567 }
1568 
1569 // Register for MODL projection of divmodL
1570 RegMask Matcher::modL_proj_mask() {
1571   ShouldNotReachHere();
1572   return RegMask();
1573 }
1574 
1575 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1576   return NO_REG_mask();
1577 }
1578 
1579 // Returns true if the high 32 bits of the value is known to be zero.
1580 bool is_operand_hi32_zero(Node* n) {
1581   int opc = n->Opcode();
1582   if (opc == Op_AndL) {
1583     Node* o2 = n->in(2);
1584     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1585       return true;
1586     }
1587   }
1588   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1589     return true;
1590   }
1591   return false;
1592 }
1593 
1594 %}
1595 
1596 //----------ENCODING BLOCK-----------------------------------------------------
1597 // This block specifies the encoding classes used by the compiler to output
1598 // byte streams.  Encoding classes generate functions which are called by
1599 // Machine Instruction Nodes in order to generate the bit encoding of the
1600 // instruction.  Operands specify their base encoding interface with the
1601 // interface keyword.  There are currently supported four interfaces,
1602 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1603 // operand to generate a function which returns its register number when
1604 // queried.   CONST_INTER causes an operand to generate a function which
1605 // returns the value of the constant when queried.  MEMORY_INTER causes an
1606 // operand to generate four functions which return the Base Register, the
1607 // Index Register, the Scale Value, and the Offset Value of the operand when
1608 // queried.  COND_INTER causes an operand to generate six functions which
1609 // return the encoding code (ie - encoding bits for the instruction)
1610 // associated with each basic boolean condition for a conditional instruction.
1611 // Instructions specify two basic values for encoding.  They use the
1612 // ins_encode keyword to specify their encoding class (which must be one of
1613 // the class names specified in the encoding block), and they use the
1614 // opcode keyword to specify, in order, their primary, secondary, and
1615 // tertiary opcode.  Only the opcode sections which a particular instruction
1616 // needs for encoding need to be specified.
1617 encode %{
1618   // Build emit functions for each basic byte or larger field in the intel
1619   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1620   // code in the enc_class source block.  Emit functions will live in the
1621   // main source block for now.  In future, we can generalize this by
1622   // adding a syntax that specifies the sizes of fields in an order,
1623   // so that the adlc can build the emit functions automagically
1624 
1625   // Emit primary opcode
1626   enc_class OpcP %{
1627     emit_opcode(cbuf, $primary);
1628   %}
1629 
1630   // Emit secondary opcode
1631   enc_class OpcS %{
1632     emit_opcode(cbuf, $secondary);
1633   %}
1634 
1635   // Emit opcode directly
1636   enc_class Opcode(immI d8) %{
1637     emit_opcode(cbuf, $d8$$constant);
1638   %}
1639 
1640   enc_class SizePrefix %{
1641     emit_opcode(cbuf,0x66);
1642   %}
1643 
1644   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1645     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1646   %}
1647 
1648   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1649     emit_opcode(cbuf,$opcode$$constant);
1650     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1651   %}
1652 
1653   enc_class mov_r32_imm0( rRegI dst ) %{
1654     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1655     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1656   %}
1657 
1658   enc_class cdq_enc %{
1659     // Full implementation of Java idiv and irem; checks for
1660     // special case as described in JVM spec., p.243 & p.271.
1661     //
1662     //         normal case                           special case
1663     //
1664     // input : rax,: dividend                         min_int
1665     //         reg: divisor                          -1
1666     //
1667     // output: rax,: quotient  (= rax, idiv reg)       min_int
1668     //         rdx: remainder (= rax, irem reg)       0
1669     //
1670     //  Code sequnce:
1671     //
1672     //  81 F8 00 00 00 80    cmp         rax,80000000h
1673     //  0F 85 0B 00 00 00    jne         normal_case
1674     //  33 D2                xor         rdx,edx
1675     //  83 F9 FF             cmp         rcx,0FFh
1676     //  0F 84 03 00 00 00    je          done
1677     //                  normal_case:
1678     //  99                   cdq
1679     //  F7 F9                idiv        rax,ecx
1680     //                  done:
1681     //
1682     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1683     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1684     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1685     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1686     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1687     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1688     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1689     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1690     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1691     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1692     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1693     // normal_case:
1694     emit_opcode(cbuf,0x99);                                         // cdq
1695     // idiv (note: must be emitted by the user of this rule)
1696     // normal:
1697   %}
1698 
1699   // Dense encoding for older common ops
1700   enc_class Opc_plus(immI opcode, rRegI reg) %{
1701     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1702   %}
1703 
1704 
1705   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1706   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1707     // Check for 8-bit immediate, and set sign extend bit in opcode
1708     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1709       emit_opcode(cbuf, $primary | 0x02);
1710     }
1711     else {                          // If 32-bit immediate
1712       emit_opcode(cbuf, $primary);
1713     }
1714   %}
1715 
1716   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1717     // Emit primary opcode and set sign-extend bit
1718     // Check for 8-bit immediate, and set sign extend bit in opcode
1719     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1720       emit_opcode(cbuf, $primary | 0x02);    }
1721     else {                          // If 32-bit immediate
1722       emit_opcode(cbuf, $primary);
1723     }
1724     // Emit r/m byte with secondary opcode, after primary opcode.
1725     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1726   %}
1727 
1728   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1729     // Check for 8-bit immediate, and set sign extend bit in opcode
1730     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1731       $$$emit8$imm$$constant;
1732     }
1733     else {                          // If 32-bit immediate
1734       // Output immediate
1735       $$$emit32$imm$$constant;
1736     }
1737   %}
1738 
1739   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1740     // Emit primary opcode and set sign-extend bit
1741     // Check for 8-bit immediate, and set sign extend bit in opcode
1742     int con = (int)$imm$$constant; // Throw away top bits
1743     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1744     // Emit r/m byte with secondary opcode, after primary opcode.
1745     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1746     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1747     else                               emit_d32(cbuf,con);
1748   %}
1749 
1750   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1751     // Emit primary opcode and set sign-extend bit
1752     // Check for 8-bit immediate, and set sign extend bit in opcode
1753     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1754     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1755     // Emit r/m byte with tertiary opcode, after primary opcode.
1756     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1757     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1758     else                               emit_d32(cbuf,con);
1759   %}
1760 
1761   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1762     emit_cc(cbuf, $secondary, $dst$$reg );
1763   %}
1764 
1765   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1766     int destlo = $dst$$reg;
1767     int desthi = HIGH_FROM_LOW(destlo);
1768     // bswap lo
1769     emit_opcode(cbuf, 0x0F);
1770     emit_cc(cbuf, 0xC8, destlo);
1771     // bswap hi
1772     emit_opcode(cbuf, 0x0F);
1773     emit_cc(cbuf, 0xC8, desthi);
1774     // xchg lo and hi
1775     emit_opcode(cbuf, 0x87);
1776     emit_rm(cbuf, 0x3, destlo, desthi);
1777   %}
1778 
1779   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1780     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1781   %}
1782 
1783   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1784     $$$emit8$primary;
1785     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1786   %}
1787 
1788   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1789     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1790     emit_d8(cbuf, op >> 8 );
1791     emit_d8(cbuf, op & 255);
1792   %}
1793 
1794   // emulate a CMOV with a conditional branch around a MOV
1795   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1796     // Invert sense of branch from sense of CMOV
1797     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1798     emit_d8( cbuf, $brOffs$$constant );
1799   %}
1800 
1801   enc_class enc_PartialSubtypeCheck( ) %{
1802     Register Redi = as_Register(EDI_enc); // result register
1803     Register Reax = as_Register(EAX_enc); // super class
1804     Register Recx = as_Register(ECX_enc); // killed
1805     Register Resi = as_Register(ESI_enc); // sub class
1806     Label miss;
1807 
1808     MacroAssembler _masm(&cbuf);
1809     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1810                                      NULL, &miss,
1811                                      /*set_cond_codes:*/ true);
1812     if ($primary) {
1813       __ xorptr(Redi, Redi);
1814     }
1815     __ bind(miss);
1816   %}
1817 
1818   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1819     MacroAssembler masm(&cbuf);
1820     int start = masm.offset();
1821     if (UseSSE >= 2) {
1822       if (VerifyFPU) {
1823         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1824       }
1825     } else {
1826       // External c_calling_convention expects the FPU stack to be 'clean'.
1827       // Compiled code leaves it dirty.  Do cleanup now.
1828       masm.empty_FPU_stack();
1829     }
1830     if (sizeof_FFree_Float_Stack_All == -1) {
1831       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1832     } else {
1833       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1834     }
1835   %}
1836 
1837   enc_class Verify_FPU_For_Leaf %{
1838     if( VerifyFPU ) {
1839       MacroAssembler masm(&cbuf);
1840       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1841     }
1842   %}
1843 
1844   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1845     // This is the instruction starting address for relocation info.
1846     cbuf.set_insts_mark();
1847     $$$emit8$primary;
1848     // CALL directly to the runtime
1849     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1850                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1851 
1852     if (UseSSE >= 2) {
1853       MacroAssembler _masm(&cbuf);
1854       BasicType rt = tf()->return_type();
1855 
1856       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1857         // A C runtime call where the return value is unused.  In SSE2+
1858         // mode the result needs to be removed from the FPU stack.  It's
1859         // likely that this function call could be removed by the
1860         // optimizer if the C function is a pure function.
1861         __ ffree(0);
1862       } else if (rt == T_FLOAT) {
1863         __ lea(rsp, Address(rsp, -4));
1864         __ fstp_s(Address(rsp, 0));
1865         __ movflt(xmm0, Address(rsp, 0));
1866         __ lea(rsp, Address(rsp,  4));
1867       } else if (rt == T_DOUBLE) {
1868         __ lea(rsp, Address(rsp, -8));
1869         __ fstp_d(Address(rsp, 0));
1870         __ movdbl(xmm0, Address(rsp, 0));
1871         __ lea(rsp, Address(rsp,  8));
1872       }
1873     }
1874   %}
1875 
1876 
1877   enc_class pre_call_resets %{
1878     // If method sets FPU control word restore it here
1879     debug_only(int off0 = cbuf.insts_size());
1880     if (ra_->C->in_24_bit_fp_mode()) {
1881       MacroAssembler _masm(&cbuf);
1882       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1883     }
1884     if (ra_->C->max_vector_size() > 16) {
1885       // Clear upper bits of YMM registers when current compiled code uses
1886       // wide vectors to avoid AVX <-> SSE transition penalty during call.
1887       MacroAssembler _masm(&cbuf);
1888       __ vzeroupper();
1889     }
1890     debug_only(int off1 = cbuf.insts_size());
1891     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1892   %}
1893 
1894   enc_class post_call_FPU %{
1895     // If method sets FPU control word do it here also
1896     if (Compile::current()->in_24_bit_fp_mode()) {
1897       MacroAssembler masm(&cbuf);
1898       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1899     }
1900   %}
1901 
1902   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1903     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1904     // who we intended to call.
1905     cbuf.set_insts_mark();
1906     $$$emit8$primary;
1907 
1908     if (!_method) {
1909       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1910                      runtime_call_Relocation::spec(),
1911                      RELOC_IMM32);
1912     } else {
1913       int method_index = resolved_method_index(cbuf);
1914       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1915                                                   : static_call_Relocation::spec(method_index);
1916       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1917                      rspec, RELOC_DISP32);
1918       // Emit stubs for static call.
1919       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1920       if (stub == NULL) {
1921         ciEnv::current()->record_failure("CodeCache is full");
1922         return;
1923       }
1924     }
1925   %}
1926 
1927   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1928     MacroAssembler _masm(&cbuf);
1929     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1930   %}
1931 
1932   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1933     int disp = in_bytes(Method::from_compiled_offset());
1934     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1935 
1936     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1937     cbuf.set_insts_mark();
1938     $$$emit8$primary;
1939     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1940     emit_d8(cbuf, disp);             // Displacement
1941 
1942   %}
1943 
1944 //   Following encoding is no longer used, but may be restored if calling
1945 //   convention changes significantly.
1946 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1947 //
1948 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1949 //     // int ic_reg     = Matcher::inline_cache_reg();
1950 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1951 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1952 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1953 //
1954 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1955 //     // // so we load it immediately before the call
1956 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1957 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1958 //
1959 //     // xor rbp,ebp
1960 //     emit_opcode(cbuf, 0x33);
1961 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1962 //
1963 //     // CALL to interpreter.
1964 //     cbuf.set_insts_mark();
1965 //     $$$emit8$primary;
1966 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1967 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1968 //   %}
1969 
1970   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1971     $$$emit8$primary;
1972     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1973     $$$emit8$shift$$constant;
1974   %}
1975 
1976   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1977     // Load immediate does not have a zero or sign extended version
1978     // for 8-bit immediates
1979     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1980     $$$emit32$src$$constant;
1981   %}
1982 
1983   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1984     // Load immediate does not have a zero or sign extended version
1985     // for 8-bit immediates
1986     emit_opcode(cbuf, $primary + $dst$$reg);
1987     $$$emit32$src$$constant;
1988   %}
1989 
1990   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1991     // Load immediate does not have a zero or sign extended version
1992     // for 8-bit immediates
1993     int dst_enc = $dst$$reg;
1994     int src_con = $src$$constant & 0x0FFFFFFFFL;
1995     if (src_con == 0) {
1996       // xor dst, dst
1997       emit_opcode(cbuf, 0x33);
1998       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1999     } else {
2000       emit_opcode(cbuf, $primary + dst_enc);
2001       emit_d32(cbuf, src_con);
2002     }
2003   %}
2004 
2005   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2006     // Load immediate does not have a zero or sign extended version
2007     // for 8-bit immediates
2008     int dst_enc = $dst$$reg + 2;
2009     int src_con = ((julong)($src$$constant)) >> 32;
2010     if (src_con == 0) {
2011       // xor dst, dst
2012       emit_opcode(cbuf, 0x33);
2013       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2014     } else {
2015       emit_opcode(cbuf, $primary + dst_enc);
2016       emit_d32(cbuf, src_con);
2017     }
2018   %}
2019 
2020 
2021   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2022   enc_class enc_Copy( rRegI dst, rRegI src ) %{
2023     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2024   %}
2025 
2026   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2027     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2028   %}
2029 
2030   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2031     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2032   %}
2033 
2034   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2035     $$$emit8$primary;
2036     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2037   %}
2038 
2039   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2040     $$$emit8$secondary;
2041     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2042   %}
2043 
2044   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2045     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2046   %}
2047 
2048   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2049     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2050   %}
2051 
2052   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2053     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2054   %}
2055 
2056   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2057     // Output immediate
2058     $$$emit32$src$$constant;
2059   %}
2060 
2061   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2062     // Output Float immediate bits
2063     jfloat jf = $src$$constant;
2064     int    jf_as_bits = jint_cast( jf );
2065     emit_d32(cbuf, jf_as_bits);
2066   %}
2067 
2068   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2069     // Output Float immediate bits
2070     jfloat jf = $src$$constant;
2071     int    jf_as_bits = jint_cast( jf );
2072     emit_d32(cbuf, jf_as_bits);
2073   %}
2074 
2075   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2076     // Output immediate
2077     $$$emit16$src$$constant;
2078   %}
2079 
2080   enc_class Con_d32(immI src) %{
2081     emit_d32(cbuf,$src$$constant);
2082   %}
2083 
2084   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2085     // Output immediate memory reference
2086     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2087     emit_d32(cbuf, 0x00);
2088   %}
2089 
2090   enc_class lock_prefix( ) %{
2091     if( os::is_MP() )
2092       emit_opcode(cbuf,0xF0);         // [Lock]
2093   %}
2094 
2095   // Cmp-xchg long value.
2096   // Note: we need to swap rbx, and rcx before and after the
2097   //       cmpxchg8 instruction because the instruction uses
2098   //       rcx as the high order word of the new value to store but
2099   //       our register encoding uses rbx,.
2100   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2101 
2102     // XCHG  rbx,ecx
2103     emit_opcode(cbuf,0x87);
2104     emit_opcode(cbuf,0xD9);
2105     // [Lock]
2106     if( os::is_MP() )
2107       emit_opcode(cbuf,0xF0);
2108     // CMPXCHG8 [Eptr]
2109     emit_opcode(cbuf,0x0F);
2110     emit_opcode(cbuf,0xC7);
2111     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2112     // XCHG  rbx,ecx
2113     emit_opcode(cbuf,0x87);
2114     emit_opcode(cbuf,0xD9);
2115   %}
2116 
2117   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2118     // [Lock]
2119     if( os::is_MP() )
2120       emit_opcode(cbuf,0xF0);
2121 
2122     // CMPXCHG [Eptr]
2123     emit_opcode(cbuf,0x0F);
2124     emit_opcode(cbuf,0xB1);
2125     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2126   %}
2127 
2128   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2129     int res_encoding = $res$$reg;
2130 
2131     // MOV  res,0
2132     emit_opcode( cbuf, 0xB8 + res_encoding);
2133     emit_d32( cbuf, 0 );
2134     // JNE,s  fail
2135     emit_opcode(cbuf,0x75);
2136     emit_d8(cbuf, 5 );
2137     // MOV  res,1
2138     emit_opcode( cbuf, 0xB8 + res_encoding);
2139     emit_d32( cbuf, 1 );
2140     // fail:
2141   %}
2142 
2143   enc_class set_instruction_start( ) %{
2144     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2145   %}
2146 
2147   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2148     int reg_encoding = $ereg$$reg;
2149     int base  = $mem$$base;
2150     int index = $mem$$index;
2151     int scale = $mem$$scale;
2152     int displace = $mem$$disp;
2153     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2154     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2155   %}
2156 
2157   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2158     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2159     int base  = $mem$$base;
2160     int index = $mem$$index;
2161     int scale = $mem$$scale;
2162     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2163     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2164     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2165   %}
2166 
2167   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2168     int r1, r2;
2169     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2170     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2171     emit_opcode(cbuf,0x0F);
2172     emit_opcode(cbuf,$tertiary);
2173     emit_rm(cbuf, 0x3, r1, r2);
2174     emit_d8(cbuf,$cnt$$constant);
2175     emit_d8(cbuf,$primary);
2176     emit_rm(cbuf, 0x3, $secondary, r1);
2177     emit_d8(cbuf,$cnt$$constant);
2178   %}
2179 
2180   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2181     emit_opcode( cbuf, 0x8B ); // Move
2182     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2183     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2184       emit_d8(cbuf,$primary);
2185       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2186       emit_d8(cbuf,$cnt$$constant-32);
2187     }
2188     emit_d8(cbuf,$primary);
2189     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2190     emit_d8(cbuf,31);
2191   %}
2192 
2193   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2194     int r1, r2;
2195     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2196     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2197 
2198     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2199     emit_rm(cbuf, 0x3, r1, r2);
2200     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2201       emit_opcode(cbuf,$primary);
2202       emit_rm(cbuf, 0x3, $secondary, r1);
2203       emit_d8(cbuf,$cnt$$constant-32);
2204     }
2205     emit_opcode(cbuf,0x33);  // XOR r2,r2
2206     emit_rm(cbuf, 0x3, r2, r2);
2207   %}
2208 
2209   // Clone of RegMem but accepts an extra parameter to access each
2210   // half of a double in memory; it never needs relocation info.
2211   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2212     emit_opcode(cbuf,$opcode$$constant);
2213     int reg_encoding = $rm_reg$$reg;
2214     int base     = $mem$$base;
2215     int index    = $mem$$index;
2216     int scale    = $mem$$scale;
2217     int displace = $mem$$disp + $disp_for_half$$constant;
2218     relocInfo::relocType disp_reloc = relocInfo::none;
2219     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2220   %}
2221 
2222   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2223   //
2224   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2225   // and it never needs relocation information.
2226   // Frequently used to move data between FPU's Stack Top and memory.
2227   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2228     int rm_byte_opcode = $rm_opcode$$constant;
2229     int base     = $mem$$base;
2230     int index    = $mem$$index;
2231     int scale    = $mem$$scale;
2232     int displace = $mem$$disp;
2233     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2234     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2235   %}
2236 
2237   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2238     int rm_byte_opcode = $rm_opcode$$constant;
2239     int base     = $mem$$base;
2240     int index    = $mem$$index;
2241     int scale    = $mem$$scale;
2242     int displace = $mem$$disp;
2243     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2244     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2245   %}
2246 
2247   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2248     int reg_encoding = $dst$$reg;
2249     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2250     int index        = 0x04;            // 0x04 indicates no index
2251     int scale        = 0x00;            // 0x00 indicates no scale
2252     int displace     = $src1$$constant; // 0x00 indicates no displacement
2253     relocInfo::relocType disp_reloc = relocInfo::none;
2254     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2255   %}
2256 
2257   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2258     // Compare dst,src
2259     emit_opcode(cbuf,0x3B);
2260     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2261     // jmp dst < src around move
2262     emit_opcode(cbuf,0x7C);
2263     emit_d8(cbuf,2);
2264     // move dst,src
2265     emit_opcode(cbuf,0x8B);
2266     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2267   %}
2268 
2269   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2270     // Compare dst,src
2271     emit_opcode(cbuf,0x3B);
2272     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2273     // jmp dst > src around move
2274     emit_opcode(cbuf,0x7F);
2275     emit_d8(cbuf,2);
2276     // move dst,src
2277     emit_opcode(cbuf,0x8B);
2278     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2279   %}
2280 
2281   enc_class enc_FPR_store(memory mem, regDPR src) %{
2282     // If src is FPR1, we can just FST to store it.
2283     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2284     int reg_encoding = 0x2; // Just store
2285     int base  = $mem$$base;
2286     int index = $mem$$index;
2287     int scale = $mem$$scale;
2288     int displace = $mem$$disp;
2289     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2290     if( $src$$reg != FPR1L_enc ) {
2291       reg_encoding = 0x3;  // Store & pop
2292       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2293       emit_d8( cbuf, 0xC0-1+$src$$reg );
2294     }
2295     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2296     emit_opcode(cbuf,$primary);
2297     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2298   %}
2299 
2300   enc_class neg_reg(rRegI dst) %{
2301     // NEG $dst
2302     emit_opcode(cbuf,0xF7);
2303     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2304   %}
2305 
2306   enc_class setLT_reg(eCXRegI dst) %{
2307     // SETLT $dst
2308     emit_opcode(cbuf,0x0F);
2309     emit_opcode(cbuf,0x9C);
2310     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2311   %}
2312 
2313   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2314     int tmpReg = $tmp$$reg;
2315 
2316     // SUB $p,$q
2317     emit_opcode(cbuf,0x2B);
2318     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2319     // SBB $tmp,$tmp
2320     emit_opcode(cbuf,0x1B);
2321     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2322     // AND $tmp,$y
2323     emit_opcode(cbuf,0x23);
2324     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2325     // ADD $p,$tmp
2326     emit_opcode(cbuf,0x03);
2327     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2328   %}
2329 
2330   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2331     // TEST shift,32
2332     emit_opcode(cbuf,0xF7);
2333     emit_rm(cbuf, 0x3, 0, ECX_enc);
2334     emit_d32(cbuf,0x20);
2335     // JEQ,s small
2336     emit_opcode(cbuf, 0x74);
2337     emit_d8(cbuf, 0x04);
2338     // MOV    $dst.hi,$dst.lo
2339     emit_opcode( cbuf, 0x8B );
2340     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2341     // CLR    $dst.lo
2342     emit_opcode(cbuf, 0x33);
2343     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2344 // small:
2345     // SHLD   $dst.hi,$dst.lo,$shift
2346     emit_opcode(cbuf,0x0F);
2347     emit_opcode(cbuf,0xA5);
2348     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2349     // SHL    $dst.lo,$shift"
2350     emit_opcode(cbuf,0xD3);
2351     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2352   %}
2353 
2354   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2355     // TEST shift,32
2356     emit_opcode(cbuf,0xF7);
2357     emit_rm(cbuf, 0x3, 0, ECX_enc);
2358     emit_d32(cbuf,0x20);
2359     // JEQ,s small
2360     emit_opcode(cbuf, 0x74);
2361     emit_d8(cbuf, 0x04);
2362     // MOV    $dst.lo,$dst.hi
2363     emit_opcode( cbuf, 0x8B );
2364     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2365     // CLR    $dst.hi
2366     emit_opcode(cbuf, 0x33);
2367     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2368 // small:
2369     // SHRD   $dst.lo,$dst.hi,$shift
2370     emit_opcode(cbuf,0x0F);
2371     emit_opcode(cbuf,0xAD);
2372     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2373     // SHR    $dst.hi,$shift"
2374     emit_opcode(cbuf,0xD3);
2375     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2376   %}
2377 
2378   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2379     // TEST shift,32
2380     emit_opcode(cbuf,0xF7);
2381     emit_rm(cbuf, 0x3, 0, ECX_enc);
2382     emit_d32(cbuf,0x20);
2383     // JEQ,s small
2384     emit_opcode(cbuf, 0x74);
2385     emit_d8(cbuf, 0x05);
2386     // MOV    $dst.lo,$dst.hi
2387     emit_opcode( cbuf, 0x8B );
2388     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2389     // SAR    $dst.hi,31
2390     emit_opcode(cbuf, 0xC1);
2391     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2392     emit_d8(cbuf, 0x1F );
2393 // small:
2394     // SHRD   $dst.lo,$dst.hi,$shift
2395     emit_opcode(cbuf,0x0F);
2396     emit_opcode(cbuf,0xAD);
2397     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2398     // SAR    $dst.hi,$shift"
2399     emit_opcode(cbuf,0xD3);
2400     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2401   %}
2402 
2403 
2404   // ----------------- Encodings for floating point unit -----------------
2405   // May leave result in FPU-TOS or FPU reg depending on opcodes
2406   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2407     $$$emit8$primary;
2408     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2409   %}
2410 
2411   // Pop argument in FPR0 with FSTP ST(0)
2412   enc_class PopFPU() %{
2413     emit_opcode( cbuf, 0xDD );
2414     emit_d8( cbuf, 0xD8 );
2415   %}
2416 
2417   // !!!!! equivalent to Pop_Reg_F
2418   enc_class Pop_Reg_DPR( regDPR dst ) %{
2419     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2420     emit_d8( cbuf, 0xD8+$dst$$reg );
2421   %}
2422 
2423   enc_class Push_Reg_DPR( regDPR dst ) %{
2424     emit_opcode( cbuf, 0xD9 );
2425     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2426   %}
2427 
2428   enc_class strictfp_bias1( regDPR dst ) %{
2429     emit_opcode( cbuf, 0xDB );           // FLD m80real
2430     emit_opcode( cbuf, 0x2D );
2431     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2432     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2433     emit_opcode( cbuf, 0xC8+$dst$$reg );
2434   %}
2435 
2436   enc_class strictfp_bias2( regDPR dst ) %{
2437     emit_opcode( cbuf, 0xDB );           // FLD m80real
2438     emit_opcode( cbuf, 0x2D );
2439     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2440     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2441     emit_opcode( cbuf, 0xC8+$dst$$reg );
2442   %}
2443 
2444   // Special case for moving an integer register to a stack slot.
2445   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2446     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2447   %}
2448 
2449   // Special case for moving a register to a stack slot.
2450   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2451     // Opcode already emitted
2452     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2453     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2454     emit_d32(cbuf, $dst$$disp);   // Displacement
2455   %}
2456 
2457   // Push the integer in stackSlot 'src' onto FP-stack
2458   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2459     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2460   %}
2461 
2462   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2463   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2464     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2465   %}
2466 
2467   // Same as Pop_Mem_F except for opcode
2468   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2469   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2470     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2471   %}
2472 
2473   enc_class Pop_Reg_FPR( regFPR dst ) %{
2474     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2475     emit_d8( cbuf, 0xD8+$dst$$reg );
2476   %}
2477 
2478   enc_class Push_Reg_FPR( regFPR dst ) %{
2479     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2480     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2481   %}
2482 
2483   // Push FPU's float to a stack-slot, and pop FPU-stack
2484   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2485     int pop = 0x02;
2486     if ($src$$reg != FPR1L_enc) {
2487       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2488       emit_d8( cbuf, 0xC0-1+$src$$reg );
2489       pop = 0x03;
2490     }
2491     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2492   %}
2493 
2494   // Push FPU's double to a stack-slot, and pop FPU-stack
2495   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2496     int pop = 0x02;
2497     if ($src$$reg != FPR1L_enc) {
2498       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2499       emit_d8( cbuf, 0xC0-1+$src$$reg );
2500       pop = 0x03;
2501     }
2502     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2503   %}
2504 
2505   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2506   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2507     int pop = 0xD0 - 1; // -1 since we skip FLD
2508     if ($src$$reg != FPR1L_enc) {
2509       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2510       emit_d8( cbuf, 0xC0-1+$src$$reg );
2511       pop = 0xD8;
2512     }
2513     emit_opcode( cbuf, 0xDD );
2514     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2515   %}
2516 
2517 
2518   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2519     // load dst in FPR0
2520     emit_opcode( cbuf, 0xD9 );
2521     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2522     if ($src$$reg != FPR1L_enc) {
2523       // fincstp
2524       emit_opcode (cbuf, 0xD9);
2525       emit_opcode (cbuf, 0xF7);
2526       // swap src with FPR1:
2527       // FXCH FPR1 with src
2528       emit_opcode(cbuf, 0xD9);
2529       emit_d8(cbuf, 0xC8-1+$src$$reg );
2530       // fdecstp
2531       emit_opcode (cbuf, 0xD9);
2532       emit_opcode (cbuf, 0xF6);
2533     }
2534   %}
2535 
2536   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2537     MacroAssembler _masm(&cbuf);
2538     __ subptr(rsp, 8);
2539     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2540     __ fld_d(Address(rsp, 0));
2541     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2542     __ fld_d(Address(rsp, 0));
2543   %}
2544 
2545   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2546     MacroAssembler _masm(&cbuf);
2547     __ subptr(rsp, 4);
2548     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2549     __ fld_s(Address(rsp, 0));
2550     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2551     __ fld_s(Address(rsp, 0));
2552   %}
2553 
2554   enc_class Push_ResultD(regD dst) %{
2555     MacroAssembler _masm(&cbuf);
2556     __ fstp_d(Address(rsp, 0));
2557     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2558     __ addptr(rsp, 8);
2559   %}
2560 
2561   enc_class Push_ResultF(regF dst, immI d8) %{
2562     MacroAssembler _masm(&cbuf);
2563     __ fstp_s(Address(rsp, 0));
2564     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2565     __ addptr(rsp, $d8$$constant);
2566   %}
2567 
2568   enc_class Push_SrcD(regD src) %{
2569     MacroAssembler _masm(&cbuf);
2570     __ subptr(rsp, 8);
2571     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2572     __ fld_d(Address(rsp, 0));
2573   %}
2574 
2575   enc_class push_stack_temp_qword() %{
2576     MacroAssembler _masm(&cbuf);
2577     __ subptr(rsp, 8);
2578   %}
2579 
2580   enc_class pop_stack_temp_qword() %{
2581     MacroAssembler _masm(&cbuf);
2582     __ addptr(rsp, 8);
2583   %}
2584 
2585   enc_class push_xmm_to_fpr1(regD src) %{
2586     MacroAssembler _masm(&cbuf);
2587     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2588     __ fld_d(Address(rsp, 0));
2589   %}
2590 
2591   enc_class Push_Result_Mod_DPR( regDPR src) %{
2592     if ($src$$reg != FPR1L_enc) {
2593       // fincstp
2594       emit_opcode (cbuf, 0xD9);
2595       emit_opcode (cbuf, 0xF7);
2596       // FXCH FPR1 with src
2597       emit_opcode(cbuf, 0xD9);
2598       emit_d8(cbuf, 0xC8-1+$src$$reg );
2599       // fdecstp
2600       emit_opcode (cbuf, 0xD9);
2601       emit_opcode (cbuf, 0xF6);
2602     }
2603     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2604     // // FSTP   FPR$dst$$reg
2605     // emit_opcode( cbuf, 0xDD );
2606     // emit_d8( cbuf, 0xD8+$dst$$reg );
2607   %}
2608 
2609   enc_class fnstsw_sahf_skip_parity() %{
2610     // fnstsw ax
2611     emit_opcode( cbuf, 0xDF );
2612     emit_opcode( cbuf, 0xE0 );
2613     // sahf
2614     emit_opcode( cbuf, 0x9E );
2615     // jnp  ::skip
2616     emit_opcode( cbuf, 0x7B );
2617     emit_opcode( cbuf, 0x05 );
2618   %}
2619 
2620   enc_class emitModDPR() %{
2621     // fprem must be iterative
2622     // :: loop
2623     // fprem
2624     emit_opcode( cbuf, 0xD9 );
2625     emit_opcode( cbuf, 0xF8 );
2626     // wait
2627     emit_opcode( cbuf, 0x9b );
2628     // fnstsw ax
2629     emit_opcode( cbuf, 0xDF );
2630     emit_opcode( cbuf, 0xE0 );
2631     // sahf
2632     emit_opcode( cbuf, 0x9E );
2633     // jp  ::loop
2634     emit_opcode( cbuf, 0x0F );
2635     emit_opcode( cbuf, 0x8A );
2636     emit_opcode( cbuf, 0xF4 );
2637     emit_opcode( cbuf, 0xFF );
2638     emit_opcode( cbuf, 0xFF );
2639     emit_opcode( cbuf, 0xFF );
2640   %}
2641 
2642   enc_class fpu_flags() %{
2643     // fnstsw_ax
2644     emit_opcode( cbuf, 0xDF);
2645     emit_opcode( cbuf, 0xE0);
2646     // test ax,0x0400
2647     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2648     emit_opcode( cbuf, 0xA9 );
2649     emit_d16   ( cbuf, 0x0400 );
2650     // // // This sequence works, but stalls for 12-16 cycles on PPro
2651     // // test rax,0x0400
2652     // emit_opcode( cbuf, 0xA9 );
2653     // emit_d32   ( cbuf, 0x00000400 );
2654     //
2655     // jz exit (no unordered comparison)
2656     emit_opcode( cbuf, 0x74 );
2657     emit_d8    ( cbuf, 0x02 );
2658     // mov ah,1 - treat as LT case (set carry flag)
2659     emit_opcode( cbuf, 0xB4 );
2660     emit_d8    ( cbuf, 0x01 );
2661     // sahf
2662     emit_opcode( cbuf, 0x9E);
2663   %}
2664 
2665   enc_class cmpF_P6_fixup() %{
2666     // Fixup the integer flags in case comparison involved a NaN
2667     //
2668     // JNP exit (no unordered comparison, P-flag is set by NaN)
2669     emit_opcode( cbuf, 0x7B );
2670     emit_d8    ( cbuf, 0x03 );
2671     // MOV AH,1 - treat as LT case (set carry flag)
2672     emit_opcode( cbuf, 0xB4 );
2673     emit_d8    ( cbuf, 0x01 );
2674     // SAHF
2675     emit_opcode( cbuf, 0x9E);
2676     // NOP     // target for branch to avoid branch to branch
2677     emit_opcode( cbuf, 0x90);
2678   %}
2679 
2680 //     fnstsw_ax();
2681 //     sahf();
2682 //     movl(dst, nan_result);
2683 //     jcc(Assembler::parity, exit);
2684 //     movl(dst, less_result);
2685 //     jcc(Assembler::below, exit);
2686 //     movl(dst, equal_result);
2687 //     jcc(Assembler::equal, exit);
2688 //     movl(dst, greater_result);
2689 
2690 // less_result     =  1;
2691 // greater_result  = -1;
2692 // equal_result    = 0;
2693 // nan_result      = -1;
2694 
2695   enc_class CmpF_Result(rRegI dst) %{
2696     // fnstsw_ax();
2697     emit_opcode( cbuf, 0xDF);
2698     emit_opcode( cbuf, 0xE0);
2699     // sahf
2700     emit_opcode( cbuf, 0x9E);
2701     // movl(dst, nan_result);
2702     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2703     emit_d32( cbuf, -1 );
2704     // jcc(Assembler::parity, exit);
2705     emit_opcode( cbuf, 0x7A );
2706     emit_d8    ( cbuf, 0x13 );
2707     // movl(dst, less_result);
2708     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2709     emit_d32( cbuf, -1 );
2710     // jcc(Assembler::below, exit);
2711     emit_opcode( cbuf, 0x72 );
2712     emit_d8    ( cbuf, 0x0C );
2713     // movl(dst, equal_result);
2714     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2715     emit_d32( cbuf, 0 );
2716     // jcc(Assembler::equal, exit);
2717     emit_opcode( cbuf, 0x74 );
2718     emit_d8    ( cbuf, 0x05 );
2719     // movl(dst, greater_result);
2720     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2721     emit_d32( cbuf, 1 );
2722   %}
2723 
2724 
2725   // Compare the longs and set flags
2726   // BROKEN!  Do Not use as-is
2727   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2728     // CMP    $src1.hi,$src2.hi
2729     emit_opcode( cbuf, 0x3B );
2730     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2731     // JNE,s  done
2732     emit_opcode(cbuf,0x75);
2733     emit_d8(cbuf, 2 );
2734     // CMP    $src1.lo,$src2.lo
2735     emit_opcode( cbuf, 0x3B );
2736     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2737 // done:
2738   %}
2739 
2740   enc_class convert_int_long( regL dst, rRegI src ) %{
2741     // mov $dst.lo,$src
2742     int dst_encoding = $dst$$reg;
2743     int src_encoding = $src$$reg;
2744     encode_Copy( cbuf, dst_encoding  , src_encoding );
2745     // mov $dst.hi,$src
2746     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2747     // sar $dst.hi,31
2748     emit_opcode( cbuf, 0xC1 );
2749     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2750     emit_d8(cbuf, 0x1F );
2751   %}
2752 
2753   enc_class convert_long_double( eRegL src ) %{
2754     // push $src.hi
2755     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2756     // push $src.lo
2757     emit_opcode(cbuf, 0x50+$src$$reg  );
2758     // fild 64-bits at [SP]
2759     emit_opcode(cbuf,0xdf);
2760     emit_d8(cbuf, 0x6C);
2761     emit_d8(cbuf, 0x24);
2762     emit_d8(cbuf, 0x00);
2763     // pop stack
2764     emit_opcode(cbuf, 0x83); // add  SP, #8
2765     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2766     emit_d8(cbuf, 0x8);
2767   %}
2768 
2769   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2770     // IMUL   EDX:EAX,$src1
2771     emit_opcode( cbuf, 0xF7 );
2772     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2773     // SAR    EDX,$cnt-32
2774     int shift_count = ((int)$cnt$$constant) - 32;
2775     if (shift_count > 0) {
2776       emit_opcode(cbuf, 0xC1);
2777       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2778       emit_d8(cbuf, shift_count);
2779     }
2780   %}
2781 
2782   // this version doesn't have add sp, 8
2783   enc_class convert_long_double2( eRegL src ) %{
2784     // push $src.hi
2785     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2786     // push $src.lo
2787     emit_opcode(cbuf, 0x50+$src$$reg  );
2788     // fild 64-bits at [SP]
2789     emit_opcode(cbuf,0xdf);
2790     emit_d8(cbuf, 0x6C);
2791     emit_d8(cbuf, 0x24);
2792     emit_d8(cbuf, 0x00);
2793   %}
2794 
2795   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2796     // Basic idea: long = (long)int * (long)int
2797     // IMUL EDX:EAX, src
2798     emit_opcode( cbuf, 0xF7 );
2799     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2800   %}
2801 
2802   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2803     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2804     // MUL EDX:EAX, src
2805     emit_opcode( cbuf, 0xF7 );
2806     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2807   %}
2808 
2809   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2810     // Basic idea: lo(result) = lo(x_lo * y_lo)
2811     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2812     // MOV    $tmp,$src.lo
2813     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2814     // IMUL   $tmp,EDX
2815     emit_opcode( cbuf, 0x0F );
2816     emit_opcode( cbuf, 0xAF );
2817     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2818     // MOV    EDX,$src.hi
2819     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2820     // IMUL   EDX,EAX
2821     emit_opcode( cbuf, 0x0F );
2822     emit_opcode( cbuf, 0xAF );
2823     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2824     // ADD    $tmp,EDX
2825     emit_opcode( cbuf, 0x03 );
2826     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2827     // MUL   EDX:EAX,$src.lo
2828     emit_opcode( cbuf, 0xF7 );
2829     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2830     // ADD    EDX,ESI
2831     emit_opcode( cbuf, 0x03 );
2832     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2833   %}
2834 
2835   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2836     // Basic idea: lo(result) = lo(src * y_lo)
2837     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2838     // IMUL   $tmp,EDX,$src
2839     emit_opcode( cbuf, 0x6B );
2840     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2841     emit_d8( cbuf, (int)$src$$constant );
2842     // MOV    EDX,$src
2843     emit_opcode(cbuf, 0xB8 + EDX_enc);
2844     emit_d32( cbuf, (int)$src$$constant );
2845     // MUL   EDX:EAX,EDX
2846     emit_opcode( cbuf, 0xF7 );
2847     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2848     // ADD    EDX,ESI
2849     emit_opcode( cbuf, 0x03 );
2850     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2851   %}
2852 
2853   enc_class long_div( eRegL src1, eRegL src2 ) %{
2854     // PUSH src1.hi
2855     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2856     // PUSH src1.lo
2857     emit_opcode(cbuf,               0x50+$src1$$reg  );
2858     // PUSH src2.hi
2859     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2860     // PUSH src2.lo
2861     emit_opcode(cbuf,               0x50+$src2$$reg  );
2862     // CALL directly to the runtime
2863     cbuf.set_insts_mark();
2864     emit_opcode(cbuf,0xE8);       // Call into runtime
2865     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2866     // Restore stack
2867     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2868     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2869     emit_d8(cbuf, 4*4);
2870   %}
2871 
2872   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2873     // PUSH src1.hi
2874     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2875     // PUSH src1.lo
2876     emit_opcode(cbuf,               0x50+$src1$$reg  );
2877     // PUSH src2.hi
2878     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2879     // PUSH src2.lo
2880     emit_opcode(cbuf,               0x50+$src2$$reg  );
2881     // CALL directly to the runtime
2882     cbuf.set_insts_mark();
2883     emit_opcode(cbuf,0xE8);       // Call into runtime
2884     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2885     // Restore stack
2886     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2887     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2888     emit_d8(cbuf, 4*4);
2889   %}
2890 
2891   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2892     // MOV   $tmp,$src.lo
2893     emit_opcode(cbuf, 0x8B);
2894     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2895     // OR    $tmp,$src.hi
2896     emit_opcode(cbuf, 0x0B);
2897     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2898   %}
2899 
2900   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2901     // CMP    $src1.lo,$src2.lo
2902     emit_opcode( cbuf, 0x3B );
2903     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2904     // JNE,s  skip
2905     emit_cc(cbuf, 0x70, 0x5);
2906     emit_d8(cbuf,2);
2907     // CMP    $src1.hi,$src2.hi
2908     emit_opcode( cbuf, 0x3B );
2909     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2910   %}
2911 
2912   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2913     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2914     emit_opcode( cbuf, 0x3B );
2915     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2916     // MOV    $tmp,$src1.hi
2917     emit_opcode( cbuf, 0x8B );
2918     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2919     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2920     emit_opcode( cbuf, 0x1B );
2921     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2922   %}
2923 
2924   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2925     // XOR    $tmp,$tmp
2926     emit_opcode(cbuf,0x33);  // XOR
2927     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2928     // CMP    $tmp,$src.lo
2929     emit_opcode( cbuf, 0x3B );
2930     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2931     // SBB    $tmp,$src.hi
2932     emit_opcode( cbuf, 0x1B );
2933     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2934   %}
2935 
2936  // Sniff, sniff... smells like Gnu Superoptimizer
2937   enc_class neg_long( eRegL dst ) %{
2938     emit_opcode(cbuf,0xF7);    // NEG hi
2939     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2940     emit_opcode(cbuf,0xF7);    // NEG lo
2941     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2942     emit_opcode(cbuf,0x83);    // SBB hi,0
2943     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2944     emit_d8    (cbuf,0 );
2945   %}
2946 
2947   enc_class enc_pop_rdx() %{
2948     emit_opcode(cbuf,0x5A);
2949   %}
2950 
2951   enc_class enc_rethrow() %{
2952     cbuf.set_insts_mark();
2953     emit_opcode(cbuf, 0xE9);        // jmp    entry
2954     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2955                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2956   %}
2957 
2958 
2959   // Convert a double to an int.  Java semantics require we do complex
2960   // manglelations in the corner cases.  So we set the rounding mode to
2961   // 'zero', store the darned double down as an int, and reset the
2962   // rounding mode to 'nearest'.  The hardware throws an exception which
2963   // patches up the correct value directly to the stack.
2964   enc_class DPR2I_encoding( regDPR src ) %{
2965     // Flip to round-to-zero mode.  We attempted to allow invalid-op
2966     // exceptions here, so that a NAN or other corner-case value will
2967     // thrown an exception (but normal values get converted at full speed).
2968     // However, I2C adapters and other float-stack manglers leave pending
2969     // invalid-op exceptions hanging.  We would have to clear them before
2970     // enabling them and that is more expensive than just testing for the
2971     // invalid value Intel stores down in the corner cases.
2972     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2973     emit_opcode(cbuf,0x2D);
2974     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2975     // Allocate a word
2976     emit_opcode(cbuf,0x83);            // SUB ESP,4
2977     emit_opcode(cbuf,0xEC);
2978     emit_d8(cbuf,0x04);
2979     // Encoding assumes a double has been pushed into FPR0.
2980     // Store down the double as an int, popping the FPU stack
2981     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
2982     emit_opcode(cbuf,0x1C);
2983     emit_d8(cbuf,0x24);
2984     // Restore the rounding mode; mask the exception
2985     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
2986     emit_opcode(cbuf,0x2D);
2987     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
2988         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
2989         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
2990 
2991     // Load the converted int; adjust CPU stack
2992     emit_opcode(cbuf,0x58);       // POP EAX
2993     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
2994     emit_d32   (cbuf,0x80000000); //         0x80000000
2995     emit_opcode(cbuf,0x75);       // JNE around_slow_call
2996     emit_d8    (cbuf,0x07);       // Size of slow_call
2997     // Push src onto stack slow-path
2998     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
2999     emit_d8    (cbuf,0xC0-1+$src$$reg );
3000     // CALL directly to the runtime
3001     cbuf.set_insts_mark();
3002     emit_opcode(cbuf,0xE8);       // Call into runtime
3003     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3004     // Carry on here...
3005   %}
3006 
3007   enc_class DPR2L_encoding( regDPR src ) %{
3008     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3009     emit_opcode(cbuf,0x2D);
3010     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3011     // Allocate a word
3012     emit_opcode(cbuf,0x83);            // SUB ESP,8
3013     emit_opcode(cbuf,0xEC);
3014     emit_d8(cbuf,0x08);
3015     // Encoding assumes a double has been pushed into FPR0.
3016     // Store down the double as a long, popping the FPU stack
3017     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3018     emit_opcode(cbuf,0x3C);
3019     emit_d8(cbuf,0x24);
3020     // Restore the rounding mode; mask the exception
3021     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3022     emit_opcode(cbuf,0x2D);
3023     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3024         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3025         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3026 
3027     // Load the converted int; adjust CPU stack
3028     emit_opcode(cbuf,0x58);       // POP EAX
3029     emit_opcode(cbuf,0x5A);       // POP EDX
3030     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3031     emit_d8    (cbuf,0xFA);       // rdx
3032     emit_d32   (cbuf,0x80000000); //         0x80000000
3033     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3034     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3035     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3036     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3037     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3038     emit_d8    (cbuf,0x07);       // Size of slow_call
3039     // Push src onto stack slow-path
3040     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3041     emit_d8    (cbuf,0xC0-1+$src$$reg );
3042     // CALL directly to the runtime
3043     cbuf.set_insts_mark();
3044     emit_opcode(cbuf,0xE8);       // Call into runtime
3045     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3046     // Carry on here...
3047   %}
3048 
3049   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3050     // Operand was loaded from memory into fp ST (stack top)
3051     // FMUL   ST,$src  /* D8 C8+i */
3052     emit_opcode(cbuf, 0xD8);
3053     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3054   %}
3055 
3056   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3057     // FADDP  ST,src2  /* D8 C0+i */
3058     emit_opcode(cbuf, 0xD8);
3059     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3060     //could use FADDP  src2,fpST  /* DE C0+i */
3061   %}
3062 
3063   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3064     // FADDP  src2,ST  /* DE C0+i */
3065     emit_opcode(cbuf, 0xDE);
3066     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3067   %}
3068 
3069   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3070     // Operand has been loaded into fp ST (stack top)
3071       // FSUB   ST,$src1
3072       emit_opcode(cbuf, 0xD8);
3073       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3074 
3075       // FDIV
3076       emit_opcode(cbuf, 0xD8);
3077       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3078   %}
3079 
3080   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3081     // Operand was loaded from memory into fp ST (stack top)
3082     // FADD   ST,$src  /* D8 C0+i */
3083     emit_opcode(cbuf, 0xD8);
3084     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3085 
3086     // FMUL  ST,src2  /* D8 C*+i */
3087     emit_opcode(cbuf, 0xD8);
3088     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3089   %}
3090 
3091 
3092   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3093     // Operand was loaded from memory into fp ST (stack top)
3094     // FADD   ST,$src  /* D8 C0+i */
3095     emit_opcode(cbuf, 0xD8);
3096     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3097 
3098     // FMULP  src2,ST  /* DE C8+i */
3099     emit_opcode(cbuf, 0xDE);
3100     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3101   %}
3102 
3103   // Atomically load the volatile long
3104   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3105     emit_opcode(cbuf,0xDF);
3106     int rm_byte_opcode = 0x05;
3107     int base     = $mem$$base;
3108     int index    = $mem$$index;
3109     int scale    = $mem$$scale;
3110     int displace = $mem$$disp;
3111     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3112     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3113     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3114   %}
3115 
3116   // Volatile Store Long.  Must be atomic, so move it into
3117   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3118   // target address before the store (for null-ptr checks)
3119   // so the memory operand is used twice in the encoding.
3120   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3121     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3122     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3123     emit_opcode(cbuf,0xDF);
3124     int rm_byte_opcode = 0x07;
3125     int base     = $mem$$base;
3126     int index    = $mem$$index;
3127     int scale    = $mem$$scale;
3128     int displace = $mem$$disp;
3129     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3130     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3131   %}
3132 
3133   // Safepoint Poll.  This polls the safepoint page, and causes an
3134   // exception if it is not readable. Unfortunately, it kills the condition code
3135   // in the process
3136   // We current use TESTL [spp],EDI
3137   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3138 
3139   enc_class Safepoint_Poll() %{
3140     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3141     emit_opcode(cbuf,0x85);
3142     emit_rm (cbuf, 0x0, 0x7, 0x5);
3143     emit_d32(cbuf, (intptr_t)os::get_polling_page());
3144   %}
3145 %}
3146 
3147 
3148 //----------FRAME--------------------------------------------------------------
3149 // Definition of frame structure and management information.
3150 //
3151 //  S T A C K   L A Y O U T    Allocators stack-slot number
3152 //                             |   (to get allocators register number
3153 //  G  Owned by    |        |  v    add OptoReg::stack0())
3154 //  r   CALLER     |        |
3155 //  o     |        +--------+      pad to even-align allocators stack-slot
3156 //  w     V        |  pad0  |        numbers; owned by CALLER
3157 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3158 //  h     ^        |   in   |  5
3159 //        |        |  args  |  4   Holes in incoming args owned by SELF
3160 //  |     |        |        |  3
3161 //  |     |        +--------+
3162 //  V     |        | old out|      Empty on Intel, window on Sparc
3163 //        |    old |preserve|      Must be even aligned.
3164 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3165 //        |        |   in   |  3   area for Intel ret address
3166 //     Owned by    |preserve|      Empty on Sparc.
3167 //       SELF      +--------+
3168 //        |        |  pad2  |  2   pad to align old SP
3169 //        |        +--------+  1
3170 //        |        | locks  |  0
3171 //        |        +--------+----> OptoReg::stack0(), even aligned
3172 //        |        |  pad1  | 11   pad to align new SP
3173 //        |        +--------+
3174 //        |        |        | 10
3175 //        |        | spills |  9   spills
3176 //        V        |        |  8   (pad0 slot for callee)
3177 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3178 //        ^        |  out   |  7
3179 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3180 //     Owned by    +--------+
3181 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3182 //        |    new |preserve|      Must be even-aligned.
3183 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3184 //        |        |        |
3185 //
3186 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3187 //         known from SELF's arguments and the Java calling convention.
3188 //         Region 6-7 is determined per call site.
3189 // Note 2: If the calling convention leaves holes in the incoming argument
3190 //         area, those holes are owned by SELF.  Holes in the outgoing area
3191 //         are owned by the CALLEE.  Holes should not be nessecary in the
3192 //         incoming area, as the Java calling convention is completely under
3193 //         the control of the AD file.  Doubles can be sorted and packed to
3194 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3195 //         varargs C calling conventions.
3196 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3197 //         even aligned with pad0 as needed.
3198 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3199 //         region 6-11 is even aligned; it may be padded out more so that
3200 //         the region from SP to FP meets the minimum stack alignment.
3201 
3202 frame %{
3203   // What direction does stack grow in (assumed to be same for C & Java)
3204   stack_direction(TOWARDS_LOW);
3205 
3206   // These three registers define part of the calling convention
3207   // between compiled code and the interpreter.
3208   inline_cache_reg(EAX);                // Inline Cache Register
3209   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3210 
3211   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3212   cisc_spilling_operand_name(indOffset32);
3213 
3214   // Number of stack slots consumed by locking an object
3215   sync_stack_slots(1);
3216 
3217   // Compiled code's Frame Pointer
3218   frame_pointer(ESP);
3219   // Interpreter stores its frame pointer in a register which is
3220   // stored to the stack by I2CAdaptors.
3221   // I2CAdaptors convert from interpreted java to compiled java.
3222   interpreter_frame_pointer(EBP);
3223 
3224   // Stack alignment requirement
3225   // Alignment size in bytes (128-bit -> 16 bytes)
3226   stack_alignment(StackAlignmentInBytes);
3227 
3228   // Number of stack slots between incoming argument block and the start of
3229   // a new frame.  The PROLOG must add this many slots to the stack.  The
3230   // EPILOG must remove this many slots.  Intel needs one slot for
3231   // return address and one for rbp, (must save rbp)
3232   in_preserve_stack_slots(2+VerifyStackAtCalls);
3233 
3234   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3235   // for calls to C.  Supports the var-args backing area for register parms.
3236   varargs_C_out_slots_killed(0);
3237 
3238   // The after-PROLOG location of the return address.  Location of
3239   // return address specifies a type (REG or STACK) and a number
3240   // representing the register number (i.e. - use a register name) or
3241   // stack slot.
3242   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3243   // Otherwise, it is above the locks and verification slot and alignment word
3244   return_addr(STACK - 1 +
3245               round_to((Compile::current()->in_preserve_stack_slots() +
3246                         Compile::current()->fixed_slots()),
3247                        stack_alignment_in_slots()));
3248 
3249   // Body of function which returns an integer array locating
3250   // arguments either in registers or in stack slots.  Passed an array
3251   // of ideal registers called "sig" and a "length" count.  Stack-slot
3252   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3253   // arguments for a CALLEE.  Incoming stack arguments are
3254   // automatically biased by the preserve_stack_slots field above.
3255   calling_convention %{
3256     // No difference between ingoing/outgoing just pass false
3257     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3258   %}
3259 
3260 
3261   // Body of function which returns an integer array locating
3262   // arguments either in registers or in stack slots.  Passed an array
3263   // of ideal registers called "sig" and a "length" count.  Stack-slot
3264   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3265   // arguments for a CALLEE.  Incoming stack arguments are
3266   // automatically biased by the preserve_stack_slots field above.
3267   c_calling_convention %{
3268     // This is obviously always outgoing
3269     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3270   %}
3271 
3272   // Location of C & interpreter return values
3273   c_return_value %{
3274     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3275     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3276     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3277 
3278     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3279     // that C functions return float and double results in XMM0.
3280     if( ideal_reg == Op_RegD && UseSSE>=2 )
3281       return OptoRegPair(XMM0b_num,XMM0_num);
3282     if( ideal_reg == Op_RegF && UseSSE>=2 )
3283       return OptoRegPair(OptoReg::Bad,XMM0_num);
3284 
3285     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3286   %}
3287 
3288   // Location of return values
3289   return_value %{
3290     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3291     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3292     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3293     if( ideal_reg == Op_RegD && UseSSE>=2 )
3294       return OptoRegPair(XMM0b_num,XMM0_num);
3295     if( ideal_reg == Op_RegF && UseSSE>=1 )
3296       return OptoRegPair(OptoReg::Bad,XMM0_num);
3297     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3298   %}
3299 
3300 %}
3301 
3302 //----------ATTRIBUTES---------------------------------------------------------
3303 //----------Operand Attributes-------------------------------------------------
3304 op_attrib op_cost(0);        // Required cost attribute
3305 
3306 //----------Instruction Attributes---------------------------------------------
3307 ins_attrib ins_cost(100);       // Required cost attribute
3308 ins_attrib ins_size(8);         // Required size attribute (in bits)
3309 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3310                                 // non-matching short branch variant of some
3311                                                             // long branch?
3312 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3313                                 // specifies the alignment that some part of the instruction (not
3314                                 // necessarily the start) requires.  If > 1, a compute_padding()
3315                                 // function must be provided for the instruction
3316 
3317 //----------OPERANDS-----------------------------------------------------------
3318 // Operand definitions must precede instruction definitions for correct parsing
3319 // in the ADLC because operands constitute user defined types which are used in
3320 // instruction definitions.
3321 
3322 //----------Simple Operands----------------------------------------------------
3323 // Immediate Operands
3324 // Integer Immediate
3325 operand immI() %{
3326   match(ConI);
3327 
3328   op_cost(10);
3329   format %{ %}
3330   interface(CONST_INTER);
3331 %}
3332 
3333 // Constant for test vs zero
3334 operand immI0() %{
3335   predicate(n->get_int() == 0);
3336   match(ConI);
3337 
3338   op_cost(0);
3339   format %{ %}
3340   interface(CONST_INTER);
3341 %}
3342 
3343 // Constant for increment
3344 operand immI1() %{
3345   predicate(n->get_int() == 1);
3346   match(ConI);
3347 
3348   op_cost(0);
3349   format %{ %}
3350   interface(CONST_INTER);
3351 %}
3352 
3353 // Constant for decrement
3354 operand immI_M1() %{
3355   predicate(n->get_int() == -1);
3356   match(ConI);
3357 
3358   op_cost(0);
3359   format %{ %}
3360   interface(CONST_INTER);
3361 %}
3362 
3363 // Valid scale values for addressing modes
3364 operand immI2() %{
3365   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3366   match(ConI);
3367 
3368   format %{ %}
3369   interface(CONST_INTER);
3370 %}
3371 
3372 operand immI8() %{
3373   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3374   match(ConI);
3375 
3376   op_cost(5);
3377   format %{ %}
3378   interface(CONST_INTER);
3379 %}
3380 
3381 operand immI16() %{
3382   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3383   match(ConI);
3384 
3385   op_cost(10);
3386   format %{ %}
3387   interface(CONST_INTER);
3388 %}
3389 
3390 // Int Immediate non-negative
3391 operand immU31()
3392 %{
3393   predicate(n->get_int() >= 0);
3394   match(ConI);
3395 
3396   op_cost(0);
3397   format %{ %}
3398   interface(CONST_INTER);
3399 %}
3400 
3401 // Constant for long shifts
3402 operand immI_32() %{
3403   predicate( n->get_int() == 32 );
3404   match(ConI);
3405 
3406   op_cost(0);
3407   format %{ %}
3408   interface(CONST_INTER);
3409 %}
3410 
3411 operand immI_1_31() %{
3412   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3413   match(ConI);
3414 
3415   op_cost(0);
3416   format %{ %}
3417   interface(CONST_INTER);
3418 %}
3419 
3420 operand immI_32_63() %{
3421   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3422   match(ConI);
3423   op_cost(0);
3424 
3425   format %{ %}
3426   interface(CONST_INTER);
3427 %}
3428 
3429 operand immI_1() %{
3430   predicate( n->get_int() == 1 );
3431   match(ConI);
3432 
3433   op_cost(0);
3434   format %{ %}
3435   interface(CONST_INTER);
3436 %}
3437 
3438 operand immI_2() %{
3439   predicate( n->get_int() == 2 );
3440   match(ConI);
3441 
3442   op_cost(0);
3443   format %{ %}
3444   interface(CONST_INTER);
3445 %}
3446 
3447 operand immI_3() %{
3448   predicate( n->get_int() == 3 );
3449   match(ConI);
3450 
3451   op_cost(0);
3452   format %{ %}
3453   interface(CONST_INTER);
3454 %}
3455 
3456 // Pointer Immediate
3457 operand immP() %{
3458   match(ConP);
3459 
3460   op_cost(10);
3461   format %{ %}
3462   interface(CONST_INTER);
3463 %}
3464 
3465 // NULL Pointer Immediate
3466 operand immP0() %{
3467   predicate( n->get_ptr() == 0 );
3468   match(ConP);
3469   op_cost(0);
3470 
3471   format %{ %}
3472   interface(CONST_INTER);
3473 %}
3474 
3475 // Long Immediate
3476 operand immL() %{
3477   match(ConL);
3478 
3479   op_cost(20);
3480   format %{ %}
3481   interface(CONST_INTER);
3482 %}
3483 
3484 // Long Immediate zero
3485 operand immL0() %{
3486   predicate( n->get_long() == 0L );
3487   match(ConL);
3488   op_cost(0);
3489 
3490   format %{ %}
3491   interface(CONST_INTER);
3492 %}
3493 
3494 // Long Immediate zero
3495 operand immL_M1() %{
3496   predicate( n->get_long() == -1L );
3497   match(ConL);
3498   op_cost(0);
3499 
3500   format %{ %}
3501   interface(CONST_INTER);
3502 %}
3503 
3504 // Long immediate from 0 to 127.
3505 // Used for a shorter form of long mul by 10.
3506 operand immL_127() %{
3507   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3508   match(ConL);
3509   op_cost(0);
3510 
3511   format %{ %}
3512   interface(CONST_INTER);
3513 %}
3514 
3515 // Long Immediate: low 32-bit mask
3516 operand immL_32bits() %{
3517   predicate(n->get_long() == 0xFFFFFFFFL);
3518   match(ConL);
3519   op_cost(0);
3520 
3521   format %{ %}
3522   interface(CONST_INTER);
3523 %}
3524 
3525 // Long Immediate: low 32-bit mask
3526 operand immL32() %{
3527   predicate(n->get_long() == (int)(n->get_long()));
3528   match(ConL);
3529   op_cost(20);
3530 
3531   format %{ %}
3532   interface(CONST_INTER);
3533 %}
3534 
3535 //Double Immediate zero
3536 operand immDPR0() %{
3537   // Do additional (and counter-intuitive) test against NaN to work around VC++
3538   // bug that generates code such that NaNs compare equal to 0.0
3539   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3540   match(ConD);
3541 
3542   op_cost(5);
3543   format %{ %}
3544   interface(CONST_INTER);
3545 %}
3546 
3547 // Double Immediate one
3548 operand immDPR1() %{
3549   predicate( UseSSE<=1 && n->getd() == 1.0 );
3550   match(ConD);
3551 
3552   op_cost(5);
3553   format %{ %}
3554   interface(CONST_INTER);
3555 %}
3556 
3557 // Double Immediate
3558 operand immDPR() %{
3559   predicate(UseSSE<=1);
3560   match(ConD);
3561 
3562   op_cost(5);
3563   format %{ %}
3564   interface(CONST_INTER);
3565 %}
3566 
3567 operand immD() %{
3568   predicate(UseSSE>=2);
3569   match(ConD);
3570 
3571   op_cost(5);
3572   format %{ %}
3573   interface(CONST_INTER);
3574 %}
3575 
3576 // Double Immediate zero
3577 operand immD0() %{
3578   // Do additional (and counter-intuitive) test against NaN to work around VC++
3579   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3580   // compare equal to -0.0.
3581   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3582   match(ConD);
3583 
3584   format %{ %}
3585   interface(CONST_INTER);
3586 %}
3587 
3588 // Float Immediate zero
3589 operand immFPR0() %{
3590   predicate(UseSSE == 0 && n->getf() == 0.0F);
3591   match(ConF);
3592 
3593   op_cost(5);
3594   format %{ %}
3595   interface(CONST_INTER);
3596 %}
3597 
3598 // Float Immediate one
3599 operand immFPR1() %{
3600   predicate(UseSSE == 0 && n->getf() == 1.0F);
3601   match(ConF);
3602 
3603   op_cost(5);
3604   format %{ %}
3605   interface(CONST_INTER);
3606 %}
3607 
3608 // Float Immediate
3609 operand immFPR() %{
3610   predicate( UseSSE == 0 );
3611   match(ConF);
3612 
3613   op_cost(5);
3614   format %{ %}
3615   interface(CONST_INTER);
3616 %}
3617 
3618 // Float Immediate
3619 operand immF() %{
3620   predicate(UseSSE >= 1);
3621   match(ConF);
3622 
3623   op_cost(5);
3624   format %{ %}
3625   interface(CONST_INTER);
3626 %}
3627 
3628 // Float Immediate zero.  Zero and not -0.0
3629 operand immF0() %{
3630   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3631   match(ConF);
3632 
3633   op_cost(5);
3634   format %{ %}
3635   interface(CONST_INTER);
3636 %}
3637 
3638 // Immediates for special shifts (sign extend)
3639 
3640 // Constants for increment
3641 operand immI_16() %{
3642   predicate( n->get_int() == 16 );
3643   match(ConI);
3644 
3645   format %{ %}
3646   interface(CONST_INTER);
3647 %}
3648 
3649 operand immI_24() %{
3650   predicate( n->get_int() == 24 );
3651   match(ConI);
3652 
3653   format %{ %}
3654   interface(CONST_INTER);
3655 %}
3656 
3657 // Constant for byte-wide masking
3658 operand immI_255() %{
3659   predicate( n->get_int() == 255 );
3660   match(ConI);
3661 
3662   format %{ %}
3663   interface(CONST_INTER);
3664 %}
3665 
3666 // Constant for short-wide masking
3667 operand immI_65535() %{
3668   predicate(n->get_int() == 65535);
3669   match(ConI);
3670 
3671   format %{ %}
3672   interface(CONST_INTER);
3673 %}
3674 
3675 // Register Operands
3676 // Integer Register
3677 operand rRegI() %{
3678   constraint(ALLOC_IN_RC(int_reg));
3679   match(RegI);
3680   match(xRegI);
3681   match(eAXRegI);
3682   match(eBXRegI);
3683   match(eCXRegI);
3684   match(eDXRegI);
3685   match(eDIRegI);
3686   match(eSIRegI);
3687 
3688   format %{ %}
3689   interface(REG_INTER);
3690 %}
3691 
3692 // Subset of Integer Register
3693 operand xRegI(rRegI reg) %{
3694   constraint(ALLOC_IN_RC(int_x_reg));
3695   match(reg);
3696   match(eAXRegI);
3697   match(eBXRegI);
3698   match(eCXRegI);
3699   match(eDXRegI);
3700 
3701   format %{ %}
3702   interface(REG_INTER);
3703 %}
3704 
3705 // Special Registers
3706 operand eAXRegI(xRegI reg) %{
3707   constraint(ALLOC_IN_RC(eax_reg));
3708   match(reg);
3709   match(rRegI);
3710 
3711   format %{ "EAX" %}
3712   interface(REG_INTER);
3713 %}
3714 
3715 // Special Registers
3716 operand eBXRegI(xRegI reg) %{
3717   constraint(ALLOC_IN_RC(ebx_reg));
3718   match(reg);
3719   match(rRegI);
3720 
3721   format %{ "EBX" %}
3722   interface(REG_INTER);
3723 %}
3724 
3725 operand eCXRegI(xRegI reg) %{
3726   constraint(ALLOC_IN_RC(ecx_reg));
3727   match(reg);
3728   match(rRegI);
3729 
3730   format %{ "ECX" %}
3731   interface(REG_INTER);
3732 %}
3733 
3734 operand eDXRegI(xRegI reg) %{
3735   constraint(ALLOC_IN_RC(edx_reg));
3736   match(reg);
3737   match(rRegI);
3738 
3739   format %{ "EDX" %}
3740   interface(REG_INTER);
3741 %}
3742 
3743 operand eDIRegI(xRegI reg) %{
3744   constraint(ALLOC_IN_RC(edi_reg));
3745   match(reg);
3746   match(rRegI);
3747 
3748   format %{ "EDI" %}
3749   interface(REG_INTER);
3750 %}
3751 
3752 operand naxRegI() %{
3753   constraint(ALLOC_IN_RC(nax_reg));
3754   match(RegI);
3755   match(eCXRegI);
3756   match(eDXRegI);
3757   match(eSIRegI);
3758   match(eDIRegI);
3759 
3760   format %{ %}
3761   interface(REG_INTER);
3762 %}
3763 
3764 operand nadxRegI() %{
3765   constraint(ALLOC_IN_RC(nadx_reg));
3766   match(RegI);
3767   match(eBXRegI);
3768   match(eCXRegI);
3769   match(eSIRegI);
3770   match(eDIRegI);
3771 
3772   format %{ %}
3773   interface(REG_INTER);
3774 %}
3775 
3776 operand ncxRegI() %{
3777   constraint(ALLOC_IN_RC(ncx_reg));
3778   match(RegI);
3779   match(eAXRegI);
3780   match(eDXRegI);
3781   match(eSIRegI);
3782   match(eDIRegI);
3783 
3784   format %{ %}
3785   interface(REG_INTER);
3786 %}
3787 
3788 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3789 // //
3790 operand eSIRegI(xRegI reg) %{
3791    constraint(ALLOC_IN_RC(esi_reg));
3792    match(reg);
3793    match(rRegI);
3794 
3795    format %{ "ESI" %}
3796    interface(REG_INTER);
3797 %}
3798 
3799 // Pointer Register
3800 operand anyRegP() %{
3801   constraint(ALLOC_IN_RC(any_reg));
3802   match(RegP);
3803   match(eAXRegP);
3804   match(eBXRegP);
3805   match(eCXRegP);
3806   match(eDIRegP);
3807   match(eRegP);
3808 
3809   format %{ %}
3810   interface(REG_INTER);
3811 %}
3812 
3813 operand eRegP() %{
3814   constraint(ALLOC_IN_RC(int_reg));
3815   match(RegP);
3816   match(eAXRegP);
3817   match(eBXRegP);
3818   match(eCXRegP);
3819   match(eDIRegP);
3820 
3821   format %{ %}
3822   interface(REG_INTER);
3823 %}
3824 
3825 // On windows95, EBP is not safe to use for implicit null tests.
3826 operand eRegP_no_EBP() %{
3827   constraint(ALLOC_IN_RC(int_reg_no_ebp));
3828   match(RegP);
3829   match(eAXRegP);
3830   match(eBXRegP);
3831   match(eCXRegP);
3832   match(eDIRegP);
3833 
3834   op_cost(100);
3835   format %{ %}
3836   interface(REG_INTER);
3837 %}
3838 
3839 operand naxRegP() %{
3840   constraint(ALLOC_IN_RC(nax_reg));
3841   match(RegP);
3842   match(eBXRegP);
3843   match(eDXRegP);
3844   match(eCXRegP);
3845   match(eSIRegP);
3846   match(eDIRegP);
3847 
3848   format %{ %}
3849   interface(REG_INTER);
3850 %}
3851 
3852 operand nabxRegP() %{
3853   constraint(ALLOC_IN_RC(nabx_reg));
3854   match(RegP);
3855   match(eCXRegP);
3856   match(eDXRegP);
3857   match(eSIRegP);
3858   match(eDIRegP);
3859 
3860   format %{ %}
3861   interface(REG_INTER);
3862 %}
3863 
3864 operand pRegP() %{
3865   constraint(ALLOC_IN_RC(p_reg));
3866   match(RegP);
3867   match(eBXRegP);
3868   match(eDXRegP);
3869   match(eSIRegP);
3870   match(eDIRegP);
3871 
3872   format %{ %}
3873   interface(REG_INTER);
3874 %}
3875 
3876 // Special Registers
3877 // Return a pointer value
3878 operand eAXRegP(eRegP reg) %{
3879   constraint(ALLOC_IN_RC(eax_reg));
3880   match(reg);
3881   format %{ "EAX" %}
3882   interface(REG_INTER);
3883 %}
3884 
3885 // Used in AtomicAdd
3886 operand eBXRegP(eRegP reg) %{
3887   constraint(ALLOC_IN_RC(ebx_reg));
3888   match(reg);
3889   format %{ "EBX" %}
3890   interface(REG_INTER);
3891 %}
3892 
3893 // Tail-call (interprocedural jump) to interpreter
3894 operand eCXRegP(eRegP reg) %{
3895   constraint(ALLOC_IN_RC(ecx_reg));
3896   match(reg);
3897   format %{ "ECX" %}
3898   interface(REG_INTER);
3899 %}
3900 
3901 operand eSIRegP(eRegP reg) %{
3902   constraint(ALLOC_IN_RC(esi_reg));
3903   match(reg);
3904   format %{ "ESI" %}
3905   interface(REG_INTER);
3906 %}
3907 
3908 // Used in rep stosw
3909 operand eDIRegP(eRegP reg) %{
3910   constraint(ALLOC_IN_RC(edi_reg));
3911   match(reg);
3912   format %{ "EDI" %}
3913   interface(REG_INTER);
3914 %}
3915 
3916 operand eRegL() %{
3917   constraint(ALLOC_IN_RC(long_reg));
3918   match(RegL);
3919   match(eADXRegL);
3920 
3921   format %{ %}
3922   interface(REG_INTER);
3923 %}
3924 
3925 operand eADXRegL( eRegL reg ) %{
3926   constraint(ALLOC_IN_RC(eadx_reg));
3927   match(reg);
3928 
3929   format %{ "EDX:EAX" %}
3930   interface(REG_INTER);
3931 %}
3932 
3933 operand eBCXRegL( eRegL reg ) %{
3934   constraint(ALLOC_IN_RC(ebcx_reg));
3935   match(reg);
3936 
3937   format %{ "EBX:ECX" %}
3938   interface(REG_INTER);
3939 %}
3940 
3941 // Special case for integer high multiply
3942 operand eADXRegL_low_only() %{
3943   constraint(ALLOC_IN_RC(eadx_reg));
3944   match(RegL);
3945 
3946   format %{ "EAX" %}
3947   interface(REG_INTER);
3948 %}
3949 
3950 // Flags register, used as output of compare instructions
3951 operand eFlagsReg() %{
3952   constraint(ALLOC_IN_RC(int_flags));
3953   match(RegFlags);
3954 
3955   format %{ "EFLAGS" %}
3956   interface(REG_INTER);
3957 %}
3958 
3959 // Flags register, used as output of FLOATING POINT compare instructions
3960 operand eFlagsRegU() %{
3961   constraint(ALLOC_IN_RC(int_flags));
3962   match(RegFlags);
3963 
3964   format %{ "EFLAGS_U" %}
3965   interface(REG_INTER);
3966 %}
3967 
3968 operand eFlagsRegUCF() %{
3969   constraint(ALLOC_IN_RC(int_flags));
3970   match(RegFlags);
3971   predicate(false);
3972 
3973   format %{ "EFLAGS_U_CF" %}
3974   interface(REG_INTER);
3975 %}
3976 
3977 // Condition Code Register used by long compare
3978 operand flagsReg_long_LTGE() %{
3979   constraint(ALLOC_IN_RC(int_flags));
3980   match(RegFlags);
3981   format %{ "FLAGS_LTGE" %}
3982   interface(REG_INTER);
3983 %}
3984 operand flagsReg_long_EQNE() %{
3985   constraint(ALLOC_IN_RC(int_flags));
3986   match(RegFlags);
3987   format %{ "FLAGS_EQNE" %}
3988   interface(REG_INTER);
3989 %}
3990 operand flagsReg_long_LEGT() %{
3991   constraint(ALLOC_IN_RC(int_flags));
3992   match(RegFlags);
3993   format %{ "FLAGS_LEGT" %}
3994   interface(REG_INTER);
3995 %}
3996 
3997 // Float register operands
3998 operand regDPR() %{
3999   predicate( UseSSE < 2 );
4000   constraint(ALLOC_IN_RC(fp_dbl_reg));
4001   match(RegD);
4002   match(regDPR1);
4003   match(regDPR2);
4004   format %{ %}
4005   interface(REG_INTER);
4006 %}
4007 
4008 operand regDPR1(regDPR reg) %{
4009   predicate( UseSSE < 2 );
4010   constraint(ALLOC_IN_RC(fp_dbl_reg0));
4011   match(reg);
4012   format %{ "FPR1" %}
4013   interface(REG_INTER);
4014 %}
4015 
4016 operand regDPR2(regDPR reg) %{
4017   predicate( UseSSE < 2 );
4018   constraint(ALLOC_IN_RC(fp_dbl_reg1));
4019   match(reg);
4020   format %{ "FPR2" %}
4021   interface(REG_INTER);
4022 %}
4023 
4024 operand regnotDPR1(regDPR reg) %{
4025   predicate( UseSSE < 2 );
4026   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4027   match(reg);
4028   format %{ %}
4029   interface(REG_INTER);
4030 %}
4031 
4032 // Float register operands
4033 operand regFPR() %{
4034   predicate( UseSSE < 2 );
4035   constraint(ALLOC_IN_RC(fp_flt_reg));
4036   match(RegF);
4037   match(regFPR1);
4038   format %{ %}
4039   interface(REG_INTER);
4040 %}
4041 
4042 // Float register operands
4043 operand regFPR1(regFPR reg) %{
4044   predicate( UseSSE < 2 );
4045   constraint(ALLOC_IN_RC(fp_flt_reg0));
4046   match(reg);
4047   format %{ "FPR1" %}
4048   interface(REG_INTER);
4049 %}
4050 
4051 // XMM Float register operands
4052 operand regF() %{
4053   predicate( UseSSE>=1 );
4054   constraint(ALLOC_IN_RC(float_reg_legacy));
4055   match(RegF);
4056   format %{ %}
4057   interface(REG_INTER);
4058 %}
4059 
4060 // XMM Double register operands
4061 operand regD() %{
4062   predicate( UseSSE>=2 );
4063   constraint(ALLOC_IN_RC(double_reg_legacy));
4064   match(RegD);
4065   format %{ %}
4066   interface(REG_INTER);
4067 %}
4068 
4069 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4070 // runtime code generation via reg_class_dynamic.
4071 operand vecS() %{
4072   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4073   match(VecS);
4074 
4075   format %{ %}
4076   interface(REG_INTER);
4077 %}
4078 
4079 operand vecD() %{
4080   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4081   match(VecD);
4082 
4083   format %{ %}
4084   interface(REG_INTER);
4085 %}
4086 
4087 operand vecX() %{
4088   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4089   match(VecX);
4090 
4091   format %{ %}
4092   interface(REG_INTER);
4093 %}
4094 
4095 operand vecY() %{
4096   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4097   match(VecY);
4098 
4099   format %{ %}
4100   interface(REG_INTER);
4101 %}
4102 
4103 //----------Memory Operands----------------------------------------------------
4104 // Direct Memory Operand
4105 operand direct(immP addr) %{
4106   match(addr);
4107 
4108   format %{ "[$addr]" %}
4109   interface(MEMORY_INTER) %{
4110     base(0xFFFFFFFF);
4111     index(0x4);
4112     scale(0x0);
4113     disp($addr);
4114   %}
4115 %}
4116 
4117 // Indirect Memory Operand
4118 operand indirect(eRegP reg) %{
4119   constraint(ALLOC_IN_RC(int_reg));
4120   match(reg);
4121 
4122   format %{ "[$reg]" %}
4123   interface(MEMORY_INTER) %{
4124     base($reg);
4125     index(0x4);
4126     scale(0x0);
4127     disp(0x0);
4128   %}
4129 %}
4130 
4131 // Indirect Memory Plus Short Offset Operand
4132 operand indOffset8(eRegP reg, immI8 off) %{
4133   match(AddP reg off);
4134 
4135   format %{ "[$reg + $off]" %}
4136   interface(MEMORY_INTER) %{
4137     base($reg);
4138     index(0x4);
4139     scale(0x0);
4140     disp($off);
4141   %}
4142 %}
4143 
4144 // Indirect Memory Plus Long Offset Operand
4145 operand indOffset32(eRegP reg, immI off) %{
4146   match(AddP reg off);
4147 
4148   format %{ "[$reg + $off]" %}
4149   interface(MEMORY_INTER) %{
4150     base($reg);
4151     index(0x4);
4152     scale(0x0);
4153     disp($off);
4154   %}
4155 %}
4156 
4157 // Indirect Memory Plus Long Offset Operand
4158 operand indOffset32X(rRegI reg, immP off) %{
4159   match(AddP off reg);
4160 
4161   format %{ "[$reg + $off]" %}
4162   interface(MEMORY_INTER) %{
4163     base($reg);
4164     index(0x4);
4165     scale(0x0);
4166     disp($off);
4167   %}
4168 %}
4169 
4170 // Indirect Memory Plus Index Register Plus Offset Operand
4171 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4172   match(AddP (AddP reg ireg) off);
4173 
4174   op_cost(10);
4175   format %{"[$reg + $off + $ireg]" %}
4176   interface(MEMORY_INTER) %{
4177     base($reg);
4178     index($ireg);
4179     scale(0x0);
4180     disp($off);
4181   %}
4182 %}
4183 
4184 // Indirect Memory Plus Index Register Plus Offset Operand
4185 operand indIndex(eRegP reg, rRegI ireg) %{
4186   match(AddP reg ireg);
4187 
4188   op_cost(10);
4189   format %{"[$reg + $ireg]" %}
4190   interface(MEMORY_INTER) %{
4191     base($reg);
4192     index($ireg);
4193     scale(0x0);
4194     disp(0x0);
4195   %}
4196 %}
4197 
4198 // // -------------------------------------------------------------------------
4199 // // 486 architecture doesn't support "scale * index + offset" with out a base
4200 // // -------------------------------------------------------------------------
4201 // // Scaled Memory Operands
4202 // // Indirect Memory Times Scale Plus Offset Operand
4203 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4204 //   match(AddP off (LShiftI ireg scale));
4205 //
4206 //   op_cost(10);
4207 //   format %{"[$off + $ireg << $scale]" %}
4208 //   interface(MEMORY_INTER) %{
4209 //     base(0x4);
4210 //     index($ireg);
4211 //     scale($scale);
4212 //     disp($off);
4213 //   %}
4214 // %}
4215 
4216 // Indirect Memory Times Scale Plus Index Register
4217 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4218   match(AddP reg (LShiftI ireg scale));
4219 
4220   op_cost(10);
4221   format %{"[$reg + $ireg << $scale]" %}
4222   interface(MEMORY_INTER) %{
4223     base($reg);
4224     index($ireg);
4225     scale($scale);
4226     disp(0x0);
4227   %}
4228 %}
4229 
4230 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4231 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4232   match(AddP (AddP reg (LShiftI ireg scale)) off);
4233 
4234   op_cost(10);
4235   format %{"[$reg + $off + $ireg << $scale]" %}
4236   interface(MEMORY_INTER) %{
4237     base($reg);
4238     index($ireg);
4239     scale($scale);
4240     disp($off);
4241   %}
4242 %}
4243 
4244 //----------Load Long Memory Operands------------------------------------------
4245 // The load-long idiom will use it's address expression again after loading
4246 // the first word of the long.  If the load-long destination overlaps with
4247 // registers used in the addressing expression, the 2nd half will be loaded
4248 // from a clobbered address.  Fix this by requiring that load-long use
4249 // address registers that do not overlap with the load-long target.
4250 
4251 // load-long support
4252 operand load_long_RegP() %{
4253   constraint(ALLOC_IN_RC(esi_reg));
4254   match(RegP);
4255   match(eSIRegP);
4256   op_cost(100);
4257   format %{  %}
4258   interface(REG_INTER);
4259 %}
4260 
4261 // Indirect Memory Operand Long
4262 operand load_long_indirect(load_long_RegP reg) %{
4263   constraint(ALLOC_IN_RC(esi_reg));
4264   match(reg);
4265 
4266   format %{ "[$reg]" %}
4267   interface(MEMORY_INTER) %{
4268     base($reg);
4269     index(0x4);
4270     scale(0x0);
4271     disp(0x0);
4272   %}
4273 %}
4274 
4275 // Indirect Memory Plus Long Offset Operand
4276 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4277   match(AddP reg off);
4278 
4279   format %{ "[$reg + $off]" %}
4280   interface(MEMORY_INTER) %{
4281     base($reg);
4282     index(0x4);
4283     scale(0x0);
4284     disp($off);
4285   %}
4286 %}
4287 
4288 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4289 
4290 
4291 //----------Special Memory Operands--------------------------------------------
4292 // Stack Slot Operand - This operand is used for loading and storing temporary
4293 //                      values on the stack where a match requires a value to
4294 //                      flow through memory.
4295 operand stackSlotP(sRegP reg) %{
4296   constraint(ALLOC_IN_RC(stack_slots));
4297   // No match rule because this operand is only generated in matching
4298   format %{ "[$reg]" %}
4299   interface(MEMORY_INTER) %{
4300     base(0x4);   // ESP
4301     index(0x4);  // No Index
4302     scale(0x0);  // No Scale
4303     disp($reg);  // Stack Offset
4304   %}
4305 %}
4306 
4307 operand stackSlotI(sRegI reg) %{
4308   constraint(ALLOC_IN_RC(stack_slots));
4309   // No match rule because this operand is only generated in matching
4310   format %{ "[$reg]" %}
4311   interface(MEMORY_INTER) %{
4312     base(0x4);   // ESP
4313     index(0x4);  // No Index
4314     scale(0x0);  // No Scale
4315     disp($reg);  // Stack Offset
4316   %}
4317 %}
4318 
4319 operand stackSlotF(sRegF reg) %{
4320   constraint(ALLOC_IN_RC(stack_slots));
4321   // No match rule because this operand is only generated in matching
4322   format %{ "[$reg]" %}
4323   interface(MEMORY_INTER) %{
4324     base(0x4);   // ESP
4325     index(0x4);  // No Index
4326     scale(0x0);  // No Scale
4327     disp($reg);  // Stack Offset
4328   %}
4329 %}
4330 
4331 operand stackSlotD(sRegD reg) %{
4332   constraint(ALLOC_IN_RC(stack_slots));
4333   // No match rule because this operand is only generated in matching
4334   format %{ "[$reg]" %}
4335   interface(MEMORY_INTER) %{
4336     base(0x4);   // ESP
4337     index(0x4);  // No Index
4338     scale(0x0);  // No Scale
4339     disp($reg);  // Stack Offset
4340   %}
4341 %}
4342 
4343 operand stackSlotL(sRegL reg) %{
4344   constraint(ALLOC_IN_RC(stack_slots));
4345   // No match rule because this operand is only generated in matching
4346   format %{ "[$reg]" %}
4347   interface(MEMORY_INTER) %{
4348     base(0x4);   // ESP
4349     index(0x4);  // No Index
4350     scale(0x0);  // No Scale
4351     disp($reg);  // Stack Offset
4352   %}
4353 %}
4354 
4355 //----------Memory Operands - Win95 Implicit Null Variants----------------
4356 // Indirect Memory Operand
4357 operand indirect_win95_safe(eRegP_no_EBP reg)
4358 %{
4359   constraint(ALLOC_IN_RC(int_reg));
4360   match(reg);
4361 
4362   op_cost(100);
4363   format %{ "[$reg]" %}
4364   interface(MEMORY_INTER) %{
4365     base($reg);
4366     index(0x4);
4367     scale(0x0);
4368     disp(0x0);
4369   %}
4370 %}
4371 
4372 // Indirect Memory Plus Short Offset Operand
4373 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4374 %{
4375   match(AddP reg off);
4376 
4377   op_cost(100);
4378   format %{ "[$reg + $off]" %}
4379   interface(MEMORY_INTER) %{
4380     base($reg);
4381     index(0x4);
4382     scale(0x0);
4383     disp($off);
4384   %}
4385 %}
4386 
4387 // Indirect Memory Plus Long Offset Operand
4388 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4389 %{
4390   match(AddP reg off);
4391 
4392   op_cost(100);
4393   format %{ "[$reg + $off]" %}
4394   interface(MEMORY_INTER) %{
4395     base($reg);
4396     index(0x4);
4397     scale(0x0);
4398     disp($off);
4399   %}
4400 %}
4401 
4402 // Indirect Memory Plus Index Register Plus Offset Operand
4403 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4404 %{
4405   match(AddP (AddP reg ireg) off);
4406 
4407   op_cost(100);
4408   format %{"[$reg + $off + $ireg]" %}
4409   interface(MEMORY_INTER) %{
4410     base($reg);
4411     index($ireg);
4412     scale(0x0);
4413     disp($off);
4414   %}
4415 %}
4416 
4417 // Indirect Memory Times Scale Plus Index Register
4418 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4419 %{
4420   match(AddP reg (LShiftI ireg scale));
4421 
4422   op_cost(100);
4423   format %{"[$reg + $ireg << $scale]" %}
4424   interface(MEMORY_INTER) %{
4425     base($reg);
4426     index($ireg);
4427     scale($scale);
4428     disp(0x0);
4429   %}
4430 %}
4431 
4432 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4433 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4434 %{
4435   match(AddP (AddP reg (LShiftI ireg scale)) off);
4436 
4437   op_cost(100);
4438   format %{"[$reg + $off + $ireg << $scale]" %}
4439   interface(MEMORY_INTER) %{
4440     base($reg);
4441     index($ireg);
4442     scale($scale);
4443     disp($off);
4444   %}
4445 %}
4446 
4447 //----------Conditional Branch Operands----------------------------------------
4448 // Comparison Op  - This is the operation of the comparison, and is limited to
4449 //                  the following set of codes:
4450 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4451 //
4452 // Other attributes of the comparison, such as unsignedness, are specified
4453 // by the comparison instruction that sets a condition code flags register.
4454 // That result is represented by a flags operand whose subtype is appropriate
4455 // to the unsignedness (etc.) of the comparison.
4456 //
4457 // Later, the instruction which matches both the Comparison Op (a Bool) and
4458 // the flags (produced by the Cmp) specifies the coding of the comparison op
4459 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4460 
4461 // Comparision Code
4462 operand cmpOp() %{
4463   match(Bool);
4464 
4465   format %{ "" %}
4466   interface(COND_INTER) %{
4467     equal(0x4, "e");
4468     not_equal(0x5, "ne");
4469     less(0xC, "l");
4470     greater_equal(0xD, "ge");
4471     less_equal(0xE, "le");
4472     greater(0xF, "g");
4473     overflow(0x0, "o");
4474     no_overflow(0x1, "no");
4475   %}
4476 %}
4477 
4478 // Comparison Code, unsigned compare.  Used by FP also, with
4479 // C2 (unordered) turned into GT or LT already.  The other bits
4480 // C0 and C3 are turned into Carry & Zero flags.
4481 operand cmpOpU() %{
4482   match(Bool);
4483 
4484   format %{ "" %}
4485   interface(COND_INTER) %{
4486     equal(0x4, "e");
4487     not_equal(0x5, "ne");
4488     less(0x2, "b");
4489     greater_equal(0x3, "nb");
4490     less_equal(0x6, "be");
4491     greater(0x7, "nbe");
4492     overflow(0x0, "o");
4493     no_overflow(0x1, "no");
4494   %}
4495 %}
4496 
4497 // Floating comparisons that don't require any fixup for the unordered case
4498 operand cmpOpUCF() %{
4499   match(Bool);
4500   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4501             n->as_Bool()->_test._test == BoolTest::ge ||
4502             n->as_Bool()->_test._test == BoolTest::le ||
4503             n->as_Bool()->_test._test == BoolTest::gt);
4504   format %{ "" %}
4505   interface(COND_INTER) %{
4506     equal(0x4, "e");
4507     not_equal(0x5, "ne");
4508     less(0x2, "b");
4509     greater_equal(0x3, "nb");
4510     less_equal(0x6, "be");
4511     greater(0x7, "nbe");
4512     overflow(0x0, "o");
4513     no_overflow(0x1, "no");
4514   %}
4515 %}
4516 
4517 
4518 // Floating comparisons that can be fixed up with extra conditional jumps
4519 operand cmpOpUCF2() %{
4520   match(Bool);
4521   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4522             n->as_Bool()->_test._test == BoolTest::eq);
4523   format %{ "" %}
4524   interface(COND_INTER) %{
4525     equal(0x4, "e");
4526     not_equal(0x5, "ne");
4527     less(0x2, "b");
4528     greater_equal(0x3, "nb");
4529     less_equal(0x6, "be");
4530     greater(0x7, "nbe");
4531     overflow(0x0, "o");
4532     no_overflow(0x1, "no");
4533   %}
4534 %}
4535 
4536 // Comparison Code for FP conditional move
4537 operand cmpOp_fcmov() %{
4538   match(Bool);
4539 
4540   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4541             n->as_Bool()->_test._test != BoolTest::no_overflow);
4542   format %{ "" %}
4543   interface(COND_INTER) %{
4544     equal        (0x0C8);
4545     not_equal    (0x1C8);
4546     less         (0x0C0);
4547     greater_equal(0x1C0);
4548     less_equal   (0x0D0);
4549     greater      (0x1D0);
4550     overflow(0x0, "o"); // not really supported by the instruction
4551     no_overflow(0x1, "no"); // not really supported by the instruction
4552   %}
4553 %}
4554 
4555 // Comparision Code used in long compares
4556 operand cmpOp_commute() %{
4557   match(Bool);
4558 
4559   format %{ "" %}
4560   interface(COND_INTER) %{
4561     equal(0x4, "e");
4562     not_equal(0x5, "ne");
4563     less(0xF, "g");
4564     greater_equal(0xE, "le");
4565     less_equal(0xD, "ge");
4566     greater(0xC, "l");
4567     overflow(0x0, "o");
4568     no_overflow(0x1, "no");
4569   %}
4570 %}
4571 
4572 //----------OPERAND CLASSES----------------------------------------------------
4573 // Operand Classes are groups of operands that are used as to simplify
4574 // instruction definitions by not requiring the AD writer to specify separate
4575 // instructions for every form of operand when the instruction accepts
4576 // multiple operand types with the same basic encoding and format.  The classic
4577 // case of this is memory operands.
4578 
4579 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4580                indIndex, indIndexScale, indIndexScaleOffset);
4581 
4582 // Long memory operations are encoded in 2 instructions and a +4 offset.
4583 // This means some kind of offset is always required and you cannot use
4584 // an oop as the offset (done when working on static globals).
4585 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4586                     indIndex, indIndexScale, indIndexScaleOffset);
4587 
4588 
4589 //----------PIPELINE-----------------------------------------------------------
4590 // Rules which define the behavior of the target architectures pipeline.
4591 pipeline %{
4592 
4593 //----------ATTRIBUTES---------------------------------------------------------
4594 attributes %{
4595   variable_size_instructions;        // Fixed size instructions
4596   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4597   instruction_unit_size = 1;         // An instruction is 1 bytes long
4598   instruction_fetch_unit_size = 16;  // The processor fetches one line
4599   instruction_fetch_units = 1;       // of 16 bytes
4600 
4601   // List of nop instructions
4602   nops( MachNop );
4603 %}
4604 
4605 //----------RESOURCES----------------------------------------------------------
4606 // Resources are the functional units available to the machine
4607 
4608 // Generic P2/P3 pipeline
4609 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4610 // 3 instructions decoded per cycle.
4611 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4612 // 2 ALU op, only ALU0 handles mul/div instructions.
4613 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4614            MS0, MS1, MEM = MS0 | MS1,
4615            BR, FPU,
4616            ALU0, ALU1, ALU = ALU0 | ALU1 );
4617 
4618 //----------PIPELINE DESCRIPTION-----------------------------------------------
4619 // Pipeline Description specifies the stages in the machine's pipeline
4620 
4621 // Generic P2/P3 pipeline
4622 pipe_desc(S0, S1, S2, S3, S4, S5);
4623 
4624 //----------PIPELINE CLASSES---------------------------------------------------
4625 // Pipeline Classes describe the stages in which input and output are
4626 // referenced by the hardware pipeline.
4627 
4628 // Naming convention: ialu or fpu
4629 // Then: _reg
4630 // Then: _reg if there is a 2nd register
4631 // Then: _long if it's a pair of instructions implementing a long
4632 // Then: _fat if it requires the big decoder
4633 //   Or: _mem if it requires the big decoder and a memory unit.
4634 
4635 // Integer ALU reg operation
4636 pipe_class ialu_reg(rRegI dst) %{
4637     single_instruction;
4638     dst    : S4(write);
4639     dst    : S3(read);
4640     DECODE : S0;        // any decoder
4641     ALU    : S3;        // any alu
4642 %}
4643 
4644 // Long ALU reg operation
4645 pipe_class ialu_reg_long(eRegL dst) %{
4646     instruction_count(2);
4647     dst    : S4(write);
4648     dst    : S3(read);
4649     DECODE : S0(2);     // any 2 decoders
4650     ALU    : S3(2);     // both alus
4651 %}
4652 
4653 // Integer ALU reg operation using big decoder
4654 pipe_class ialu_reg_fat(rRegI dst) %{
4655     single_instruction;
4656     dst    : S4(write);
4657     dst    : S3(read);
4658     D0     : S0;        // big decoder only
4659     ALU    : S3;        // any alu
4660 %}
4661 
4662 // Long ALU reg operation using big decoder
4663 pipe_class ialu_reg_long_fat(eRegL dst) %{
4664     instruction_count(2);
4665     dst    : S4(write);
4666     dst    : S3(read);
4667     D0     : S0(2);     // big decoder only; twice
4668     ALU    : S3(2);     // any 2 alus
4669 %}
4670 
4671 // Integer ALU reg-reg operation
4672 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4673     single_instruction;
4674     dst    : S4(write);
4675     src    : S3(read);
4676     DECODE : S0;        // any decoder
4677     ALU    : S3;        // any alu
4678 %}
4679 
4680 // Long ALU reg-reg operation
4681 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4682     instruction_count(2);
4683     dst    : S4(write);
4684     src    : S3(read);
4685     DECODE : S0(2);     // any 2 decoders
4686     ALU    : S3(2);     // both alus
4687 %}
4688 
4689 // Integer ALU reg-reg operation
4690 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4691     single_instruction;
4692     dst    : S4(write);
4693     src    : S3(read);
4694     D0     : S0;        // big decoder only
4695     ALU    : S3;        // any alu
4696 %}
4697 
4698 // Long ALU reg-reg operation
4699 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4700     instruction_count(2);
4701     dst    : S4(write);
4702     src    : S3(read);
4703     D0     : S0(2);     // big decoder only; twice
4704     ALU    : S3(2);     // both alus
4705 %}
4706 
4707 // Integer ALU reg-mem operation
4708 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4709     single_instruction;
4710     dst    : S5(write);
4711     mem    : S3(read);
4712     D0     : S0;        // big decoder only
4713     ALU    : S4;        // any alu
4714     MEM    : S3;        // any mem
4715 %}
4716 
4717 // Long ALU reg-mem operation
4718 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4719     instruction_count(2);
4720     dst    : S5(write);
4721     mem    : S3(read);
4722     D0     : S0(2);     // big decoder only; twice
4723     ALU    : S4(2);     // any 2 alus
4724     MEM    : S3(2);     // both mems
4725 %}
4726 
4727 // Integer mem operation (prefetch)
4728 pipe_class ialu_mem(memory mem)
4729 %{
4730     single_instruction;
4731     mem    : S3(read);
4732     D0     : S0;        // big decoder only
4733     MEM    : S3;        // any mem
4734 %}
4735 
4736 // Integer Store to Memory
4737 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4738     single_instruction;
4739     mem    : S3(read);
4740     src    : S5(read);
4741     D0     : S0;        // big decoder only
4742     ALU    : S4;        // any alu
4743     MEM    : S3;
4744 %}
4745 
4746 // Long Store to Memory
4747 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4748     instruction_count(2);
4749     mem    : S3(read);
4750     src    : S5(read);
4751     D0     : S0(2);     // big decoder only; twice
4752     ALU    : S4(2);     // any 2 alus
4753     MEM    : S3(2);     // Both mems
4754 %}
4755 
4756 // Integer Store to Memory
4757 pipe_class ialu_mem_imm(memory mem) %{
4758     single_instruction;
4759     mem    : S3(read);
4760     D0     : S0;        // big decoder only
4761     ALU    : S4;        // any alu
4762     MEM    : S3;
4763 %}
4764 
4765 // Integer ALU0 reg-reg operation
4766 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4767     single_instruction;
4768     dst    : S4(write);
4769     src    : S3(read);
4770     D0     : S0;        // Big decoder only
4771     ALU0   : S3;        // only alu0
4772 %}
4773 
4774 // Integer ALU0 reg-mem operation
4775 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4776     single_instruction;
4777     dst    : S5(write);
4778     mem    : S3(read);
4779     D0     : S0;        // big decoder only
4780     ALU0   : S4;        // ALU0 only
4781     MEM    : S3;        // any mem
4782 %}
4783 
4784 // Integer ALU reg-reg operation
4785 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4786     single_instruction;
4787     cr     : S4(write);
4788     src1   : S3(read);
4789     src2   : S3(read);
4790     DECODE : S0;        // any decoder
4791     ALU    : S3;        // any alu
4792 %}
4793 
4794 // Integer ALU reg-imm operation
4795 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4796     single_instruction;
4797     cr     : S4(write);
4798     src1   : S3(read);
4799     DECODE : S0;        // any decoder
4800     ALU    : S3;        // any alu
4801 %}
4802 
4803 // Integer ALU reg-mem operation
4804 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4805     single_instruction;
4806     cr     : S4(write);
4807     src1   : S3(read);
4808     src2   : S3(read);
4809     D0     : S0;        // big decoder only
4810     ALU    : S4;        // any alu
4811     MEM    : S3;
4812 %}
4813 
4814 // Conditional move reg-reg
4815 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4816     instruction_count(4);
4817     y      : S4(read);
4818     q      : S3(read);
4819     p      : S3(read);
4820     DECODE : S0(4);     // any decoder
4821 %}
4822 
4823 // Conditional move reg-reg
4824 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4825     single_instruction;
4826     dst    : S4(write);
4827     src    : S3(read);
4828     cr     : S3(read);
4829     DECODE : S0;        // any decoder
4830 %}
4831 
4832 // Conditional move reg-mem
4833 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4834     single_instruction;
4835     dst    : S4(write);
4836     src    : S3(read);
4837     cr     : S3(read);
4838     DECODE : S0;        // any decoder
4839     MEM    : S3;
4840 %}
4841 
4842 // Conditional move reg-reg long
4843 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4844     single_instruction;
4845     dst    : S4(write);
4846     src    : S3(read);
4847     cr     : S3(read);
4848     DECODE : S0(2);     // any 2 decoders
4849 %}
4850 
4851 // Conditional move double reg-reg
4852 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4853     single_instruction;
4854     dst    : S4(write);
4855     src    : S3(read);
4856     cr     : S3(read);
4857     DECODE : S0;        // any decoder
4858 %}
4859 
4860 // Float reg-reg operation
4861 pipe_class fpu_reg(regDPR dst) %{
4862     instruction_count(2);
4863     dst    : S3(read);
4864     DECODE : S0(2);     // any 2 decoders
4865     FPU    : S3;
4866 %}
4867 
4868 // Float reg-reg operation
4869 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4870     instruction_count(2);
4871     dst    : S4(write);
4872     src    : S3(read);
4873     DECODE : S0(2);     // any 2 decoders
4874     FPU    : S3;
4875 %}
4876 
4877 // Float reg-reg operation
4878 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4879     instruction_count(3);
4880     dst    : S4(write);
4881     src1   : S3(read);
4882     src2   : S3(read);
4883     DECODE : S0(3);     // any 3 decoders
4884     FPU    : S3(2);
4885 %}
4886 
4887 // Float reg-reg operation
4888 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4889     instruction_count(4);
4890     dst    : S4(write);
4891     src1   : S3(read);
4892     src2   : S3(read);
4893     src3   : S3(read);
4894     DECODE : S0(4);     // any 3 decoders
4895     FPU    : S3(2);
4896 %}
4897 
4898 // Float reg-reg operation
4899 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4900     instruction_count(4);
4901     dst    : S4(write);
4902     src1   : S3(read);
4903     src2   : S3(read);
4904     src3   : S3(read);
4905     DECODE : S1(3);     // any 3 decoders
4906     D0     : S0;        // Big decoder only
4907     FPU    : S3(2);
4908     MEM    : S3;
4909 %}
4910 
4911 // Float reg-mem operation
4912 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4913     instruction_count(2);
4914     dst    : S5(write);
4915     mem    : S3(read);
4916     D0     : S0;        // big decoder only
4917     DECODE : S1;        // any decoder for FPU POP
4918     FPU    : S4;
4919     MEM    : S3;        // any mem
4920 %}
4921 
4922 // Float reg-mem operation
4923 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4924     instruction_count(3);
4925     dst    : S5(write);
4926     src1   : S3(read);
4927     mem    : S3(read);
4928     D0     : S0;        // big decoder only
4929     DECODE : S1(2);     // any decoder for FPU POP
4930     FPU    : S4;
4931     MEM    : S3;        // any mem
4932 %}
4933 
4934 // Float mem-reg operation
4935 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4936     instruction_count(2);
4937     src    : S5(read);
4938     mem    : S3(read);
4939     DECODE : S0;        // any decoder for FPU PUSH
4940     D0     : S1;        // big decoder only
4941     FPU    : S4;
4942     MEM    : S3;        // any mem
4943 %}
4944 
4945 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
4946     instruction_count(3);
4947     src1   : S3(read);
4948     src2   : S3(read);
4949     mem    : S3(read);
4950     DECODE : S0(2);     // any decoder for FPU PUSH
4951     D0     : S1;        // big decoder only
4952     FPU    : S4;
4953     MEM    : S3;        // any mem
4954 %}
4955 
4956 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
4957     instruction_count(3);
4958     src1   : S3(read);
4959     src2   : S3(read);
4960     mem    : S4(read);
4961     DECODE : S0;        // any decoder for FPU PUSH
4962     D0     : S0(2);     // big decoder only
4963     FPU    : S4;
4964     MEM    : S3(2);     // any mem
4965 %}
4966 
4967 pipe_class fpu_mem_mem(memory dst, memory src1) %{
4968     instruction_count(2);
4969     src1   : S3(read);
4970     dst    : S4(read);
4971     D0     : S0(2);     // big decoder only
4972     MEM    : S3(2);     // any mem
4973 %}
4974 
4975 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
4976     instruction_count(3);
4977     src1   : S3(read);
4978     src2   : S3(read);
4979     dst    : S4(read);
4980     D0     : S0(3);     // big decoder only
4981     FPU    : S4;
4982     MEM    : S3(3);     // any mem
4983 %}
4984 
4985 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
4986     instruction_count(3);
4987     src1   : S4(read);
4988     mem    : S4(read);
4989     DECODE : S0;        // any decoder for FPU PUSH
4990     D0     : S0(2);     // big decoder only
4991     FPU    : S4;
4992     MEM    : S3(2);     // any mem
4993 %}
4994 
4995 // Float load constant
4996 pipe_class fpu_reg_con(regDPR dst) %{
4997     instruction_count(2);
4998     dst    : S5(write);
4999     D0     : S0;        // big decoder only for the load
5000     DECODE : S1;        // any decoder for FPU POP
5001     FPU    : S4;
5002     MEM    : S3;        // any mem
5003 %}
5004 
5005 // Float load constant
5006 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5007     instruction_count(3);
5008     dst    : S5(write);
5009     src    : S3(read);
5010     D0     : S0;        // big decoder only for the load
5011     DECODE : S1(2);     // any decoder for FPU POP
5012     FPU    : S4;
5013     MEM    : S3;        // any mem
5014 %}
5015 
5016 // UnConditional branch
5017 pipe_class pipe_jmp( label labl ) %{
5018     single_instruction;
5019     BR   : S3;
5020 %}
5021 
5022 // Conditional branch
5023 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5024     single_instruction;
5025     cr    : S1(read);
5026     BR    : S3;
5027 %}
5028 
5029 // Allocation idiom
5030 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5031     instruction_count(1); force_serialization;
5032     fixed_latency(6);
5033     heap_ptr : S3(read);
5034     DECODE   : S0(3);
5035     D0       : S2;
5036     MEM      : S3;
5037     ALU      : S3(2);
5038     dst      : S5(write);
5039     BR       : S5;
5040 %}
5041 
5042 // Generic big/slow expanded idiom
5043 pipe_class pipe_slow(  ) %{
5044     instruction_count(10); multiple_bundles; force_serialization;
5045     fixed_latency(100);
5046     D0  : S0(2);
5047     MEM : S3(2);
5048 %}
5049 
5050 // The real do-nothing guy
5051 pipe_class empty( ) %{
5052     instruction_count(0);
5053 %}
5054 
5055 // Define the class for the Nop node
5056 define %{
5057    MachNop = empty;
5058 %}
5059 
5060 %}
5061 
5062 //----------INSTRUCTIONS-------------------------------------------------------
5063 //
5064 // match      -- States which machine-independent subtree may be replaced
5065 //               by this instruction.
5066 // ins_cost   -- The estimated cost of this instruction is used by instruction
5067 //               selection to identify a minimum cost tree of machine
5068 //               instructions that matches a tree of machine-independent
5069 //               instructions.
5070 // format     -- A string providing the disassembly for this instruction.
5071 //               The value of an instruction's operand may be inserted
5072 //               by referring to it with a '$' prefix.
5073 // opcode     -- Three instruction opcodes may be provided.  These are referred
5074 //               to within an encode class as $primary, $secondary, and $tertiary
5075 //               respectively.  The primary opcode is commonly used to
5076 //               indicate the type of machine instruction, while secondary
5077 //               and tertiary are often used for prefix options or addressing
5078 //               modes.
5079 // ins_encode -- A list of encode classes with parameters. The encode class
5080 //               name must have been defined in an 'enc_class' specification
5081 //               in the encode section of the architecture description.
5082 
5083 //----------BSWAP-Instruction--------------------------------------------------
5084 instruct bytes_reverse_int(rRegI dst) %{
5085   match(Set dst (ReverseBytesI dst));
5086 
5087   format %{ "BSWAP  $dst" %}
5088   opcode(0x0F, 0xC8);
5089   ins_encode( OpcP, OpcSReg(dst) );
5090   ins_pipe( ialu_reg );
5091 %}
5092 
5093 instruct bytes_reverse_long(eRegL dst) %{
5094   match(Set dst (ReverseBytesL dst));
5095 
5096   format %{ "BSWAP  $dst.lo\n\t"
5097             "BSWAP  $dst.hi\n\t"
5098             "XCHG   $dst.lo $dst.hi" %}
5099 
5100   ins_cost(125);
5101   ins_encode( bswap_long_bytes(dst) );
5102   ins_pipe( ialu_reg_reg);
5103 %}
5104 
5105 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5106   match(Set dst (ReverseBytesUS dst));
5107   effect(KILL cr);
5108 
5109   format %{ "BSWAP  $dst\n\t"
5110             "SHR    $dst,16\n\t" %}
5111   ins_encode %{
5112     __ bswapl($dst$$Register);
5113     __ shrl($dst$$Register, 16);
5114   %}
5115   ins_pipe( ialu_reg );
5116 %}
5117 
5118 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5119   match(Set dst (ReverseBytesS dst));
5120   effect(KILL cr);
5121 
5122   format %{ "BSWAP  $dst\n\t"
5123             "SAR    $dst,16\n\t" %}
5124   ins_encode %{
5125     __ bswapl($dst$$Register);
5126     __ sarl($dst$$Register, 16);
5127   %}
5128   ins_pipe( ialu_reg );
5129 %}
5130 
5131 
5132 //---------- Zeros Count Instructions ------------------------------------------
5133 
5134 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5135   predicate(UseCountLeadingZerosInstruction);
5136   match(Set dst (CountLeadingZerosI src));
5137   effect(KILL cr);
5138 
5139   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5140   ins_encode %{
5141     __ lzcntl($dst$$Register, $src$$Register);
5142   %}
5143   ins_pipe(ialu_reg);
5144 %}
5145 
5146 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5147   predicate(!UseCountLeadingZerosInstruction);
5148   match(Set dst (CountLeadingZerosI src));
5149   effect(KILL cr);
5150 
5151   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5152             "JNZ    skip\n\t"
5153             "MOV    $dst, -1\n"
5154       "skip:\n\t"
5155             "NEG    $dst\n\t"
5156             "ADD    $dst, 31" %}
5157   ins_encode %{
5158     Register Rdst = $dst$$Register;
5159     Register Rsrc = $src$$Register;
5160     Label skip;
5161     __ bsrl(Rdst, Rsrc);
5162     __ jccb(Assembler::notZero, skip);
5163     __ movl(Rdst, -1);
5164     __ bind(skip);
5165     __ negl(Rdst);
5166     __ addl(Rdst, BitsPerInt - 1);
5167   %}
5168   ins_pipe(ialu_reg);
5169 %}
5170 
5171 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5172   predicate(UseCountLeadingZerosInstruction);
5173   match(Set dst (CountLeadingZerosL src));
5174   effect(TEMP dst, KILL cr);
5175 
5176   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5177             "JNC    done\n\t"
5178             "LZCNT  $dst, $src.lo\n\t"
5179             "ADD    $dst, 32\n"
5180       "done:" %}
5181   ins_encode %{
5182     Register Rdst = $dst$$Register;
5183     Register Rsrc = $src$$Register;
5184     Label done;
5185     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5186     __ jccb(Assembler::carryClear, done);
5187     __ lzcntl(Rdst, Rsrc);
5188     __ addl(Rdst, BitsPerInt);
5189     __ bind(done);
5190   %}
5191   ins_pipe(ialu_reg);
5192 %}
5193 
5194 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5195   predicate(!UseCountLeadingZerosInstruction);
5196   match(Set dst (CountLeadingZerosL src));
5197   effect(TEMP dst, KILL cr);
5198 
5199   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5200             "JZ     msw_is_zero\n\t"
5201             "ADD    $dst, 32\n\t"
5202             "JMP    not_zero\n"
5203       "msw_is_zero:\n\t"
5204             "BSR    $dst, $src.lo\n\t"
5205             "JNZ    not_zero\n\t"
5206             "MOV    $dst, -1\n"
5207       "not_zero:\n\t"
5208             "NEG    $dst\n\t"
5209             "ADD    $dst, 63\n" %}
5210  ins_encode %{
5211     Register Rdst = $dst$$Register;
5212     Register Rsrc = $src$$Register;
5213     Label msw_is_zero;
5214     Label not_zero;
5215     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5216     __ jccb(Assembler::zero, msw_is_zero);
5217     __ addl(Rdst, BitsPerInt);
5218     __ jmpb(not_zero);
5219     __ bind(msw_is_zero);
5220     __ bsrl(Rdst, Rsrc);
5221     __ jccb(Assembler::notZero, not_zero);
5222     __ movl(Rdst, -1);
5223     __ bind(not_zero);
5224     __ negl(Rdst);
5225     __ addl(Rdst, BitsPerLong - 1);
5226   %}
5227   ins_pipe(ialu_reg);
5228 %}
5229 
5230 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5231   predicate(UseCountTrailingZerosInstruction);
5232   match(Set dst (CountTrailingZerosI src));
5233   effect(KILL cr);
5234 
5235   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5236   ins_encode %{
5237     __ tzcntl($dst$$Register, $src$$Register);
5238   %}
5239   ins_pipe(ialu_reg);
5240 %}
5241 
5242 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5243   predicate(!UseCountTrailingZerosInstruction);
5244   match(Set dst (CountTrailingZerosI src));
5245   effect(KILL cr);
5246 
5247   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5248             "JNZ    done\n\t"
5249             "MOV    $dst, 32\n"
5250       "done:" %}
5251   ins_encode %{
5252     Register Rdst = $dst$$Register;
5253     Label done;
5254     __ bsfl(Rdst, $src$$Register);
5255     __ jccb(Assembler::notZero, done);
5256     __ movl(Rdst, BitsPerInt);
5257     __ bind(done);
5258   %}
5259   ins_pipe(ialu_reg);
5260 %}
5261 
5262 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5263   predicate(UseCountTrailingZerosInstruction);
5264   match(Set dst (CountTrailingZerosL src));
5265   effect(TEMP dst, KILL cr);
5266 
5267   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5268             "JNC    done\n\t"
5269             "TZCNT  $dst, $src.hi\n\t"
5270             "ADD    $dst, 32\n"
5271             "done:" %}
5272   ins_encode %{
5273     Register Rdst = $dst$$Register;
5274     Register Rsrc = $src$$Register;
5275     Label done;
5276     __ tzcntl(Rdst, Rsrc);
5277     __ jccb(Assembler::carryClear, done);
5278     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5279     __ addl(Rdst, BitsPerInt);
5280     __ bind(done);
5281   %}
5282   ins_pipe(ialu_reg);
5283 %}
5284 
5285 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5286   predicate(!UseCountTrailingZerosInstruction);
5287   match(Set dst (CountTrailingZerosL src));
5288   effect(TEMP dst, KILL cr);
5289 
5290   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5291             "JNZ    done\n\t"
5292             "BSF    $dst, $src.hi\n\t"
5293             "JNZ    msw_not_zero\n\t"
5294             "MOV    $dst, 32\n"
5295       "msw_not_zero:\n\t"
5296             "ADD    $dst, 32\n"
5297       "done:" %}
5298   ins_encode %{
5299     Register Rdst = $dst$$Register;
5300     Register Rsrc = $src$$Register;
5301     Label msw_not_zero;
5302     Label done;
5303     __ bsfl(Rdst, Rsrc);
5304     __ jccb(Assembler::notZero, done);
5305     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5306     __ jccb(Assembler::notZero, msw_not_zero);
5307     __ movl(Rdst, BitsPerInt);
5308     __ bind(msw_not_zero);
5309     __ addl(Rdst, BitsPerInt);
5310     __ bind(done);
5311   %}
5312   ins_pipe(ialu_reg);
5313 %}
5314 
5315 
5316 //---------- Population Count Instructions -------------------------------------
5317 
5318 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5319   predicate(UsePopCountInstruction);
5320   match(Set dst (PopCountI src));
5321   effect(KILL cr);
5322 
5323   format %{ "POPCNT $dst, $src" %}
5324   ins_encode %{
5325     __ popcntl($dst$$Register, $src$$Register);
5326   %}
5327   ins_pipe(ialu_reg);
5328 %}
5329 
5330 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5331   predicate(UsePopCountInstruction);
5332   match(Set dst (PopCountI (LoadI mem)));
5333   effect(KILL cr);
5334 
5335   format %{ "POPCNT $dst, $mem" %}
5336   ins_encode %{
5337     __ popcntl($dst$$Register, $mem$$Address);
5338   %}
5339   ins_pipe(ialu_reg);
5340 %}
5341 
5342 // Note: Long.bitCount(long) returns an int.
5343 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5344   predicate(UsePopCountInstruction);
5345   match(Set dst (PopCountL src));
5346   effect(KILL cr, TEMP tmp, TEMP dst);
5347 
5348   format %{ "POPCNT $dst, $src.lo\n\t"
5349             "POPCNT $tmp, $src.hi\n\t"
5350             "ADD    $dst, $tmp" %}
5351   ins_encode %{
5352     __ popcntl($dst$$Register, $src$$Register);
5353     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5354     __ addl($dst$$Register, $tmp$$Register);
5355   %}
5356   ins_pipe(ialu_reg);
5357 %}
5358 
5359 // Note: Long.bitCount(long) returns an int.
5360 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5361   predicate(UsePopCountInstruction);
5362   match(Set dst (PopCountL (LoadL mem)));
5363   effect(KILL cr, TEMP tmp, TEMP dst);
5364 
5365   format %{ "POPCNT $dst, $mem\n\t"
5366             "POPCNT $tmp, $mem+4\n\t"
5367             "ADD    $dst, $tmp" %}
5368   ins_encode %{
5369     //__ popcntl($dst$$Register, $mem$$Address$$first);
5370     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5371     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5372     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5373     __ addl($dst$$Register, $tmp$$Register);
5374   %}
5375   ins_pipe(ialu_reg);
5376 %}
5377 
5378 
5379 //----------Load/Store/Move Instructions---------------------------------------
5380 //----------Load Instructions--------------------------------------------------
5381 // Load Byte (8bit signed)
5382 instruct loadB(xRegI dst, memory mem) %{
5383   match(Set dst (LoadB mem));
5384 
5385   ins_cost(125);
5386   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5387 
5388   ins_encode %{
5389     __ movsbl($dst$$Register, $mem$$Address);
5390   %}
5391 
5392   ins_pipe(ialu_reg_mem);
5393 %}
5394 
5395 // Load Byte (8bit signed) into Long Register
5396 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5397   match(Set dst (ConvI2L (LoadB mem)));
5398   effect(KILL cr);
5399 
5400   ins_cost(375);
5401   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5402             "MOV    $dst.hi,$dst.lo\n\t"
5403             "SAR    $dst.hi,7" %}
5404 
5405   ins_encode %{
5406     __ movsbl($dst$$Register, $mem$$Address);
5407     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5408     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5409   %}
5410 
5411   ins_pipe(ialu_reg_mem);
5412 %}
5413 
5414 // Load Unsigned Byte (8bit UNsigned)
5415 instruct loadUB(xRegI dst, memory mem) %{
5416   match(Set dst (LoadUB mem));
5417 
5418   ins_cost(125);
5419   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5420 
5421   ins_encode %{
5422     __ movzbl($dst$$Register, $mem$$Address);
5423   %}
5424 
5425   ins_pipe(ialu_reg_mem);
5426 %}
5427 
5428 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5429 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5430   match(Set dst (ConvI2L (LoadUB mem)));
5431   effect(KILL cr);
5432 
5433   ins_cost(250);
5434   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5435             "XOR    $dst.hi,$dst.hi" %}
5436 
5437   ins_encode %{
5438     Register Rdst = $dst$$Register;
5439     __ movzbl(Rdst, $mem$$Address);
5440     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5441   %}
5442 
5443   ins_pipe(ialu_reg_mem);
5444 %}
5445 
5446 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5447 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5448   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5449   effect(KILL cr);
5450 
5451   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5452             "XOR    $dst.hi,$dst.hi\n\t"
5453             "AND    $dst.lo,right_n_bits($mask, 8)" %}
5454   ins_encode %{
5455     Register Rdst = $dst$$Register;
5456     __ movzbl(Rdst, $mem$$Address);
5457     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5458     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5459   %}
5460   ins_pipe(ialu_reg_mem);
5461 %}
5462 
5463 // Load Short (16bit signed)
5464 instruct loadS(rRegI dst, memory mem) %{
5465   match(Set dst (LoadS mem));
5466 
5467   ins_cost(125);
5468   format %{ "MOVSX  $dst,$mem\t# short" %}
5469 
5470   ins_encode %{
5471     __ movswl($dst$$Register, $mem$$Address);
5472   %}
5473 
5474   ins_pipe(ialu_reg_mem);
5475 %}
5476 
5477 // Load Short (16 bit signed) to Byte (8 bit signed)
5478 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5479   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5480 
5481   ins_cost(125);
5482   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5483   ins_encode %{
5484     __ movsbl($dst$$Register, $mem$$Address);
5485   %}
5486   ins_pipe(ialu_reg_mem);
5487 %}
5488 
5489 // Load Short (16bit signed) into Long Register
5490 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5491   match(Set dst (ConvI2L (LoadS mem)));
5492   effect(KILL cr);
5493 
5494   ins_cost(375);
5495   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5496             "MOV    $dst.hi,$dst.lo\n\t"
5497             "SAR    $dst.hi,15" %}
5498 
5499   ins_encode %{
5500     __ movswl($dst$$Register, $mem$$Address);
5501     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5502     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5503   %}
5504 
5505   ins_pipe(ialu_reg_mem);
5506 %}
5507 
5508 // Load Unsigned Short/Char (16bit unsigned)
5509 instruct loadUS(rRegI dst, memory mem) %{
5510   match(Set dst (LoadUS mem));
5511 
5512   ins_cost(125);
5513   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5514 
5515   ins_encode %{
5516     __ movzwl($dst$$Register, $mem$$Address);
5517   %}
5518 
5519   ins_pipe(ialu_reg_mem);
5520 %}
5521 
5522 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5523 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5524   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5525 
5526   ins_cost(125);
5527   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5528   ins_encode %{
5529     __ movsbl($dst$$Register, $mem$$Address);
5530   %}
5531   ins_pipe(ialu_reg_mem);
5532 %}
5533 
5534 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5535 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5536   match(Set dst (ConvI2L (LoadUS mem)));
5537   effect(KILL cr);
5538 
5539   ins_cost(250);
5540   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5541             "XOR    $dst.hi,$dst.hi" %}
5542 
5543   ins_encode %{
5544     __ movzwl($dst$$Register, $mem$$Address);
5545     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5546   %}
5547 
5548   ins_pipe(ialu_reg_mem);
5549 %}
5550 
5551 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5552 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5553   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5554   effect(KILL cr);
5555 
5556   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5557             "XOR    $dst.hi,$dst.hi" %}
5558   ins_encode %{
5559     Register Rdst = $dst$$Register;
5560     __ movzbl(Rdst, $mem$$Address);
5561     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5562   %}
5563   ins_pipe(ialu_reg_mem);
5564 %}
5565 
5566 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5567 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5568   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5569   effect(KILL cr);
5570 
5571   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5572             "XOR    $dst.hi,$dst.hi\n\t"
5573             "AND    $dst.lo,right_n_bits($mask, 16)" %}
5574   ins_encode %{
5575     Register Rdst = $dst$$Register;
5576     __ movzwl(Rdst, $mem$$Address);
5577     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5578     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5579   %}
5580   ins_pipe(ialu_reg_mem);
5581 %}
5582 
5583 // Load Integer
5584 instruct loadI(rRegI dst, memory mem) %{
5585   match(Set dst (LoadI mem));
5586 
5587   ins_cost(125);
5588   format %{ "MOV    $dst,$mem\t# int" %}
5589 
5590   ins_encode %{
5591     __ movl($dst$$Register, $mem$$Address);
5592   %}
5593 
5594   ins_pipe(ialu_reg_mem);
5595 %}
5596 
5597 // Load Integer (32 bit signed) to Byte (8 bit signed)
5598 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5599   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5600 
5601   ins_cost(125);
5602   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5603   ins_encode %{
5604     __ movsbl($dst$$Register, $mem$$Address);
5605   %}
5606   ins_pipe(ialu_reg_mem);
5607 %}
5608 
5609 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5610 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5611   match(Set dst (AndI (LoadI mem) mask));
5612 
5613   ins_cost(125);
5614   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5615   ins_encode %{
5616     __ movzbl($dst$$Register, $mem$$Address);
5617   %}
5618   ins_pipe(ialu_reg_mem);
5619 %}
5620 
5621 // Load Integer (32 bit signed) to Short (16 bit signed)
5622 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5623   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5624 
5625   ins_cost(125);
5626   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5627   ins_encode %{
5628     __ movswl($dst$$Register, $mem$$Address);
5629   %}
5630   ins_pipe(ialu_reg_mem);
5631 %}
5632 
5633 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5634 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5635   match(Set dst (AndI (LoadI mem) mask));
5636 
5637   ins_cost(125);
5638   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5639   ins_encode %{
5640     __ movzwl($dst$$Register, $mem$$Address);
5641   %}
5642   ins_pipe(ialu_reg_mem);
5643 %}
5644 
5645 // Load Integer into Long Register
5646 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5647   match(Set dst (ConvI2L (LoadI mem)));
5648   effect(KILL cr);
5649 
5650   ins_cost(375);
5651   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5652             "MOV    $dst.hi,$dst.lo\n\t"
5653             "SAR    $dst.hi,31" %}
5654 
5655   ins_encode %{
5656     __ movl($dst$$Register, $mem$$Address);
5657     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5658     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5659   %}
5660 
5661   ins_pipe(ialu_reg_mem);
5662 %}
5663 
5664 // Load Integer with mask 0xFF into Long Register
5665 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5666   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5667   effect(KILL cr);
5668 
5669   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5670             "XOR    $dst.hi,$dst.hi" %}
5671   ins_encode %{
5672     Register Rdst = $dst$$Register;
5673     __ movzbl(Rdst, $mem$$Address);
5674     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5675   %}
5676   ins_pipe(ialu_reg_mem);
5677 %}
5678 
5679 // Load Integer with mask 0xFFFF into Long Register
5680 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5681   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5682   effect(KILL cr);
5683 
5684   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5685             "XOR    $dst.hi,$dst.hi" %}
5686   ins_encode %{
5687     Register Rdst = $dst$$Register;
5688     __ movzwl(Rdst, $mem$$Address);
5689     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5690   %}
5691   ins_pipe(ialu_reg_mem);
5692 %}
5693 
5694 // Load Integer with 31-bit mask into Long Register
5695 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5696   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5697   effect(KILL cr);
5698 
5699   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5700             "XOR    $dst.hi,$dst.hi\n\t"
5701             "AND    $dst.lo,$mask" %}
5702   ins_encode %{
5703     Register Rdst = $dst$$Register;
5704     __ movl(Rdst, $mem$$Address);
5705     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5706     __ andl(Rdst, $mask$$constant);
5707   %}
5708   ins_pipe(ialu_reg_mem);
5709 %}
5710 
5711 // Load Unsigned Integer into Long Register
5712 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5713   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5714   effect(KILL cr);
5715 
5716   ins_cost(250);
5717   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5718             "XOR    $dst.hi,$dst.hi" %}
5719 
5720   ins_encode %{
5721     __ movl($dst$$Register, $mem$$Address);
5722     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5723   %}
5724 
5725   ins_pipe(ialu_reg_mem);
5726 %}
5727 
5728 // Load Long.  Cannot clobber address while loading, so restrict address
5729 // register to ESI
5730 instruct loadL(eRegL dst, load_long_memory mem) %{
5731   predicate(!((LoadLNode*)n)->require_atomic_access());
5732   match(Set dst (LoadL mem));
5733 
5734   ins_cost(250);
5735   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5736             "MOV    $dst.hi,$mem+4" %}
5737 
5738   ins_encode %{
5739     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5740     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5741     __ movl($dst$$Register, Amemlo);
5742     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5743   %}
5744 
5745   ins_pipe(ialu_reg_long_mem);
5746 %}
5747 
5748 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5749 // then store it down to the stack and reload on the int
5750 // side.
5751 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5752   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5753   match(Set dst (LoadL mem));
5754 
5755   ins_cost(200);
5756   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5757             "FISTp  $dst" %}
5758   ins_encode(enc_loadL_volatile(mem,dst));
5759   ins_pipe( fpu_reg_mem );
5760 %}
5761 
5762 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5763   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5764   match(Set dst (LoadL mem));
5765   effect(TEMP tmp);
5766   ins_cost(180);
5767   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5768             "MOVSD  $dst,$tmp" %}
5769   ins_encode %{
5770     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5771     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5772   %}
5773   ins_pipe( pipe_slow );
5774 %}
5775 
5776 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5777   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5778   match(Set dst (LoadL mem));
5779   effect(TEMP tmp);
5780   ins_cost(160);
5781   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5782             "MOVD   $dst.lo,$tmp\n\t"
5783             "PSRLQ  $tmp,32\n\t"
5784             "MOVD   $dst.hi,$tmp" %}
5785   ins_encode %{
5786     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5787     __ movdl($dst$$Register, $tmp$$XMMRegister);
5788     __ psrlq($tmp$$XMMRegister, 32);
5789     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5790   %}
5791   ins_pipe( pipe_slow );
5792 %}
5793 
5794 // Load Range
5795 instruct loadRange(rRegI dst, memory mem) %{
5796   match(Set dst (LoadRange mem));
5797 
5798   ins_cost(125);
5799   format %{ "MOV    $dst,$mem" %}
5800   opcode(0x8B);
5801   ins_encode( OpcP, RegMem(dst,mem));
5802   ins_pipe( ialu_reg_mem );
5803 %}
5804 
5805 
5806 // Load Pointer
5807 instruct loadP(eRegP dst, memory mem) %{
5808   match(Set dst (LoadP mem));
5809 
5810   ins_cost(125);
5811   format %{ "MOV    $dst,$mem" %}
5812   opcode(0x8B);
5813   ins_encode( OpcP, RegMem(dst,mem));
5814   ins_pipe( ialu_reg_mem );
5815 %}
5816 
5817 // Load Klass Pointer
5818 instruct loadKlass(eRegP dst, memory mem) %{
5819   match(Set dst (LoadKlass mem));
5820 
5821   ins_cost(125);
5822   format %{ "MOV    $dst,$mem" %}
5823   opcode(0x8B);
5824   ins_encode( OpcP, RegMem(dst,mem));
5825   ins_pipe( ialu_reg_mem );
5826 %}
5827 
5828 // Load Double
5829 instruct loadDPR(regDPR dst, memory mem) %{
5830   predicate(UseSSE<=1);
5831   match(Set dst (LoadD mem));
5832 
5833   ins_cost(150);
5834   format %{ "FLD_D  ST,$mem\n\t"
5835             "FSTP   $dst" %}
5836   opcode(0xDD);               /* DD /0 */
5837   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5838               Pop_Reg_DPR(dst) );
5839   ins_pipe( fpu_reg_mem );
5840 %}
5841 
5842 // Load Double to XMM
5843 instruct loadD(regD dst, memory mem) %{
5844   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5845   match(Set dst (LoadD mem));
5846   ins_cost(145);
5847   format %{ "MOVSD  $dst,$mem" %}
5848   ins_encode %{
5849     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5850   %}
5851   ins_pipe( pipe_slow );
5852 %}
5853 
5854 instruct loadD_partial(regD dst, memory mem) %{
5855   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5856   match(Set dst (LoadD mem));
5857   ins_cost(145);
5858   format %{ "MOVLPD $dst,$mem" %}
5859   ins_encode %{
5860     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5861   %}
5862   ins_pipe( pipe_slow );
5863 %}
5864 
5865 // Load to XMM register (single-precision floating point)
5866 // MOVSS instruction
5867 instruct loadF(regF dst, memory mem) %{
5868   predicate(UseSSE>=1);
5869   match(Set dst (LoadF mem));
5870   ins_cost(145);
5871   format %{ "MOVSS  $dst,$mem" %}
5872   ins_encode %{
5873     __ movflt ($dst$$XMMRegister, $mem$$Address);
5874   %}
5875   ins_pipe( pipe_slow );
5876 %}
5877 
5878 // Load Float
5879 instruct loadFPR(regFPR dst, memory mem) %{
5880   predicate(UseSSE==0);
5881   match(Set dst (LoadF mem));
5882 
5883   ins_cost(150);
5884   format %{ "FLD_S  ST,$mem\n\t"
5885             "FSTP   $dst" %}
5886   opcode(0xD9);               /* D9 /0 */
5887   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5888               Pop_Reg_FPR(dst) );
5889   ins_pipe( fpu_reg_mem );
5890 %}
5891 
5892 // Load Effective Address
5893 instruct leaP8(eRegP dst, indOffset8 mem) %{
5894   match(Set dst mem);
5895 
5896   ins_cost(110);
5897   format %{ "LEA    $dst,$mem" %}
5898   opcode(0x8D);
5899   ins_encode( OpcP, RegMem(dst,mem));
5900   ins_pipe( ialu_reg_reg_fat );
5901 %}
5902 
5903 instruct leaP32(eRegP dst, indOffset32 mem) %{
5904   match(Set dst mem);
5905 
5906   ins_cost(110);
5907   format %{ "LEA    $dst,$mem" %}
5908   opcode(0x8D);
5909   ins_encode( OpcP, RegMem(dst,mem));
5910   ins_pipe( ialu_reg_reg_fat );
5911 %}
5912 
5913 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5914   match(Set dst mem);
5915 
5916   ins_cost(110);
5917   format %{ "LEA    $dst,$mem" %}
5918   opcode(0x8D);
5919   ins_encode( OpcP, RegMem(dst,mem));
5920   ins_pipe( ialu_reg_reg_fat );
5921 %}
5922 
5923 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5924   match(Set dst mem);
5925 
5926   ins_cost(110);
5927   format %{ "LEA    $dst,$mem" %}
5928   opcode(0x8D);
5929   ins_encode( OpcP, RegMem(dst,mem));
5930   ins_pipe( ialu_reg_reg_fat );
5931 %}
5932 
5933 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5934   match(Set dst mem);
5935 
5936   ins_cost(110);
5937   format %{ "LEA    $dst,$mem" %}
5938   opcode(0x8D);
5939   ins_encode( OpcP, RegMem(dst,mem));
5940   ins_pipe( ialu_reg_reg_fat );
5941 %}
5942 
5943 // Load Constant
5944 instruct loadConI(rRegI dst, immI src) %{
5945   match(Set dst src);
5946 
5947   format %{ "MOV    $dst,$src" %}
5948   ins_encode( LdImmI(dst, src) );
5949   ins_pipe( ialu_reg_fat );
5950 %}
5951 
5952 // Load Constant zero
5953 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
5954   match(Set dst src);
5955   effect(KILL cr);
5956 
5957   ins_cost(50);
5958   format %{ "XOR    $dst,$dst" %}
5959   opcode(0x33);  /* + rd */
5960   ins_encode( OpcP, RegReg( dst, dst ) );
5961   ins_pipe( ialu_reg );
5962 %}
5963 
5964 instruct loadConP(eRegP dst, immP src) %{
5965   match(Set dst src);
5966 
5967   format %{ "MOV    $dst,$src" %}
5968   opcode(0xB8);  /* + rd */
5969   ins_encode( LdImmP(dst, src) );
5970   ins_pipe( ialu_reg_fat );
5971 %}
5972 
5973 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
5974   match(Set dst src);
5975   effect(KILL cr);
5976   ins_cost(200);
5977   format %{ "MOV    $dst.lo,$src.lo\n\t"
5978             "MOV    $dst.hi,$src.hi" %}
5979   opcode(0xB8);
5980   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
5981   ins_pipe( ialu_reg_long_fat );
5982 %}
5983 
5984 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
5985   match(Set dst src);
5986   effect(KILL cr);
5987   ins_cost(150);
5988   format %{ "XOR    $dst.lo,$dst.lo\n\t"
5989             "XOR    $dst.hi,$dst.hi" %}
5990   opcode(0x33,0x33);
5991   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
5992   ins_pipe( ialu_reg_long );
5993 %}
5994 
5995 // The instruction usage is guarded by predicate in operand immFPR().
5996 instruct loadConFPR(regFPR dst, immFPR con) %{
5997   match(Set dst con);
5998   ins_cost(125);
5999   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
6000             "FSTP   $dst" %}
6001   ins_encode %{
6002     __ fld_s($constantaddress($con));
6003     __ fstp_d($dst$$reg);
6004   %}
6005   ins_pipe(fpu_reg_con);
6006 %}
6007 
6008 // The instruction usage is guarded by predicate in operand immFPR0().
6009 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6010   match(Set dst con);
6011   ins_cost(125);
6012   format %{ "FLDZ   ST\n\t"
6013             "FSTP   $dst" %}
6014   ins_encode %{
6015     __ fldz();
6016     __ fstp_d($dst$$reg);
6017   %}
6018   ins_pipe(fpu_reg_con);
6019 %}
6020 
6021 // The instruction usage is guarded by predicate in operand immFPR1().
6022 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6023   match(Set dst con);
6024   ins_cost(125);
6025   format %{ "FLD1   ST\n\t"
6026             "FSTP   $dst" %}
6027   ins_encode %{
6028     __ fld1();
6029     __ fstp_d($dst$$reg);
6030   %}
6031   ins_pipe(fpu_reg_con);
6032 %}
6033 
6034 // The instruction usage is guarded by predicate in operand immF().
6035 instruct loadConF(regF dst, immF con) %{
6036   match(Set dst con);
6037   ins_cost(125);
6038   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6039   ins_encode %{
6040     __ movflt($dst$$XMMRegister, $constantaddress($con));
6041   %}
6042   ins_pipe(pipe_slow);
6043 %}
6044 
6045 // The instruction usage is guarded by predicate in operand immF0().
6046 instruct loadConF0(regF dst, immF0 src) %{
6047   match(Set dst src);
6048   ins_cost(100);
6049   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6050   ins_encode %{
6051     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6052   %}
6053   ins_pipe(pipe_slow);
6054 %}
6055 
6056 // The instruction usage is guarded by predicate in operand immDPR().
6057 instruct loadConDPR(regDPR dst, immDPR con) %{
6058   match(Set dst con);
6059   ins_cost(125);
6060 
6061   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6062             "FSTP   $dst" %}
6063   ins_encode %{
6064     __ fld_d($constantaddress($con));
6065     __ fstp_d($dst$$reg);
6066   %}
6067   ins_pipe(fpu_reg_con);
6068 %}
6069 
6070 // The instruction usage is guarded by predicate in operand immDPR0().
6071 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6072   match(Set dst con);
6073   ins_cost(125);
6074 
6075   format %{ "FLDZ   ST\n\t"
6076             "FSTP   $dst" %}
6077   ins_encode %{
6078     __ fldz();
6079     __ fstp_d($dst$$reg);
6080   %}
6081   ins_pipe(fpu_reg_con);
6082 %}
6083 
6084 // The instruction usage is guarded by predicate in operand immDPR1().
6085 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6086   match(Set dst con);
6087   ins_cost(125);
6088 
6089   format %{ "FLD1   ST\n\t"
6090             "FSTP   $dst" %}
6091   ins_encode %{
6092     __ fld1();
6093     __ fstp_d($dst$$reg);
6094   %}
6095   ins_pipe(fpu_reg_con);
6096 %}
6097 
6098 // The instruction usage is guarded by predicate in operand immD().
6099 instruct loadConD(regD dst, immD con) %{
6100   match(Set dst con);
6101   ins_cost(125);
6102   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6103   ins_encode %{
6104     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6105   %}
6106   ins_pipe(pipe_slow);
6107 %}
6108 
6109 // The instruction usage is guarded by predicate in operand immD0().
6110 instruct loadConD0(regD dst, immD0 src) %{
6111   match(Set dst src);
6112   ins_cost(100);
6113   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6114   ins_encode %{
6115     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6116   %}
6117   ins_pipe( pipe_slow );
6118 %}
6119 
6120 // Load Stack Slot
6121 instruct loadSSI(rRegI dst, stackSlotI src) %{
6122   match(Set dst src);
6123   ins_cost(125);
6124 
6125   format %{ "MOV    $dst,$src" %}
6126   opcode(0x8B);
6127   ins_encode( OpcP, RegMem(dst,src));
6128   ins_pipe( ialu_reg_mem );
6129 %}
6130 
6131 instruct loadSSL(eRegL dst, stackSlotL src) %{
6132   match(Set dst src);
6133 
6134   ins_cost(200);
6135   format %{ "MOV    $dst,$src.lo\n\t"
6136             "MOV    $dst+4,$src.hi" %}
6137   opcode(0x8B, 0x8B);
6138   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6139   ins_pipe( ialu_mem_long_reg );
6140 %}
6141 
6142 // Load Stack Slot
6143 instruct loadSSP(eRegP dst, stackSlotP src) %{
6144   match(Set dst src);
6145   ins_cost(125);
6146 
6147   format %{ "MOV    $dst,$src" %}
6148   opcode(0x8B);
6149   ins_encode( OpcP, RegMem(dst,src));
6150   ins_pipe( ialu_reg_mem );
6151 %}
6152 
6153 // Load Stack Slot
6154 instruct loadSSF(regFPR dst, stackSlotF src) %{
6155   match(Set dst src);
6156   ins_cost(125);
6157 
6158   format %{ "FLD_S  $src\n\t"
6159             "FSTP   $dst" %}
6160   opcode(0xD9);               /* D9 /0, FLD m32real */
6161   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6162               Pop_Reg_FPR(dst) );
6163   ins_pipe( fpu_reg_mem );
6164 %}
6165 
6166 // Load Stack Slot
6167 instruct loadSSD(regDPR dst, stackSlotD src) %{
6168   match(Set dst src);
6169   ins_cost(125);
6170 
6171   format %{ "FLD_D  $src\n\t"
6172             "FSTP   $dst" %}
6173   opcode(0xDD);               /* DD /0, FLD m64real */
6174   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6175               Pop_Reg_DPR(dst) );
6176   ins_pipe( fpu_reg_mem );
6177 %}
6178 
6179 // Prefetch instructions for allocation.
6180 // Must be safe to execute with invalid address (cannot fault).
6181 
6182 instruct prefetchAlloc0( memory mem ) %{
6183   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6184   match(PrefetchAllocation mem);
6185   ins_cost(0);
6186   size(0);
6187   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6188   ins_encode();
6189   ins_pipe(empty);
6190 %}
6191 
6192 instruct prefetchAlloc( memory mem ) %{
6193   predicate(AllocatePrefetchInstr==3);
6194   match( PrefetchAllocation mem );
6195   ins_cost(100);
6196 
6197   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6198   ins_encode %{
6199     __ prefetchw($mem$$Address);
6200   %}
6201   ins_pipe(ialu_mem);
6202 %}
6203 
6204 instruct prefetchAllocNTA( memory mem ) %{
6205   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6206   match(PrefetchAllocation mem);
6207   ins_cost(100);
6208 
6209   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6210   ins_encode %{
6211     __ prefetchnta($mem$$Address);
6212   %}
6213   ins_pipe(ialu_mem);
6214 %}
6215 
6216 instruct prefetchAllocT0( memory mem ) %{
6217   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6218   match(PrefetchAllocation mem);
6219   ins_cost(100);
6220 
6221   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6222   ins_encode %{
6223     __ prefetcht0($mem$$Address);
6224   %}
6225   ins_pipe(ialu_mem);
6226 %}
6227 
6228 instruct prefetchAllocT2( memory mem ) %{
6229   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6230   match(PrefetchAllocation mem);
6231   ins_cost(100);
6232 
6233   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6234   ins_encode %{
6235     __ prefetcht2($mem$$Address);
6236   %}
6237   ins_pipe(ialu_mem);
6238 %}
6239 
6240 //----------Store Instructions-------------------------------------------------
6241 
6242 // Store Byte
6243 instruct storeB(memory mem, xRegI src) %{
6244   match(Set mem (StoreB mem src));
6245 
6246   ins_cost(125);
6247   format %{ "MOV8   $mem,$src" %}
6248   opcode(0x88);
6249   ins_encode( OpcP, RegMem( src, mem ) );
6250   ins_pipe( ialu_mem_reg );
6251 %}
6252 
6253 // Store Char/Short
6254 instruct storeC(memory mem, rRegI src) %{
6255   match(Set mem (StoreC mem src));
6256 
6257   ins_cost(125);
6258   format %{ "MOV16  $mem,$src" %}
6259   opcode(0x89, 0x66);
6260   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6261   ins_pipe( ialu_mem_reg );
6262 %}
6263 
6264 // Store Integer
6265 instruct storeI(memory mem, rRegI src) %{
6266   match(Set mem (StoreI mem src));
6267 
6268   ins_cost(125);
6269   format %{ "MOV    $mem,$src" %}
6270   opcode(0x89);
6271   ins_encode( OpcP, RegMem( src, mem ) );
6272   ins_pipe( ialu_mem_reg );
6273 %}
6274 
6275 // Store Long
6276 instruct storeL(long_memory mem, eRegL src) %{
6277   predicate(!((StoreLNode*)n)->require_atomic_access());
6278   match(Set mem (StoreL mem src));
6279 
6280   ins_cost(200);
6281   format %{ "MOV    $mem,$src.lo\n\t"
6282             "MOV    $mem+4,$src.hi" %}
6283   opcode(0x89, 0x89);
6284   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6285   ins_pipe( ialu_mem_long_reg );
6286 %}
6287 
6288 // Store Long to Integer
6289 instruct storeL2I(memory mem, eRegL src) %{
6290   match(Set mem (StoreI mem (ConvL2I src)));
6291 
6292   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6293   ins_encode %{
6294     __ movl($mem$$Address, $src$$Register);
6295   %}
6296   ins_pipe(ialu_mem_reg);
6297 %}
6298 
6299 // Volatile Store Long.  Must be atomic, so move it into
6300 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6301 // target address before the store (for null-ptr checks)
6302 // so the memory operand is used twice in the encoding.
6303 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6304   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6305   match(Set mem (StoreL mem src));
6306   effect( KILL cr );
6307   ins_cost(400);
6308   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6309             "FILD   $src\n\t"
6310             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6311   opcode(0x3B);
6312   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6313   ins_pipe( fpu_reg_mem );
6314 %}
6315 
6316 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6317   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6318   match(Set mem (StoreL mem src));
6319   effect( TEMP tmp, KILL cr );
6320   ins_cost(380);
6321   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6322             "MOVSD  $tmp,$src\n\t"
6323             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6324   ins_encode %{
6325     __ cmpl(rax, $mem$$Address);
6326     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6327     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6328   %}
6329   ins_pipe( pipe_slow );
6330 %}
6331 
6332 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6333   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6334   match(Set mem (StoreL mem src));
6335   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6336   ins_cost(360);
6337   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6338             "MOVD   $tmp,$src.lo\n\t"
6339             "MOVD   $tmp2,$src.hi\n\t"
6340             "PUNPCKLDQ $tmp,$tmp2\n\t"
6341             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6342   ins_encode %{
6343     __ cmpl(rax, $mem$$Address);
6344     __ movdl($tmp$$XMMRegister, $src$$Register);
6345     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6346     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6347     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6348   %}
6349   ins_pipe( pipe_slow );
6350 %}
6351 
6352 // Store Pointer; for storing unknown oops and raw pointers
6353 instruct storeP(memory mem, anyRegP src) %{
6354   match(Set mem (StoreP mem src));
6355 
6356   ins_cost(125);
6357   format %{ "MOV    $mem,$src" %}
6358   opcode(0x89);
6359   ins_encode( OpcP, RegMem( src, mem ) );
6360   ins_pipe( ialu_mem_reg );
6361 %}
6362 
6363 // Store Integer Immediate
6364 instruct storeImmI(memory mem, immI src) %{
6365   match(Set mem (StoreI mem src));
6366 
6367   ins_cost(150);
6368   format %{ "MOV    $mem,$src" %}
6369   opcode(0xC7);               /* C7 /0 */
6370   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6371   ins_pipe( ialu_mem_imm );
6372 %}
6373 
6374 // Store Short/Char Immediate
6375 instruct storeImmI16(memory mem, immI16 src) %{
6376   predicate(UseStoreImmI16);
6377   match(Set mem (StoreC mem src));
6378 
6379   ins_cost(150);
6380   format %{ "MOV16  $mem,$src" %}
6381   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6382   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6383   ins_pipe( ialu_mem_imm );
6384 %}
6385 
6386 // Store Pointer Immediate; null pointers or constant oops that do not
6387 // need card-mark barriers.
6388 instruct storeImmP(memory mem, immP src) %{
6389   match(Set mem (StoreP mem src));
6390 
6391   ins_cost(150);
6392   format %{ "MOV    $mem,$src" %}
6393   opcode(0xC7);               /* C7 /0 */
6394   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6395   ins_pipe( ialu_mem_imm );
6396 %}
6397 
6398 // Store Byte Immediate
6399 instruct storeImmB(memory mem, immI8 src) %{
6400   match(Set mem (StoreB mem src));
6401 
6402   ins_cost(150);
6403   format %{ "MOV8   $mem,$src" %}
6404   opcode(0xC6);               /* C6 /0 */
6405   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6406   ins_pipe( ialu_mem_imm );
6407 %}
6408 
6409 // Store CMS card-mark Immediate
6410 instruct storeImmCM(memory mem, immI8 src) %{
6411   match(Set mem (StoreCM mem src));
6412 
6413   ins_cost(150);
6414   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6415   opcode(0xC6);               /* C6 /0 */
6416   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6417   ins_pipe( ialu_mem_imm );
6418 %}
6419 
6420 // Store Double
6421 instruct storeDPR( memory mem, regDPR1 src) %{
6422   predicate(UseSSE<=1);
6423   match(Set mem (StoreD mem src));
6424 
6425   ins_cost(100);
6426   format %{ "FST_D  $mem,$src" %}
6427   opcode(0xDD);       /* DD /2 */
6428   ins_encode( enc_FPR_store(mem,src) );
6429   ins_pipe( fpu_mem_reg );
6430 %}
6431 
6432 // Store double does rounding on x86
6433 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6434   predicate(UseSSE<=1);
6435   match(Set mem (StoreD mem (RoundDouble src)));
6436 
6437   ins_cost(100);
6438   format %{ "FST_D  $mem,$src\t# round" %}
6439   opcode(0xDD);       /* DD /2 */
6440   ins_encode( enc_FPR_store(mem,src) );
6441   ins_pipe( fpu_mem_reg );
6442 %}
6443 
6444 // Store XMM register to memory (double-precision floating points)
6445 // MOVSD instruction
6446 instruct storeD(memory mem, regD src) %{
6447   predicate(UseSSE>=2);
6448   match(Set mem (StoreD mem src));
6449   ins_cost(95);
6450   format %{ "MOVSD  $mem,$src" %}
6451   ins_encode %{
6452     __ movdbl($mem$$Address, $src$$XMMRegister);
6453   %}
6454   ins_pipe( pipe_slow );
6455 %}
6456 
6457 // Store XMM register to memory (single-precision floating point)
6458 // MOVSS instruction
6459 instruct storeF(memory mem, regF src) %{
6460   predicate(UseSSE>=1);
6461   match(Set mem (StoreF mem src));
6462   ins_cost(95);
6463   format %{ "MOVSS  $mem,$src" %}
6464   ins_encode %{
6465     __ movflt($mem$$Address, $src$$XMMRegister);
6466   %}
6467   ins_pipe( pipe_slow );
6468 %}
6469 
6470 // Store Float
6471 instruct storeFPR( memory mem, regFPR1 src) %{
6472   predicate(UseSSE==0);
6473   match(Set mem (StoreF mem src));
6474 
6475   ins_cost(100);
6476   format %{ "FST_S  $mem,$src" %}
6477   opcode(0xD9);       /* D9 /2 */
6478   ins_encode( enc_FPR_store(mem,src) );
6479   ins_pipe( fpu_mem_reg );
6480 %}
6481 
6482 // Store Float does rounding on x86
6483 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6484   predicate(UseSSE==0);
6485   match(Set mem (StoreF mem (RoundFloat src)));
6486 
6487   ins_cost(100);
6488   format %{ "FST_S  $mem,$src\t# round" %}
6489   opcode(0xD9);       /* D9 /2 */
6490   ins_encode( enc_FPR_store(mem,src) );
6491   ins_pipe( fpu_mem_reg );
6492 %}
6493 
6494 // Store Float does rounding on x86
6495 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6496   predicate(UseSSE<=1);
6497   match(Set mem (StoreF mem (ConvD2F src)));
6498 
6499   ins_cost(100);
6500   format %{ "FST_S  $mem,$src\t# D-round" %}
6501   opcode(0xD9);       /* D9 /2 */
6502   ins_encode( enc_FPR_store(mem,src) );
6503   ins_pipe( fpu_mem_reg );
6504 %}
6505 
6506 // Store immediate Float value (it is faster than store from FPU register)
6507 // The instruction usage is guarded by predicate in operand immFPR().
6508 instruct storeFPR_imm( memory mem, immFPR src) %{
6509   match(Set mem (StoreF mem src));
6510 
6511   ins_cost(50);
6512   format %{ "MOV    $mem,$src\t# store float" %}
6513   opcode(0xC7);               /* C7 /0 */
6514   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6515   ins_pipe( ialu_mem_imm );
6516 %}
6517 
6518 // Store immediate Float value (it is faster than store from XMM register)
6519 // The instruction usage is guarded by predicate in operand immF().
6520 instruct storeF_imm( memory mem, immF src) %{
6521   match(Set mem (StoreF mem src));
6522 
6523   ins_cost(50);
6524   format %{ "MOV    $mem,$src\t# store float" %}
6525   opcode(0xC7);               /* C7 /0 */
6526   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6527   ins_pipe( ialu_mem_imm );
6528 %}
6529 
6530 // Store Integer to stack slot
6531 instruct storeSSI(stackSlotI dst, rRegI src) %{
6532   match(Set dst src);
6533 
6534   ins_cost(100);
6535   format %{ "MOV    $dst,$src" %}
6536   opcode(0x89);
6537   ins_encode( OpcPRegSS( dst, src ) );
6538   ins_pipe( ialu_mem_reg );
6539 %}
6540 
6541 // Store Integer to stack slot
6542 instruct storeSSP(stackSlotP dst, eRegP src) %{
6543   match(Set dst src);
6544 
6545   ins_cost(100);
6546   format %{ "MOV    $dst,$src" %}
6547   opcode(0x89);
6548   ins_encode( OpcPRegSS( dst, src ) );
6549   ins_pipe( ialu_mem_reg );
6550 %}
6551 
6552 // Store Long to stack slot
6553 instruct storeSSL(stackSlotL dst, eRegL src) %{
6554   match(Set dst src);
6555 
6556   ins_cost(200);
6557   format %{ "MOV    $dst,$src.lo\n\t"
6558             "MOV    $dst+4,$src.hi" %}
6559   opcode(0x89, 0x89);
6560   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6561   ins_pipe( ialu_mem_long_reg );
6562 %}
6563 
6564 //----------MemBar Instructions-----------------------------------------------
6565 // Memory barrier flavors
6566 
6567 instruct membar_acquire() %{
6568   match(MemBarAcquire);
6569   match(LoadFence);
6570   ins_cost(400);
6571 
6572   size(0);
6573   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6574   ins_encode();
6575   ins_pipe(empty);
6576 %}
6577 
6578 instruct membar_acquire_lock() %{
6579   match(MemBarAcquireLock);
6580   ins_cost(0);
6581 
6582   size(0);
6583   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6584   ins_encode( );
6585   ins_pipe(empty);
6586 %}
6587 
6588 instruct membar_release() %{
6589   match(MemBarRelease);
6590   match(StoreFence);
6591   ins_cost(400);
6592 
6593   size(0);
6594   format %{ "MEMBAR-release ! (empty encoding)" %}
6595   ins_encode( );
6596   ins_pipe(empty);
6597 %}
6598 
6599 instruct membar_release_lock() %{
6600   match(MemBarReleaseLock);
6601   ins_cost(0);
6602 
6603   size(0);
6604   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6605   ins_encode( );
6606   ins_pipe(empty);
6607 %}
6608 
6609 instruct membar_volatile(eFlagsReg cr) %{
6610   match(MemBarVolatile);
6611   effect(KILL cr);
6612   ins_cost(400);
6613 
6614   format %{
6615     $$template
6616     if (os::is_MP()) {
6617       $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6618     } else {
6619       $$emit$$"MEMBAR-volatile ! (empty encoding)"
6620     }
6621   %}
6622   ins_encode %{
6623     __ membar(Assembler::StoreLoad);
6624   %}
6625   ins_pipe(pipe_slow);
6626 %}
6627 
6628 instruct unnecessary_membar_volatile() %{
6629   match(MemBarVolatile);
6630   predicate(Matcher::post_store_load_barrier(n));
6631   ins_cost(0);
6632 
6633   size(0);
6634   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6635   ins_encode( );
6636   ins_pipe(empty);
6637 %}
6638 
6639 instruct membar_storestore() %{
6640   match(MemBarStoreStore);
6641   ins_cost(0);
6642 
6643   size(0);
6644   format %{ "MEMBAR-storestore (empty encoding)" %}
6645   ins_encode( );
6646   ins_pipe(empty);
6647 %}
6648 
6649 //----------Move Instructions--------------------------------------------------
6650 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6651   match(Set dst (CastX2P src));
6652   format %{ "# X2P  $dst, $src" %}
6653   ins_encode( /*empty encoding*/ );
6654   ins_cost(0);
6655   ins_pipe(empty);
6656 %}
6657 
6658 instruct castP2X(rRegI dst, eRegP src ) %{
6659   match(Set dst (CastP2X src));
6660   ins_cost(50);
6661   format %{ "MOV    $dst, $src\t# CastP2X" %}
6662   ins_encode( enc_Copy( dst, src) );
6663   ins_pipe( ialu_reg_reg );
6664 %}
6665 
6666 //----------Conditional Move---------------------------------------------------
6667 // Conditional move
6668 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6669   predicate(!VM_Version::supports_cmov() );
6670   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6671   ins_cost(200);
6672   format %{ "J$cop,us skip\t# signed cmove\n\t"
6673             "MOV    $dst,$src\n"
6674       "skip:" %}
6675   ins_encode %{
6676     Label Lskip;
6677     // Invert sense of branch from sense of CMOV
6678     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6679     __ movl($dst$$Register, $src$$Register);
6680     __ bind(Lskip);
6681   %}
6682   ins_pipe( pipe_cmov_reg );
6683 %}
6684 
6685 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6686   predicate(!VM_Version::supports_cmov() );
6687   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6688   ins_cost(200);
6689   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6690             "MOV    $dst,$src\n"
6691       "skip:" %}
6692   ins_encode %{
6693     Label Lskip;
6694     // Invert sense of branch from sense of CMOV
6695     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6696     __ movl($dst$$Register, $src$$Register);
6697     __ bind(Lskip);
6698   %}
6699   ins_pipe( pipe_cmov_reg );
6700 %}
6701 
6702 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6703   predicate(VM_Version::supports_cmov() );
6704   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6705   ins_cost(200);
6706   format %{ "CMOV$cop $dst,$src" %}
6707   opcode(0x0F,0x40);
6708   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6709   ins_pipe( pipe_cmov_reg );
6710 %}
6711 
6712 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6713   predicate(VM_Version::supports_cmov() );
6714   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6715   ins_cost(200);
6716   format %{ "CMOV$cop $dst,$src" %}
6717   opcode(0x0F,0x40);
6718   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6719   ins_pipe( pipe_cmov_reg );
6720 %}
6721 
6722 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6723   predicate(VM_Version::supports_cmov() );
6724   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6725   ins_cost(200);
6726   expand %{
6727     cmovI_regU(cop, cr, dst, src);
6728   %}
6729 %}
6730 
6731 // Conditional move
6732 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6733   predicate(VM_Version::supports_cmov() );
6734   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6735   ins_cost(250);
6736   format %{ "CMOV$cop $dst,$src" %}
6737   opcode(0x0F,0x40);
6738   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6739   ins_pipe( pipe_cmov_mem );
6740 %}
6741 
6742 // Conditional move
6743 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6744   predicate(VM_Version::supports_cmov() );
6745   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6746   ins_cost(250);
6747   format %{ "CMOV$cop $dst,$src" %}
6748   opcode(0x0F,0x40);
6749   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6750   ins_pipe( pipe_cmov_mem );
6751 %}
6752 
6753 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6754   predicate(VM_Version::supports_cmov() );
6755   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6756   ins_cost(250);
6757   expand %{
6758     cmovI_memU(cop, cr, dst, src);
6759   %}
6760 %}
6761 
6762 // Conditional move
6763 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6764   predicate(VM_Version::supports_cmov() );
6765   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6766   ins_cost(200);
6767   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6768   opcode(0x0F,0x40);
6769   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6770   ins_pipe( pipe_cmov_reg );
6771 %}
6772 
6773 // Conditional move (non-P6 version)
6774 // Note:  a CMoveP is generated for  stubs and native wrappers
6775 //        regardless of whether we are on a P6, so we
6776 //        emulate a cmov here
6777 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6778   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6779   ins_cost(300);
6780   format %{ "Jn$cop   skip\n\t"
6781           "MOV    $dst,$src\t# pointer\n"
6782       "skip:" %}
6783   opcode(0x8b);
6784   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6785   ins_pipe( pipe_cmov_reg );
6786 %}
6787 
6788 // Conditional move
6789 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6790   predicate(VM_Version::supports_cmov() );
6791   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6792   ins_cost(200);
6793   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6794   opcode(0x0F,0x40);
6795   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6796   ins_pipe( pipe_cmov_reg );
6797 %}
6798 
6799 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6800   predicate(VM_Version::supports_cmov() );
6801   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6802   ins_cost(200);
6803   expand %{
6804     cmovP_regU(cop, cr, dst, src);
6805   %}
6806 %}
6807 
6808 // DISABLED: Requires the ADLC to emit a bottom_type call that
6809 // correctly meets the two pointer arguments; one is an incoming
6810 // register but the other is a memory operand.  ALSO appears to
6811 // be buggy with implicit null checks.
6812 //
6813 //// Conditional move
6814 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6815 //  predicate(VM_Version::supports_cmov() );
6816 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6817 //  ins_cost(250);
6818 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6819 //  opcode(0x0F,0x40);
6820 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6821 //  ins_pipe( pipe_cmov_mem );
6822 //%}
6823 //
6824 //// Conditional move
6825 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6826 //  predicate(VM_Version::supports_cmov() );
6827 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6828 //  ins_cost(250);
6829 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6830 //  opcode(0x0F,0x40);
6831 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6832 //  ins_pipe( pipe_cmov_mem );
6833 //%}
6834 
6835 // Conditional move
6836 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6837   predicate(UseSSE<=1);
6838   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6839   ins_cost(200);
6840   format %{ "FCMOV$cop $dst,$src\t# double" %}
6841   opcode(0xDA);
6842   ins_encode( enc_cmov_dpr(cop,src) );
6843   ins_pipe( pipe_cmovDPR_reg );
6844 %}
6845 
6846 // Conditional move
6847 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6848   predicate(UseSSE==0);
6849   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6850   ins_cost(200);
6851   format %{ "FCMOV$cop $dst,$src\t# float" %}
6852   opcode(0xDA);
6853   ins_encode( enc_cmov_dpr(cop,src) );
6854   ins_pipe( pipe_cmovDPR_reg );
6855 %}
6856 
6857 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6858 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6859   predicate(UseSSE<=1);
6860   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6861   ins_cost(200);
6862   format %{ "Jn$cop   skip\n\t"
6863             "MOV    $dst,$src\t# double\n"
6864       "skip:" %}
6865   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6866   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6867   ins_pipe( pipe_cmovDPR_reg );
6868 %}
6869 
6870 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6871 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6872   predicate(UseSSE==0);
6873   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6874   ins_cost(200);
6875   format %{ "Jn$cop    skip\n\t"
6876             "MOV    $dst,$src\t# float\n"
6877       "skip:" %}
6878   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6879   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6880   ins_pipe( pipe_cmovDPR_reg );
6881 %}
6882 
6883 // No CMOVE with SSE/SSE2
6884 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6885   predicate (UseSSE>=1);
6886   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6887   ins_cost(200);
6888   format %{ "Jn$cop   skip\n\t"
6889             "MOVSS  $dst,$src\t# float\n"
6890       "skip:" %}
6891   ins_encode %{
6892     Label skip;
6893     // Invert sense of branch from sense of CMOV
6894     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6895     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6896     __ bind(skip);
6897   %}
6898   ins_pipe( pipe_slow );
6899 %}
6900 
6901 // No CMOVE with SSE/SSE2
6902 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6903   predicate (UseSSE>=2);
6904   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6905   ins_cost(200);
6906   format %{ "Jn$cop   skip\n\t"
6907             "MOVSD  $dst,$src\t# float\n"
6908       "skip:" %}
6909   ins_encode %{
6910     Label skip;
6911     // Invert sense of branch from sense of CMOV
6912     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6913     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6914     __ bind(skip);
6915   %}
6916   ins_pipe( pipe_slow );
6917 %}
6918 
6919 // unsigned version
6920 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6921   predicate (UseSSE>=1);
6922   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6923   ins_cost(200);
6924   format %{ "Jn$cop   skip\n\t"
6925             "MOVSS  $dst,$src\t# float\n"
6926       "skip:" %}
6927   ins_encode %{
6928     Label skip;
6929     // Invert sense of branch from sense of CMOV
6930     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6931     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6932     __ bind(skip);
6933   %}
6934   ins_pipe( pipe_slow );
6935 %}
6936 
6937 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
6938   predicate (UseSSE>=1);
6939   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6940   ins_cost(200);
6941   expand %{
6942     fcmovF_regU(cop, cr, dst, src);
6943   %}
6944 %}
6945 
6946 // unsigned version
6947 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
6948   predicate (UseSSE>=2);
6949   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6950   ins_cost(200);
6951   format %{ "Jn$cop   skip\n\t"
6952             "MOVSD  $dst,$src\t# float\n"
6953       "skip:" %}
6954   ins_encode %{
6955     Label skip;
6956     // Invert sense of branch from sense of CMOV
6957     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6958     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6959     __ bind(skip);
6960   %}
6961   ins_pipe( pipe_slow );
6962 %}
6963 
6964 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
6965   predicate (UseSSE>=2);
6966   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6967   ins_cost(200);
6968   expand %{
6969     fcmovD_regU(cop, cr, dst, src);
6970   %}
6971 %}
6972 
6973 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
6974   predicate(VM_Version::supports_cmov() );
6975   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6976   ins_cost(200);
6977   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6978             "CMOV$cop $dst.hi,$src.hi" %}
6979   opcode(0x0F,0x40);
6980   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6981   ins_pipe( pipe_cmov_reg_long );
6982 %}
6983 
6984 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
6985   predicate(VM_Version::supports_cmov() );
6986   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6987   ins_cost(200);
6988   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6989             "CMOV$cop $dst.hi,$src.hi" %}
6990   opcode(0x0F,0x40);
6991   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6992   ins_pipe( pipe_cmov_reg_long );
6993 %}
6994 
6995 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
6996   predicate(VM_Version::supports_cmov() );
6997   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6998   ins_cost(200);
6999   expand %{
7000     cmovL_regU(cop, cr, dst, src);
7001   %}
7002 %}
7003 
7004 //----------Arithmetic Instructions--------------------------------------------
7005 //----------Addition Instructions----------------------------------------------
7006 
7007 // Integer Addition Instructions
7008 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7009   match(Set dst (AddI dst src));
7010   effect(KILL cr);
7011 
7012   size(2);
7013   format %{ "ADD    $dst,$src" %}
7014   opcode(0x03);
7015   ins_encode( OpcP, RegReg( dst, src) );
7016   ins_pipe( ialu_reg_reg );
7017 %}
7018 
7019 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7020   match(Set dst (AddI dst src));
7021   effect(KILL cr);
7022 
7023   format %{ "ADD    $dst,$src" %}
7024   opcode(0x81, 0x00); /* /0 id */
7025   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7026   ins_pipe( ialu_reg );
7027 %}
7028 
7029 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7030   predicate(UseIncDec);
7031   match(Set dst (AddI dst src));
7032   effect(KILL cr);
7033 
7034   size(1);
7035   format %{ "INC    $dst" %}
7036   opcode(0x40); /*  */
7037   ins_encode( Opc_plus( primary, dst ) );
7038   ins_pipe( ialu_reg );
7039 %}
7040 
7041 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7042   match(Set dst (AddI src0 src1));
7043   ins_cost(110);
7044 
7045   format %{ "LEA    $dst,[$src0 + $src1]" %}
7046   opcode(0x8D); /* 0x8D /r */
7047   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7048   ins_pipe( ialu_reg_reg );
7049 %}
7050 
7051 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7052   match(Set dst (AddP src0 src1));
7053   ins_cost(110);
7054 
7055   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7056   opcode(0x8D); /* 0x8D /r */
7057   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7058   ins_pipe( ialu_reg_reg );
7059 %}
7060 
7061 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7062   predicate(UseIncDec);
7063   match(Set dst (AddI dst src));
7064   effect(KILL cr);
7065 
7066   size(1);
7067   format %{ "DEC    $dst" %}
7068   opcode(0x48); /*  */
7069   ins_encode( Opc_plus( primary, dst ) );
7070   ins_pipe( ialu_reg );
7071 %}
7072 
7073 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7074   match(Set dst (AddP dst src));
7075   effect(KILL cr);
7076 
7077   size(2);
7078   format %{ "ADD    $dst,$src" %}
7079   opcode(0x03);
7080   ins_encode( OpcP, RegReg( dst, src) );
7081   ins_pipe( ialu_reg_reg );
7082 %}
7083 
7084 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7085   match(Set dst (AddP dst src));
7086   effect(KILL cr);
7087 
7088   format %{ "ADD    $dst,$src" %}
7089   opcode(0x81,0x00); /* Opcode 81 /0 id */
7090   // ins_encode( RegImm( dst, src) );
7091   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7092   ins_pipe( ialu_reg );
7093 %}
7094 
7095 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7096   match(Set dst (AddI dst (LoadI src)));
7097   effect(KILL cr);
7098 
7099   ins_cost(125);
7100   format %{ "ADD    $dst,$src" %}
7101   opcode(0x03);
7102   ins_encode( OpcP, RegMem( dst, src) );
7103   ins_pipe( ialu_reg_mem );
7104 %}
7105 
7106 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7107   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7108   effect(KILL cr);
7109 
7110   ins_cost(150);
7111   format %{ "ADD    $dst,$src" %}
7112   opcode(0x01);  /* Opcode 01 /r */
7113   ins_encode( OpcP, RegMem( src, dst ) );
7114   ins_pipe( ialu_mem_reg );
7115 %}
7116 
7117 // Add Memory with Immediate
7118 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7119   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7120   effect(KILL cr);
7121 
7122   ins_cost(125);
7123   format %{ "ADD    $dst,$src" %}
7124   opcode(0x81);               /* Opcode 81 /0 id */
7125   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7126   ins_pipe( ialu_mem_imm );
7127 %}
7128 
7129 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7130   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7131   effect(KILL cr);
7132 
7133   ins_cost(125);
7134   format %{ "INC    $dst" %}
7135   opcode(0xFF);               /* Opcode FF /0 */
7136   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7137   ins_pipe( ialu_mem_imm );
7138 %}
7139 
7140 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7141   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7142   effect(KILL cr);
7143 
7144   ins_cost(125);
7145   format %{ "DEC    $dst" %}
7146   opcode(0xFF);               /* Opcode FF /1 */
7147   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7148   ins_pipe( ialu_mem_imm );
7149 %}
7150 
7151 
7152 instruct checkCastPP( eRegP dst ) %{
7153   match(Set dst (CheckCastPP dst));
7154 
7155   size(0);
7156   format %{ "#checkcastPP of $dst" %}
7157   ins_encode( /*empty encoding*/ );
7158   ins_pipe( empty );
7159 %}
7160 
7161 instruct castPP( eRegP dst ) %{
7162   match(Set dst (CastPP dst));
7163   format %{ "#castPP of $dst" %}
7164   ins_encode( /*empty encoding*/ );
7165   ins_pipe( empty );
7166 %}
7167 
7168 instruct castII( rRegI dst ) %{
7169   match(Set dst (CastII dst));
7170   format %{ "#castII of $dst" %}
7171   ins_encode( /*empty encoding*/ );
7172   ins_cost(0);
7173   ins_pipe( empty );
7174 %}
7175 
7176 
7177 // Load-locked - same as a regular pointer load when used with compare-swap
7178 instruct loadPLocked(eRegP dst, memory mem) %{
7179   match(Set dst (LoadPLocked mem));
7180 
7181   ins_cost(125);
7182   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7183   opcode(0x8B);
7184   ins_encode( OpcP, RegMem(dst,mem));
7185   ins_pipe( ialu_reg_mem );
7186 %}
7187 
7188 // Conditional-store of the updated heap-top.
7189 // Used during allocation of the shared heap.
7190 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7191 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7192   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7193   // EAX is killed if there is contention, but then it's also unused.
7194   // In the common case of no contention, EAX holds the new oop address.
7195   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7196   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7197   ins_pipe( pipe_cmpxchg );
7198 %}
7199 
7200 // Conditional-store of an int value.
7201 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7202 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7203   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7204   effect(KILL oldval);
7205   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7206   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7207   ins_pipe( pipe_cmpxchg );
7208 %}
7209 
7210 // Conditional-store of a long value.
7211 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7212 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7213   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7214   effect(KILL oldval);
7215   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7216             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7217             "XCHG   EBX,ECX"
7218   %}
7219   ins_encode %{
7220     // Note: we need to swap rbx, and rcx before and after the
7221     //       cmpxchg8 instruction because the instruction uses
7222     //       rcx as the high order word of the new value to store but
7223     //       our register encoding uses rbx.
7224     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7225     if( os::is_MP() )
7226       __ lock();
7227     __ cmpxchg8($mem$$Address);
7228     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7229   %}
7230   ins_pipe( pipe_cmpxchg );
7231 %}
7232 
7233 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7234 
7235 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7236   predicate(VM_Version::supports_cx8());
7237   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7238   effect(KILL cr, KILL oldval);
7239   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7240             "MOV    $res,0\n\t"
7241             "JNE,s  fail\n\t"
7242             "MOV    $res,1\n"
7243           "fail:" %}
7244   ins_encode( enc_cmpxchg8(mem_ptr),
7245               enc_flags_ne_to_boolean(res) );
7246   ins_pipe( pipe_cmpxchg );
7247 %}
7248 
7249 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7250   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7251   effect(KILL cr, KILL oldval);
7252   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7253             "MOV    $res,0\n\t"
7254             "JNE,s  fail\n\t"
7255             "MOV    $res,1\n"
7256           "fail:" %}
7257   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7258   ins_pipe( pipe_cmpxchg );
7259 %}
7260 
7261 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7262   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7263   effect(KILL cr, KILL oldval);
7264   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7265             "MOV    $res,0\n\t"
7266             "JNE,s  fail\n\t"
7267             "MOV    $res,1\n"
7268           "fail:" %}
7269   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7270   ins_pipe( pipe_cmpxchg );
7271 %}
7272 
7273 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7274   predicate(n->as_LoadStore()->result_not_used());
7275   match(Set dummy (GetAndAddI mem add));
7276   effect(KILL cr);
7277   format %{ "ADDL  [$mem],$add" %}
7278   ins_encode %{
7279     if (os::is_MP()) { __ lock(); }
7280     __ addl($mem$$Address, $add$$constant);
7281   %}
7282   ins_pipe( pipe_cmpxchg );
7283 %}
7284 
7285 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7286   match(Set newval (GetAndAddI mem newval));
7287   effect(KILL cr);
7288   format %{ "XADDL  [$mem],$newval" %}
7289   ins_encode %{
7290     if (os::is_MP()) { __ lock(); }
7291     __ xaddl($mem$$Address, $newval$$Register);
7292   %}
7293   ins_pipe( pipe_cmpxchg );
7294 %}
7295 
7296 instruct xchgI( memory mem, rRegI newval) %{
7297   match(Set newval (GetAndSetI mem newval));
7298   format %{ "XCHGL  $newval,[$mem]" %}
7299   ins_encode %{
7300     __ xchgl($newval$$Register, $mem$$Address);
7301   %}
7302   ins_pipe( pipe_cmpxchg );
7303 %}
7304 
7305 instruct xchgP( memory mem, pRegP newval) %{
7306   match(Set newval (GetAndSetP mem newval));
7307   format %{ "XCHGL  $newval,[$mem]" %}
7308   ins_encode %{
7309     __ xchgl($newval$$Register, $mem$$Address);
7310   %}
7311   ins_pipe( pipe_cmpxchg );
7312 %}
7313 
7314 //----------Subtraction Instructions-------------------------------------------
7315 
7316 // Integer Subtraction Instructions
7317 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7318   match(Set dst (SubI dst src));
7319   effect(KILL cr);
7320 
7321   size(2);
7322   format %{ "SUB    $dst,$src" %}
7323   opcode(0x2B);
7324   ins_encode( OpcP, RegReg( dst, src) );
7325   ins_pipe( ialu_reg_reg );
7326 %}
7327 
7328 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7329   match(Set dst (SubI dst src));
7330   effect(KILL cr);
7331 
7332   format %{ "SUB    $dst,$src" %}
7333   opcode(0x81,0x05);  /* Opcode 81 /5 */
7334   // ins_encode( RegImm( dst, src) );
7335   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7336   ins_pipe( ialu_reg );
7337 %}
7338 
7339 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7340   match(Set dst (SubI dst (LoadI src)));
7341   effect(KILL cr);
7342 
7343   ins_cost(125);
7344   format %{ "SUB    $dst,$src" %}
7345   opcode(0x2B);
7346   ins_encode( OpcP, RegMem( dst, src) );
7347   ins_pipe( ialu_reg_mem );
7348 %}
7349 
7350 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7351   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7352   effect(KILL cr);
7353 
7354   ins_cost(150);
7355   format %{ "SUB    $dst,$src" %}
7356   opcode(0x29);  /* Opcode 29 /r */
7357   ins_encode( OpcP, RegMem( src, dst ) );
7358   ins_pipe( ialu_mem_reg );
7359 %}
7360 
7361 // Subtract from a pointer
7362 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7363   match(Set dst (AddP dst (SubI zero src)));
7364   effect(KILL cr);
7365 
7366   size(2);
7367   format %{ "SUB    $dst,$src" %}
7368   opcode(0x2B);
7369   ins_encode( OpcP, RegReg( dst, src) );
7370   ins_pipe( ialu_reg_reg );
7371 %}
7372 
7373 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7374   match(Set dst (SubI zero dst));
7375   effect(KILL cr);
7376 
7377   size(2);
7378   format %{ "NEG    $dst" %}
7379   opcode(0xF7,0x03);  // Opcode F7 /3
7380   ins_encode( OpcP, RegOpc( dst ) );
7381   ins_pipe( ialu_reg );
7382 %}
7383 
7384 //----------Multiplication/Division Instructions-------------------------------
7385 // Integer Multiplication Instructions
7386 // Multiply Register
7387 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7388   match(Set dst (MulI dst src));
7389   effect(KILL cr);
7390 
7391   size(3);
7392   ins_cost(300);
7393   format %{ "IMUL   $dst,$src" %}
7394   opcode(0xAF, 0x0F);
7395   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7396   ins_pipe( ialu_reg_reg_alu0 );
7397 %}
7398 
7399 // Multiply 32-bit Immediate
7400 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7401   match(Set dst (MulI src imm));
7402   effect(KILL cr);
7403 
7404   ins_cost(300);
7405   format %{ "IMUL   $dst,$src,$imm" %}
7406   opcode(0x69);  /* 69 /r id */
7407   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7408   ins_pipe( ialu_reg_reg_alu0 );
7409 %}
7410 
7411 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7412   match(Set dst src);
7413   effect(KILL cr);
7414 
7415   // Note that this is artificially increased to make it more expensive than loadConL
7416   ins_cost(250);
7417   format %{ "MOV    EAX,$src\t// low word only" %}
7418   opcode(0xB8);
7419   ins_encode( LdImmL_Lo(dst, src) );
7420   ins_pipe( ialu_reg_fat );
7421 %}
7422 
7423 // Multiply by 32-bit Immediate, taking the shifted high order results
7424 //  (special case for shift by 32)
7425 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7426   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7427   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7428              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7429              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7430   effect(USE src1, KILL cr);
7431 
7432   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7433   ins_cost(0*100 + 1*400 - 150);
7434   format %{ "IMUL   EDX:EAX,$src1" %}
7435   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7436   ins_pipe( pipe_slow );
7437 %}
7438 
7439 // Multiply by 32-bit Immediate, taking the shifted high order results
7440 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7441   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7442   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7443              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7444              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7445   effect(USE src1, KILL cr);
7446 
7447   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7448   ins_cost(1*100 + 1*400 - 150);
7449   format %{ "IMUL   EDX:EAX,$src1\n\t"
7450             "SAR    EDX,$cnt-32" %}
7451   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7452   ins_pipe( pipe_slow );
7453 %}
7454 
7455 // Multiply Memory 32-bit Immediate
7456 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7457   match(Set dst (MulI (LoadI src) imm));
7458   effect(KILL cr);
7459 
7460   ins_cost(300);
7461   format %{ "IMUL   $dst,$src,$imm" %}
7462   opcode(0x69);  /* 69 /r id */
7463   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7464   ins_pipe( ialu_reg_mem_alu0 );
7465 %}
7466 
7467 // Multiply Memory
7468 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7469   match(Set dst (MulI dst (LoadI src)));
7470   effect(KILL cr);
7471 
7472   ins_cost(350);
7473   format %{ "IMUL   $dst,$src" %}
7474   opcode(0xAF, 0x0F);
7475   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7476   ins_pipe( ialu_reg_mem_alu0 );
7477 %}
7478 
7479 // Multiply Register Int to Long
7480 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7481   // Basic Idea: long = (long)int * (long)int
7482   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7483   effect(DEF dst, USE src, USE src1, KILL flags);
7484 
7485   ins_cost(300);
7486   format %{ "IMUL   $dst,$src1" %}
7487 
7488   ins_encode( long_int_multiply( dst, src1 ) );
7489   ins_pipe( ialu_reg_reg_alu0 );
7490 %}
7491 
7492 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7493   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7494   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7495   effect(KILL flags);
7496 
7497   ins_cost(300);
7498   format %{ "MUL    $dst,$src1" %}
7499 
7500   ins_encode( long_uint_multiply(dst, src1) );
7501   ins_pipe( ialu_reg_reg_alu0 );
7502 %}
7503 
7504 // Multiply Register Long
7505 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7506   match(Set dst (MulL dst src));
7507   effect(KILL cr, TEMP tmp);
7508   ins_cost(4*100+3*400);
7509 // Basic idea: lo(result) = lo(x_lo * y_lo)
7510 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7511   format %{ "MOV    $tmp,$src.lo\n\t"
7512             "IMUL   $tmp,EDX\n\t"
7513             "MOV    EDX,$src.hi\n\t"
7514             "IMUL   EDX,EAX\n\t"
7515             "ADD    $tmp,EDX\n\t"
7516             "MUL    EDX:EAX,$src.lo\n\t"
7517             "ADD    EDX,$tmp" %}
7518   ins_encode( long_multiply( dst, src, tmp ) );
7519   ins_pipe( pipe_slow );
7520 %}
7521 
7522 // Multiply Register Long where the left operand's high 32 bits are zero
7523 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7524   predicate(is_operand_hi32_zero(n->in(1)));
7525   match(Set dst (MulL dst src));
7526   effect(KILL cr, TEMP tmp);
7527   ins_cost(2*100+2*400);
7528 // Basic idea: lo(result) = lo(x_lo * y_lo)
7529 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7530   format %{ "MOV    $tmp,$src.hi\n\t"
7531             "IMUL   $tmp,EAX\n\t"
7532             "MUL    EDX:EAX,$src.lo\n\t"
7533             "ADD    EDX,$tmp" %}
7534   ins_encode %{
7535     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7536     __ imull($tmp$$Register, rax);
7537     __ mull($src$$Register);
7538     __ addl(rdx, $tmp$$Register);
7539   %}
7540   ins_pipe( pipe_slow );
7541 %}
7542 
7543 // Multiply Register Long where the right operand's high 32 bits are zero
7544 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7545   predicate(is_operand_hi32_zero(n->in(2)));
7546   match(Set dst (MulL dst src));
7547   effect(KILL cr, TEMP tmp);
7548   ins_cost(2*100+2*400);
7549 // Basic idea: lo(result) = lo(x_lo * y_lo)
7550 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7551   format %{ "MOV    $tmp,$src.lo\n\t"
7552             "IMUL   $tmp,EDX\n\t"
7553             "MUL    EDX:EAX,$src.lo\n\t"
7554             "ADD    EDX,$tmp" %}
7555   ins_encode %{
7556     __ movl($tmp$$Register, $src$$Register);
7557     __ imull($tmp$$Register, rdx);
7558     __ mull($src$$Register);
7559     __ addl(rdx, $tmp$$Register);
7560   %}
7561   ins_pipe( pipe_slow );
7562 %}
7563 
7564 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7565 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7566   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7567   match(Set dst (MulL dst src));
7568   effect(KILL cr);
7569   ins_cost(1*400);
7570 // Basic idea: lo(result) = lo(x_lo * y_lo)
7571 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7572   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7573   ins_encode %{
7574     __ mull($src$$Register);
7575   %}
7576   ins_pipe( pipe_slow );
7577 %}
7578 
7579 // Multiply Register Long by small constant
7580 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7581   match(Set dst (MulL dst src));
7582   effect(KILL cr, TEMP tmp);
7583   ins_cost(2*100+2*400);
7584   size(12);
7585 // Basic idea: lo(result) = lo(src * EAX)
7586 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7587   format %{ "IMUL   $tmp,EDX,$src\n\t"
7588             "MOV    EDX,$src\n\t"
7589             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7590             "ADD    EDX,$tmp" %}
7591   ins_encode( long_multiply_con( dst, src, tmp ) );
7592   ins_pipe( pipe_slow );
7593 %}
7594 
7595 // Integer DIV with Register
7596 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7597   match(Set rax (DivI rax div));
7598   effect(KILL rdx, KILL cr);
7599   size(26);
7600   ins_cost(30*100+10*100);
7601   format %{ "CMP    EAX,0x80000000\n\t"
7602             "JNE,s  normal\n\t"
7603             "XOR    EDX,EDX\n\t"
7604             "CMP    ECX,-1\n\t"
7605             "JE,s   done\n"
7606     "normal: CDQ\n\t"
7607             "IDIV   $div\n\t"
7608     "done:"        %}
7609   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7610   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7611   ins_pipe( ialu_reg_reg_alu0 );
7612 %}
7613 
7614 // Divide Register Long
7615 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7616   match(Set dst (DivL src1 src2));
7617   effect( KILL cr, KILL cx, KILL bx );
7618   ins_cost(10000);
7619   format %{ "PUSH   $src1.hi\n\t"
7620             "PUSH   $src1.lo\n\t"
7621             "PUSH   $src2.hi\n\t"
7622             "PUSH   $src2.lo\n\t"
7623             "CALL   SharedRuntime::ldiv\n\t"
7624             "ADD    ESP,16" %}
7625   ins_encode( long_div(src1,src2) );
7626   ins_pipe( pipe_slow );
7627 %}
7628 
7629 // Integer DIVMOD with Register, both quotient and mod results
7630 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7631   match(DivModI rax div);
7632   effect(KILL cr);
7633   size(26);
7634   ins_cost(30*100+10*100);
7635   format %{ "CMP    EAX,0x80000000\n\t"
7636             "JNE,s  normal\n\t"
7637             "XOR    EDX,EDX\n\t"
7638             "CMP    ECX,-1\n\t"
7639             "JE,s   done\n"
7640     "normal: CDQ\n\t"
7641             "IDIV   $div\n\t"
7642     "done:"        %}
7643   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7644   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7645   ins_pipe( pipe_slow );
7646 %}
7647 
7648 // Integer MOD with Register
7649 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7650   match(Set rdx (ModI rax div));
7651   effect(KILL rax, KILL cr);
7652 
7653   size(26);
7654   ins_cost(300);
7655   format %{ "CDQ\n\t"
7656             "IDIV   $div" %}
7657   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7658   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7659   ins_pipe( ialu_reg_reg_alu0 );
7660 %}
7661 
7662 // Remainder Register Long
7663 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7664   match(Set dst (ModL src1 src2));
7665   effect( KILL cr, KILL cx, KILL bx );
7666   ins_cost(10000);
7667   format %{ "PUSH   $src1.hi\n\t"
7668             "PUSH   $src1.lo\n\t"
7669             "PUSH   $src2.hi\n\t"
7670             "PUSH   $src2.lo\n\t"
7671             "CALL   SharedRuntime::lrem\n\t"
7672             "ADD    ESP,16" %}
7673   ins_encode( long_mod(src1,src2) );
7674   ins_pipe( pipe_slow );
7675 %}
7676 
7677 // Divide Register Long (no special case since divisor != -1)
7678 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7679   match(Set dst (DivL dst imm));
7680   effect( TEMP tmp, TEMP tmp2, KILL cr );
7681   ins_cost(1000);
7682   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7683             "XOR    $tmp2,$tmp2\n\t"
7684             "CMP    $tmp,EDX\n\t"
7685             "JA,s   fast\n\t"
7686             "MOV    $tmp2,EAX\n\t"
7687             "MOV    EAX,EDX\n\t"
7688             "MOV    EDX,0\n\t"
7689             "JLE,s  pos\n\t"
7690             "LNEG   EAX : $tmp2\n\t"
7691             "DIV    $tmp # unsigned division\n\t"
7692             "XCHG   EAX,$tmp2\n\t"
7693             "DIV    $tmp\n\t"
7694             "LNEG   $tmp2 : EAX\n\t"
7695             "JMP,s  done\n"
7696     "pos:\n\t"
7697             "DIV    $tmp\n\t"
7698             "XCHG   EAX,$tmp2\n"
7699     "fast:\n\t"
7700             "DIV    $tmp\n"
7701     "done:\n\t"
7702             "MOV    EDX,$tmp2\n\t"
7703             "NEG    EDX:EAX # if $imm < 0" %}
7704   ins_encode %{
7705     int con = (int)$imm$$constant;
7706     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7707     int pcon = (con > 0) ? con : -con;
7708     Label Lfast, Lpos, Ldone;
7709 
7710     __ movl($tmp$$Register, pcon);
7711     __ xorl($tmp2$$Register,$tmp2$$Register);
7712     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7713     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7714 
7715     __ movl($tmp2$$Register, $dst$$Register); // save
7716     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7717     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7718     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7719 
7720     // Negative dividend.
7721     // convert value to positive to use unsigned division
7722     __ lneg($dst$$Register, $tmp2$$Register);
7723     __ divl($tmp$$Register);
7724     __ xchgl($dst$$Register, $tmp2$$Register);
7725     __ divl($tmp$$Register);
7726     // revert result back to negative
7727     __ lneg($tmp2$$Register, $dst$$Register);
7728     __ jmpb(Ldone);
7729 
7730     __ bind(Lpos);
7731     __ divl($tmp$$Register); // Use unsigned division
7732     __ xchgl($dst$$Register, $tmp2$$Register);
7733     // Fallthrow for final divide, tmp2 has 32 bit hi result
7734 
7735     __ bind(Lfast);
7736     // fast path: src is positive
7737     __ divl($tmp$$Register); // Use unsigned division
7738 
7739     __ bind(Ldone);
7740     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7741     if (con < 0) {
7742       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7743     }
7744   %}
7745   ins_pipe( pipe_slow );
7746 %}
7747 
7748 // Remainder Register Long (remainder fit into 32 bits)
7749 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7750   match(Set dst (ModL dst imm));
7751   effect( TEMP tmp, TEMP tmp2, KILL cr );
7752   ins_cost(1000);
7753   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7754             "CMP    $tmp,EDX\n\t"
7755             "JA,s   fast\n\t"
7756             "MOV    $tmp2,EAX\n\t"
7757             "MOV    EAX,EDX\n\t"
7758             "MOV    EDX,0\n\t"
7759             "JLE,s  pos\n\t"
7760             "LNEG   EAX : $tmp2\n\t"
7761             "DIV    $tmp # unsigned division\n\t"
7762             "MOV    EAX,$tmp2\n\t"
7763             "DIV    $tmp\n\t"
7764             "NEG    EDX\n\t"
7765             "JMP,s  done\n"
7766     "pos:\n\t"
7767             "DIV    $tmp\n\t"
7768             "MOV    EAX,$tmp2\n"
7769     "fast:\n\t"
7770             "DIV    $tmp\n"
7771     "done:\n\t"
7772             "MOV    EAX,EDX\n\t"
7773             "SAR    EDX,31\n\t" %}
7774   ins_encode %{
7775     int con = (int)$imm$$constant;
7776     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7777     int pcon = (con > 0) ? con : -con;
7778     Label  Lfast, Lpos, Ldone;
7779 
7780     __ movl($tmp$$Register, pcon);
7781     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7782     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7783 
7784     __ movl($tmp2$$Register, $dst$$Register); // save
7785     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7786     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7787     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7788 
7789     // Negative dividend.
7790     // convert value to positive to use unsigned division
7791     __ lneg($dst$$Register, $tmp2$$Register);
7792     __ divl($tmp$$Register);
7793     __ movl($dst$$Register, $tmp2$$Register);
7794     __ divl($tmp$$Register);
7795     // revert remainder back to negative
7796     __ negl(HIGH_FROM_LOW($dst$$Register));
7797     __ jmpb(Ldone);
7798 
7799     __ bind(Lpos);
7800     __ divl($tmp$$Register);
7801     __ movl($dst$$Register, $tmp2$$Register);
7802 
7803     __ bind(Lfast);
7804     // fast path: src is positive
7805     __ divl($tmp$$Register);
7806 
7807     __ bind(Ldone);
7808     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7809     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
7810 
7811   %}
7812   ins_pipe( pipe_slow );
7813 %}
7814 
7815 // Integer Shift Instructions
7816 // Shift Left by one
7817 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7818   match(Set dst (LShiftI dst shift));
7819   effect(KILL cr);
7820 
7821   size(2);
7822   format %{ "SHL    $dst,$shift" %}
7823   opcode(0xD1, 0x4);  /* D1 /4 */
7824   ins_encode( OpcP, RegOpc( dst ) );
7825   ins_pipe( ialu_reg );
7826 %}
7827 
7828 // Shift Left by 8-bit immediate
7829 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7830   match(Set dst (LShiftI dst shift));
7831   effect(KILL cr);
7832 
7833   size(3);
7834   format %{ "SHL    $dst,$shift" %}
7835   opcode(0xC1, 0x4);  /* C1 /4 ib */
7836   ins_encode( RegOpcImm( dst, shift) );
7837   ins_pipe( ialu_reg );
7838 %}
7839 
7840 // Shift Left by variable
7841 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7842   match(Set dst (LShiftI dst shift));
7843   effect(KILL cr);
7844 
7845   size(2);
7846   format %{ "SHL    $dst,$shift" %}
7847   opcode(0xD3, 0x4);  /* D3 /4 */
7848   ins_encode( OpcP, RegOpc( dst ) );
7849   ins_pipe( ialu_reg_reg );
7850 %}
7851 
7852 // Arithmetic shift right by one
7853 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7854   match(Set dst (RShiftI dst shift));
7855   effect(KILL cr);
7856 
7857   size(2);
7858   format %{ "SAR    $dst,$shift" %}
7859   opcode(0xD1, 0x7);  /* D1 /7 */
7860   ins_encode( OpcP, RegOpc( dst ) );
7861   ins_pipe( ialu_reg );
7862 %}
7863 
7864 // Arithmetic shift right by one
7865 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
7866   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7867   effect(KILL cr);
7868   format %{ "SAR    $dst,$shift" %}
7869   opcode(0xD1, 0x7);  /* D1 /7 */
7870   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
7871   ins_pipe( ialu_mem_imm );
7872 %}
7873 
7874 // Arithmetic Shift Right by 8-bit immediate
7875 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7876   match(Set dst (RShiftI dst shift));
7877   effect(KILL cr);
7878 
7879   size(3);
7880   format %{ "SAR    $dst,$shift" %}
7881   opcode(0xC1, 0x7);  /* C1 /7 ib */
7882   ins_encode( RegOpcImm( dst, shift ) );
7883   ins_pipe( ialu_mem_imm );
7884 %}
7885 
7886 // Arithmetic Shift Right by 8-bit immediate
7887 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
7888   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7889   effect(KILL cr);
7890 
7891   format %{ "SAR    $dst,$shift" %}
7892   opcode(0xC1, 0x7);  /* C1 /7 ib */
7893   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
7894   ins_pipe( ialu_mem_imm );
7895 %}
7896 
7897 // Arithmetic Shift Right by variable
7898 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7899   match(Set dst (RShiftI dst shift));
7900   effect(KILL cr);
7901 
7902   size(2);
7903   format %{ "SAR    $dst,$shift" %}
7904   opcode(0xD3, 0x7);  /* D3 /7 */
7905   ins_encode( OpcP, RegOpc( dst ) );
7906   ins_pipe( ialu_reg_reg );
7907 %}
7908 
7909 // Logical shift right by one
7910 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7911   match(Set dst (URShiftI dst shift));
7912   effect(KILL cr);
7913 
7914   size(2);
7915   format %{ "SHR    $dst,$shift" %}
7916   opcode(0xD1, 0x5);  /* D1 /5 */
7917   ins_encode( OpcP, RegOpc( dst ) );
7918   ins_pipe( ialu_reg );
7919 %}
7920 
7921 // Logical Shift Right by 8-bit immediate
7922 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7923   match(Set dst (URShiftI dst shift));
7924   effect(KILL cr);
7925 
7926   size(3);
7927   format %{ "SHR    $dst,$shift" %}
7928   opcode(0xC1, 0x5);  /* C1 /5 ib */
7929   ins_encode( RegOpcImm( dst, shift) );
7930   ins_pipe( ialu_reg );
7931 %}
7932 
7933 
7934 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
7935 // This idiom is used by the compiler for the i2b bytecode.
7936 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
7937   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
7938 
7939   size(3);
7940   format %{ "MOVSX  $dst,$src :8" %}
7941   ins_encode %{
7942     __ movsbl($dst$$Register, $src$$Register);
7943   %}
7944   ins_pipe(ialu_reg_reg);
7945 %}
7946 
7947 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
7948 // This idiom is used by the compiler the i2s bytecode.
7949 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
7950   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
7951 
7952   size(3);
7953   format %{ "MOVSX  $dst,$src :16" %}
7954   ins_encode %{
7955     __ movswl($dst$$Register, $src$$Register);
7956   %}
7957   ins_pipe(ialu_reg_reg);
7958 %}
7959 
7960 
7961 // Logical Shift Right by variable
7962 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7963   match(Set dst (URShiftI dst shift));
7964   effect(KILL cr);
7965 
7966   size(2);
7967   format %{ "SHR    $dst,$shift" %}
7968   opcode(0xD3, 0x5);  /* D3 /5 */
7969   ins_encode( OpcP, RegOpc( dst ) );
7970   ins_pipe( ialu_reg_reg );
7971 %}
7972 
7973 
7974 //----------Logical Instructions-----------------------------------------------
7975 //----------Integer Logical Instructions---------------------------------------
7976 // And Instructions
7977 // And Register with Register
7978 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7979   match(Set dst (AndI dst src));
7980   effect(KILL cr);
7981 
7982   size(2);
7983   format %{ "AND    $dst,$src" %}
7984   opcode(0x23);
7985   ins_encode( OpcP, RegReg( dst, src) );
7986   ins_pipe( ialu_reg_reg );
7987 %}
7988 
7989 // And Register with Immediate
7990 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7991   match(Set dst (AndI dst src));
7992   effect(KILL cr);
7993 
7994   format %{ "AND    $dst,$src" %}
7995   opcode(0x81,0x04);  /* Opcode 81 /4 */
7996   // ins_encode( RegImm( dst, src) );
7997   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7998   ins_pipe( ialu_reg );
7999 %}
8000 
8001 // And Register with Memory
8002 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8003   match(Set dst (AndI dst (LoadI src)));
8004   effect(KILL cr);
8005 
8006   ins_cost(125);
8007   format %{ "AND    $dst,$src" %}
8008   opcode(0x23);
8009   ins_encode( OpcP, RegMem( dst, src) );
8010   ins_pipe( ialu_reg_mem );
8011 %}
8012 
8013 // And Memory with Register
8014 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8015   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8016   effect(KILL cr);
8017 
8018   ins_cost(150);
8019   format %{ "AND    $dst,$src" %}
8020   opcode(0x21);  /* Opcode 21 /r */
8021   ins_encode( OpcP, RegMem( src, dst ) );
8022   ins_pipe( ialu_mem_reg );
8023 %}
8024 
8025 // And Memory with Immediate
8026 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8027   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8028   effect(KILL cr);
8029 
8030   ins_cost(125);
8031   format %{ "AND    $dst,$src" %}
8032   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8033   // ins_encode( MemImm( dst, src) );
8034   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8035   ins_pipe( ialu_mem_imm );
8036 %}
8037 
8038 // BMI1 instructions
8039 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8040   match(Set dst (AndI (XorI src1 minus_1) src2));
8041   predicate(UseBMI1Instructions);
8042   effect(KILL cr);
8043 
8044   format %{ "ANDNL  $dst, $src1, $src2" %}
8045 
8046   ins_encode %{
8047     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8048   %}
8049   ins_pipe(ialu_reg);
8050 %}
8051 
8052 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8053   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8054   predicate(UseBMI1Instructions);
8055   effect(KILL cr);
8056 
8057   ins_cost(125);
8058   format %{ "ANDNL  $dst, $src1, $src2" %}
8059 
8060   ins_encode %{
8061     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8062   %}
8063   ins_pipe(ialu_reg_mem);
8064 %}
8065 
8066 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8067   match(Set dst (AndI (SubI imm_zero src) src));
8068   predicate(UseBMI1Instructions);
8069   effect(KILL cr);
8070 
8071   format %{ "BLSIL  $dst, $src" %}
8072 
8073   ins_encode %{
8074     __ blsil($dst$$Register, $src$$Register);
8075   %}
8076   ins_pipe(ialu_reg);
8077 %}
8078 
8079 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8080   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8081   predicate(UseBMI1Instructions);
8082   effect(KILL cr);
8083 
8084   ins_cost(125);
8085   format %{ "BLSIL  $dst, $src" %}
8086 
8087   ins_encode %{
8088     __ blsil($dst$$Register, $src$$Address);
8089   %}
8090   ins_pipe(ialu_reg_mem);
8091 %}
8092 
8093 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8094 %{
8095   match(Set dst (XorI (AddI src minus_1) src));
8096   predicate(UseBMI1Instructions);
8097   effect(KILL cr);
8098 
8099   format %{ "BLSMSKL $dst, $src" %}
8100 
8101   ins_encode %{
8102     __ blsmskl($dst$$Register, $src$$Register);
8103   %}
8104 
8105   ins_pipe(ialu_reg);
8106 %}
8107 
8108 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8109 %{
8110   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8111   predicate(UseBMI1Instructions);
8112   effect(KILL cr);
8113 
8114   ins_cost(125);
8115   format %{ "BLSMSKL $dst, $src" %}
8116 
8117   ins_encode %{
8118     __ blsmskl($dst$$Register, $src$$Address);
8119   %}
8120 
8121   ins_pipe(ialu_reg_mem);
8122 %}
8123 
8124 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8125 %{
8126   match(Set dst (AndI (AddI src minus_1) src) );
8127   predicate(UseBMI1Instructions);
8128   effect(KILL cr);
8129 
8130   format %{ "BLSRL  $dst, $src" %}
8131 
8132   ins_encode %{
8133     __ blsrl($dst$$Register, $src$$Register);
8134   %}
8135 
8136   ins_pipe(ialu_reg);
8137 %}
8138 
8139 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8140 %{
8141   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8142   predicate(UseBMI1Instructions);
8143   effect(KILL cr);
8144 
8145   ins_cost(125);
8146   format %{ "BLSRL  $dst, $src" %}
8147 
8148   ins_encode %{
8149     __ blsrl($dst$$Register, $src$$Address);
8150   %}
8151 
8152   ins_pipe(ialu_reg_mem);
8153 %}
8154 
8155 // Or Instructions
8156 // Or Register with Register
8157 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8158   match(Set dst (OrI dst src));
8159   effect(KILL cr);
8160 
8161   size(2);
8162   format %{ "OR     $dst,$src" %}
8163   opcode(0x0B);
8164   ins_encode( OpcP, RegReg( dst, src) );
8165   ins_pipe( ialu_reg_reg );
8166 %}
8167 
8168 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8169   match(Set dst (OrI dst (CastP2X src)));
8170   effect(KILL cr);
8171 
8172   size(2);
8173   format %{ "OR     $dst,$src" %}
8174   opcode(0x0B);
8175   ins_encode( OpcP, RegReg( dst, src) );
8176   ins_pipe( ialu_reg_reg );
8177 %}
8178 
8179 
8180 // Or Register with Immediate
8181 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8182   match(Set dst (OrI dst src));
8183   effect(KILL cr);
8184 
8185   format %{ "OR     $dst,$src" %}
8186   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8187   // ins_encode( RegImm( dst, src) );
8188   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8189   ins_pipe( ialu_reg );
8190 %}
8191 
8192 // Or Register with Memory
8193 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8194   match(Set dst (OrI dst (LoadI src)));
8195   effect(KILL cr);
8196 
8197   ins_cost(125);
8198   format %{ "OR     $dst,$src" %}
8199   opcode(0x0B);
8200   ins_encode( OpcP, RegMem( dst, src) );
8201   ins_pipe( ialu_reg_mem );
8202 %}
8203 
8204 // Or Memory with Register
8205 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8206   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8207   effect(KILL cr);
8208 
8209   ins_cost(150);
8210   format %{ "OR     $dst,$src" %}
8211   opcode(0x09);  /* Opcode 09 /r */
8212   ins_encode( OpcP, RegMem( src, dst ) );
8213   ins_pipe( ialu_mem_reg );
8214 %}
8215 
8216 // Or Memory with Immediate
8217 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8218   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8219   effect(KILL cr);
8220 
8221   ins_cost(125);
8222   format %{ "OR     $dst,$src" %}
8223   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8224   // ins_encode( MemImm( dst, src) );
8225   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8226   ins_pipe( ialu_mem_imm );
8227 %}
8228 
8229 // ROL/ROR
8230 // ROL expand
8231 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8232   effect(USE_DEF dst, USE shift, KILL cr);
8233 
8234   format %{ "ROL    $dst, $shift" %}
8235   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8236   ins_encode( OpcP, RegOpc( dst ));
8237   ins_pipe( ialu_reg );
8238 %}
8239 
8240 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8241   effect(USE_DEF dst, USE shift, KILL cr);
8242 
8243   format %{ "ROL    $dst, $shift" %}
8244   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8245   ins_encode( RegOpcImm(dst, shift) );
8246   ins_pipe(ialu_reg);
8247 %}
8248 
8249 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8250   effect(USE_DEF dst, USE shift, KILL cr);
8251 
8252   format %{ "ROL    $dst, $shift" %}
8253   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8254   ins_encode(OpcP, RegOpc(dst));
8255   ins_pipe( ialu_reg_reg );
8256 %}
8257 // end of ROL expand
8258 
8259 // ROL 32bit by one once
8260 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8261   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8262 
8263   expand %{
8264     rolI_eReg_imm1(dst, lshift, cr);
8265   %}
8266 %}
8267 
8268 // ROL 32bit var by imm8 once
8269 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8270   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8271   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8272 
8273   expand %{
8274     rolI_eReg_imm8(dst, lshift, cr);
8275   %}
8276 %}
8277 
8278 // ROL 32bit var by var once
8279 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8280   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8281 
8282   expand %{
8283     rolI_eReg_CL(dst, shift, cr);
8284   %}
8285 %}
8286 
8287 // ROL 32bit var by var once
8288 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8289   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8290 
8291   expand %{
8292     rolI_eReg_CL(dst, shift, cr);
8293   %}
8294 %}
8295 
8296 // ROR expand
8297 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8298   effect(USE_DEF dst, USE shift, KILL cr);
8299 
8300   format %{ "ROR    $dst, $shift" %}
8301   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8302   ins_encode( OpcP, RegOpc( dst ) );
8303   ins_pipe( ialu_reg );
8304 %}
8305 
8306 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8307   effect (USE_DEF dst, USE shift, KILL cr);
8308 
8309   format %{ "ROR    $dst, $shift" %}
8310   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8311   ins_encode( RegOpcImm(dst, shift) );
8312   ins_pipe( ialu_reg );
8313 %}
8314 
8315 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8316   effect(USE_DEF dst, USE shift, KILL cr);
8317 
8318   format %{ "ROR    $dst, $shift" %}
8319   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8320   ins_encode(OpcP, RegOpc(dst));
8321   ins_pipe( ialu_reg_reg );
8322 %}
8323 // end of ROR expand
8324 
8325 // ROR right once
8326 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8327   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8328 
8329   expand %{
8330     rorI_eReg_imm1(dst, rshift, cr);
8331   %}
8332 %}
8333 
8334 // ROR 32bit by immI8 once
8335 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8336   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8337   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8338 
8339   expand %{
8340     rorI_eReg_imm8(dst, rshift, cr);
8341   %}
8342 %}
8343 
8344 // ROR 32bit var by var once
8345 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8346   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8347 
8348   expand %{
8349     rorI_eReg_CL(dst, shift, cr);
8350   %}
8351 %}
8352 
8353 // ROR 32bit var by var once
8354 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8355   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8356 
8357   expand %{
8358     rorI_eReg_CL(dst, shift, cr);
8359   %}
8360 %}
8361 
8362 // Xor Instructions
8363 // Xor Register with Register
8364 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8365   match(Set dst (XorI dst src));
8366   effect(KILL cr);
8367 
8368   size(2);
8369   format %{ "XOR    $dst,$src" %}
8370   opcode(0x33);
8371   ins_encode( OpcP, RegReg( dst, src) );
8372   ins_pipe( ialu_reg_reg );
8373 %}
8374 
8375 // Xor Register with Immediate -1
8376 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8377   match(Set dst (XorI dst imm));
8378 
8379   size(2);
8380   format %{ "NOT    $dst" %}
8381   ins_encode %{
8382      __ notl($dst$$Register);
8383   %}
8384   ins_pipe( ialu_reg );
8385 %}
8386 
8387 // Xor Register with Immediate
8388 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8389   match(Set dst (XorI dst src));
8390   effect(KILL cr);
8391 
8392   format %{ "XOR    $dst,$src" %}
8393   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8394   // ins_encode( RegImm( dst, src) );
8395   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8396   ins_pipe( ialu_reg );
8397 %}
8398 
8399 // Xor Register with Memory
8400 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8401   match(Set dst (XorI dst (LoadI src)));
8402   effect(KILL cr);
8403 
8404   ins_cost(125);
8405   format %{ "XOR    $dst,$src" %}
8406   opcode(0x33);
8407   ins_encode( OpcP, RegMem(dst, src) );
8408   ins_pipe( ialu_reg_mem );
8409 %}
8410 
8411 // Xor Memory with Register
8412 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8413   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8414   effect(KILL cr);
8415 
8416   ins_cost(150);
8417   format %{ "XOR    $dst,$src" %}
8418   opcode(0x31);  /* Opcode 31 /r */
8419   ins_encode( OpcP, RegMem( src, dst ) );
8420   ins_pipe( ialu_mem_reg );
8421 %}
8422 
8423 // Xor Memory with Immediate
8424 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8425   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8426   effect(KILL cr);
8427 
8428   ins_cost(125);
8429   format %{ "XOR    $dst,$src" %}
8430   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8431   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8432   ins_pipe( ialu_mem_imm );
8433 %}
8434 
8435 //----------Convert Int to Boolean---------------------------------------------
8436 
8437 instruct movI_nocopy(rRegI dst, rRegI src) %{
8438   effect( DEF dst, USE src );
8439   format %{ "MOV    $dst,$src" %}
8440   ins_encode( enc_Copy( dst, src) );
8441   ins_pipe( ialu_reg_reg );
8442 %}
8443 
8444 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8445   effect( USE_DEF dst, USE src, KILL cr );
8446 
8447   size(4);
8448   format %{ "NEG    $dst\n\t"
8449             "ADC    $dst,$src" %}
8450   ins_encode( neg_reg(dst),
8451               OpcRegReg(0x13,dst,src) );
8452   ins_pipe( ialu_reg_reg_long );
8453 %}
8454 
8455 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8456   match(Set dst (Conv2B src));
8457 
8458   expand %{
8459     movI_nocopy(dst,src);
8460     ci2b(dst,src,cr);
8461   %}
8462 %}
8463 
8464 instruct movP_nocopy(rRegI dst, eRegP src) %{
8465   effect( DEF dst, USE src );
8466   format %{ "MOV    $dst,$src" %}
8467   ins_encode( enc_Copy( dst, src) );
8468   ins_pipe( ialu_reg_reg );
8469 %}
8470 
8471 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8472   effect( USE_DEF dst, USE src, KILL cr );
8473   format %{ "NEG    $dst\n\t"
8474             "ADC    $dst,$src" %}
8475   ins_encode( neg_reg(dst),
8476               OpcRegReg(0x13,dst,src) );
8477   ins_pipe( ialu_reg_reg_long );
8478 %}
8479 
8480 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8481   match(Set dst (Conv2B src));
8482 
8483   expand %{
8484     movP_nocopy(dst,src);
8485     cp2b(dst,src,cr);
8486   %}
8487 %}
8488 
8489 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8490   match(Set dst (CmpLTMask p q));
8491   effect(KILL cr);
8492   ins_cost(400);
8493 
8494   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8495   format %{ "XOR    $dst,$dst\n\t"
8496             "CMP    $p,$q\n\t"
8497             "SETlt  $dst\n\t"
8498             "NEG    $dst" %}
8499   ins_encode %{
8500     Register Rp = $p$$Register;
8501     Register Rq = $q$$Register;
8502     Register Rd = $dst$$Register;
8503     Label done;
8504     __ xorl(Rd, Rd);
8505     __ cmpl(Rp, Rq);
8506     __ setb(Assembler::less, Rd);
8507     __ negl(Rd);
8508   %}
8509 
8510   ins_pipe(pipe_slow);
8511 %}
8512 
8513 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8514   match(Set dst (CmpLTMask dst zero));
8515   effect(DEF dst, KILL cr);
8516   ins_cost(100);
8517 
8518   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8519   ins_encode %{
8520   __ sarl($dst$$Register, 31);
8521   %}
8522   ins_pipe(ialu_reg);
8523 %}
8524 
8525 /* better to save a register than avoid a branch */
8526 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8527   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8528   effect(KILL cr);
8529   ins_cost(400);
8530   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8531             "JGE    done\n\t"
8532             "ADD    $p,$y\n"
8533             "done:  " %}
8534   ins_encode %{
8535     Register Rp = $p$$Register;
8536     Register Rq = $q$$Register;
8537     Register Ry = $y$$Register;
8538     Label done;
8539     __ subl(Rp, Rq);
8540     __ jccb(Assembler::greaterEqual, done);
8541     __ addl(Rp, Ry);
8542     __ bind(done);
8543   %}
8544 
8545   ins_pipe(pipe_cmplt);
8546 %}
8547 
8548 /* better to save a register than avoid a branch */
8549 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8550   match(Set y (AndI (CmpLTMask p q) y));
8551   effect(KILL cr);
8552 
8553   ins_cost(300);
8554 
8555   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8556             "JLT      done\n\t"
8557             "XORL     $y, $y\n"
8558             "done:  " %}
8559   ins_encode %{
8560     Register Rp = $p$$Register;
8561     Register Rq = $q$$Register;
8562     Register Ry = $y$$Register;
8563     Label done;
8564     __ cmpl(Rp, Rq);
8565     __ jccb(Assembler::less, done);
8566     __ xorl(Ry, Ry);
8567     __ bind(done);
8568   %}
8569 
8570   ins_pipe(pipe_cmplt);
8571 %}
8572 
8573 /* If I enable this, I encourage spilling in the inner loop of compress.
8574 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8575   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8576 */
8577 //----------Overflow Math Instructions-----------------------------------------
8578 
8579 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8580 %{
8581   match(Set cr (OverflowAddI op1 op2));
8582   effect(DEF cr, USE_KILL op1, USE op2);
8583 
8584   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8585 
8586   ins_encode %{
8587     __ addl($op1$$Register, $op2$$Register);
8588   %}
8589   ins_pipe(ialu_reg_reg);
8590 %}
8591 
8592 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8593 %{
8594   match(Set cr (OverflowAddI op1 op2));
8595   effect(DEF cr, USE_KILL op1, USE op2);
8596 
8597   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8598 
8599   ins_encode %{
8600     __ addl($op1$$Register, $op2$$constant);
8601   %}
8602   ins_pipe(ialu_reg_reg);
8603 %}
8604 
8605 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8606 %{
8607   match(Set cr (OverflowSubI op1 op2));
8608 
8609   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8610   ins_encode %{
8611     __ cmpl($op1$$Register, $op2$$Register);
8612   %}
8613   ins_pipe(ialu_reg_reg);
8614 %}
8615 
8616 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8617 %{
8618   match(Set cr (OverflowSubI op1 op2));
8619 
8620   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8621   ins_encode %{
8622     __ cmpl($op1$$Register, $op2$$constant);
8623   %}
8624   ins_pipe(ialu_reg_reg);
8625 %}
8626 
8627 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8628 %{
8629   match(Set cr (OverflowSubI zero op2));
8630   effect(DEF cr, USE_KILL op2);
8631 
8632   format %{ "NEG    $op2\t# overflow check int" %}
8633   ins_encode %{
8634     __ negl($op2$$Register);
8635   %}
8636   ins_pipe(ialu_reg_reg);
8637 %}
8638 
8639 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8640 %{
8641   match(Set cr (OverflowMulI op1 op2));
8642   effect(DEF cr, USE_KILL op1, USE op2);
8643 
8644   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8645   ins_encode %{
8646     __ imull($op1$$Register, $op2$$Register);
8647   %}
8648   ins_pipe(ialu_reg_reg_alu0);
8649 %}
8650 
8651 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8652 %{
8653   match(Set cr (OverflowMulI op1 op2));
8654   effect(DEF cr, TEMP tmp, USE op1, USE op2);
8655 
8656   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8657   ins_encode %{
8658     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8659   %}
8660   ins_pipe(ialu_reg_reg_alu0);
8661 %}
8662 
8663 //----------Long Instructions------------------------------------------------
8664 // Add Long Register with Register
8665 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8666   match(Set dst (AddL dst src));
8667   effect(KILL cr);
8668   ins_cost(200);
8669   format %{ "ADD    $dst.lo,$src.lo\n\t"
8670             "ADC    $dst.hi,$src.hi" %}
8671   opcode(0x03, 0x13);
8672   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8673   ins_pipe( ialu_reg_reg_long );
8674 %}
8675 
8676 // Add Long Register with Immediate
8677 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8678   match(Set dst (AddL dst src));
8679   effect(KILL cr);
8680   format %{ "ADD    $dst.lo,$src.lo\n\t"
8681             "ADC    $dst.hi,$src.hi" %}
8682   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8683   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8684   ins_pipe( ialu_reg_long );
8685 %}
8686 
8687 // Add Long Register with Memory
8688 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8689   match(Set dst (AddL dst (LoadL mem)));
8690   effect(KILL cr);
8691   ins_cost(125);
8692   format %{ "ADD    $dst.lo,$mem\n\t"
8693             "ADC    $dst.hi,$mem+4" %}
8694   opcode(0x03, 0x13);
8695   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8696   ins_pipe( ialu_reg_long_mem );
8697 %}
8698 
8699 // Subtract Long Register with Register.
8700 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8701   match(Set dst (SubL dst src));
8702   effect(KILL cr);
8703   ins_cost(200);
8704   format %{ "SUB    $dst.lo,$src.lo\n\t"
8705             "SBB    $dst.hi,$src.hi" %}
8706   opcode(0x2B, 0x1B);
8707   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8708   ins_pipe( ialu_reg_reg_long );
8709 %}
8710 
8711 // Subtract Long Register with Immediate
8712 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8713   match(Set dst (SubL dst src));
8714   effect(KILL cr);
8715   format %{ "SUB    $dst.lo,$src.lo\n\t"
8716             "SBB    $dst.hi,$src.hi" %}
8717   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
8718   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8719   ins_pipe( ialu_reg_long );
8720 %}
8721 
8722 // Subtract Long Register with Memory
8723 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8724   match(Set dst (SubL dst (LoadL mem)));
8725   effect(KILL cr);
8726   ins_cost(125);
8727   format %{ "SUB    $dst.lo,$mem\n\t"
8728             "SBB    $dst.hi,$mem+4" %}
8729   opcode(0x2B, 0x1B);
8730   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8731   ins_pipe( ialu_reg_long_mem );
8732 %}
8733 
8734 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8735   match(Set dst (SubL zero dst));
8736   effect(KILL cr);
8737   ins_cost(300);
8738   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
8739   ins_encode( neg_long(dst) );
8740   ins_pipe( ialu_reg_reg_long );
8741 %}
8742 
8743 // And Long Register with Register
8744 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8745   match(Set dst (AndL dst src));
8746   effect(KILL cr);
8747   format %{ "AND    $dst.lo,$src.lo\n\t"
8748             "AND    $dst.hi,$src.hi" %}
8749   opcode(0x23,0x23);
8750   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8751   ins_pipe( ialu_reg_reg_long );
8752 %}
8753 
8754 // And Long Register with Immediate
8755 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8756   match(Set dst (AndL dst src));
8757   effect(KILL cr);
8758   format %{ "AND    $dst.lo,$src.lo\n\t"
8759             "AND    $dst.hi,$src.hi" %}
8760   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
8761   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8762   ins_pipe( ialu_reg_long );
8763 %}
8764 
8765 // And Long Register with Memory
8766 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8767   match(Set dst (AndL dst (LoadL mem)));
8768   effect(KILL cr);
8769   ins_cost(125);
8770   format %{ "AND    $dst.lo,$mem\n\t"
8771             "AND    $dst.hi,$mem+4" %}
8772   opcode(0x23, 0x23);
8773   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8774   ins_pipe( ialu_reg_long_mem );
8775 %}
8776 
8777 // BMI1 instructions
8778 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
8779   match(Set dst (AndL (XorL src1 minus_1) src2));
8780   predicate(UseBMI1Instructions);
8781   effect(KILL cr, TEMP dst);
8782 
8783   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
8784             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
8785          %}
8786 
8787   ins_encode %{
8788     Register Rdst = $dst$$Register;
8789     Register Rsrc1 = $src1$$Register;
8790     Register Rsrc2 = $src2$$Register;
8791     __ andnl(Rdst, Rsrc1, Rsrc2);
8792     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
8793   %}
8794   ins_pipe(ialu_reg_reg_long);
8795 %}
8796 
8797 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
8798   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
8799   predicate(UseBMI1Instructions);
8800   effect(KILL cr, TEMP dst);
8801 
8802   ins_cost(125);
8803   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
8804             "ANDNL  $dst.hi, $src1.hi, $src2+4"
8805          %}
8806 
8807   ins_encode %{
8808     Register Rdst = $dst$$Register;
8809     Register Rsrc1 = $src1$$Register;
8810     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
8811 
8812     __ andnl(Rdst, Rsrc1, $src2$$Address);
8813     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
8814   %}
8815   ins_pipe(ialu_reg_mem);
8816 %}
8817 
8818 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
8819   match(Set dst (AndL (SubL imm_zero src) src));
8820   predicate(UseBMI1Instructions);
8821   effect(KILL cr, TEMP dst);
8822 
8823   format %{ "MOVL   $dst.hi, 0\n\t"
8824             "BLSIL  $dst.lo, $src.lo\n\t"
8825             "JNZ    done\n\t"
8826             "BLSIL  $dst.hi, $src.hi\n"
8827             "done:"
8828          %}
8829 
8830   ins_encode %{
8831     Label done;
8832     Register Rdst = $dst$$Register;
8833     Register Rsrc = $src$$Register;
8834     __ movl(HIGH_FROM_LOW(Rdst), 0);
8835     __ blsil(Rdst, Rsrc);
8836     __ jccb(Assembler::notZero, done);
8837     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8838     __ bind(done);
8839   %}
8840   ins_pipe(ialu_reg);
8841 %}
8842 
8843 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
8844   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
8845   predicate(UseBMI1Instructions);
8846   effect(KILL cr, TEMP dst);
8847 
8848   ins_cost(125);
8849   format %{ "MOVL   $dst.hi, 0\n\t"
8850             "BLSIL  $dst.lo, $src\n\t"
8851             "JNZ    done\n\t"
8852             "BLSIL  $dst.hi, $src+4\n"
8853             "done:"
8854          %}
8855 
8856   ins_encode %{
8857     Label done;
8858     Register Rdst = $dst$$Register;
8859     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8860 
8861     __ movl(HIGH_FROM_LOW(Rdst), 0);
8862     __ blsil(Rdst, $src$$Address);
8863     __ jccb(Assembler::notZero, done);
8864     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
8865     __ bind(done);
8866   %}
8867   ins_pipe(ialu_reg_mem);
8868 %}
8869 
8870 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8871 %{
8872   match(Set dst (XorL (AddL src minus_1) src));
8873   predicate(UseBMI1Instructions);
8874   effect(KILL cr, TEMP dst);
8875 
8876   format %{ "MOVL    $dst.hi, 0\n\t"
8877             "BLSMSKL $dst.lo, $src.lo\n\t"
8878             "JNC     done\n\t"
8879             "BLSMSKL $dst.hi, $src.hi\n"
8880             "done:"
8881          %}
8882 
8883   ins_encode %{
8884     Label done;
8885     Register Rdst = $dst$$Register;
8886     Register Rsrc = $src$$Register;
8887     __ movl(HIGH_FROM_LOW(Rdst), 0);
8888     __ blsmskl(Rdst, Rsrc);
8889     __ jccb(Assembler::carryClear, done);
8890     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8891     __ bind(done);
8892   %}
8893 
8894   ins_pipe(ialu_reg);
8895 %}
8896 
8897 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8898 %{
8899   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
8900   predicate(UseBMI1Instructions);
8901   effect(KILL cr, TEMP dst);
8902 
8903   ins_cost(125);
8904   format %{ "MOVL    $dst.hi, 0\n\t"
8905             "BLSMSKL $dst.lo, $src\n\t"
8906             "JNC     done\n\t"
8907             "BLSMSKL $dst.hi, $src+4\n"
8908             "done:"
8909          %}
8910 
8911   ins_encode %{
8912     Label done;
8913     Register Rdst = $dst$$Register;
8914     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8915 
8916     __ movl(HIGH_FROM_LOW(Rdst), 0);
8917     __ blsmskl(Rdst, $src$$Address);
8918     __ jccb(Assembler::carryClear, done);
8919     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
8920     __ bind(done);
8921   %}
8922 
8923   ins_pipe(ialu_reg_mem);
8924 %}
8925 
8926 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8927 %{
8928   match(Set dst (AndL (AddL src minus_1) src) );
8929   predicate(UseBMI1Instructions);
8930   effect(KILL cr, TEMP dst);
8931 
8932   format %{ "MOVL   $dst.hi, $src.hi\n\t"
8933             "BLSRL  $dst.lo, $src.lo\n\t"
8934             "JNC    done\n\t"
8935             "BLSRL  $dst.hi, $src.hi\n"
8936             "done:"
8937   %}
8938 
8939   ins_encode %{
8940     Label done;
8941     Register Rdst = $dst$$Register;
8942     Register Rsrc = $src$$Register;
8943     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8944     __ blsrl(Rdst, Rsrc);
8945     __ jccb(Assembler::carryClear, done);
8946     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8947     __ bind(done);
8948   %}
8949 
8950   ins_pipe(ialu_reg);
8951 %}
8952 
8953 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8954 %{
8955   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
8956   predicate(UseBMI1Instructions);
8957   effect(KILL cr, TEMP dst);
8958 
8959   ins_cost(125);
8960   format %{ "MOVL   $dst.hi, $src+4\n\t"
8961             "BLSRL  $dst.lo, $src\n\t"
8962             "JNC    done\n\t"
8963             "BLSRL  $dst.hi, $src+4\n"
8964             "done:"
8965   %}
8966 
8967   ins_encode %{
8968     Label done;
8969     Register Rdst = $dst$$Register;
8970     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8971     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
8972     __ blsrl(Rdst, $src$$Address);
8973     __ jccb(Assembler::carryClear, done);
8974     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
8975     __ bind(done);
8976   %}
8977 
8978   ins_pipe(ialu_reg_mem);
8979 %}
8980 
8981 // Or Long Register with Register
8982 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8983   match(Set dst (OrL dst src));
8984   effect(KILL cr);
8985   format %{ "OR     $dst.lo,$src.lo\n\t"
8986             "OR     $dst.hi,$src.hi" %}
8987   opcode(0x0B,0x0B);
8988   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8989   ins_pipe( ialu_reg_reg_long );
8990 %}
8991 
8992 // Or Long Register with Immediate
8993 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8994   match(Set dst (OrL dst src));
8995   effect(KILL cr);
8996   format %{ "OR     $dst.lo,$src.lo\n\t"
8997             "OR     $dst.hi,$src.hi" %}
8998   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
8999   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9000   ins_pipe( ialu_reg_long );
9001 %}
9002 
9003 // Or Long Register with Memory
9004 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9005   match(Set dst (OrL dst (LoadL mem)));
9006   effect(KILL cr);
9007   ins_cost(125);
9008   format %{ "OR     $dst.lo,$mem\n\t"
9009             "OR     $dst.hi,$mem+4" %}
9010   opcode(0x0B,0x0B);
9011   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9012   ins_pipe( ialu_reg_long_mem );
9013 %}
9014 
9015 // Xor Long Register with Register
9016 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9017   match(Set dst (XorL dst src));
9018   effect(KILL cr);
9019   format %{ "XOR    $dst.lo,$src.lo\n\t"
9020             "XOR    $dst.hi,$src.hi" %}
9021   opcode(0x33,0x33);
9022   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9023   ins_pipe( ialu_reg_reg_long );
9024 %}
9025 
9026 // Xor Long Register with Immediate -1
9027 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9028   match(Set dst (XorL dst imm));
9029   format %{ "NOT    $dst.lo\n\t"
9030             "NOT    $dst.hi" %}
9031   ins_encode %{
9032      __ notl($dst$$Register);
9033      __ notl(HIGH_FROM_LOW($dst$$Register));
9034   %}
9035   ins_pipe( ialu_reg_long );
9036 %}
9037 
9038 // Xor Long Register with Immediate
9039 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9040   match(Set dst (XorL dst src));
9041   effect(KILL cr);
9042   format %{ "XOR    $dst.lo,$src.lo\n\t"
9043             "XOR    $dst.hi,$src.hi" %}
9044   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9045   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9046   ins_pipe( ialu_reg_long );
9047 %}
9048 
9049 // Xor Long Register with Memory
9050 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9051   match(Set dst (XorL dst (LoadL mem)));
9052   effect(KILL cr);
9053   ins_cost(125);
9054   format %{ "XOR    $dst.lo,$mem\n\t"
9055             "XOR    $dst.hi,$mem+4" %}
9056   opcode(0x33,0x33);
9057   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9058   ins_pipe( ialu_reg_long_mem );
9059 %}
9060 
9061 // Shift Left Long by 1
9062 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9063   predicate(UseNewLongLShift);
9064   match(Set dst (LShiftL dst cnt));
9065   effect(KILL cr);
9066   ins_cost(100);
9067   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9068             "ADC    $dst.hi,$dst.hi" %}
9069   ins_encode %{
9070     __ addl($dst$$Register,$dst$$Register);
9071     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9072   %}
9073   ins_pipe( ialu_reg_long );
9074 %}
9075 
9076 // Shift Left Long by 2
9077 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9078   predicate(UseNewLongLShift);
9079   match(Set dst (LShiftL dst cnt));
9080   effect(KILL cr);
9081   ins_cost(100);
9082   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9083             "ADC    $dst.hi,$dst.hi\n\t"
9084             "ADD    $dst.lo,$dst.lo\n\t"
9085             "ADC    $dst.hi,$dst.hi" %}
9086   ins_encode %{
9087     __ addl($dst$$Register,$dst$$Register);
9088     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9089     __ addl($dst$$Register,$dst$$Register);
9090     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9091   %}
9092   ins_pipe( ialu_reg_long );
9093 %}
9094 
9095 // Shift Left Long by 3
9096 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9097   predicate(UseNewLongLShift);
9098   match(Set dst (LShiftL dst cnt));
9099   effect(KILL cr);
9100   ins_cost(100);
9101   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9102             "ADC    $dst.hi,$dst.hi\n\t"
9103             "ADD    $dst.lo,$dst.lo\n\t"
9104             "ADC    $dst.hi,$dst.hi\n\t"
9105             "ADD    $dst.lo,$dst.lo\n\t"
9106             "ADC    $dst.hi,$dst.hi" %}
9107   ins_encode %{
9108     __ addl($dst$$Register,$dst$$Register);
9109     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9110     __ addl($dst$$Register,$dst$$Register);
9111     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9112     __ addl($dst$$Register,$dst$$Register);
9113     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9114   %}
9115   ins_pipe( ialu_reg_long );
9116 %}
9117 
9118 // Shift Left Long by 1-31
9119 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9120   match(Set dst (LShiftL dst cnt));
9121   effect(KILL cr);
9122   ins_cost(200);
9123   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9124             "SHL    $dst.lo,$cnt" %}
9125   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9126   ins_encode( move_long_small_shift(dst,cnt) );
9127   ins_pipe( ialu_reg_long );
9128 %}
9129 
9130 // Shift Left Long by 32-63
9131 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9132   match(Set dst (LShiftL dst cnt));
9133   effect(KILL cr);
9134   ins_cost(300);
9135   format %{ "MOV    $dst.hi,$dst.lo\n"
9136           "\tSHL    $dst.hi,$cnt-32\n"
9137           "\tXOR    $dst.lo,$dst.lo" %}
9138   opcode(0xC1, 0x4);  /* C1 /4 ib */
9139   ins_encode( move_long_big_shift_clr(dst,cnt) );
9140   ins_pipe( ialu_reg_long );
9141 %}
9142 
9143 // Shift Left Long by variable
9144 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9145   match(Set dst (LShiftL dst shift));
9146   effect(KILL cr);
9147   ins_cost(500+200);
9148   size(17);
9149   format %{ "TEST   $shift,32\n\t"
9150             "JEQ,s  small\n\t"
9151             "MOV    $dst.hi,$dst.lo\n\t"
9152             "XOR    $dst.lo,$dst.lo\n"
9153     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9154             "SHL    $dst.lo,$shift" %}
9155   ins_encode( shift_left_long( dst, shift ) );
9156   ins_pipe( pipe_slow );
9157 %}
9158 
9159 // Shift Right Long by 1-31
9160 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9161   match(Set dst (URShiftL dst cnt));
9162   effect(KILL cr);
9163   ins_cost(200);
9164   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9165             "SHR    $dst.hi,$cnt" %}
9166   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9167   ins_encode( move_long_small_shift(dst,cnt) );
9168   ins_pipe( ialu_reg_long );
9169 %}
9170 
9171 // Shift Right Long by 32-63
9172 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9173   match(Set dst (URShiftL dst cnt));
9174   effect(KILL cr);
9175   ins_cost(300);
9176   format %{ "MOV    $dst.lo,$dst.hi\n"
9177           "\tSHR    $dst.lo,$cnt-32\n"
9178           "\tXOR    $dst.hi,$dst.hi" %}
9179   opcode(0xC1, 0x5);  /* C1 /5 ib */
9180   ins_encode( move_long_big_shift_clr(dst,cnt) );
9181   ins_pipe( ialu_reg_long );
9182 %}
9183 
9184 // Shift Right Long by variable
9185 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9186   match(Set dst (URShiftL dst shift));
9187   effect(KILL cr);
9188   ins_cost(600);
9189   size(17);
9190   format %{ "TEST   $shift,32\n\t"
9191             "JEQ,s  small\n\t"
9192             "MOV    $dst.lo,$dst.hi\n\t"
9193             "XOR    $dst.hi,$dst.hi\n"
9194     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9195             "SHR    $dst.hi,$shift" %}
9196   ins_encode( shift_right_long( dst, shift ) );
9197   ins_pipe( pipe_slow );
9198 %}
9199 
9200 // Shift Right Long by 1-31
9201 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9202   match(Set dst (RShiftL dst cnt));
9203   effect(KILL cr);
9204   ins_cost(200);
9205   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9206             "SAR    $dst.hi,$cnt" %}
9207   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9208   ins_encode( move_long_small_shift(dst,cnt) );
9209   ins_pipe( ialu_reg_long );
9210 %}
9211 
9212 // Shift Right Long by 32-63
9213 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9214   match(Set dst (RShiftL dst cnt));
9215   effect(KILL cr);
9216   ins_cost(300);
9217   format %{ "MOV    $dst.lo,$dst.hi\n"
9218           "\tSAR    $dst.lo,$cnt-32\n"
9219           "\tSAR    $dst.hi,31" %}
9220   opcode(0xC1, 0x7);  /* C1 /7 ib */
9221   ins_encode( move_long_big_shift_sign(dst,cnt) );
9222   ins_pipe( ialu_reg_long );
9223 %}
9224 
9225 // Shift Right arithmetic Long by variable
9226 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9227   match(Set dst (RShiftL dst shift));
9228   effect(KILL cr);
9229   ins_cost(600);
9230   size(18);
9231   format %{ "TEST   $shift,32\n\t"
9232             "JEQ,s  small\n\t"
9233             "MOV    $dst.lo,$dst.hi\n\t"
9234             "SAR    $dst.hi,31\n"
9235     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9236             "SAR    $dst.hi,$shift" %}
9237   ins_encode( shift_right_arith_long( dst, shift ) );
9238   ins_pipe( pipe_slow );
9239 %}
9240 
9241 
9242 //----------Double Instructions------------------------------------------------
9243 // Double Math
9244 
9245 // Compare & branch
9246 
9247 // P6 version of float compare, sets condition codes in EFLAGS
9248 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9249   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9250   match(Set cr (CmpD src1 src2));
9251   effect(KILL rax);
9252   ins_cost(150);
9253   format %{ "FLD    $src1\n\t"
9254             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9255             "JNP    exit\n\t"
9256             "MOV    ah,1       // saw a NaN, set CF\n\t"
9257             "SAHF\n"
9258      "exit:\tNOP               // avoid branch to branch" %}
9259   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9260   ins_encode( Push_Reg_DPR(src1),
9261               OpcP, RegOpc(src2),
9262               cmpF_P6_fixup );
9263   ins_pipe( pipe_slow );
9264 %}
9265 
9266 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9267   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9268   match(Set cr (CmpD src1 src2));
9269   ins_cost(150);
9270   format %{ "FLD    $src1\n\t"
9271             "FUCOMIP ST,$src2  // P6 instruction" %}
9272   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9273   ins_encode( Push_Reg_DPR(src1),
9274               OpcP, RegOpc(src2));
9275   ins_pipe( pipe_slow );
9276 %}
9277 
9278 // Compare & branch
9279 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9280   predicate(UseSSE<=1);
9281   match(Set cr (CmpD src1 src2));
9282   effect(KILL rax);
9283   ins_cost(200);
9284   format %{ "FLD    $src1\n\t"
9285             "FCOMp  $src2\n\t"
9286             "FNSTSW AX\n\t"
9287             "TEST   AX,0x400\n\t"
9288             "JZ,s   flags\n\t"
9289             "MOV    AH,1\t# unordered treat as LT\n"
9290     "flags:\tSAHF" %}
9291   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9292   ins_encode( Push_Reg_DPR(src1),
9293               OpcP, RegOpc(src2),
9294               fpu_flags);
9295   ins_pipe( pipe_slow );
9296 %}
9297 
9298 // Compare vs zero into -1,0,1
9299 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9300   predicate(UseSSE<=1);
9301   match(Set dst (CmpD3 src1 zero));
9302   effect(KILL cr, KILL rax);
9303   ins_cost(280);
9304   format %{ "FTSTD  $dst,$src1" %}
9305   opcode(0xE4, 0xD9);
9306   ins_encode( Push_Reg_DPR(src1),
9307               OpcS, OpcP, PopFPU,
9308               CmpF_Result(dst));
9309   ins_pipe( pipe_slow );
9310 %}
9311 
9312 // Compare into -1,0,1
9313 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9314   predicate(UseSSE<=1);
9315   match(Set dst (CmpD3 src1 src2));
9316   effect(KILL cr, KILL rax);
9317   ins_cost(300);
9318   format %{ "FCMPD  $dst,$src1,$src2" %}
9319   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9320   ins_encode( Push_Reg_DPR(src1),
9321               OpcP, RegOpc(src2),
9322               CmpF_Result(dst));
9323   ins_pipe( pipe_slow );
9324 %}
9325 
9326 // float compare and set condition codes in EFLAGS by XMM regs
9327 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9328   predicate(UseSSE>=2);
9329   match(Set cr (CmpD src1 src2));
9330   ins_cost(145);
9331   format %{ "UCOMISD $src1,$src2\n\t"
9332             "JNP,s   exit\n\t"
9333             "PUSHF\t# saw NaN, set CF\n\t"
9334             "AND     [rsp], #0xffffff2b\n\t"
9335             "POPF\n"
9336     "exit:" %}
9337   ins_encode %{
9338     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9339     emit_cmpfp_fixup(_masm);
9340   %}
9341   ins_pipe( pipe_slow );
9342 %}
9343 
9344 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9345   predicate(UseSSE>=2);
9346   match(Set cr (CmpD src1 src2));
9347   ins_cost(100);
9348   format %{ "UCOMISD $src1,$src2" %}
9349   ins_encode %{
9350     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9351   %}
9352   ins_pipe( pipe_slow );
9353 %}
9354 
9355 // float compare and set condition codes in EFLAGS by XMM regs
9356 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9357   predicate(UseSSE>=2);
9358   match(Set cr (CmpD src1 (LoadD src2)));
9359   ins_cost(145);
9360   format %{ "UCOMISD $src1,$src2\n\t"
9361             "JNP,s   exit\n\t"
9362             "PUSHF\t# saw NaN, set CF\n\t"
9363             "AND     [rsp], #0xffffff2b\n\t"
9364             "POPF\n"
9365     "exit:" %}
9366   ins_encode %{
9367     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9368     emit_cmpfp_fixup(_masm);
9369   %}
9370   ins_pipe( pipe_slow );
9371 %}
9372 
9373 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9374   predicate(UseSSE>=2);
9375   match(Set cr (CmpD src1 (LoadD src2)));
9376   ins_cost(100);
9377   format %{ "UCOMISD $src1,$src2" %}
9378   ins_encode %{
9379     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9380   %}
9381   ins_pipe( pipe_slow );
9382 %}
9383 
9384 // Compare into -1,0,1 in XMM
9385 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9386   predicate(UseSSE>=2);
9387   match(Set dst (CmpD3 src1 src2));
9388   effect(KILL cr);
9389   ins_cost(255);
9390   format %{ "UCOMISD $src1, $src2\n\t"
9391             "MOV     $dst, #-1\n\t"
9392             "JP,s    done\n\t"
9393             "JB,s    done\n\t"
9394             "SETNE   $dst\n\t"
9395             "MOVZB   $dst, $dst\n"
9396     "done:" %}
9397   ins_encode %{
9398     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9399     emit_cmpfp3(_masm, $dst$$Register);
9400   %}
9401   ins_pipe( pipe_slow );
9402 %}
9403 
9404 // Compare into -1,0,1 in XMM and memory
9405 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9406   predicate(UseSSE>=2);
9407   match(Set dst (CmpD3 src1 (LoadD src2)));
9408   effect(KILL cr);
9409   ins_cost(275);
9410   format %{ "UCOMISD $src1, $src2\n\t"
9411             "MOV     $dst, #-1\n\t"
9412             "JP,s    done\n\t"
9413             "JB,s    done\n\t"
9414             "SETNE   $dst\n\t"
9415             "MOVZB   $dst, $dst\n"
9416     "done:" %}
9417   ins_encode %{
9418     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9419     emit_cmpfp3(_masm, $dst$$Register);
9420   %}
9421   ins_pipe( pipe_slow );
9422 %}
9423 
9424 
9425 instruct subDPR_reg(regDPR dst, regDPR src) %{
9426   predicate (UseSSE <=1);
9427   match(Set dst (SubD dst src));
9428 
9429   format %{ "FLD    $src\n\t"
9430             "DSUBp  $dst,ST" %}
9431   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9432   ins_cost(150);
9433   ins_encode( Push_Reg_DPR(src),
9434               OpcP, RegOpc(dst) );
9435   ins_pipe( fpu_reg_reg );
9436 %}
9437 
9438 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9439   predicate (UseSSE <=1);
9440   match(Set dst (RoundDouble (SubD src1 src2)));
9441   ins_cost(250);
9442 
9443   format %{ "FLD    $src2\n\t"
9444             "DSUB   ST,$src1\n\t"
9445             "FSTP_D $dst\t# D-round" %}
9446   opcode(0xD8, 0x5);
9447   ins_encode( Push_Reg_DPR(src2),
9448               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9449   ins_pipe( fpu_mem_reg_reg );
9450 %}
9451 
9452 
9453 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9454   predicate (UseSSE <=1);
9455   match(Set dst (SubD dst (LoadD src)));
9456   ins_cost(150);
9457 
9458   format %{ "FLD    $src\n\t"
9459             "DSUBp  $dst,ST" %}
9460   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9461   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9462               OpcP, RegOpc(dst) );
9463   ins_pipe( fpu_reg_mem );
9464 %}
9465 
9466 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9467   predicate (UseSSE<=1);
9468   match(Set dst (AbsD src));
9469   ins_cost(100);
9470   format %{ "FABS" %}
9471   opcode(0xE1, 0xD9);
9472   ins_encode( OpcS, OpcP );
9473   ins_pipe( fpu_reg_reg );
9474 %}
9475 
9476 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9477   predicate(UseSSE<=1);
9478   match(Set dst (NegD src));
9479   ins_cost(100);
9480   format %{ "FCHS" %}
9481   opcode(0xE0, 0xD9);
9482   ins_encode( OpcS, OpcP );
9483   ins_pipe( fpu_reg_reg );
9484 %}
9485 
9486 instruct addDPR_reg(regDPR dst, regDPR src) %{
9487   predicate(UseSSE<=1);
9488   match(Set dst (AddD dst src));
9489   format %{ "FLD    $src\n\t"
9490             "DADD   $dst,ST" %}
9491   size(4);
9492   ins_cost(150);
9493   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9494   ins_encode( Push_Reg_DPR(src),
9495               OpcP, RegOpc(dst) );
9496   ins_pipe( fpu_reg_reg );
9497 %}
9498 
9499 
9500 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9501   predicate(UseSSE<=1);
9502   match(Set dst (RoundDouble (AddD src1 src2)));
9503   ins_cost(250);
9504 
9505   format %{ "FLD    $src2\n\t"
9506             "DADD   ST,$src1\n\t"
9507             "FSTP_D $dst\t# D-round" %}
9508   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9509   ins_encode( Push_Reg_DPR(src2),
9510               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9511   ins_pipe( fpu_mem_reg_reg );
9512 %}
9513 
9514 
9515 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9516   predicate(UseSSE<=1);
9517   match(Set dst (AddD dst (LoadD src)));
9518   ins_cost(150);
9519 
9520   format %{ "FLD    $src\n\t"
9521             "DADDp  $dst,ST" %}
9522   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9523   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9524               OpcP, RegOpc(dst) );
9525   ins_pipe( fpu_reg_mem );
9526 %}
9527 
9528 // add-to-memory
9529 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9530   predicate(UseSSE<=1);
9531   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9532   ins_cost(150);
9533 
9534   format %{ "FLD_D  $dst\n\t"
9535             "DADD   ST,$src\n\t"
9536             "FST_D  $dst" %}
9537   opcode(0xDD, 0x0);
9538   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9539               Opcode(0xD8), RegOpc(src),
9540               set_instruction_start,
9541               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9542   ins_pipe( fpu_reg_mem );
9543 %}
9544 
9545 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9546   predicate(UseSSE<=1);
9547   match(Set dst (AddD dst con));
9548   ins_cost(125);
9549   format %{ "FLD1\n\t"
9550             "DADDp  $dst,ST" %}
9551   ins_encode %{
9552     __ fld1();
9553     __ faddp($dst$$reg);
9554   %}
9555   ins_pipe(fpu_reg);
9556 %}
9557 
9558 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9559   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9560   match(Set dst (AddD dst con));
9561   ins_cost(200);
9562   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9563             "DADDp  $dst,ST" %}
9564   ins_encode %{
9565     __ fld_d($constantaddress($con));
9566     __ faddp($dst$$reg);
9567   %}
9568   ins_pipe(fpu_reg_mem);
9569 %}
9570 
9571 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9572   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9573   match(Set dst (RoundDouble (AddD src con)));
9574   ins_cost(200);
9575   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9576             "DADD   ST,$src\n\t"
9577             "FSTP_D $dst\t# D-round" %}
9578   ins_encode %{
9579     __ fld_d($constantaddress($con));
9580     __ fadd($src$$reg);
9581     __ fstp_d(Address(rsp, $dst$$disp));
9582   %}
9583   ins_pipe(fpu_mem_reg_con);
9584 %}
9585 
9586 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9587   predicate(UseSSE<=1);
9588   match(Set dst (MulD dst src));
9589   format %{ "FLD    $src\n\t"
9590             "DMULp  $dst,ST" %}
9591   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9592   ins_cost(150);
9593   ins_encode( Push_Reg_DPR(src),
9594               OpcP, RegOpc(dst) );
9595   ins_pipe( fpu_reg_reg );
9596 %}
9597 
9598 // Strict FP instruction biases argument before multiply then
9599 // biases result to avoid double rounding of subnormals.
9600 //
9601 // scale arg1 by multiplying arg1 by 2^(-15360)
9602 // load arg2
9603 // multiply scaled arg1 by arg2
9604 // rescale product by 2^(15360)
9605 //
9606 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9607   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9608   match(Set dst (MulD dst src));
9609   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9610 
9611   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9612             "DMULp  $dst,ST\n\t"
9613             "FLD    $src\n\t"
9614             "DMULp  $dst,ST\n\t"
9615             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9616             "DMULp  $dst,ST\n\t" %}
9617   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9618   ins_encode( strictfp_bias1(dst),
9619               Push_Reg_DPR(src),
9620               OpcP, RegOpc(dst),
9621               strictfp_bias2(dst) );
9622   ins_pipe( fpu_reg_reg );
9623 %}
9624 
9625 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9626   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9627   match(Set dst (MulD dst con));
9628   ins_cost(200);
9629   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9630             "DMULp  $dst,ST" %}
9631   ins_encode %{
9632     __ fld_d($constantaddress($con));
9633     __ fmulp($dst$$reg);
9634   %}
9635   ins_pipe(fpu_reg_mem);
9636 %}
9637 
9638 
9639 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9640   predicate( UseSSE<=1 );
9641   match(Set dst (MulD dst (LoadD src)));
9642   ins_cost(200);
9643   format %{ "FLD_D  $src\n\t"
9644             "DMULp  $dst,ST" %}
9645   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9646   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9647               OpcP, RegOpc(dst) );
9648   ins_pipe( fpu_reg_mem );
9649 %}
9650 
9651 //
9652 // Cisc-alternate to reg-reg multiply
9653 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9654   predicate( UseSSE<=1 );
9655   match(Set dst (MulD src (LoadD mem)));
9656   ins_cost(250);
9657   format %{ "FLD_D  $mem\n\t"
9658             "DMUL   ST,$src\n\t"
9659             "FSTP_D $dst" %}
9660   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9661   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9662               OpcReg_FPR(src),
9663               Pop_Reg_DPR(dst) );
9664   ins_pipe( fpu_reg_reg_mem );
9665 %}
9666 
9667 
9668 // MACRO3 -- addDPR a mulDPR
9669 // This instruction is a '2-address' instruction in that the result goes
9670 // back to src2.  This eliminates a move from the macro; possibly the
9671 // register allocator will have to add it back (and maybe not).
9672 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9673   predicate( UseSSE<=1 );
9674   match(Set src2 (AddD (MulD src0 src1) src2));
9675   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9676             "DMUL   ST,$src1\n\t"
9677             "DADDp  $src2,ST" %}
9678   ins_cost(250);
9679   opcode(0xDD); /* LoadD DD /0 */
9680   ins_encode( Push_Reg_FPR(src0),
9681               FMul_ST_reg(src1),
9682               FAddP_reg_ST(src2) );
9683   ins_pipe( fpu_reg_reg_reg );
9684 %}
9685 
9686 
9687 // MACRO3 -- subDPR a mulDPR
9688 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9689   predicate( UseSSE<=1 );
9690   match(Set src2 (SubD (MulD src0 src1) src2));
9691   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9692             "DMUL   ST,$src1\n\t"
9693             "DSUBRp $src2,ST" %}
9694   ins_cost(250);
9695   ins_encode( Push_Reg_FPR(src0),
9696               FMul_ST_reg(src1),
9697               Opcode(0xDE), Opc_plus(0xE0,src2));
9698   ins_pipe( fpu_reg_reg_reg );
9699 %}
9700 
9701 
9702 instruct divDPR_reg(regDPR dst, regDPR src) %{
9703   predicate( UseSSE<=1 );
9704   match(Set dst (DivD dst src));
9705 
9706   format %{ "FLD    $src\n\t"
9707             "FDIVp  $dst,ST" %}
9708   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9709   ins_cost(150);
9710   ins_encode( Push_Reg_DPR(src),
9711               OpcP, RegOpc(dst) );
9712   ins_pipe( fpu_reg_reg );
9713 %}
9714 
9715 // Strict FP instruction biases argument before division then
9716 // biases result, to avoid double rounding of subnormals.
9717 //
9718 // scale dividend by multiplying dividend by 2^(-15360)
9719 // load divisor
9720 // divide scaled dividend by divisor
9721 // rescale quotient by 2^(15360)
9722 //
9723 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9724   predicate (UseSSE<=1);
9725   match(Set dst (DivD dst src));
9726   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9727   ins_cost(01);
9728 
9729   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9730             "DMULp  $dst,ST\n\t"
9731             "FLD    $src\n\t"
9732             "FDIVp  $dst,ST\n\t"
9733             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9734             "DMULp  $dst,ST\n\t" %}
9735   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9736   ins_encode( strictfp_bias1(dst),
9737               Push_Reg_DPR(src),
9738               OpcP, RegOpc(dst),
9739               strictfp_bias2(dst) );
9740   ins_pipe( fpu_reg_reg );
9741 %}
9742 
9743 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9744   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
9745   match(Set dst (RoundDouble (DivD src1 src2)));
9746 
9747   format %{ "FLD    $src1\n\t"
9748             "FDIV   ST,$src2\n\t"
9749             "FSTP_D $dst\t# D-round" %}
9750   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
9751   ins_encode( Push_Reg_DPR(src1),
9752               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
9753   ins_pipe( fpu_mem_reg_reg );
9754 %}
9755 
9756 
9757 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9758   predicate(UseSSE<=1);
9759   match(Set dst (ModD dst src));
9760   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9761 
9762   format %{ "DMOD   $dst,$src" %}
9763   ins_cost(250);
9764   ins_encode(Push_Reg_Mod_DPR(dst, src),
9765               emitModDPR(),
9766               Push_Result_Mod_DPR(src),
9767               Pop_Reg_DPR(dst));
9768   ins_pipe( pipe_slow );
9769 %}
9770 
9771 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9772   predicate(UseSSE>=2);
9773   match(Set dst (ModD src0 src1));
9774   effect(KILL rax, KILL cr);
9775 
9776   format %{ "SUB    ESP,8\t # DMOD\n"
9777           "\tMOVSD  [ESP+0],$src1\n"
9778           "\tFLD_D  [ESP+0]\n"
9779           "\tMOVSD  [ESP+0],$src0\n"
9780           "\tFLD_D  [ESP+0]\n"
9781      "loop:\tFPREM\n"
9782           "\tFWAIT\n"
9783           "\tFNSTSW AX\n"
9784           "\tSAHF\n"
9785           "\tJP     loop\n"
9786           "\tFSTP_D [ESP+0]\n"
9787           "\tMOVSD  $dst,[ESP+0]\n"
9788           "\tADD    ESP,8\n"
9789           "\tFSTP   ST0\t # Restore FPU Stack"
9790     %}
9791   ins_cost(250);
9792   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
9793   ins_pipe( pipe_slow );
9794 %}
9795 
9796 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
9797   predicate (UseSSE<=1);
9798   match(Set dst(TanD src));
9799   format %{ "DTAN   $dst" %}
9800   ins_encode( Opcode(0xD9), Opcode(0xF2),    // fptan
9801               Opcode(0xDD), Opcode(0xD8));   // fstp st
9802   ins_pipe( pipe_slow );
9803 %}
9804 
9805 instruct tanD_reg(regD dst, eFlagsReg cr) %{
9806   predicate (UseSSE>=2);
9807   match(Set dst(TanD dst));
9808   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9809   format %{ "DTAN   $dst" %}
9810   ins_encode( Push_SrcD(dst),
9811               Opcode(0xD9), Opcode(0xF2),    // fptan
9812               Opcode(0xDD), Opcode(0xD8),   // fstp st
9813               Push_ResultD(dst) );
9814   ins_pipe( pipe_slow );
9815 %}
9816 
9817 instruct atanDPR_reg(regDPR dst, regDPR src) %{
9818   predicate (UseSSE<=1);
9819   match(Set dst(AtanD dst src));
9820   format %{ "DATA   $dst,$src" %}
9821   opcode(0xD9, 0xF3);
9822   ins_encode( Push_Reg_DPR(src),
9823               OpcP, OpcS, RegOpc(dst) );
9824   ins_pipe( pipe_slow );
9825 %}
9826 
9827 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
9828   predicate (UseSSE>=2);
9829   match(Set dst(AtanD dst src));
9830   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9831   format %{ "DATA   $dst,$src" %}
9832   opcode(0xD9, 0xF3);
9833   ins_encode( Push_SrcD(src),
9834               OpcP, OpcS, Push_ResultD(dst) );
9835   ins_pipe( pipe_slow );
9836 %}
9837 
9838 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
9839   predicate (UseSSE<=1);
9840   match(Set dst (SqrtD src));
9841   format %{ "DSQRT  $dst,$src" %}
9842   opcode(0xFA, 0xD9);
9843   ins_encode( Push_Reg_DPR(src),
9844               OpcS, OpcP, Pop_Reg_DPR(dst) );
9845   ins_pipe( pipe_slow );
9846 %}
9847 
9848 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
9849   predicate (UseSSE<=1);
9850   // The source Double operand on FPU stack
9851   match(Set dst (Log10D src));
9852   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9853   // fxch         ; swap ST(0) with ST(1)
9854   // fyl2x        ; compute log_10(2) * log_2(x)
9855   format %{ "FLDLG2 \t\t\t#Log10\n\t"
9856             "FXCH   \n\t"
9857             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9858          %}
9859   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9860               Opcode(0xD9), Opcode(0xC9),   // fxch
9861               Opcode(0xD9), Opcode(0xF1));  // fyl2x
9862 
9863   ins_pipe( pipe_slow );
9864 %}
9865 
9866 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
9867   predicate (UseSSE>=2);
9868   effect(KILL cr);
9869   match(Set dst (Log10D src));
9870   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9871   // fyl2x        ; compute log_10(2) * log_2(x)
9872   format %{ "FLDLG2 \t\t\t#Log10\n\t"
9873             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9874          %}
9875   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9876               Push_SrcD(src),
9877               Opcode(0xD9), Opcode(0xF1),   // fyl2x
9878               Push_ResultD(dst));
9879 
9880   ins_pipe( pipe_slow );
9881 %}
9882 
9883 //-------------Float Instructions-------------------------------
9884 // Float Math
9885 
9886 // Code for float compare:
9887 //     fcompp();
9888 //     fwait(); fnstsw_ax();
9889 //     sahf();
9890 //     movl(dst, unordered_result);
9891 //     jcc(Assembler::parity, exit);
9892 //     movl(dst, less_result);
9893 //     jcc(Assembler::below, exit);
9894 //     movl(dst, equal_result);
9895 //     jcc(Assembler::equal, exit);
9896 //     movl(dst, greater_result);
9897 //   exit:
9898 
9899 // P6 version of float compare, sets condition codes in EFLAGS
9900 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
9901   predicate(VM_Version::supports_cmov() && UseSSE == 0);
9902   match(Set cr (CmpF src1 src2));
9903   effect(KILL rax);
9904   ins_cost(150);
9905   format %{ "FLD    $src1\n\t"
9906             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9907             "JNP    exit\n\t"
9908             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
9909             "SAHF\n"
9910      "exit:\tNOP               // avoid branch to branch" %}
9911   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9912   ins_encode( Push_Reg_DPR(src1),
9913               OpcP, RegOpc(src2),
9914               cmpF_P6_fixup );
9915   ins_pipe( pipe_slow );
9916 %}
9917 
9918 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
9919   predicate(VM_Version::supports_cmov() && UseSSE == 0);
9920   match(Set cr (CmpF src1 src2));
9921   ins_cost(100);
9922   format %{ "FLD    $src1\n\t"
9923             "FUCOMIP ST,$src2  // P6 instruction" %}
9924   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9925   ins_encode( Push_Reg_DPR(src1),
9926               OpcP, RegOpc(src2));
9927   ins_pipe( pipe_slow );
9928 %}
9929 
9930 
9931 // Compare & branch
9932 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
9933   predicate(UseSSE == 0);
9934   match(Set cr (CmpF src1 src2));
9935   effect(KILL rax);
9936   ins_cost(200);
9937   format %{ "FLD    $src1\n\t"
9938             "FCOMp  $src2\n\t"
9939             "FNSTSW AX\n\t"
9940             "TEST   AX,0x400\n\t"
9941             "JZ,s   flags\n\t"
9942             "MOV    AH,1\t# unordered treat as LT\n"
9943     "flags:\tSAHF" %}
9944   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9945   ins_encode( Push_Reg_DPR(src1),
9946               OpcP, RegOpc(src2),
9947               fpu_flags);
9948   ins_pipe( pipe_slow );
9949 %}
9950 
9951 // Compare vs zero into -1,0,1
9952 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9953   predicate(UseSSE == 0);
9954   match(Set dst (CmpF3 src1 zero));
9955   effect(KILL cr, KILL rax);
9956   ins_cost(280);
9957   format %{ "FTSTF  $dst,$src1" %}
9958   opcode(0xE4, 0xD9);
9959   ins_encode( Push_Reg_DPR(src1),
9960               OpcS, OpcP, PopFPU,
9961               CmpF_Result(dst));
9962   ins_pipe( pipe_slow );
9963 %}
9964 
9965 // Compare into -1,0,1
9966 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
9967   predicate(UseSSE == 0);
9968   match(Set dst (CmpF3 src1 src2));
9969   effect(KILL cr, KILL rax);
9970   ins_cost(300);
9971   format %{ "FCMPF  $dst,$src1,$src2" %}
9972   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9973   ins_encode( Push_Reg_DPR(src1),
9974               OpcP, RegOpc(src2),
9975               CmpF_Result(dst));
9976   ins_pipe( pipe_slow );
9977 %}
9978 
9979 // float compare and set condition codes in EFLAGS by XMM regs
9980 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
9981   predicate(UseSSE>=1);
9982   match(Set cr (CmpF src1 src2));
9983   ins_cost(145);
9984   format %{ "UCOMISS $src1,$src2\n\t"
9985             "JNP,s   exit\n\t"
9986             "PUSHF\t# saw NaN, set CF\n\t"
9987             "AND     [rsp], #0xffffff2b\n\t"
9988             "POPF\n"
9989     "exit:" %}
9990   ins_encode %{
9991     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
9992     emit_cmpfp_fixup(_masm);
9993   %}
9994   ins_pipe( pipe_slow );
9995 %}
9996 
9997 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
9998   predicate(UseSSE>=1);
9999   match(Set cr (CmpF src1 src2));
10000   ins_cost(100);
10001   format %{ "UCOMISS $src1,$src2" %}
10002   ins_encode %{
10003     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10004   %}
10005   ins_pipe( pipe_slow );
10006 %}
10007 
10008 // float compare and set condition codes in EFLAGS by XMM regs
10009 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10010   predicate(UseSSE>=1);
10011   match(Set cr (CmpF src1 (LoadF src2)));
10012   ins_cost(165);
10013   format %{ "UCOMISS $src1,$src2\n\t"
10014             "JNP,s   exit\n\t"
10015             "PUSHF\t# saw NaN, set CF\n\t"
10016             "AND     [rsp], #0xffffff2b\n\t"
10017             "POPF\n"
10018     "exit:" %}
10019   ins_encode %{
10020     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10021     emit_cmpfp_fixup(_masm);
10022   %}
10023   ins_pipe( pipe_slow );
10024 %}
10025 
10026 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10027   predicate(UseSSE>=1);
10028   match(Set cr (CmpF src1 (LoadF src2)));
10029   ins_cost(100);
10030   format %{ "UCOMISS $src1,$src2" %}
10031   ins_encode %{
10032     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10033   %}
10034   ins_pipe( pipe_slow );
10035 %}
10036 
10037 // Compare into -1,0,1 in XMM
10038 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10039   predicate(UseSSE>=1);
10040   match(Set dst (CmpF3 src1 src2));
10041   effect(KILL cr);
10042   ins_cost(255);
10043   format %{ "UCOMISS $src1, $src2\n\t"
10044             "MOV     $dst, #-1\n\t"
10045             "JP,s    done\n\t"
10046             "JB,s    done\n\t"
10047             "SETNE   $dst\n\t"
10048             "MOVZB   $dst, $dst\n"
10049     "done:" %}
10050   ins_encode %{
10051     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10052     emit_cmpfp3(_masm, $dst$$Register);
10053   %}
10054   ins_pipe( pipe_slow );
10055 %}
10056 
10057 // Compare into -1,0,1 in XMM and memory
10058 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10059   predicate(UseSSE>=1);
10060   match(Set dst (CmpF3 src1 (LoadF src2)));
10061   effect(KILL cr);
10062   ins_cost(275);
10063   format %{ "UCOMISS $src1, $src2\n\t"
10064             "MOV     $dst, #-1\n\t"
10065             "JP,s    done\n\t"
10066             "JB,s    done\n\t"
10067             "SETNE   $dst\n\t"
10068             "MOVZB   $dst, $dst\n"
10069     "done:" %}
10070   ins_encode %{
10071     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10072     emit_cmpfp3(_masm, $dst$$Register);
10073   %}
10074   ins_pipe( pipe_slow );
10075 %}
10076 
10077 // Spill to obtain 24-bit precision
10078 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10079   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10080   match(Set dst (SubF src1 src2));
10081 
10082   format %{ "FSUB   $dst,$src1 - $src2" %}
10083   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10084   ins_encode( Push_Reg_FPR(src1),
10085               OpcReg_FPR(src2),
10086               Pop_Mem_FPR(dst) );
10087   ins_pipe( fpu_mem_reg_reg );
10088 %}
10089 //
10090 // This instruction does not round to 24-bits
10091 instruct subFPR_reg(regFPR dst, regFPR src) %{
10092   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10093   match(Set dst (SubF dst src));
10094 
10095   format %{ "FSUB   $dst,$src" %}
10096   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10097   ins_encode( Push_Reg_FPR(src),
10098               OpcP, RegOpc(dst) );
10099   ins_pipe( fpu_reg_reg );
10100 %}
10101 
10102 // Spill to obtain 24-bit precision
10103 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10104   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10105   match(Set dst (AddF src1 src2));
10106 
10107   format %{ "FADD   $dst,$src1,$src2" %}
10108   opcode(0xD8, 0x0); /* D8 C0+i */
10109   ins_encode( Push_Reg_FPR(src2),
10110               OpcReg_FPR(src1),
10111               Pop_Mem_FPR(dst) );
10112   ins_pipe( fpu_mem_reg_reg );
10113 %}
10114 //
10115 // This instruction does not round to 24-bits
10116 instruct addFPR_reg(regFPR dst, regFPR src) %{
10117   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10118   match(Set dst (AddF dst src));
10119 
10120   format %{ "FLD    $src\n\t"
10121             "FADDp  $dst,ST" %}
10122   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10123   ins_encode( Push_Reg_FPR(src),
10124               OpcP, RegOpc(dst) );
10125   ins_pipe( fpu_reg_reg );
10126 %}
10127 
10128 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10129   predicate(UseSSE==0);
10130   match(Set dst (AbsF src));
10131   ins_cost(100);
10132   format %{ "FABS" %}
10133   opcode(0xE1, 0xD9);
10134   ins_encode( OpcS, OpcP );
10135   ins_pipe( fpu_reg_reg );
10136 %}
10137 
10138 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10139   predicate(UseSSE==0);
10140   match(Set dst (NegF src));
10141   ins_cost(100);
10142   format %{ "FCHS" %}
10143   opcode(0xE0, 0xD9);
10144   ins_encode( OpcS, OpcP );
10145   ins_pipe( fpu_reg_reg );
10146 %}
10147 
10148 // Cisc-alternate to addFPR_reg
10149 // Spill to obtain 24-bit precision
10150 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10151   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10152   match(Set dst (AddF src1 (LoadF src2)));
10153 
10154   format %{ "FLD    $src2\n\t"
10155             "FADD   ST,$src1\n\t"
10156             "FSTP_S $dst" %}
10157   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10158   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10159               OpcReg_FPR(src1),
10160               Pop_Mem_FPR(dst) );
10161   ins_pipe( fpu_mem_reg_mem );
10162 %}
10163 //
10164 // Cisc-alternate to addFPR_reg
10165 // This instruction does not round to 24-bits
10166 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10167   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10168   match(Set dst (AddF dst (LoadF src)));
10169 
10170   format %{ "FADD   $dst,$src" %}
10171   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10172   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10173               OpcP, RegOpc(dst) );
10174   ins_pipe( fpu_reg_mem );
10175 %}
10176 
10177 // // Following two instructions for _222_mpegaudio
10178 // Spill to obtain 24-bit precision
10179 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10180   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10181   match(Set dst (AddF src1 src2));
10182 
10183   format %{ "FADD   $dst,$src1,$src2" %}
10184   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10185   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10186               OpcReg_FPR(src2),
10187               Pop_Mem_FPR(dst) );
10188   ins_pipe( fpu_mem_reg_mem );
10189 %}
10190 
10191 // Cisc-spill variant
10192 // Spill to obtain 24-bit precision
10193 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10194   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10195   match(Set dst (AddF src1 (LoadF src2)));
10196 
10197   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10198   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10199   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10200               set_instruction_start,
10201               OpcP, RMopc_Mem(secondary,src1),
10202               Pop_Mem_FPR(dst) );
10203   ins_pipe( fpu_mem_mem_mem );
10204 %}
10205 
10206 // Spill to obtain 24-bit precision
10207 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10208   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10209   match(Set dst (AddF src1 src2));
10210 
10211   format %{ "FADD   $dst,$src1,$src2" %}
10212   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10213   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10214               set_instruction_start,
10215               OpcP, RMopc_Mem(secondary,src1),
10216               Pop_Mem_FPR(dst) );
10217   ins_pipe( fpu_mem_mem_mem );
10218 %}
10219 
10220 
10221 // Spill to obtain 24-bit precision
10222 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10223   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10224   match(Set dst (AddF src con));
10225   format %{ "FLD    $src\n\t"
10226             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10227             "FSTP_S $dst"  %}
10228   ins_encode %{
10229     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10230     __ fadd_s($constantaddress($con));
10231     __ fstp_s(Address(rsp, $dst$$disp));
10232   %}
10233   ins_pipe(fpu_mem_reg_con);
10234 %}
10235 //
10236 // This instruction does not round to 24-bits
10237 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10238   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10239   match(Set dst (AddF src con));
10240   format %{ "FLD    $src\n\t"
10241             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10242             "FSTP   $dst"  %}
10243   ins_encode %{
10244     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10245     __ fadd_s($constantaddress($con));
10246     __ fstp_d($dst$$reg);
10247   %}
10248   ins_pipe(fpu_reg_reg_con);
10249 %}
10250 
10251 // Spill to obtain 24-bit precision
10252 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10253   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10254   match(Set dst (MulF src1 src2));
10255 
10256   format %{ "FLD    $src1\n\t"
10257             "FMUL   $src2\n\t"
10258             "FSTP_S $dst"  %}
10259   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10260   ins_encode( Push_Reg_FPR(src1),
10261               OpcReg_FPR(src2),
10262               Pop_Mem_FPR(dst) );
10263   ins_pipe( fpu_mem_reg_reg );
10264 %}
10265 //
10266 // This instruction does not round to 24-bits
10267 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10268   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10269   match(Set dst (MulF src1 src2));
10270 
10271   format %{ "FLD    $src1\n\t"
10272             "FMUL   $src2\n\t"
10273             "FSTP_S $dst"  %}
10274   opcode(0xD8, 0x1); /* D8 C8+i */
10275   ins_encode( Push_Reg_FPR(src2),
10276               OpcReg_FPR(src1),
10277               Pop_Reg_FPR(dst) );
10278   ins_pipe( fpu_reg_reg_reg );
10279 %}
10280 
10281 
10282 // Spill to obtain 24-bit precision
10283 // Cisc-alternate to reg-reg multiply
10284 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10285   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10286   match(Set dst (MulF src1 (LoadF src2)));
10287 
10288   format %{ "FLD_S  $src2\n\t"
10289             "FMUL   $src1\n\t"
10290             "FSTP_S $dst"  %}
10291   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10292   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10293               OpcReg_FPR(src1),
10294               Pop_Mem_FPR(dst) );
10295   ins_pipe( fpu_mem_reg_mem );
10296 %}
10297 //
10298 // This instruction does not round to 24-bits
10299 // Cisc-alternate to reg-reg multiply
10300 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10301   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10302   match(Set dst (MulF src1 (LoadF src2)));
10303 
10304   format %{ "FMUL   $dst,$src1,$src2" %}
10305   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10306   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10307               OpcReg_FPR(src1),
10308               Pop_Reg_FPR(dst) );
10309   ins_pipe( fpu_reg_reg_mem );
10310 %}
10311 
10312 // Spill to obtain 24-bit precision
10313 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10314   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10315   match(Set dst (MulF src1 src2));
10316 
10317   format %{ "FMUL   $dst,$src1,$src2" %}
10318   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10319   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10320               set_instruction_start,
10321               OpcP, RMopc_Mem(secondary,src1),
10322               Pop_Mem_FPR(dst) );
10323   ins_pipe( fpu_mem_mem_mem );
10324 %}
10325 
10326 // Spill to obtain 24-bit precision
10327 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10328   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10329   match(Set dst (MulF src con));
10330 
10331   format %{ "FLD    $src\n\t"
10332             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10333             "FSTP_S $dst"  %}
10334   ins_encode %{
10335     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10336     __ fmul_s($constantaddress($con));
10337     __ fstp_s(Address(rsp, $dst$$disp));
10338   %}
10339   ins_pipe(fpu_mem_reg_con);
10340 %}
10341 //
10342 // This instruction does not round to 24-bits
10343 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10344   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10345   match(Set dst (MulF src con));
10346 
10347   format %{ "FLD    $src\n\t"
10348             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10349             "FSTP   $dst"  %}
10350   ins_encode %{
10351     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10352     __ fmul_s($constantaddress($con));
10353     __ fstp_d($dst$$reg);
10354   %}
10355   ins_pipe(fpu_reg_reg_con);
10356 %}
10357 
10358 
10359 //
10360 // MACRO1 -- subsume unshared load into mulFPR
10361 // This instruction does not round to 24-bits
10362 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10363   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10364   match(Set dst (MulF (LoadF mem1) src));
10365 
10366   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10367             "FMUL   ST,$src\n\t"
10368             "FSTP   $dst" %}
10369   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10370   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10371               OpcReg_FPR(src),
10372               Pop_Reg_FPR(dst) );
10373   ins_pipe( fpu_reg_reg_mem );
10374 %}
10375 //
10376 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10377 // This instruction does not round to 24-bits
10378 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10379   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10380   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10381   ins_cost(95);
10382 
10383   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10384             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10385             "FADD   ST,$src2\n\t"
10386             "FSTP   $dst" %}
10387   opcode(0xD9); /* LoadF D9 /0 */
10388   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10389               FMul_ST_reg(src1),
10390               FAdd_ST_reg(src2),
10391               Pop_Reg_FPR(dst) );
10392   ins_pipe( fpu_reg_mem_reg_reg );
10393 %}
10394 
10395 // MACRO3 -- addFPR a mulFPR
10396 // This instruction does not round to 24-bits.  It is a '2-address'
10397 // instruction in that the result goes back to src2.  This eliminates
10398 // a move from the macro; possibly the register allocator will have
10399 // to add it back (and maybe not).
10400 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10401   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10402   match(Set src2 (AddF (MulF src0 src1) src2));
10403 
10404   format %{ "FLD    $src0     ===MACRO3===\n\t"
10405             "FMUL   ST,$src1\n\t"
10406             "FADDP  $src2,ST" %}
10407   opcode(0xD9); /* LoadF D9 /0 */
10408   ins_encode( Push_Reg_FPR(src0),
10409               FMul_ST_reg(src1),
10410               FAddP_reg_ST(src2) );
10411   ins_pipe( fpu_reg_reg_reg );
10412 %}
10413 
10414 // MACRO4 -- divFPR subFPR
10415 // This instruction does not round to 24-bits
10416 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10417   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10418   match(Set dst (DivF (SubF src2 src1) src3));
10419 
10420   format %{ "FLD    $src2   ===MACRO4===\n\t"
10421             "FSUB   ST,$src1\n\t"
10422             "FDIV   ST,$src3\n\t"
10423             "FSTP  $dst" %}
10424   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10425   ins_encode( Push_Reg_FPR(src2),
10426               subFPR_divFPR_encode(src1,src3),
10427               Pop_Reg_FPR(dst) );
10428   ins_pipe( fpu_reg_reg_reg_reg );
10429 %}
10430 
10431 // Spill to obtain 24-bit precision
10432 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10433   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10434   match(Set dst (DivF src1 src2));
10435 
10436   format %{ "FDIV   $dst,$src1,$src2" %}
10437   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10438   ins_encode( Push_Reg_FPR(src1),
10439               OpcReg_FPR(src2),
10440               Pop_Mem_FPR(dst) );
10441   ins_pipe( fpu_mem_reg_reg );
10442 %}
10443 //
10444 // This instruction does not round to 24-bits
10445 instruct divFPR_reg(regFPR dst, regFPR src) %{
10446   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10447   match(Set dst (DivF dst src));
10448 
10449   format %{ "FDIV   $dst,$src" %}
10450   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10451   ins_encode( Push_Reg_FPR(src),
10452               OpcP, RegOpc(dst) );
10453   ins_pipe( fpu_reg_reg );
10454 %}
10455 
10456 
10457 // Spill to obtain 24-bit precision
10458 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10459   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10460   match(Set dst (ModF src1 src2));
10461   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10462 
10463   format %{ "FMOD   $dst,$src1,$src2" %}
10464   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10465               emitModDPR(),
10466               Push_Result_Mod_DPR(src2),
10467               Pop_Mem_FPR(dst));
10468   ins_pipe( pipe_slow );
10469 %}
10470 //
10471 // This instruction does not round to 24-bits
10472 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10473   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10474   match(Set dst (ModF dst src));
10475   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10476 
10477   format %{ "FMOD   $dst,$src" %}
10478   ins_encode(Push_Reg_Mod_DPR(dst, src),
10479               emitModDPR(),
10480               Push_Result_Mod_DPR(src),
10481               Pop_Reg_FPR(dst));
10482   ins_pipe( pipe_slow );
10483 %}
10484 
10485 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10486   predicate(UseSSE>=1);
10487   match(Set dst (ModF src0 src1));
10488   effect(KILL rax, KILL cr);
10489   format %{ "SUB    ESP,4\t # FMOD\n"
10490           "\tMOVSS  [ESP+0],$src1\n"
10491           "\tFLD_S  [ESP+0]\n"
10492           "\tMOVSS  [ESP+0],$src0\n"
10493           "\tFLD_S  [ESP+0]\n"
10494      "loop:\tFPREM\n"
10495           "\tFWAIT\n"
10496           "\tFNSTSW AX\n"
10497           "\tSAHF\n"
10498           "\tJP     loop\n"
10499           "\tFSTP_S [ESP+0]\n"
10500           "\tMOVSS  $dst,[ESP+0]\n"
10501           "\tADD    ESP,4\n"
10502           "\tFSTP   ST0\t # Restore FPU Stack"
10503     %}
10504   ins_cost(250);
10505   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10506   ins_pipe( pipe_slow );
10507 %}
10508 
10509 
10510 //----------Arithmetic Conversion Instructions---------------------------------
10511 // The conversions operations are all Alpha sorted.  Please keep it that way!
10512 
10513 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10514   predicate(UseSSE==0);
10515   match(Set dst (RoundFloat src));
10516   ins_cost(125);
10517   format %{ "FST_S  $dst,$src\t# F-round" %}
10518   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10519   ins_pipe( fpu_mem_reg );
10520 %}
10521 
10522 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10523   predicate(UseSSE<=1);
10524   match(Set dst (RoundDouble src));
10525   ins_cost(125);
10526   format %{ "FST_D  $dst,$src\t# D-round" %}
10527   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10528   ins_pipe( fpu_mem_reg );
10529 %}
10530 
10531 // Force rounding to 24-bit precision and 6-bit exponent
10532 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10533   predicate(UseSSE==0);
10534   match(Set dst (ConvD2F src));
10535   format %{ "FST_S  $dst,$src\t# F-round" %}
10536   expand %{
10537     roundFloat_mem_reg(dst,src);
10538   %}
10539 %}
10540 
10541 // Force rounding to 24-bit precision and 6-bit exponent
10542 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10543   predicate(UseSSE==1);
10544   match(Set dst (ConvD2F src));
10545   effect( KILL cr );
10546   format %{ "SUB    ESP,4\n\t"
10547             "FST_S  [ESP],$src\t# F-round\n\t"
10548             "MOVSS  $dst,[ESP]\n\t"
10549             "ADD ESP,4" %}
10550   ins_encode %{
10551     __ subptr(rsp, 4);
10552     if ($src$$reg != FPR1L_enc) {
10553       __ fld_s($src$$reg-1);
10554       __ fstp_s(Address(rsp, 0));
10555     } else {
10556       __ fst_s(Address(rsp, 0));
10557     }
10558     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10559     __ addptr(rsp, 4);
10560   %}
10561   ins_pipe( pipe_slow );
10562 %}
10563 
10564 // Force rounding double precision to single precision
10565 instruct convD2F_reg(regF dst, regD src) %{
10566   predicate(UseSSE>=2);
10567   match(Set dst (ConvD2F src));
10568   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10569   ins_encode %{
10570     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10571   %}
10572   ins_pipe( pipe_slow );
10573 %}
10574 
10575 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10576   predicate(UseSSE==0);
10577   match(Set dst (ConvF2D src));
10578   format %{ "FST_S  $dst,$src\t# D-round" %}
10579   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10580   ins_pipe( fpu_reg_reg );
10581 %}
10582 
10583 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10584   predicate(UseSSE==1);
10585   match(Set dst (ConvF2D src));
10586   format %{ "FST_D  $dst,$src\t# D-round" %}
10587   expand %{
10588     roundDouble_mem_reg(dst,src);
10589   %}
10590 %}
10591 
10592 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10593   predicate(UseSSE==1);
10594   match(Set dst (ConvF2D src));
10595   effect( KILL cr );
10596   format %{ "SUB    ESP,4\n\t"
10597             "MOVSS  [ESP] $src\n\t"
10598             "FLD_S  [ESP]\n\t"
10599             "ADD    ESP,4\n\t"
10600             "FSTP   $dst\t# D-round" %}
10601   ins_encode %{
10602     __ subptr(rsp, 4);
10603     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10604     __ fld_s(Address(rsp, 0));
10605     __ addptr(rsp, 4);
10606     __ fstp_d($dst$$reg);
10607   %}
10608   ins_pipe( pipe_slow );
10609 %}
10610 
10611 instruct convF2D_reg(regD dst, regF src) %{
10612   predicate(UseSSE>=2);
10613   match(Set dst (ConvF2D src));
10614   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10615   ins_encode %{
10616     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10617   %}
10618   ins_pipe( pipe_slow );
10619 %}
10620 
10621 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10622 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10623   predicate(UseSSE<=1);
10624   match(Set dst (ConvD2I src));
10625   effect( KILL tmp, KILL cr );
10626   format %{ "FLD    $src\t# Convert double to int \n\t"
10627             "FLDCW  trunc mode\n\t"
10628             "SUB    ESP,4\n\t"
10629             "FISTp  [ESP + #0]\n\t"
10630             "FLDCW  std/24-bit mode\n\t"
10631             "POP    EAX\n\t"
10632             "CMP    EAX,0x80000000\n\t"
10633             "JNE,s  fast\n\t"
10634             "FLD_D  $src\n\t"
10635             "CALL   d2i_wrapper\n"
10636       "fast:" %}
10637   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10638   ins_pipe( pipe_slow );
10639 %}
10640 
10641 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10642 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10643   predicate(UseSSE>=2);
10644   match(Set dst (ConvD2I src));
10645   effect( KILL tmp, KILL cr );
10646   format %{ "CVTTSD2SI $dst, $src\n\t"
10647             "CMP    $dst,0x80000000\n\t"
10648             "JNE,s  fast\n\t"
10649             "SUB    ESP, 8\n\t"
10650             "MOVSD  [ESP], $src\n\t"
10651             "FLD_D  [ESP]\n\t"
10652             "ADD    ESP, 8\n\t"
10653             "CALL   d2i_wrapper\n"
10654       "fast:" %}
10655   ins_encode %{
10656     Label fast;
10657     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10658     __ cmpl($dst$$Register, 0x80000000);
10659     __ jccb(Assembler::notEqual, fast);
10660     __ subptr(rsp, 8);
10661     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10662     __ fld_d(Address(rsp, 0));
10663     __ addptr(rsp, 8);
10664     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10665     __ bind(fast);
10666   %}
10667   ins_pipe( pipe_slow );
10668 %}
10669 
10670 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10671   predicate(UseSSE<=1);
10672   match(Set dst (ConvD2L src));
10673   effect( KILL cr );
10674   format %{ "FLD    $src\t# Convert double to long\n\t"
10675             "FLDCW  trunc mode\n\t"
10676             "SUB    ESP,8\n\t"
10677             "FISTp  [ESP + #0]\n\t"
10678             "FLDCW  std/24-bit mode\n\t"
10679             "POP    EAX\n\t"
10680             "POP    EDX\n\t"
10681             "CMP    EDX,0x80000000\n\t"
10682             "JNE,s  fast\n\t"
10683             "TEST   EAX,EAX\n\t"
10684             "JNE,s  fast\n\t"
10685             "FLD    $src\n\t"
10686             "CALL   d2l_wrapper\n"
10687       "fast:" %}
10688   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10689   ins_pipe( pipe_slow );
10690 %}
10691 
10692 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10693 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10694   predicate (UseSSE>=2);
10695   match(Set dst (ConvD2L src));
10696   effect( KILL cr );
10697   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10698             "MOVSD  [ESP],$src\n\t"
10699             "FLD_D  [ESP]\n\t"
10700             "FLDCW  trunc mode\n\t"
10701             "FISTp  [ESP + #0]\n\t"
10702             "FLDCW  std/24-bit mode\n\t"
10703             "POP    EAX\n\t"
10704             "POP    EDX\n\t"
10705             "CMP    EDX,0x80000000\n\t"
10706             "JNE,s  fast\n\t"
10707             "TEST   EAX,EAX\n\t"
10708             "JNE,s  fast\n\t"
10709             "SUB    ESP,8\n\t"
10710             "MOVSD  [ESP],$src\n\t"
10711             "FLD_D  [ESP]\n\t"
10712             "ADD    ESP,8\n\t"
10713             "CALL   d2l_wrapper\n"
10714       "fast:" %}
10715   ins_encode %{
10716     Label fast;
10717     __ subptr(rsp, 8);
10718     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10719     __ fld_d(Address(rsp, 0));
10720     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10721     __ fistp_d(Address(rsp, 0));
10722     // Restore the rounding mode, mask the exception
10723     if (Compile::current()->in_24_bit_fp_mode()) {
10724       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10725     } else {
10726       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10727     }
10728     // Load the converted long, adjust CPU stack
10729     __ pop(rax);
10730     __ pop(rdx);
10731     __ cmpl(rdx, 0x80000000);
10732     __ jccb(Assembler::notEqual, fast);
10733     __ testl(rax, rax);
10734     __ jccb(Assembler::notEqual, fast);
10735     __ subptr(rsp, 8);
10736     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10737     __ fld_d(Address(rsp, 0));
10738     __ addptr(rsp, 8);
10739     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10740     __ bind(fast);
10741   %}
10742   ins_pipe( pipe_slow );
10743 %}
10744 
10745 // Convert a double to an int.  Java semantics require we do complex
10746 // manglations in the corner cases.  So we set the rounding mode to
10747 // 'zero', store the darned double down as an int, and reset the
10748 // rounding mode to 'nearest'.  The hardware stores a flag value down
10749 // if we would overflow or converted a NAN; we check for this and
10750 // and go the slow path if needed.
10751 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10752   predicate(UseSSE==0);
10753   match(Set dst (ConvF2I src));
10754   effect( KILL tmp, KILL cr );
10755   format %{ "FLD    $src\t# Convert float to int \n\t"
10756             "FLDCW  trunc mode\n\t"
10757             "SUB    ESP,4\n\t"
10758             "FISTp  [ESP + #0]\n\t"
10759             "FLDCW  std/24-bit mode\n\t"
10760             "POP    EAX\n\t"
10761             "CMP    EAX,0x80000000\n\t"
10762             "JNE,s  fast\n\t"
10763             "FLD    $src\n\t"
10764             "CALL   d2i_wrapper\n"
10765       "fast:" %}
10766   // DPR2I_encoding works for FPR2I
10767   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10768   ins_pipe( pipe_slow );
10769 %}
10770 
10771 // Convert a float in xmm to an int reg.
10772 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10773   predicate(UseSSE>=1);
10774   match(Set dst (ConvF2I src));
10775   effect( KILL tmp, KILL cr );
10776   format %{ "CVTTSS2SI $dst, $src\n\t"
10777             "CMP    $dst,0x80000000\n\t"
10778             "JNE,s  fast\n\t"
10779             "SUB    ESP, 4\n\t"
10780             "MOVSS  [ESP], $src\n\t"
10781             "FLD    [ESP]\n\t"
10782             "ADD    ESP, 4\n\t"
10783             "CALL   d2i_wrapper\n"
10784       "fast:" %}
10785   ins_encode %{
10786     Label fast;
10787     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10788     __ cmpl($dst$$Register, 0x80000000);
10789     __ jccb(Assembler::notEqual, fast);
10790     __ subptr(rsp, 4);
10791     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10792     __ fld_s(Address(rsp, 0));
10793     __ addptr(rsp, 4);
10794     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10795     __ bind(fast);
10796   %}
10797   ins_pipe( pipe_slow );
10798 %}
10799 
10800 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10801   predicate(UseSSE==0);
10802   match(Set dst (ConvF2L src));
10803   effect( KILL cr );
10804   format %{ "FLD    $src\t# Convert float to long\n\t"
10805             "FLDCW  trunc mode\n\t"
10806             "SUB    ESP,8\n\t"
10807             "FISTp  [ESP + #0]\n\t"
10808             "FLDCW  std/24-bit mode\n\t"
10809             "POP    EAX\n\t"
10810             "POP    EDX\n\t"
10811             "CMP    EDX,0x80000000\n\t"
10812             "JNE,s  fast\n\t"
10813             "TEST   EAX,EAX\n\t"
10814             "JNE,s  fast\n\t"
10815             "FLD    $src\n\t"
10816             "CALL   d2l_wrapper\n"
10817       "fast:" %}
10818   // DPR2L_encoding works for FPR2L
10819   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10820   ins_pipe( pipe_slow );
10821 %}
10822 
10823 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10824 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10825   predicate (UseSSE>=1);
10826   match(Set dst (ConvF2L src));
10827   effect( KILL cr );
10828   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10829             "MOVSS  [ESP],$src\n\t"
10830             "FLD_S  [ESP]\n\t"
10831             "FLDCW  trunc mode\n\t"
10832             "FISTp  [ESP + #0]\n\t"
10833             "FLDCW  std/24-bit mode\n\t"
10834             "POP    EAX\n\t"
10835             "POP    EDX\n\t"
10836             "CMP    EDX,0x80000000\n\t"
10837             "JNE,s  fast\n\t"
10838             "TEST   EAX,EAX\n\t"
10839             "JNE,s  fast\n\t"
10840             "SUB    ESP,4\t# Convert float to long\n\t"
10841             "MOVSS  [ESP],$src\n\t"
10842             "FLD_S  [ESP]\n\t"
10843             "ADD    ESP,4\n\t"
10844             "CALL   d2l_wrapper\n"
10845       "fast:" %}
10846   ins_encode %{
10847     Label fast;
10848     __ subptr(rsp, 8);
10849     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10850     __ fld_s(Address(rsp, 0));
10851     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10852     __ fistp_d(Address(rsp, 0));
10853     // Restore the rounding mode, mask the exception
10854     if (Compile::current()->in_24_bit_fp_mode()) {
10855       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10856     } else {
10857       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10858     }
10859     // Load the converted long, adjust CPU stack
10860     __ pop(rax);
10861     __ pop(rdx);
10862     __ cmpl(rdx, 0x80000000);
10863     __ jccb(Assembler::notEqual, fast);
10864     __ testl(rax, rax);
10865     __ jccb(Assembler::notEqual, fast);
10866     __ subptr(rsp, 4);
10867     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10868     __ fld_s(Address(rsp, 0));
10869     __ addptr(rsp, 4);
10870     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10871     __ bind(fast);
10872   %}
10873   ins_pipe( pipe_slow );
10874 %}
10875 
10876 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
10877   predicate( UseSSE<=1 );
10878   match(Set dst (ConvI2D src));
10879   format %{ "FILD   $src\n\t"
10880             "FSTP   $dst" %}
10881   opcode(0xDB, 0x0);  /* DB /0 */
10882   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
10883   ins_pipe( fpu_reg_mem );
10884 %}
10885 
10886 instruct convI2D_reg(regD dst, rRegI src) %{
10887   predicate( UseSSE>=2 && !UseXmmI2D );
10888   match(Set dst (ConvI2D src));
10889   format %{ "CVTSI2SD $dst,$src" %}
10890   ins_encode %{
10891     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10892   %}
10893   ins_pipe( pipe_slow );
10894 %}
10895 
10896 instruct convI2D_mem(regD dst, memory mem) %{
10897   predicate( UseSSE>=2 );
10898   match(Set dst (ConvI2D (LoadI mem)));
10899   format %{ "CVTSI2SD $dst,$mem" %}
10900   ins_encode %{
10901     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
10902   %}
10903   ins_pipe( pipe_slow );
10904 %}
10905 
10906 instruct convXI2D_reg(regD dst, rRegI src)
10907 %{
10908   predicate( UseSSE>=2 && UseXmmI2D );
10909   match(Set dst (ConvI2D src));
10910 
10911   format %{ "MOVD  $dst,$src\n\t"
10912             "CVTDQ2PD $dst,$dst\t# i2d" %}
10913   ins_encode %{
10914     __ movdl($dst$$XMMRegister, $src$$Register);
10915     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10916   %}
10917   ins_pipe(pipe_slow); // XXX
10918 %}
10919 
10920 instruct convI2DPR_mem(regDPR dst, memory mem) %{
10921   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
10922   match(Set dst (ConvI2D (LoadI mem)));
10923   format %{ "FILD   $mem\n\t"
10924             "FSTP   $dst" %}
10925   opcode(0xDB);      /* DB /0 */
10926   ins_encode( OpcP, RMopc_Mem(0x00,mem),
10927               Pop_Reg_DPR(dst));
10928   ins_pipe( fpu_reg_mem );
10929 %}
10930 
10931 // Convert a byte to a float; no rounding step needed.
10932 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
10933   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
10934   match(Set dst (ConvI2F src));
10935   format %{ "FILD   $src\n\t"
10936             "FSTP   $dst" %}
10937 
10938   opcode(0xDB, 0x0);  /* DB /0 */
10939   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
10940   ins_pipe( fpu_reg_mem );
10941 %}
10942 
10943 // In 24-bit mode, force exponent rounding by storing back out
10944 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
10945   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10946   match(Set dst (ConvI2F src));
10947   ins_cost(200);
10948   format %{ "FILD   $src\n\t"
10949             "FSTP_S $dst" %}
10950   opcode(0xDB, 0x0);  /* DB /0 */
10951   ins_encode( Push_Mem_I(src),
10952               Pop_Mem_FPR(dst));
10953   ins_pipe( fpu_mem_mem );
10954 %}
10955 
10956 // In 24-bit mode, force exponent rounding by storing back out
10957 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
10958   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10959   match(Set dst (ConvI2F (LoadI mem)));
10960   ins_cost(200);
10961   format %{ "FILD   $mem\n\t"
10962             "FSTP_S $dst" %}
10963   opcode(0xDB);  /* DB /0 */
10964   ins_encode( OpcP, RMopc_Mem(0x00,mem),
10965               Pop_Mem_FPR(dst));
10966   ins_pipe( fpu_mem_mem );
10967 %}
10968 
10969 // This instruction does not round to 24-bits
10970 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
10971   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10972   match(Set dst (ConvI2F src));
10973   format %{ "FILD   $src\n\t"
10974             "FSTP   $dst" %}
10975   opcode(0xDB, 0x0);  /* DB /0 */
10976   ins_encode( Push_Mem_I(src),
10977               Pop_Reg_FPR(dst));
10978   ins_pipe( fpu_reg_mem );
10979 %}
10980 
10981 // This instruction does not round to 24-bits
10982 instruct convI2FPR_mem(regFPR dst, memory mem) %{
10983   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10984   match(Set dst (ConvI2F (LoadI mem)));
10985   format %{ "FILD   $mem\n\t"
10986             "FSTP   $dst" %}
10987   opcode(0xDB);      /* DB /0 */
10988   ins_encode( OpcP, RMopc_Mem(0x00,mem),
10989               Pop_Reg_FPR(dst));
10990   ins_pipe( fpu_reg_mem );
10991 %}
10992 
10993 // Convert an int to a float in xmm; no rounding step needed.
10994 instruct convI2F_reg(regF dst, rRegI src) %{
10995   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
10996   match(Set dst (ConvI2F src));
10997   format %{ "CVTSI2SS $dst, $src" %}
10998   ins_encode %{
10999     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11000   %}
11001   ins_pipe( pipe_slow );
11002 %}
11003 
11004  instruct convXI2F_reg(regF dst, rRegI src)
11005 %{
11006   predicate( UseSSE>=2 && UseXmmI2F );
11007   match(Set dst (ConvI2F src));
11008 
11009   format %{ "MOVD  $dst,$src\n\t"
11010             "CVTDQ2PS $dst,$dst\t# i2f" %}
11011   ins_encode %{
11012     __ movdl($dst$$XMMRegister, $src$$Register);
11013     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11014   %}
11015   ins_pipe(pipe_slow); // XXX
11016 %}
11017 
11018 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11019   match(Set dst (ConvI2L src));
11020   effect(KILL cr);
11021   ins_cost(375);
11022   format %{ "MOV    $dst.lo,$src\n\t"
11023             "MOV    $dst.hi,$src\n\t"
11024             "SAR    $dst.hi,31" %}
11025   ins_encode(convert_int_long(dst,src));
11026   ins_pipe( ialu_reg_reg_long );
11027 %}
11028 
11029 // Zero-extend convert int to long
11030 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11031   match(Set dst (AndL (ConvI2L src) mask) );
11032   effect( KILL flags );
11033   ins_cost(250);
11034   format %{ "MOV    $dst.lo,$src\n\t"
11035             "XOR    $dst.hi,$dst.hi" %}
11036   opcode(0x33); // XOR
11037   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11038   ins_pipe( ialu_reg_reg_long );
11039 %}
11040 
11041 // Zero-extend long
11042 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11043   match(Set dst (AndL src mask) );
11044   effect( KILL flags );
11045   ins_cost(250);
11046   format %{ "MOV    $dst.lo,$src.lo\n\t"
11047             "XOR    $dst.hi,$dst.hi\n\t" %}
11048   opcode(0x33); // XOR
11049   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11050   ins_pipe( ialu_reg_reg_long );
11051 %}
11052 
11053 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11054   predicate (UseSSE<=1);
11055   match(Set dst (ConvL2D src));
11056   effect( KILL cr );
11057   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11058             "PUSH   $src.lo\n\t"
11059             "FILD   ST,[ESP + #0]\n\t"
11060             "ADD    ESP,8\n\t"
11061             "FSTP_D $dst\t# D-round" %}
11062   opcode(0xDF, 0x5);  /* DF /5 */
11063   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11064   ins_pipe( pipe_slow );
11065 %}
11066 
11067 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11068   predicate (UseSSE>=2);
11069   match(Set dst (ConvL2D src));
11070   effect( KILL cr );
11071   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11072             "PUSH   $src.lo\n\t"
11073             "FILD_D [ESP]\n\t"
11074             "FSTP_D [ESP]\n\t"
11075             "MOVSD  $dst,[ESP]\n\t"
11076             "ADD    ESP,8" %}
11077   opcode(0xDF, 0x5);  /* DF /5 */
11078   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11079   ins_pipe( pipe_slow );
11080 %}
11081 
11082 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11083   predicate (UseSSE>=1);
11084   match(Set dst (ConvL2F src));
11085   effect( KILL cr );
11086   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11087             "PUSH   $src.lo\n\t"
11088             "FILD_D [ESP]\n\t"
11089             "FSTP_S [ESP]\n\t"
11090             "MOVSS  $dst,[ESP]\n\t"
11091             "ADD    ESP,8" %}
11092   opcode(0xDF, 0x5);  /* DF /5 */
11093   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11094   ins_pipe( pipe_slow );
11095 %}
11096 
11097 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11098   match(Set dst (ConvL2F src));
11099   effect( KILL cr );
11100   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11101             "PUSH   $src.lo\n\t"
11102             "FILD   ST,[ESP + #0]\n\t"
11103             "ADD    ESP,8\n\t"
11104             "FSTP_S $dst\t# F-round" %}
11105   opcode(0xDF, 0x5);  /* DF /5 */
11106   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11107   ins_pipe( pipe_slow );
11108 %}
11109 
11110 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11111   match(Set dst (ConvL2I src));
11112   effect( DEF dst, USE src );
11113   format %{ "MOV    $dst,$src.lo" %}
11114   ins_encode(enc_CopyL_Lo(dst,src));
11115   ins_pipe( ialu_reg_reg );
11116 %}
11117 
11118 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11119   match(Set dst (MoveF2I src));
11120   effect( DEF dst, USE src );
11121   ins_cost(100);
11122   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11123   ins_encode %{
11124     __ movl($dst$$Register, Address(rsp, $src$$disp));
11125   %}
11126   ins_pipe( ialu_reg_mem );
11127 %}
11128 
11129 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11130   predicate(UseSSE==0);
11131   match(Set dst (MoveF2I src));
11132   effect( DEF dst, USE src );
11133 
11134   ins_cost(125);
11135   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11136   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11137   ins_pipe( fpu_mem_reg );
11138 %}
11139 
11140 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11141   predicate(UseSSE>=1);
11142   match(Set dst (MoveF2I src));
11143   effect( DEF dst, USE src );
11144 
11145   ins_cost(95);
11146   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11147   ins_encode %{
11148     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11149   %}
11150   ins_pipe( pipe_slow );
11151 %}
11152 
11153 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11154   predicate(UseSSE>=2);
11155   match(Set dst (MoveF2I src));
11156   effect( DEF dst, USE src );
11157   ins_cost(85);
11158   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11159   ins_encode %{
11160     __ movdl($dst$$Register, $src$$XMMRegister);
11161   %}
11162   ins_pipe( pipe_slow );
11163 %}
11164 
11165 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11166   match(Set dst (MoveI2F src));
11167   effect( DEF dst, USE src );
11168 
11169   ins_cost(100);
11170   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11171   ins_encode %{
11172     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11173   %}
11174   ins_pipe( ialu_mem_reg );
11175 %}
11176 
11177 
11178 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11179   predicate(UseSSE==0);
11180   match(Set dst (MoveI2F src));
11181   effect(DEF dst, USE src);
11182 
11183   ins_cost(125);
11184   format %{ "FLD_S  $src\n\t"
11185             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11186   opcode(0xD9);               /* D9 /0, FLD m32real */
11187   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11188               Pop_Reg_FPR(dst) );
11189   ins_pipe( fpu_reg_mem );
11190 %}
11191 
11192 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11193   predicate(UseSSE>=1);
11194   match(Set dst (MoveI2F src));
11195   effect( DEF dst, USE src );
11196 
11197   ins_cost(95);
11198   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11199   ins_encode %{
11200     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11201   %}
11202   ins_pipe( pipe_slow );
11203 %}
11204 
11205 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11206   predicate(UseSSE>=2);
11207   match(Set dst (MoveI2F src));
11208   effect( DEF dst, USE src );
11209 
11210   ins_cost(85);
11211   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11212   ins_encode %{
11213     __ movdl($dst$$XMMRegister, $src$$Register);
11214   %}
11215   ins_pipe( pipe_slow );
11216 %}
11217 
11218 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11219   match(Set dst (MoveD2L src));
11220   effect(DEF dst, USE src);
11221 
11222   ins_cost(250);
11223   format %{ "MOV    $dst.lo,$src\n\t"
11224             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11225   opcode(0x8B, 0x8B);
11226   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11227   ins_pipe( ialu_mem_long_reg );
11228 %}
11229 
11230 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11231   predicate(UseSSE<=1);
11232   match(Set dst (MoveD2L src));
11233   effect(DEF dst, USE src);
11234 
11235   ins_cost(125);
11236   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11237   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11238   ins_pipe( fpu_mem_reg );
11239 %}
11240 
11241 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11242   predicate(UseSSE>=2);
11243   match(Set dst (MoveD2L src));
11244   effect(DEF dst, USE src);
11245   ins_cost(95);
11246   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11247   ins_encode %{
11248     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11249   %}
11250   ins_pipe( pipe_slow );
11251 %}
11252 
11253 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11254   predicate(UseSSE>=2);
11255   match(Set dst (MoveD2L src));
11256   effect(DEF dst, USE src, TEMP tmp);
11257   ins_cost(85);
11258   format %{ "MOVD   $dst.lo,$src\n\t"
11259             "PSHUFLW $tmp,$src,0x4E\n\t"
11260             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11261   ins_encode %{
11262     __ movdl($dst$$Register, $src$$XMMRegister);
11263     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11264     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11265   %}
11266   ins_pipe( pipe_slow );
11267 %}
11268 
11269 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11270   match(Set dst (MoveL2D src));
11271   effect(DEF dst, USE src);
11272 
11273   ins_cost(200);
11274   format %{ "MOV    $dst,$src.lo\n\t"
11275             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11276   opcode(0x89, 0x89);
11277   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11278   ins_pipe( ialu_mem_long_reg );
11279 %}
11280 
11281 
11282 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11283   predicate(UseSSE<=1);
11284   match(Set dst (MoveL2D src));
11285   effect(DEF dst, USE src);
11286   ins_cost(125);
11287 
11288   format %{ "FLD_D  $src\n\t"
11289             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11290   opcode(0xDD);               /* DD /0, FLD m64real */
11291   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11292               Pop_Reg_DPR(dst) );
11293   ins_pipe( fpu_reg_mem );
11294 %}
11295 
11296 
11297 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11298   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11299   match(Set dst (MoveL2D src));
11300   effect(DEF dst, USE src);
11301 
11302   ins_cost(95);
11303   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11304   ins_encode %{
11305     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11306   %}
11307   ins_pipe( pipe_slow );
11308 %}
11309 
11310 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11311   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11312   match(Set dst (MoveL2D src));
11313   effect(DEF dst, USE src);
11314 
11315   ins_cost(95);
11316   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11317   ins_encode %{
11318     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11319   %}
11320   ins_pipe( pipe_slow );
11321 %}
11322 
11323 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11324   predicate(UseSSE>=2);
11325   match(Set dst (MoveL2D src));
11326   effect(TEMP dst, USE src, TEMP tmp);
11327   ins_cost(85);
11328   format %{ "MOVD   $dst,$src.lo\n\t"
11329             "MOVD   $tmp,$src.hi\n\t"
11330             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11331   ins_encode %{
11332     __ movdl($dst$$XMMRegister, $src$$Register);
11333     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11334     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11335   %}
11336   ins_pipe( pipe_slow );
11337 %}
11338 
11339 
11340 // =======================================================================
11341 // fast clearing of an array
11342 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11343   predicate(!UseFastStosb);
11344   match(Set dummy (ClearArray cnt base));
11345   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11346   format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11347             "SHL    ECX,1\t# Convert doublewords to words\n\t"
11348             "REP STOS\t# store EAX into [EDI++] while ECX--" %}
11349   ins_encode %{
11350     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
11351   %}
11352   ins_pipe( pipe_slow );
11353 %}
11354 
11355 instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11356   predicate(UseFastStosb);
11357   match(Set dummy (ClearArray cnt base));
11358   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11359   format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11360             "SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11361             "REP STOSB\t# store EAX into [EDI++] while ECX--" %}
11362   ins_encode %{
11363     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
11364   %}
11365   ins_pipe( pipe_slow );
11366 %}
11367 
11368 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11369                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11370   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11371   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11372   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11373 
11374   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11375   ins_encode %{
11376     __ string_compare($str1$$Register, $str2$$Register,
11377                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11378                       $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11379   %}
11380   ins_pipe( pipe_slow );
11381 %}
11382 
11383 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11384                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11385   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11386   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11387   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11388 
11389   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11390   ins_encode %{
11391     __ string_compare($str1$$Register, $str2$$Register,
11392                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11393                       $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11394   %}
11395   ins_pipe( pipe_slow );
11396 %}
11397 
11398 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11399                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11400   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11401   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11402   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11403 
11404   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11405   ins_encode %{
11406     __ string_compare($str1$$Register, $str2$$Register,
11407                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11408                       $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11409   %}
11410   ins_pipe( pipe_slow );
11411 %}
11412 
11413 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11414                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11415   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11416   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11417   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11418 
11419   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11420   ins_encode %{
11421     __ string_compare($str2$$Register, $str1$$Register,
11422                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11423                       $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11424   %}
11425   ins_pipe( pipe_slow );
11426 %}
11427 
11428 // fast string equals
11429 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11430                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11431   match(Set result (StrEquals (Binary str1 str2) cnt));
11432   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11433 
11434   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11435   ins_encode %{
11436     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11437                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11438                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11439   %}
11440 
11441   ins_pipe( pipe_slow );
11442 %}
11443 
11444 // fast search of substring with known size.
11445 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11446                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11447   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11448   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11449   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11450 
11451   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11452   ins_encode %{
11453     int icnt2 = (int)$int_cnt2$$constant;
11454     if (icnt2 >= 16) {
11455       // IndexOf for constant substrings with size >= 16 elements
11456       // which don't need to be loaded through stack.
11457       __ string_indexofC8($str1$$Register, $str2$$Register,
11458                           $cnt1$$Register, $cnt2$$Register,
11459                           icnt2, $result$$Register,
11460                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11461     } else {
11462       // Small strings are loaded through stack if they cross page boundary.
11463       __ string_indexof($str1$$Register, $str2$$Register,
11464                         $cnt1$$Register, $cnt2$$Register,
11465                         icnt2, $result$$Register,
11466                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11467     }
11468   %}
11469   ins_pipe( pipe_slow );
11470 %}
11471 
11472 // fast search of substring with known size.
11473 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11474                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11475   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11476   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11477   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11478 
11479   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11480   ins_encode %{
11481     int icnt2 = (int)$int_cnt2$$constant;
11482     if (icnt2 >= 8) {
11483       // IndexOf for constant substrings with size >= 8 elements
11484       // which don't need to be loaded through stack.
11485       __ string_indexofC8($str1$$Register, $str2$$Register,
11486                           $cnt1$$Register, $cnt2$$Register,
11487                           icnt2, $result$$Register,
11488                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11489     } else {
11490       // Small strings are loaded through stack if they cross page boundary.
11491       __ string_indexof($str1$$Register, $str2$$Register,
11492                         $cnt1$$Register, $cnt2$$Register,
11493                         icnt2, $result$$Register,
11494                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11495     }
11496   %}
11497   ins_pipe( pipe_slow );
11498 %}
11499 
11500 // fast search of substring with known size.
11501 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11502                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11503   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11504   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11505   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11506 
11507   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11508   ins_encode %{
11509     int icnt2 = (int)$int_cnt2$$constant;
11510     if (icnt2 >= 8) {
11511       // IndexOf for constant substrings with size >= 8 elements
11512       // which don't need to be loaded through stack.
11513       __ string_indexofC8($str1$$Register, $str2$$Register,
11514                           $cnt1$$Register, $cnt2$$Register,
11515                           icnt2, $result$$Register,
11516                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11517     } else {
11518       // Small strings are loaded through stack if they cross page boundary.
11519       __ string_indexof($str1$$Register, $str2$$Register,
11520                         $cnt1$$Register, $cnt2$$Register,
11521                         icnt2, $result$$Register,
11522                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11523     }
11524   %}
11525   ins_pipe( pipe_slow );
11526 %}
11527 
11528 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11529                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11530   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11531   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11532   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11533 
11534   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11535   ins_encode %{
11536     __ string_indexof($str1$$Register, $str2$$Register,
11537                       $cnt1$$Register, $cnt2$$Register,
11538                       (-1), $result$$Register,
11539                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11540   %}
11541   ins_pipe( pipe_slow );
11542 %}
11543 
11544 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11545                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11546   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11547   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11548   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11549 
11550   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11551   ins_encode %{
11552     __ string_indexof($str1$$Register, $str2$$Register,
11553                       $cnt1$$Register, $cnt2$$Register,
11554                       (-1), $result$$Register,
11555                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11556   %}
11557   ins_pipe( pipe_slow );
11558 %}
11559 
11560 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11561                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11562   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11563   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11564   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11565 
11566   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11567   ins_encode %{
11568     __ string_indexof($str1$$Register, $str2$$Register,
11569                       $cnt1$$Register, $cnt2$$Register,
11570                       (-1), $result$$Register,
11571                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11572   %}
11573   ins_pipe( pipe_slow );
11574 %}
11575 
11576 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11577                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11578   predicate(UseSSE42Intrinsics);
11579   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11580   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11581   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11582   ins_encode %{
11583     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11584                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11585   %}
11586   ins_pipe( pipe_slow );
11587 %}
11588 
11589 // fast array equals
11590 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11591                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11592 %{
11593   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11594   match(Set result (AryEq ary1 ary2));
11595   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11596   //ins_cost(300);
11597 
11598   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11599   ins_encode %{
11600     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11601                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11602                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11603   %}
11604   ins_pipe( pipe_slow );
11605 %}
11606 
11607 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11608                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11609 %{
11610   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11611   match(Set result (AryEq ary1 ary2));
11612   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11613   //ins_cost(300);
11614 
11615   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11616   ins_encode %{
11617     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11618                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11619                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11620   %}
11621   ins_pipe( pipe_slow );
11622 %}
11623 
11624 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
11625                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
11626 %{
11627   match(Set result (HasNegatives ary1 len));
11628   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11629 
11630   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11631   ins_encode %{
11632     __ has_negatives($ary1$$Register, $len$$Register,
11633                      $result$$Register, $tmp3$$Register,
11634                      $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11635   %}
11636   ins_pipe( pipe_slow );
11637 %}
11638 
11639 // fast char[] to byte[] compression
11640 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11641                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11642   match(Set result (StrCompressedCopy src (Binary dst len)));
11643   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11644 
11645   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11646   ins_encode %{
11647     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11648                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11649                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11650   %}
11651   ins_pipe( pipe_slow );
11652 %}
11653 
11654 // fast byte[] to char[] inflation
11655 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
11656                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
11657   match(Set dummy (StrInflatedCopy src (Binary dst len)));
11658   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11659 
11660   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11661   ins_encode %{
11662     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11663                           $tmp1$$XMMRegister, $tmp2$$Register);
11664   %}
11665   ins_pipe( pipe_slow );
11666 %}
11667 
11668 // encode char[] to byte[] in ISO_8859_1
11669 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11670                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11671                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11672   match(Set result (EncodeISOArray src (Binary dst len)));
11673   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11674 
11675   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11676   ins_encode %{
11677     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11678                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11679                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11680   %}
11681   ins_pipe( pipe_slow );
11682 %}
11683 
11684 
11685 //----------Control Flow Instructions------------------------------------------
11686 // Signed compare Instructions
11687 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
11688   match(Set cr (CmpI op1 op2));
11689   effect( DEF cr, USE op1, USE op2 );
11690   format %{ "CMP    $op1,$op2" %}
11691   opcode(0x3B);  /* Opcode 3B /r */
11692   ins_encode( OpcP, RegReg( op1, op2) );
11693   ins_pipe( ialu_cr_reg_reg );
11694 %}
11695 
11696 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
11697   match(Set cr (CmpI op1 op2));
11698   effect( DEF cr, USE op1 );
11699   format %{ "CMP    $op1,$op2" %}
11700   opcode(0x81,0x07);  /* Opcode 81 /7 */
11701   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
11702   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11703   ins_pipe( ialu_cr_reg_imm );
11704 %}
11705 
11706 // Cisc-spilled version of cmpI_eReg
11707 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
11708   match(Set cr (CmpI op1 (LoadI op2)));
11709 
11710   format %{ "CMP    $op1,$op2" %}
11711   ins_cost(500);
11712   opcode(0x3B);  /* Opcode 3B /r */
11713   ins_encode( OpcP, RegMem( op1, op2) );
11714   ins_pipe( ialu_cr_reg_mem );
11715 %}
11716 
11717 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
11718   match(Set cr (CmpI src zero));
11719   effect( DEF cr, USE src );
11720 
11721   format %{ "TEST   $src,$src" %}
11722   opcode(0x85);
11723   ins_encode( OpcP, RegReg( src, src ) );
11724   ins_pipe( ialu_cr_reg_imm );
11725 %}
11726 
11727 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
11728   match(Set cr (CmpI (AndI src con) zero));
11729 
11730   format %{ "TEST   $src,$con" %}
11731   opcode(0xF7,0x00);
11732   ins_encode( OpcP, RegOpc(src), Con32(con) );
11733   ins_pipe( ialu_cr_reg_imm );
11734 %}
11735 
11736 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
11737   match(Set cr (CmpI (AndI src mem) zero));
11738 
11739   format %{ "TEST   $src,$mem" %}
11740   opcode(0x85);
11741   ins_encode( OpcP, RegMem( src, mem ) );
11742   ins_pipe( ialu_cr_reg_mem );
11743 %}
11744 
11745 // Unsigned compare Instructions; really, same as signed except they
11746 // produce an eFlagsRegU instead of eFlagsReg.
11747 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
11748   match(Set cr (CmpU op1 op2));
11749 
11750   format %{ "CMPu   $op1,$op2" %}
11751   opcode(0x3B);  /* Opcode 3B /r */
11752   ins_encode( OpcP, RegReg( op1, op2) );
11753   ins_pipe( ialu_cr_reg_reg );
11754 %}
11755 
11756 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
11757   match(Set cr (CmpU op1 op2));
11758 
11759   format %{ "CMPu   $op1,$op2" %}
11760   opcode(0x81,0x07);  /* Opcode 81 /7 */
11761   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11762   ins_pipe( ialu_cr_reg_imm );
11763 %}
11764 
11765 // // Cisc-spilled version of cmpU_eReg
11766 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
11767   match(Set cr (CmpU op1 (LoadI op2)));
11768 
11769   format %{ "CMPu   $op1,$op2" %}
11770   ins_cost(500);
11771   opcode(0x3B);  /* Opcode 3B /r */
11772   ins_encode( OpcP, RegMem( op1, op2) );
11773   ins_pipe( ialu_cr_reg_mem );
11774 %}
11775 
11776 // // Cisc-spilled version of cmpU_eReg
11777 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
11778 //  match(Set cr (CmpU (LoadI op1) op2));
11779 //
11780 //  format %{ "CMPu   $op1,$op2" %}
11781 //  ins_cost(500);
11782 //  opcode(0x39);  /* Opcode 39 /r */
11783 //  ins_encode( OpcP, RegMem( op1, op2) );
11784 //%}
11785 
11786 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
11787   match(Set cr (CmpU src zero));
11788 
11789   format %{ "TESTu  $src,$src" %}
11790   opcode(0x85);
11791   ins_encode( OpcP, RegReg( src, src ) );
11792   ins_pipe( ialu_cr_reg_imm );
11793 %}
11794 
11795 // Unsigned pointer compare Instructions
11796 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
11797   match(Set cr (CmpP op1 op2));
11798 
11799   format %{ "CMPu   $op1,$op2" %}
11800   opcode(0x3B);  /* Opcode 3B /r */
11801   ins_encode( OpcP, RegReg( op1, op2) );
11802   ins_pipe( ialu_cr_reg_reg );
11803 %}
11804 
11805 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
11806   match(Set cr (CmpP op1 op2));
11807 
11808   format %{ "CMPu   $op1,$op2" %}
11809   opcode(0x81,0x07);  /* Opcode 81 /7 */
11810   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11811   ins_pipe( ialu_cr_reg_imm );
11812 %}
11813 
11814 // // Cisc-spilled version of cmpP_eReg
11815 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
11816   match(Set cr (CmpP op1 (LoadP op2)));
11817 
11818   format %{ "CMPu   $op1,$op2" %}
11819   ins_cost(500);
11820   opcode(0x3B);  /* Opcode 3B /r */
11821   ins_encode( OpcP, RegMem( op1, op2) );
11822   ins_pipe( ialu_cr_reg_mem );
11823 %}
11824 
11825 // // Cisc-spilled version of cmpP_eReg
11826 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
11827 //  match(Set cr (CmpP (LoadP op1) op2));
11828 //
11829 //  format %{ "CMPu   $op1,$op2" %}
11830 //  ins_cost(500);
11831 //  opcode(0x39);  /* Opcode 39 /r */
11832 //  ins_encode( OpcP, RegMem( op1, op2) );
11833 //%}
11834 
11835 // Compare raw pointer (used in out-of-heap check).
11836 // Only works because non-oop pointers must be raw pointers
11837 // and raw pointers have no anti-dependencies.
11838 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
11839   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
11840   match(Set cr (CmpP op1 (LoadP op2)));
11841 
11842   format %{ "CMPu   $op1,$op2" %}
11843   opcode(0x3B);  /* Opcode 3B /r */
11844   ins_encode( OpcP, RegMem( op1, op2) );
11845   ins_pipe( ialu_cr_reg_mem );
11846 %}
11847 
11848 //
11849 // This will generate a signed flags result. This should be ok
11850 // since any compare to a zero should be eq/neq.
11851 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
11852   match(Set cr (CmpP src zero));
11853 
11854   format %{ "TEST   $src,$src" %}
11855   opcode(0x85);
11856   ins_encode( OpcP, RegReg( src, src ) );
11857   ins_pipe( ialu_cr_reg_imm );
11858 %}
11859 
11860 // Cisc-spilled version of testP_reg
11861 // This will generate a signed flags result. This should be ok
11862 // since any compare to a zero should be eq/neq.
11863 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
11864   match(Set cr (CmpP (LoadP op) zero));
11865 
11866   format %{ "TEST   $op,0xFFFFFFFF" %}
11867   ins_cost(500);
11868   opcode(0xF7);               /* Opcode F7 /0 */
11869   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
11870   ins_pipe( ialu_cr_reg_imm );
11871 %}
11872 
11873 // Yanked all unsigned pointer compare operations.
11874 // Pointer compares are done with CmpP which is already unsigned.
11875 
11876 //----------Max and Min--------------------------------------------------------
11877 // Min Instructions
11878 ////
11879 //   *** Min and Max using the conditional move are slower than the
11880 //   *** branch version on a Pentium III.
11881 // // Conditional move for min
11882 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11883 //  effect( USE_DEF op2, USE op1, USE cr );
11884 //  format %{ "CMOVlt $op2,$op1\t! min" %}
11885 //  opcode(0x4C,0x0F);
11886 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11887 //  ins_pipe( pipe_cmov_reg );
11888 //%}
11889 //
11890 //// Min Register with Register (P6 version)
11891 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
11892 //  predicate(VM_Version::supports_cmov() );
11893 //  match(Set op2 (MinI op1 op2));
11894 //  ins_cost(200);
11895 //  expand %{
11896 //    eFlagsReg cr;
11897 //    compI_eReg(cr,op1,op2);
11898 //    cmovI_reg_lt(op2,op1,cr);
11899 //  %}
11900 //%}
11901 
11902 // Min Register with Register (generic version)
11903 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11904   match(Set dst (MinI dst src));
11905   effect(KILL flags);
11906   ins_cost(300);
11907 
11908   format %{ "MIN    $dst,$src" %}
11909   opcode(0xCC);
11910   ins_encode( min_enc(dst,src) );
11911   ins_pipe( pipe_slow );
11912 %}
11913 
11914 // Max Register with Register
11915 //   *** Min and Max using the conditional move are slower than the
11916 //   *** branch version on a Pentium III.
11917 // // Conditional move for max
11918 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11919 //  effect( USE_DEF op2, USE op1, USE cr );
11920 //  format %{ "CMOVgt $op2,$op1\t! max" %}
11921 //  opcode(0x4F,0x0F);
11922 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11923 //  ins_pipe( pipe_cmov_reg );
11924 //%}
11925 //
11926 // // Max Register with Register (P6 version)
11927 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
11928 //  predicate(VM_Version::supports_cmov() );
11929 //  match(Set op2 (MaxI op1 op2));
11930 //  ins_cost(200);
11931 //  expand %{
11932 //    eFlagsReg cr;
11933 //    compI_eReg(cr,op1,op2);
11934 //    cmovI_reg_gt(op2,op1,cr);
11935 //  %}
11936 //%}
11937 
11938 // Max Register with Register (generic version)
11939 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11940   match(Set dst (MaxI dst src));
11941   effect(KILL flags);
11942   ins_cost(300);
11943 
11944   format %{ "MAX    $dst,$src" %}
11945   opcode(0xCC);
11946   ins_encode( max_enc(dst,src) );
11947   ins_pipe( pipe_slow );
11948 %}
11949 
11950 // ============================================================================
11951 // Counted Loop limit node which represents exact final iterator value.
11952 // Note: the resulting value should fit into integer range since
11953 // counted loops have limit check on overflow.
11954 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
11955   match(Set limit (LoopLimit (Binary init limit) stride));
11956   effect(TEMP limit_hi, TEMP tmp, KILL flags);
11957   ins_cost(300);
11958 
11959   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
11960   ins_encode %{
11961     int strd = (int)$stride$$constant;
11962     assert(strd != 1 && strd != -1, "sanity");
11963     int m1 = (strd > 0) ? 1 : -1;
11964     // Convert limit to long (EAX:EDX)
11965     __ cdql();
11966     // Convert init to long (init:tmp)
11967     __ movl($tmp$$Register, $init$$Register);
11968     __ sarl($tmp$$Register, 31);
11969     // $limit - $init
11970     __ subl($limit$$Register, $init$$Register);
11971     __ sbbl($limit_hi$$Register, $tmp$$Register);
11972     // + ($stride - 1)
11973     if (strd > 0) {
11974       __ addl($limit$$Register, (strd - 1));
11975       __ adcl($limit_hi$$Register, 0);
11976       __ movl($tmp$$Register, strd);
11977     } else {
11978       __ addl($limit$$Register, (strd + 1));
11979       __ adcl($limit_hi$$Register, -1);
11980       __ lneg($limit_hi$$Register, $limit$$Register);
11981       __ movl($tmp$$Register, -strd);
11982     }
11983     // signed devision: (EAX:EDX) / pos_stride
11984     __ idivl($tmp$$Register);
11985     if (strd < 0) {
11986       // restore sign
11987       __ negl($tmp$$Register);
11988     }
11989     // (EAX) * stride
11990     __ mull($tmp$$Register);
11991     // + init (ignore upper bits)
11992     __ addl($limit$$Register, $init$$Register);
11993   %}
11994   ins_pipe( pipe_slow );
11995 %}
11996 
11997 // ============================================================================
11998 // Branch Instructions
11999 // Jump Table
12000 instruct jumpXtnd(rRegI switch_val) %{
12001   match(Jump switch_val);
12002   ins_cost(350);
12003   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12004   ins_encode %{
12005     // Jump to Address(table_base + switch_reg)
12006     Address index(noreg, $switch_val$$Register, Address::times_1);
12007     __ jump(ArrayAddress($constantaddress, index));
12008   %}
12009   ins_pipe(pipe_jmp);
12010 %}
12011 
12012 // Jump Direct - Label defines a relative address from JMP+1
12013 instruct jmpDir(label labl) %{
12014   match(Goto);
12015   effect(USE labl);
12016 
12017   ins_cost(300);
12018   format %{ "JMP    $labl" %}
12019   size(5);
12020   ins_encode %{
12021     Label* L = $labl$$label;
12022     __ jmp(*L, false); // Always long jump
12023   %}
12024   ins_pipe( pipe_jmp );
12025 %}
12026 
12027 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12028 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12029   match(If cop cr);
12030   effect(USE labl);
12031 
12032   ins_cost(300);
12033   format %{ "J$cop    $labl" %}
12034   size(6);
12035   ins_encode %{
12036     Label* L = $labl$$label;
12037     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12038   %}
12039   ins_pipe( pipe_jcc );
12040 %}
12041 
12042 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12043 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12044   match(CountedLoopEnd cop cr);
12045   effect(USE labl);
12046 
12047   ins_cost(300);
12048   format %{ "J$cop    $labl\t# Loop end" %}
12049   size(6);
12050   ins_encode %{
12051     Label* L = $labl$$label;
12052     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12053   %}
12054   ins_pipe( pipe_jcc );
12055 %}
12056 
12057 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12058 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12059   match(CountedLoopEnd cop cmp);
12060   effect(USE labl);
12061 
12062   ins_cost(300);
12063   format %{ "J$cop,u  $labl\t# Loop end" %}
12064   size(6);
12065   ins_encode %{
12066     Label* L = $labl$$label;
12067     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12068   %}
12069   ins_pipe( pipe_jcc );
12070 %}
12071 
12072 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12073   match(CountedLoopEnd cop cmp);
12074   effect(USE labl);
12075 
12076   ins_cost(200);
12077   format %{ "J$cop,u  $labl\t# Loop end" %}
12078   size(6);
12079   ins_encode %{
12080     Label* L = $labl$$label;
12081     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12082   %}
12083   ins_pipe( pipe_jcc );
12084 %}
12085 
12086 // Jump Direct Conditional - using unsigned comparison
12087 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12088   match(If cop cmp);
12089   effect(USE labl);
12090 
12091   ins_cost(300);
12092   format %{ "J$cop,u  $labl" %}
12093   size(6);
12094   ins_encode %{
12095     Label* L = $labl$$label;
12096     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12097   %}
12098   ins_pipe(pipe_jcc);
12099 %}
12100 
12101 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12102   match(If cop cmp);
12103   effect(USE labl);
12104 
12105   ins_cost(200);
12106   format %{ "J$cop,u  $labl" %}
12107   size(6);
12108   ins_encode %{
12109     Label* L = $labl$$label;
12110     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12111   %}
12112   ins_pipe(pipe_jcc);
12113 %}
12114 
12115 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12116   match(If cop cmp);
12117   effect(USE labl);
12118 
12119   ins_cost(200);
12120   format %{ $$template
12121     if ($cop$$cmpcode == Assembler::notEqual) {
12122       $$emit$$"JP,u   $labl\n\t"
12123       $$emit$$"J$cop,u   $labl"
12124     } else {
12125       $$emit$$"JP,u   done\n\t"
12126       $$emit$$"J$cop,u   $labl\n\t"
12127       $$emit$$"done:"
12128     }
12129   %}
12130   ins_encode %{
12131     Label* l = $labl$$label;
12132     if ($cop$$cmpcode == Assembler::notEqual) {
12133       __ jcc(Assembler::parity, *l, false);
12134       __ jcc(Assembler::notEqual, *l, false);
12135     } else if ($cop$$cmpcode == Assembler::equal) {
12136       Label done;
12137       __ jccb(Assembler::parity, done);
12138       __ jcc(Assembler::equal, *l, false);
12139       __ bind(done);
12140     } else {
12141        ShouldNotReachHere();
12142     }
12143   %}
12144   ins_pipe(pipe_jcc);
12145 %}
12146 
12147 // ============================================================================
12148 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12149 // array for an instance of the superklass.  Set a hidden internal cache on a
12150 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12151 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12152 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12153   match(Set result (PartialSubtypeCheck sub super));
12154   effect( KILL rcx, KILL cr );
12155 
12156   ins_cost(1100);  // slightly larger than the next version
12157   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12158             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12159             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12160             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12161             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12162             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12163             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12164      "miss:\t" %}
12165 
12166   opcode(0x1); // Force a XOR of EDI
12167   ins_encode( enc_PartialSubtypeCheck() );
12168   ins_pipe( pipe_slow );
12169 %}
12170 
12171 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12172   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12173   effect( KILL rcx, KILL result );
12174 
12175   ins_cost(1000);
12176   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12177             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12178             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12179             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12180             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12181             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12182      "miss:\t" %}
12183 
12184   opcode(0x0);  // No need to XOR EDI
12185   ins_encode( enc_PartialSubtypeCheck() );
12186   ins_pipe( pipe_slow );
12187 %}
12188 
12189 // ============================================================================
12190 // Branch Instructions -- short offset versions
12191 //
12192 // These instructions are used to replace jumps of a long offset (the default
12193 // match) with jumps of a shorter offset.  These instructions are all tagged
12194 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12195 // match rules in general matching.  Instead, the ADLC generates a conversion
12196 // method in the MachNode which can be used to do in-place replacement of the
12197 // long variant with the shorter variant.  The compiler will determine if a
12198 // branch can be taken by the is_short_branch_offset() predicate in the machine
12199 // specific code section of the file.
12200 
12201 // Jump Direct - Label defines a relative address from JMP+1
12202 instruct jmpDir_short(label labl) %{
12203   match(Goto);
12204   effect(USE labl);
12205 
12206   ins_cost(300);
12207   format %{ "JMP,s  $labl" %}
12208   size(2);
12209   ins_encode %{
12210     Label* L = $labl$$label;
12211     __ jmpb(*L);
12212   %}
12213   ins_pipe( pipe_jmp );
12214   ins_short_branch(1);
12215 %}
12216 
12217 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12218 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12219   match(If cop cr);
12220   effect(USE labl);
12221 
12222   ins_cost(300);
12223   format %{ "J$cop,s  $labl" %}
12224   size(2);
12225   ins_encode %{
12226     Label* L = $labl$$label;
12227     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12228   %}
12229   ins_pipe( pipe_jcc );
12230   ins_short_branch(1);
12231 %}
12232 
12233 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12234 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12235   match(CountedLoopEnd cop cr);
12236   effect(USE labl);
12237 
12238   ins_cost(300);
12239   format %{ "J$cop,s  $labl\t# Loop end" %}
12240   size(2);
12241   ins_encode %{
12242     Label* L = $labl$$label;
12243     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12244   %}
12245   ins_pipe( pipe_jcc );
12246   ins_short_branch(1);
12247 %}
12248 
12249 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12250 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12251   match(CountedLoopEnd cop cmp);
12252   effect(USE labl);
12253 
12254   ins_cost(300);
12255   format %{ "J$cop,us $labl\t# Loop end" %}
12256   size(2);
12257   ins_encode %{
12258     Label* L = $labl$$label;
12259     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12260   %}
12261   ins_pipe( pipe_jcc );
12262   ins_short_branch(1);
12263 %}
12264 
12265 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12266   match(CountedLoopEnd cop cmp);
12267   effect(USE labl);
12268 
12269   ins_cost(300);
12270   format %{ "J$cop,us $labl\t# Loop end" %}
12271   size(2);
12272   ins_encode %{
12273     Label* L = $labl$$label;
12274     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12275   %}
12276   ins_pipe( pipe_jcc );
12277   ins_short_branch(1);
12278 %}
12279 
12280 // Jump Direct Conditional - using unsigned comparison
12281 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12282   match(If cop cmp);
12283   effect(USE labl);
12284 
12285   ins_cost(300);
12286   format %{ "J$cop,us $labl" %}
12287   size(2);
12288   ins_encode %{
12289     Label* L = $labl$$label;
12290     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12291   %}
12292   ins_pipe( pipe_jcc );
12293   ins_short_branch(1);
12294 %}
12295 
12296 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12297   match(If cop cmp);
12298   effect(USE labl);
12299 
12300   ins_cost(300);
12301   format %{ "J$cop,us $labl" %}
12302   size(2);
12303   ins_encode %{
12304     Label* L = $labl$$label;
12305     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12306   %}
12307   ins_pipe( pipe_jcc );
12308   ins_short_branch(1);
12309 %}
12310 
12311 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12312   match(If cop cmp);
12313   effect(USE labl);
12314 
12315   ins_cost(300);
12316   format %{ $$template
12317     if ($cop$$cmpcode == Assembler::notEqual) {
12318       $$emit$$"JP,u,s   $labl\n\t"
12319       $$emit$$"J$cop,u,s   $labl"
12320     } else {
12321       $$emit$$"JP,u,s   done\n\t"
12322       $$emit$$"J$cop,u,s  $labl\n\t"
12323       $$emit$$"done:"
12324     }
12325   %}
12326   size(4);
12327   ins_encode %{
12328     Label* l = $labl$$label;
12329     if ($cop$$cmpcode == Assembler::notEqual) {
12330       __ jccb(Assembler::parity, *l);
12331       __ jccb(Assembler::notEqual, *l);
12332     } else if ($cop$$cmpcode == Assembler::equal) {
12333       Label done;
12334       __ jccb(Assembler::parity, done);
12335       __ jccb(Assembler::equal, *l);
12336       __ bind(done);
12337     } else {
12338        ShouldNotReachHere();
12339     }
12340   %}
12341   ins_pipe(pipe_jcc);
12342   ins_short_branch(1);
12343 %}
12344 
12345 // ============================================================================
12346 // Long Compare
12347 //
12348 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12349 // is tricky.  The flavor of compare used depends on whether we are testing
12350 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12351 // The GE test is the negated LT test.  The LE test can be had by commuting
12352 // the operands (yielding a GE test) and then negating; negate again for the
12353 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12354 // NE test is negated from that.
12355 
12356 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12357 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12358 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12359 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12360 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12361 // foo match ends up with the wrong leaf.  One fix is to not match both
12362 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12363 // both forms beat the trinary form of long-compare and both are very useful
12364 // on Intel which has so few registers.
12365 
12366 // Manifest a CmpL result in an integer register.  Very painful.
12367 // This is the test to avoid.
12368 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12369   match(Set dst (CmpL3 src1 src2));
12370   effect( KILL flags );
12371   ins_cost(1000);
12372   format %{ "XOR    $dst,$dst\n\t"
12373             "CMP    $src1.hi,$src2.hi\n\t"
12374             "JLT,s  m_one\n\t"
12375             "JGT,s  p_one\n\t"
12376             "CMP    $src1.lo,$src2.lo\n\t"
12377             "JB,s   m_one\n\t"
12378             "JEQ,s  done\n"
12379     "p_one:\tINC    $dst\n\t"
12380             "JMP,s  done\n"
12381     "m_one:\tDEC    $dst\n"
12382      "done:" %}
12383   ins_encode %{
12384     Label p_one, m_one, done;
12385     __ xorptr($dst$$Register, $dst$$Register);
12386     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12387     __ jccb(Assembler::less,    m_one);
12388     __ jccb(Assembler::greater, p_one);
12389     __ cmpl($src1$$Register, $src2$$Register);
12390     __ jccb(Assembler::below,   m_one);
12391     __ jccb(Assembler::equal,   done);
12392     __ bind(p_one);
12393     __ incrementl($dst$$Register);
12394     __ jmpb(done);
12395     __ bind(m_one);
12396     __ decrementl($dst$$Register);
12397     __ bind(done);
12398   %}
12399   ins_pipe( pipe_slow );
12400 %}
12401 
12402 //======
12403 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12404 // compares.  Can be used for LE or GT compares by reversing arguments.
12405 // NOT GOOD FOR EQ/NE tests.
12406 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12407   match( Set flags (CmpL src zero ));
12408   ins_cost(100);
12409   format %{ "TEST   $src.hi,$src.hi" %}
12410   opcode(0x85);
12411   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12412   ins_pipe( ialu_cr_reg_reg );
12413 %}
12414 
12415 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12416 // compares.  Can be used for LE or GT compares by reversing arguments.
12417 // NOT GOOD FOR EQ/NE tests.
12418 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12419   match( Set flags (CmpL src1 src2 ));
12420   effect( TEMP tmp );
12421   ins_cost(300);
12422   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12423             "MOV    $tmp,$src1.hi\n\t"
12424             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12425   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12426   ins_pipe( ialu_cr_reg_reg );
12427 %}
12428 
12429 // Long compares reg < zero/req OR reg >= zero/req.
12430 // Just a wrapper for a normal branch, plus the predicate test.
12431 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12432   match(If cmp flags);
12433   effect(USE labl);
12434   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12435   expand %{
12436     jmpCon(cmp,flags,labl);    // JLT or JGE...
12437   %}
12438 %}
12439 
12440 // Compare 2 longs and CMOVE longs.
12441 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12442   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12443   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12444   ins_cost(400);
12445   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12446             "CMOV$cmp $dst.hi,$src.hi" %}
12447   opcode(0x0F,0x40);
12448   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12449   ins_pipe( pipe_cmov_reg_long );
12450 %}
12451 
12452 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12453   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12454   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12455   ins_cost(500);
12456   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12457             "CMOV$cmp $dst.hi,$src.hi" %}
12458   opcode(0x0F,0x40);
12459   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12460   ins_pipe( pipe_cmov_reg_long );
12461 %}
12462 
12463 // Compare 2 longs and CMOVE ints.
12464 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12465   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12466   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12467   ins_cost(200);
12468   format %{ "CMOV$cmp $dst,$src" %}
12469   opcode(0x0F,0x40);
12470   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12471   ins_pipe( pipe_cmov_reg );
12472 %}
12473 
12474 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12475   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12476   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12477   ins_cost(250);
12478   format %{ "CMOV$cmp $dst,$src" %}
12479   opcode(0x0F,0x40);
12480   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12481   ins_pipe( pipe_cmov_mem );
12482 %}
12483 
12484 // Compare 2 longs and CMOVE ints.
12485 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12486   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12487   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12488   ins_cost(200);
12489   format %{ "CMOV$cmp $dst,$src" %}
12490   opcode(0x0F,0x40);
12491   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12492   ins_pipe( pipe_cmov_reg );
12493 %}
12494 
12495 // Compare 2 longs and CMOVE doubles
12496 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12497   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12498   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12499   ins_cost(200);
12500   expand %{
12501     fcmovDPR_regS(cmp,flags,dst,src);
12502   %}
12503 %}
12504 
12505 // Compare 2 longs and CMOVE doubles
12506 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12507   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12508   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12509   ins_cost(200);
12510   expand %{
12511     fcmovD_regS(cmp,flags,dst,src);
12512   %}
12513 %}
12514 
12515 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12516   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12517   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12518   ins_cost(200);
12519   expand %{
12520     fcmovFPR_regS(cmp,flags,dst,src);
12521   %}
12522 %}
12523 
12524 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12525   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12526   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12527   ins_cost(200);
12528   expand %{
12529     fcmovF_regS(cmp,flags,dst,src);
12530   %}
12531 %}
12532 
12533 //======
12534 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12535 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12536   match( Set flags (CmpL src zero ));
12537   effect(TEMP tmp);
12538   ins_cost(200);
12539   format %{ "MOV    $tmp,$src.lo\n\t"
12540             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12541   ins_encode( long_cmp_flags0( src, tmp ) );
12542   ins_pipe( ialu_reg_reg_long );
12543 %}
12544 
12545 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12546 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12547   match( Set flags (CmpL src1 src2 ));
12548   ins_cost(200+300);
12549   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12550             "JNE,s  skip\n\t"
12551             "CMP    $src1.hi,$src2.hi\n\t"
12552      "skip:\t" %}
12553   ins_encode( long_cmp_flags1( src1, src2 ) );
12554   ins_pipe( ialu_cr_reg_reg );
12555 %}
12556 
12557 // Long compare reg == zero/reg OR reg != zero/reg
12558 // Just a wrapper for a normal branch, plus the predicate test.
12559 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12560   match(If cmp flags);
12561   effect(USE labl);
12562   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12563   expand %{
12564     jmpCon(cmp,flags,labl);    // JEQ or JNE...
12565   %}
12566 %}
12567 
12568 // Compare 2 longs and CMOVE longs.
12569 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12570   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12571   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12572   ins_cost(400);
12573   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12574             "CMOV$cmp $dst.hi,$src.hi" %}
12575   opcode(0x0F,0x40);
12576   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12577   ins_pipe( pipe_cmov_reg_long );
12578 %}
12579 
12580 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12581   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12582   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12583   ins_cost(500);
12584   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12585             "CMOV$cmp $dst.hi,$src.hi" %}
12586   opcode(0x0F,0x40);
12587   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12588   ins_pipe( pipe_cmov_reg_long );
12589 %}
12590 
12591 // Compare 2 longs and CMOVE ints.
12592 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
12593   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12594   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12595   ins_cost(200);
12596   format %{ "CMOV$cmp $dst,$src" %}
12597   opcode(0x0F,0x40);
12598   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12599   ins_pipe( pipe_cmov_reg );
12600 %}
12601 
12602 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
12603   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12604   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12605   ins_cost(250);
12606   format %{ "CMOV$cmp $dst,$src" %}
12607   opcode(0x0F,0x40);
12608   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12609   ins_pipe( pipe_cmov_mem );
12610 %}
12611 
12612 // Compare 2 longs and CMOVE ints.
12613 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
12614   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12615   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12616   ins_cost(200);
12617   format %{ "CMOV$cmp $dst,$src" %}
12618   opcode(0x0F,0x40);
12619   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12620   ins_pipe( pipe_cmov_reg );
12621 %}
12622 
12623 // Compare 2 longs and CMOVE doubles
12624 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
12625   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12626   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12627   ins_cost(200);
12628   expand %{
12629     fcmovDPR_regS(cmp,flags,dst,src);
12630   %}
12631 %}
12632 
12633 // Compare 2 longs and CMOVE doubles
12634 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
12635   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12636   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12637   ins_cost(200);
12638   expand %{
12639     fcmovD_regS(cmp,flags,dst,src);
12640   %}
12641 %}
12642 
12643 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
12644   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12645   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12646   ins_cost(200);
12647   expand %{
12648     fcmovFPR_regS(cmp,flags,dst,src);
12649   %}
12650 %}
12651 
12652 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
12653   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12654   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12655   ins_cost(200);
12656   expand %{
12657     fcmovF_regS(cmp,flags,dst,src);
12658   %}
12659 %}
12660 
12661 //======
12662 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12663 // Same as cmpL_reg_flags_LEGT except must negate src
12664 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
12665   match( Set flags (CmpL src zero ));
12666   effect( TEMP tmp );
12667   ins_cost(300);
12668   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
12669             "CMP    $tmp,$src.lo\n\t"
12670             "SBB    $tmp,$src.hi\n\t" %}
12671   ins_encode( long_cmp_flags3(src, tmp) );
12672   ins_pipe( ialu_reg_reg_long );
12673 %}
12674 
12675 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12676 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
12677 // requires a commuted test to get the same result.
12678 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12679   match( Set flags (CmpL src1 src2 ));
12680   effect( TEMP tmp );
12681   ins_cost(300);
12682   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
12683             "MOV    $tmp,$src2.hi\n\t"
12684             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
12685   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
12686   ins_pipe( ialu_cr_reg_reg );
12687 %}
12688 
12689 // Long compares reg < zero/req OR reg >= zero/req.
12690 // Just a wrapper for a normal branch, plus the predicate test
12691 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
12692   match(If cmp flags);
12693   effect(USE labl);
12694   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
12695   ins_cost(300);
12696   expand %{
12697     jmpCon(cmp,flags,labl);    // JGT or JLE...
12698   %}
12699 %}
12700 
12701 // Compare 2 longs and CMOVE longs.
12702 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
12703   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12704   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12705   ins_cost(400);
12706   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12707             "CMOV$cmp $dst.hi,$src.hi" %}
12708   opcode(0x0F,0x40);
12709   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12710   ins_pipe( pipe_cmov_reg_long );
12711 %}
12712 
12713 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
12714   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12715   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12716   ins_cost(500);
12717   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12718             "CMOV$cmp $dst.hi,$src.hi+4" %}
12719   opcode(0x0F,0x40);
12720   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12721   ins_pipe( pipe_cmov_reg_long );
12722 %}
12723 
12724 // Compare 2 longs and CMOVE ints.
12725 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
12726   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12727   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12728   ins_cost(200);
12729   format %{ "CMOV$cmp $dst,$src" %}
12730   opcode(0x0F,0x40);
12731   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12732   ins_pipe( pipe_cmov_reg );
12733 %}
12734 
12735 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
12736   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12737   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12738   ins_cost(250);
12739   format %{ "CMOV$cmp $dst,$src" %}
12740   opcode(0x0F,0x40);
12741   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12742   ins_pipe( pipe_cmov_mem );
12743 %}
12744 
12745 // Compare 2 longs and CMOVE ptrs.
12746 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
12747   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12748   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12749   ins_cost(200);
12750   format %{ "CMOV$cmp $dst,$src" %}
12751   opcode(0x0F,0x40);
12752   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12753   ins_pipe( pipe_cmov_reg );
12754 %}
12755 
12756 // Compare 2 longs and CMOVE doubles
12757 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
12758   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12759   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12760   ins_cost(200);
12761   expand %{
12762     fcmovDPR_regS(cmp,flags,dst,src);
12763   %}
12764 %}
12765 
12766 // Compare 2 longs and CMOVE doubles
12767 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
12768   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12769   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12770   ins_cost(200);
12771   expand %{
12772     fcmovD_regS(cmp,flags,dst,src);
12773   %}
12774 %}
12775 
12776 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
12777   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12778   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12779   ins_cost(200);
12780   expand %{
12781     fcmovFPR_regS(cmp,flags,dst,src);
12782   %}
12783 %}
12784 
12785 
12786 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
12787   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12788   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12789   ins_cost(200);
12790   expand %{
12791     fcmovF_regS(cmp,flags,dst,src);
12792   %}
12793 %}
12794 
12795 
12796 // ============================================================================
12797 // Procedure Call/Return Instructions
12798 // Call Java Static Instruction
12799 // Note: If this code changes, the corresponding ret_addr_offset() and
12800 //       compute_padding() functions will have to be adjusted.
12801 instruct CallStaticJavaDirect(method meth) %{
12802   match(CallStaticJava);
12803   effect(USE meth);
12804 
12805   ins_cost(300);
12806   format %{ "CALL,static " %}
12807   opcode(0xE8); /* E8 cd */
12808   ins_encode( pre_call_resets,
12809               Java_Static_Call( meth ),
12810               call_epilog,
12811               post_call_FPU );
12812   ins_pipe( pipe_slow );
12813   ins_alignment(4);
12814 %}
12815 
12816 // Call Java Dynamic Instruction
12817 // Note: If this code changes, the corresponding ret_addr_offset() and
12818 //       compute_padding() functions will have to be adjusted.
12819 instruct CallDynamicJavaDirect(method meth) %{
12820   match(CallDynamicJava);
12821   effect(USE meth);
12822 
12823   ins_cost(300);
12824   format %{ "MOV    EAX,(oop)-1\n\t"
12825             "CALL,dynamic" %}
12826   opcode(0xE8); /* E8 cd */
12827   ins_encode( pre_call_resets,
12828               Java_Dynamic_Call( meth ),
12829               call_epilog,
12830               post_call_FPU );
12831   ins_pipe( pipe_slow );
12832   ins_alignment(4);
12833 %}
12834 
12835 // Call Runtime Instruction
12836 instruct CallRuntimeDirect(method meth) %{
12837   match(CallRuntime );
12838   effect(USE meth);
12839 
12840   ins_cost(300);
12841   format %{ "CALL,runtime " %}
12842   opcode(0xE8); /* E8 cd */
12843   // Use FFREEs to clear entries in float stack
12844   ins_encode( pre_call_resets,
12845               FFree_Float_Stack_All,
12846               Java_To_Runtime( meth ),
12847               post_call_FPU );
12848   ins_pipe( pipe_slow );
12849 %}
12850 
12851 // Call runtime without safepoint
12852 instruct CallLeafDirect(method meth) %{
12853   match(CallLeaf);
12854   effect(USE meth);
12855 
12856   ins_cost(300);
12857   format %{ "CALL_LEAF,runtime " %}
12858   opcode(0xE8); /* E8 cd */
12859   ins_encode( pre_call_resets,
12860               FFree_Float_Stack_All,
12861               Java_To_Runtime( meth ),
12862               Verify_FPU_For_Leaf, post_call_FPU );
12863   ins_pipe( pipe_slow );
12864 %}
12865 
12866 instruct CallLeafNoFPDirect(method meth) %{
12867   match(CallLeafNoFP);
12868   effect(USE meth);
12869 
12870   ins_cost(300);
12871   format %{ "CALL_LEAF_NOFP,runtime " %}
12872   opcode(0xE8); /* E8 cd */
12873   ins_encode(Java_To_Runtime(meth));
12874   ins_pipe( pipe_slow );
12875 %}
12876 
12877 
12878 // Return Instruction
12879 // Remove the return address & jump to it.
12880 instruct Ret() %{
12881   match(Return);
12882   format %{ "RET" %}
12883   opcode(0xC3);
12884   ins_encode(OpcP);
12885   ins_pipe( pipe_jmp );
12886 %}
12887 
12888 // Tail Call; Jump from runtime stub to Java code.
12889 // Also known as an 'interprocedural jump'.
12890 // Target of jump will eventually return to caller.
12891 // TailJump below removes the return address.
12892 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
12893   match(TailCall jump_target method_oop );
12894   ins_cost(300);
12895   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
12896   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12897   ins_encode( OpcP, RegOpc(jump_target) );
12898   ins_pipe( pipe_jmp );
12899 %}
12900 
12901 
12902 // Tail Jump; remove the return address; jump to target.
12903 // TailCall above leaves the return address around.
12904 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
12905   match( TailJump jump_target ex_oop );
12906   ins_cost(300);
12907   format %{ "POP    EDX\t# pop return address into dummy\n\t"
12908             "JMP    $jump_target " %}
12909   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12910   ins_encode( enc_pop_rdx,
12911               OpcP, RegOpc(jump_target) );
12912   ins_pipe( pipe_jmp );
12913 %}
12914 
12915 // Create exception oop: created by stack-crawling runtime code.
12916 // Created exception is now available to this handler, and is setup
12917 // just prior to jumping to this handler.  No code emitted.
12918 instruct CreateException( eAXRegP ex_oop )
12919 %{
12920   match(Set ex_oop (CreateEx));
12921 
12922   size(0);
12923   // use the following format syntax
12924   format %{ "# exception oop is in EAX; no code emitted" %}
12925   ins_encode();
12926   ins_pipe( empty );
12927 %}
12928 
12929 
12930 // Rethrow exception:
12931 // The exception oop will come in the first argument position.
12932 // Then JUMP (not call) to the rethrow stub code.
12933 instruct RethrowException()
12934 %{
12935   match(Rethrow);
12936 
12937   // use the following format syntax
12938   format %{ "JMP    rethrow_stub" %}
12939   ins_encode(enc_rethrow);
12940   ins_pipe( pipe_jmp );
12941 %}
12942 
12943 // inlined locking and unlocking
12944 
12945 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
12946   predicate(Compile::current()->use_rtm());
12947   match(Set cr (FastLock object box));
12948   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
12949   ins_cost(300);
12950   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
12951   ins_encode %{
12952     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12953                  $scr$$Register, $cx1$$Register, $cx2$$Register,
12954                  _counters, _rtm_counters, _stack_rtm_counters,
12955                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
12956                  true, ra_->C->profile_rtm());
12957   %}
12958   ins_pipe(pipe_slow);
12959 %}
12960 
12961 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
12962   predicate(!Compile::current()->use_rtm());
12963   match(Set cr (FastLock object box));
12964   effect(TEMP tmp, TEMP scr, USE_KILL box);
12965   ins_cost(300);
12966   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
12967   ins_encode %{
12968     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12969                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
12970   %}
12971   ins_pipe(pipe_slow);
12972 %}
12973 
12974 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
12975   match(Set cr (FastUnlock object box));
12976   effect(TEMP tmp, USE_KILL box);
12977   ins_cost(300);
12978   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
12979   ins_encode %{
12980     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
12981   %}
12982   ins_pipe(pipe_slow);
12983 %}
12984 
12985 
12986 
12987 // ============================================================================
12988 // Safepoint Instruction
12989 instruct safePoint_poll(eFlagsReg cr) %{
12990   match(SafePoint);
12991   effect(KILL cr);
12992 
12993   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
12994   // On SPARC that might be acceptable as we can generate the address with
12995   // just a sethi, saving an or.  By polling at offset 0 we can end up
12996   // putting additional pressure on the index-0 in the D$.  Because of
12997   // alignment (just like the situation at hand) the lower indices tend
12998   // to see more traffic.  It'd be better to change the polling address
12999   // to offset 0 of the last $line in the polling page.
13000 
13001   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13002   ins_cost(125);
13003   size(6) ;
13004   ins_encode( Safepoint_Poll() );
13005   ins_pipe( ialu_reg_mem );
13006 %}
13007 
13008 
13009 // ============================================================================
13010 // This name is KNOWN by the ADLC and cannot be changed.
13011 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13012 // for this guy.
13013 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13014   match(Set dst (ThreadLocal));
13015   effect(DEF dst, KILL cr);
13016 
13017   format %{ "MOV    $dst, Thread::current()" %}
13018   ins_encode %{
13019     Register dstReg = as_Register($dst$$reg);
13020     __ get_thread(dstReg);
13021   %}
13022   ins_pipe( ialu_reg_fat );
13023 %}
13024 
13025 
13026 
13027 //----------PEEPHOLE RULES-----------------------------------------------------
13028 // These must follow all instruction definitions as they use the names
13029 // defined in the instructions definitions.
13030 //
13031 // peepmatch ( root_instr_name [preceding_instruction]* );
13032 //
13033 // peepconstraint %{
13034 // (instruction_number.operand_name relational_op instruction_number.operand_name
13035 //  [, ...] );
13036 // // instruction numbers are zero-based using left to right order in peepmatch
13037 //
13038 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13039 // // provide an instruction_number.operand_name for each operand that appears
13040 // // in the replacement instruction's match rule
13041 //
13042 // ---------VM FLAGS---------------------------------------------------------
13043 //
13044 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13045 //
13046 // Each peephole rule is given an identifying number starting with zero and
13047 // increasing by one in the order seen by the parser.  An individual peephole
13048 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13049 // on the command-line.
13050 //
13051 // ---------CURRENT LIMITATIONS----------------------------------------------
13052 //
13053 // Only match adjacent instructions in same basic block
13054 // Only equality constraints
13055 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13056 // Only one replacement instruction
13057 //
13058 // ---------EXAMPLE----------------------------------------------------------
13059 //
13060 // // pertinent parts of existing instructions in architecture description
13061 // instruct movI(rRegI dst, rRegI src) %{
13062 //   match(Set dst (CopyI src));
13063 // %}
13064 //
13065 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
13066 //   match(Set dst (AddI dst src));
13067 //   effect(KILL cr);
13068 // %}
13069 //
13070 // // Change (inc mov) to lea
13071 // peephole %{
13072 //   // increment preceeded by register-register move
13073 //   peepmatch ( incI_eReg movI );
13074 //   // require that the destination register of the increment
13075 //   // match the destination register of the move
13076 //   peepconstraint ( 0.dst == 1.dst );
13077 //   // construct a replacement instruction that sets
13078 //   // the destination to ( move's source register + one )
13079 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13080 // %}
13081 //
13082 // Implementation no longer uses movX instructions since
13083 // machine-independent system no longer uses CopyX nodes.
13084 //
13085 // peephole %{
13086 //   peepmatch ( incI_eReg movI );
13087 //   peepconstraint ( 0.dst == 1.dst );
13088 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13089 // %}
13090 //
13091 // peephole %{
13092 //   peepmatch ( decI_eReg movI );
13093 //   peepconstraint ( 0.dst == 1.dst );
13094 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13095 // %}
13096 //
13097 // peephole %{
13098 //   peepmatch ( addI_eReg_imm movI );
13099 //   peepconstraint ( 0.dst == 1.dst );
13100 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13101 // %}
13102 //
13103 // peephole %{
13104 //   peepmatch ( addP_eReg_imm movP );
13105 //   peepconstraint ( 0.dst == 1.dst );
13106 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13107 // %}
13108 
13109 // // Change load of spilled value to only a spill
13110 // instruct storeI(memory mem, rRegI src) %{
13111 //   match(Set mem (StoreI mem src));
13112 // %}
13113 //
13114 // instruct loadI(rRegI dst, memory mem) %{
13115 //   match(Set dst (LoadI mem));
13116 // %}
13117 //
13118 peephole %{
13119   peepmatch ( loadI storeI );
13120   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13121   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13122 %}
13123 
13124 //----------SMARTSPILL RULES---------------------------------------------------
13125 // These must follow all instruction definitions as they use the names
13126 // defined in the instructions definitions.