New src/cpu/x86/vm/x86

   1 //
   2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 //
 105 // Empty fill registers, which are never used, but supply alignment to xmm regs
 106 //
 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2));
 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3));
 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4));
 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5));
 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6));
 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7));
 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8));
 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9));
 115 
 116 // Specify priority of register selection within phases of register
 117 // allocation.  Highest priority is first.  A useful heuristic is to
 118 // give registers a low priority when they are required by machine
 119 // instructions, like EAX and EDX.  Registers which are used as
 120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 121 // For the Intel integer registers, the equivalent Long pairs are
 122 // EDX:EAX, EBX:ECX, and EDI:EBP.
 123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 126                     FPR6L, FPR6H, FPR7L, FPR7H,
 127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
 128 
 129 
 130 //----------Architecture Description Register Classes--------------------------
 131 // Several register classes are automatically defined based upon information in
 132 // this architecture description.
 133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 134 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 137 //
 138 // Class for no registers (empty set).
 139 reg_class no_reg();
 140 
 141 // Class for all registers
 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 143 // Class for all registers (excluding EBP)
 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
 145 // Dynamic register class that selects at runtime between register classes
 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
 149 
 150 // Class for general registers
 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 152 // Class for general registers (excluding EBP).
 153 // This register class can be used for implicit null checks on win95.
 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
 155 // Used also if the PreserveFramePointer flag is true.
 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
 159 
 160 // Class of "X" registers
 161 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 162 
 163 // Class of registers that can appear in an address with no offset.
 164 // EBP and ESP require an extra instruction byte for zero offset.
 165 // Used in fast-unlock
 166 reg_class p_reg(EDX, EDI, ESI, EBX);
 167 
 168 // Class for general registers excluding ECX
 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
 170 // Class for general registers excluding ECX (and EBP)
 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
 174 
 175 // Class for general registers excluding EAX
 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 177 
 178 // Class for general registers excluding EAX and EBX.
 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
 180 // Class for general registers excluding EAX and EBX (and EBP)
 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
 184 
 185 // Class of EAX (for multiply and divide operations)
 186 reg_class eax_reg(EAX);
 187 
 188 // Class of EBX (for atomic add)
 189 reg_class ebx_reg(EBX);
 190 
 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 192 reg_class ecx_reg(ECX);
 193 
 194 // Class of EDX (for multiply and divide operations)
 195 reg_class edx_reg(EDX);
 196 
 197 // Class of EDI (for synchronization)
 198 reg_class edi_reg(EDI);
 199 
 200 // Class of ESI (for synchronization)
 201 reg_class esi_reg(ESI);
 202 
 203 // Singleton class for stack pointer
 204 reg_class sp_reg(ESP);
 205 
 206 // Singleton class for instruction pointer
 207 // reg_class ip_reg(EIP);
 208 
 209 // Class of integer register pairs
 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
 211 // Class of integer register pairs (excluding EBP and EDI);
 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
 215 
 216 // Class of integer register pairs that aligns with calling convention
 217 reg_class eadx_reg( EAX,EDX );
 218 reg_class ebcx_reg( ECX,EBX );
 219 
 220 // Not AX or DX, used in divides
 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
 222 // Not AX or DX (and neither EBP), used in divides
 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 226 
 227 // Floating point registers.  Notice FPR0 is not a choice.
 228 // FPR0 is not ever allocated; we use clever encodings to fake
 229 // a 2-address instructions out of Intels FP stack.
 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 231 
 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 233                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 234                       FPR7L,FPR7H );
 235 
 236 reg_class fp_flt_reg0( FPR1L );
 237 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 238 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 240                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 241 
 242 %}
 243 
 244 
 245 //----------SOURCE BLOCK-------------------------------------------------------
 246 // This is a block of C++ code which provides values, functions, and
 247 // definitions necessary in the rest of the architecture description
 248 source_hpp %{
 249 // Must be visible to the DFA in dfa_x86_32.cpp
 250 extern bool is_operand_hi32_zero(Node* n);
 251 %}
 252 
 253 source %{
 254 #define   RELOC_IMM32    Assembler::imm_operand
 255 #define   RELOC_DISP32   Assembler::disp32_operand
 256 
 257 #define __ _masm.
 258 
 259 // How to find the high register of a Long pair, given the low register
 260 #define   HIGH_FROM_LOW(x) ((x)+2)
 261 
 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 263 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 264 // fast versions of NegF/NegD and AbsF/AbsD.
 265 
 266 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 268   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 269   // of 128-bits operands for SSE instructions.
 270   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 271   // Store the value to a 128-bits operand.
 272   operand[0] = lo;
 273   operand[1] = hi;
 274   return operand;
 275 }
 276 
 277 // Buffer for 128-bits masks used by SSE instructions.
 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 279 
 280 // Static initialization during VM startup.
 281 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 283 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 285 
 286 // Offset hacking within calls.
 287 static int pre_call_resets_size() {
 288   int size = 0;
 289   Compile* C = Compile::current();
 290   if (C->in_24_bit_fp_mode()) {
 291     size += 6; // fldcw
 292   }
 293   if (C->max_vector_size() > 16) {
 294     size += 3; // vzeroupper
 295   }
 296   return size;
 297 }
 298 
 299 // !!!!! Special hack to get all type of calls to specify the byte offset
 300 //       from the start of the call to the point where the return address
 301 //       will point.
 302 int MachCallStaticJavaNode::ret_addr_offset() {
 303   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
 304 }
 305 
 306 int MachCallDynamicJavaNode::ret_addr_offset() {
 307   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 308 }
 309 
 310 static int sizeof_FFree_Float_Stack_All = -1;
 311 
 312 int MachCallRuntimeNode::ret_addr_offset() {
 313   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 314   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 315 }
 316 
 317 // Indicate if the safepoint node needs the polling page as an input.
 318 // Since x86 does have absolute addressing, it doesn't.
 319 bool SafePointNode::needs_polling_address_input() {
 320   return false;
 321 }
 322 
 323 //
 324 // Compute padding required for nodes which need alignment
 325 //
 326 
 327 // The address of the call instruction needs to be 4-byte aligned to
 328 // ensure that it does not span a cache line so that it can be patched.
 329 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 330   current_offset += pre_call_resets_size();  // skip fldcw, if any
 331   current_offset += 1;      // skip call opcode byte
 332   return round_to(current_offset, alignment_required()) - current_offset;
 333 }
 334 
 335 // The address of the call instruction needs to be 4-byte aligned to
 336 // ensure that it does not span a cache line so that it can be patched.
 337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 338   current_offset += pre_call_resets_size();  // skip fldcw, if any
 339   current_offset += 5;      // skip MOV instruction
 340   current_offset += 1;      // skip call opcode byte
 341   return round_to(current_offset, alignment_required()) - current_offset;
 342 }
 343 
 344 // EMIT_RM()
 345 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 346   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 347   cbuf.insts()->emit_int8(c);
 348 }
 349 
 350 // EMIT_CC()
 351 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 352   unsigned char c = (unsigned char)( f1 | f2 );
 353   cbuf.insts()->emit_int8(c);
 354 }
 355 
 356 // EMIT_OPCODE()
 357 void emit_opcode(CodeBuffer &cbuf, int code) {
 358   cbuf.insts()->emit_int8((unsigned char) code);
 359 }
 360 
 361 // EMIT_OPCODE() w/ relocation information
 362 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 363   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 364   emit_opcode(cbuf, code);
 365 }
 366 
 367 // EMIT_D8()
 368 void emit_d8(CodeBuffer &cbuf, int d8) {
 369   cbuf.insts()->emit_int8((unsigned char) d8);
 370 }
 371 
 372 // EMIT_D16()
 373 void emit_d16(CodeBuffer &cbuf, int d16) {
 374   cbuf.insts()->emit_int16(d16);
 375 }
 376 
 377 // EMIT_D32()
 378 void emit_d32(CodeBuffer &cbuf, int d32) {
 379   cbuf.insts()->emit_int32(d32);
 380 }
 381 
 382 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 383 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 384         int format) {
 385   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 386   cbuf.insts()->emit_int32(d32);
 387 }
 388 
 389 // emit 32 bit value and construct relocation entry from RelocationHolder
 390 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 391         int format) {
 392 #ifdef ASSERT
 393   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 394     assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
 395   }
 396 #endif
 397   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 398   cbuf.insts()->emit_int32(d32);
 399 }
 400 
 401 // Access stack slot for load or store
 402 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 403   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 404   if( -128 <= disp && disp <= 127 ) {
 405     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 406     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 407     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 408   } else {
 409     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 410     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 411     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 412   }
 413 }
 414 
 415    // rRegI ereg, memory mem) %{    // emit_reg_mem
 416 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 417   // There is no index & no scale, use form without SIB byte
 418   if ((index == 0x4) &&
 419       (scale == 0) && (base != ESP_enc)) {
 420     // If no displacement, mode is 0x0; unless base is [EBP]
 421     if ( (displace == 0) && (base != EBP_enc) ) {
 422       emit_rm(cbuf, 0x0, reg_encoding, base);
 423     }
 424     else {                    // If 8-bit displacement, mode 0x1
 425       if ((displace >= -128) && (displace <= 127)
 426           && (disp_reloc == relocInfo::none) ) {
 427         emit_rm(cbuf, 0x1, reg_encoding, base);
 428         emit_d8(cbuf, displace);
 429       }
 430       else {                  // If 32-bit displacement
 431         if (base == -1) { // Special flag for absolute address
 432           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 433           // (manual lies; no SIB needed here)
 434           if ( disp_reloc != relocInfo::none ) {
 435             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 436           } else {
 437             emit_d32      (cbuf, displace);
 438           }
 439         }
 440         else {                // Normal base + offset
 441           emit_rm(cbuf, 0x2, reg_encoding, base);
 442           if ( disp_reloc != relocInfo::none ) {
 443             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 444           } else {
 445             emit_d32      (cbuf, displace);
 446           }
 447         }
 448       }
 449     }
 450   }
 451   else {                      // Else, encode with the SIB byte
 452     // If no displacement, mode is 0x0; unless base is [EBP]
 453     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 454       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 455       emit_rm(cbuf, scale, index, base);
 456     }
 457     else {                    // If 8-bit displacement, mode 0x1
 458       if ((displace >= -128) && (displace <= 127)
 459           && (disp_reloc == relocInfo::none) ) {
 460         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 461         emit_rm(cbuf, scale, index, base);
 462         emit_d8(cbuf, displace);
 463       }
 464       else {                  // If 32-bit displacement
 465         if (base == 0x04 ) {
 466           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 467           emit_rm(cbuf, scale, index, 0x04);
 468         } else {
 469           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 470           emit_rm(cbuf, scale, index, base);
 471         }
 472         if ( disp_reloc != relocInfo::none ) {
 473           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 474         } else {
 475           emit_d32      (cbuf, displace);
 476         }
 477       }
 478     }
 479   }
 480 }
 481 
 482 
 483 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 484   if( dst_encoding == src_encoding ) {
 485     // reg-reg copy, use an empty encoding
 486   } else {
 487     emit_opcode( cbuf, 0x8B );
 488     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 489   }
 490 }
 491 
 492 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 493   Label exit;
 494   __ jccb(Assembler::noParity, exit);
 495   __ pushf();
 496   //
 497   // comiss/ucomiss instructions set ZF,PF,CF flags and
 498   // zero OF,AF,SF for NaN values.
 499   // Fixup flags by zeroing ZF,PF so that compare of NaN
 500   // values returns 'less than' result (CF is set).
 501   // Leave the rest of flags unchanged.
 502   //
 503   //    7 6 5 4 3 2 1 0
 504   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 505   //    0 0 1 0 1 0 1 1   (0x2B)
 506   //
 507   __ andl(Address(rsp, 0), 0xffffff2b);
 508   __ popf();
 509   __ bind(exit);
 510 }
 511 
 512 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 513   Label done;
 514   __ movl(dst, -1);
 515   __ jcc(Assembler::parity, done);
 516   __ jcc(Assembler::below, done);
 517   __ setb(Assembler::notEqual, dst);
 518   __ movzbl(dst, dst);
 519   __ bind(done);
 520 }
 521 
 522 
 523 //=============================================================================
 524 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 525 
 526 int Compile::ConstantTable::calculate_table_base_offset() const {
 527   return 0;  // absolute addressing, no offset
 528 }
 529 
 530 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 531 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 532   ShouldNotReachHere();
 533 }
 534 
 535 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 536   // Empty encoding
 537 }
 538 
 539 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 540   return 0;
 541 }
 542 
 543 #ifndef PRODUCT
 544 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 545   st->print("# MachConstantBaseNode (empty encoding)");
 546 }
 547 #endif
 548 
 549 
 550 //=============================================================================
 551 #ifndef PRODUCT
 552 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 553   Compile* C = ra_->C;
 554 
 555   int framesize = C->frame_size_in_bytes();
 556   int bangsize = C->bang_size_in_bytes();
 557   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 558   // Remove wordSize for return addr which is already pushed.
 559   framesize -= wordSize;
 560 
 561   if (C->need_stack_bang(bangsize)) {
 562     framesize -= wordSize;
 563     st->print("# stack bang (%d bytes)", bangsize);
 564     st->print("\n\t");
 565     st->print("PUSH   EBP\t# Save EBP");
 566     if (PreserveFramePointer) {
 567       st->print("\n\t");
 568       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 569     }
 570     if (framesize) {
 571       st->print("\n\t");
 572       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 573     }
 574   } else {
 575     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 576     st->print("\n\t");
 577     framesize -= wordSize;
 578     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 579     if (PreserveFramePointer) {
 580       st->print("\n\t");
 581       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 582       if (framesize > 0) {
 583         st->print("\n\t");
 584         st->print("ADD    EBP, #%d", framesize);
 585       }
 586     }
 587   }
 588 
 589   if (VerifyStackAtCalls) {
 590     st->print("\n\t");
 591     framesize -= wordSize;
 592     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 593   }
 594 
 595   if( C->in_24_bit_fp_mode() ) {
 596     st->print("\n\t");
 597     st->print("FLDCW  \t# load 24 bit fpu control word");
 598   }
 599   if (UseSSE >= 2 && VerifyFPU) {
 600     st->print("\n\t");
 601     st->print("# verify FPU stack (must be clean on entry)");
 602   }
 603 
 604 #ifdef ASSERT
 605   if (VerifyStackAtCalls) {
 606     st->print("\n\t");
 607     st->print("# stack alignment check");
 608   }
 609 #endif
 610   st->cr();
 611 }
 612 #endif
 613 
 614 
 615 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 616   Compile* C = ra_->C;
 617   MacroAssembler _masm(&cbuf);
 618 
 619   int framesize = C->frame_size_in_bytes();
 620   int bangsize = C->bang_size_in_bytes();
 621 
 622   __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode());
 623 
 624   C->set_frame_complete(cbuf.insts_size());
 625 
 626   if (C->has_mach_constant_base_node()) {
 627     // NOTE: We set the table base offset here because users might be
 628     // emitted before MachConstantBaseNode.
 629     Compile::ConstantTable& constant_table = C->constant_table();
 630     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 631   }
 632 }
 633 
 634 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 635   return MachNode::size(ra_); // too many variables; just compute it the hard way
 636 }
 637 
 638 int MachPrologNode::reloc() const {
 639   return 0; // a large enough number
 640 }
 641 
 642 //=============================================================================
 643 #ifndef PRODUCT
 644 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 645   Compile *C = ra_->C;
 646   int framesize = C->frame_size_in_bytes();
 647   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 648   // Remove two words for return addr and rbp,
 649   framesize -= 2*wordSize;
 650 
 651   if (C->max_vector_size() > 16) {
 652     st->print("VZEROUPPER");
 653     st->cr(); st->print("\t");
 654   }
 655   if (C->in_24_bit_fp_mode()) {
 656     st->print("FLDCW  standard control word");
 657     st->cr(); st->print("\t");
 658   }
 659   if (framesize) {
 660     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 661     st->cr(); st->print("\t");
 662   }
 663   st->print_cr("POPL   EBP"); st->print("\t");
 664   if (do_polling() && C->is_method_compilation()) {
 665     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 666     st->cr(); st->print("\t");
 667   }
 668 }
 669 #endif
 670 
 671 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 672   Compile *C = ra_->C;
 673   MacroAssembler _masm(&cbuf);
 674 
 675   if (C->max_vector_size() > 16) {
 676     // Clear upper bits of YMM registers when current compiled code uses
 677     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 678     _masm.vzeroupper();
 679   }
 680   // If method set FPU control word, restore to standard control word
 681   if (C->in_24_bit_fp_mode()) {
 682     _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 683   }
 684 
 685   int framesize = C->frame_size_in_bytes();
 686   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 687   // Remove two words for return addr and rbp,
 688   framesize -= 2*wordSize;
 689 
 690   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 691 
 692   if (framesize >= 128) {
 693     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 694     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 695     emit_d32(cbuf, framesize);
 696   } else if (framesize) {
 697     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 698     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 699     emit_d8(cbuf, framesize);
 700   }
 701 
 702   emit_opcode(cbuf, 0x58 | EBP_enc);
 703 
 704   if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
 705     __ reserved_stack_check();
 706   }
 707 
 708   if (do_polling() && C->is_method_compilation()) {
 709     cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 710     emit_opcode(cbuf,0x85);
 711     emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 712     emit_d32(cbuf, (intptr_t)os::get_polling_page());
 713   }
 714 }
 715 
 716 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 717   Compile *C = ra_->C;
 718   // If method set FPU control word, restore to standard control word
 719   int size = C->in_24_bit_fp_mode() ? 6 : 0;
 720   if (C->max_vector_size() > 16) size += 3; // vzeroupper
 721   if (do_polling() && C->is_method_compilation()) size += 6;
 722 
 723   int framesize = C->frame_size_in_bytes();
 724   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 725   // Remove two words for return addr and rbp,
 726   framesize -= 2*wordSize;
 727 
 728   size++; // popl rbp,
 729 
 730   if (framesize >= 128) {
 731     size += 6;
 732   } else {
 733     size += framesize ? 3 : 0;
 734   }
 735   size += 64; // added to support ReservedStackAccess
 736   return size;
 737 }
 738 
 739 int MachEpilogNode::reloc() const {
 740   return 0; // a large enough number
 741 }
 742 
 743 const Pipeline * MachEpilogNode::pipeline() const {
 744   return MachNode::pipeline_class();
 745 }
 746 
 747 int MachEpilogNode::safepoint_offset() const { return 0; }
 748 
 749 //=============================================================================
 750 
 751 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 752 static enum RC rc_class( OptoReg::Name reg ) {
 753 
 754   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 755   if (OptoReg::is_stack(reg)) return rc_stack;
 756 
 757   VMReg r = OptoReg::as_VMReg(reg);
 758   if (r->is_Register()) return rc_int;
 759   if (r->is_FloatRegister()) {
 760     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 761     return rc_float;
 762   }
 763   assert(r->is_XMMRegister(), "must be");
 764   return rc_xmm;
 765 }
 766 
 767 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 768                         int opcode, const char *op_str, int size, outputStream* st ) {
 769   if( cbuf ) {
 770     emit_opcode  (*cbuf, opcode );
 771     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 772 #ifndef PRODUCT
 773   } else if( !do_size ) {
 774     if( size != 0 ) st->print("\n\t");
 775     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 776       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 777       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 778     } else { // FLD, FST, PUSH, POP
 779       st->print("%s [ESP + #%d]",op_str,offset);
 780     }
 781 #endif
 782   }
 783   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 784   return size+3+offset_size;
 785 }
 786 
 787 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 788 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 789                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 790   int in_size_in_bits = Assembler::EVEX_32bit;
 791   int evex_encoding = 0;
 792   if (reg_lo+1 == reg_hi) {
 793     in_size_in_bits = Assembler::EVEX_64bit;
 794     evex_encoding = Assembler::VEX_W;
 795   }
 796   if (cbuf) {
 797     MacroAssembler _masm(cbuf);
 798     if (reg_lo+1 == reg_hi) { // double move?
 799       if (is_load) {
 800         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 801       } else {
 802         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 803       }
 804     } else {
 805       if (is_load) {
 806         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 807       } else {
 808         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 809       }
 810     }
 811 #ifndef PRODUCT
 812   } else if (!do_size) {
 813     if (size != 0) st->print("\n\t");
 814     if (reg_lo+1 == reg_hi) { // double move?
 815       if (is_load) st->print("%s %s,[ESP + #%d]",
 816                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 817                               Matcher::regName[reg_lo], offset);
 818       else         st->print("MOVSD  [ESP + #%d],%s",
 819                               offset, Matcher::regName[reg_lo]);
 820     } else {
 821       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 822                               Matcher::regName[reg_lo], offset);
 823       else         st->print("MOVSS  [ESP + #%d],%s",
 824                               offset, Matcher::regName[reg_lo]);
 825     }
 826 #endif
 827   }
 828   bool is_single_byte = false;
 829   if ((UseAVX > 2) && (offset != 0)) {
 830     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
 831   }
 832   int offset_size = 0;
 833   if (UseAVX > 2 ) {
 834     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
 835   } else {
 836     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 837   }
 838   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
 839   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 840   return size+5+offset_size;
 841 }
 842 
 843 
 844 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 845                             int src_hi, int dst_hi, int size, outputStream* st ) {
 846   if (cbuf) {
 847     MacroAssembler _masm(cbuf);
 848     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 849       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 850                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 851     } else {
 852       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 853                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 854     }
 855 #ifndef PRODUCT
 856   } else if (!do_size) {
 857     if (size != 0) st->print("\n\t");
 858     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 859       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 860         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 861       } else {
 862         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 863       }
 864     } else {
 865       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 866         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 867       } else {
 868         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 869       }
 870     }
 871 #endif
 872   }
 873   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 874   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
 875   int sz = (UseAVX > 2) ? 6 : 4;
 876   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 877       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 878   return size + sz;
 879 }
 880 
 881 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 882                             int src_hi, int dst_hi, int size, outputStream* st ) {
 883   // 32-bit
 884   if (cbuf) {
 885     MacroAssembler _masm(cbuf);
 886     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 887              as_Register(Matcher::_regEncode[src_lo]));
 888 #ifndef PRODUCT
 889   } else if (!do_size) {
 890     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 891 #endif
 892   }
 893   return (UseAVX> 2) ? 6 : 4;
 894 }
 895 
 896 
 897 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 898                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 899   // 32-bit
 900   if (cbuf) {
 901     MacroAssembler _masm(cbuf);
 902     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 903              as_XMMRegister(Matcher::_regEncode[src_lo]));
 904 #ifndef PRODUCT
 905   } else if (!do_size) {
 906     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 907 #endif
 908   }
 909   return (UseAVX> 2) ? 6 : 4;
 910 }
 911 
 912 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 913   if( cbuf ) {
 914     emit_opcode(*cbuf, 0x8B );
 915     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 916 #ifndef PRODUCT
 917   } else if( !do_size ) {
 918     if( size != 0 ) st->print("\n\t");
 919     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 920 #endif
 921   }
 922   return size+2;
 923 }
 924 
 925 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 926                                  int offset, int size, outputStream* st ) {
 927   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 928     if( cbuf ) {
 929       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 930       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 931 #ifndef PRODUCT
 932     } else if( !do_size ) {
 933       if( size != 0 ) st->print("\n\t");
 934       st->print("FLD    %s",Matcher::regName[src_lo]);
 935 #endif
 936     }
 937     size += 2;
 938   }
 939 
 940   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 941   const char *op_str;
 942   int op;
 943   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 944     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 945     op = 0xDD;
 946   } else {                   // 32-bit store
 947     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 948     op = 0xD9;
 949     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 950   }
 951 
 952   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 953 }
 954 
 955 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 956 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 957                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 958 
 959 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 960                             int stack_offset, int reg, uint ireg, outputStream* st);
 961 
 962 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 963                                      int dst_offset, uint ireg, outputStream* st) {
 964   int calc_size = 0;
 965   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 966   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 967   switch (ireg) {
 968   case Op_VecS:
 969     calc_size = 3+src_offset_size + 3+dst_offset_size;
 970     break;
 971   case Op_VecD: {
 972     calc_size = 3+src_offset_size + 3+dst_offset_size;
 973     int tmp_src_offset = src_offset + 4;
 974     int tmp_dst_offset = dst_offset + 4;
 975     src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4);
 976     dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4);
 977     calc_size += 3+src_offset_size + 3+dst_offset_size;
 978     break;
 979   }   
 980   case Op_VecX:
 981   case Op_VecY:
 982   case Op_VecZ:
 983     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 984     break;
 985   default:
 986     ShouldNotReachHere();
 987   }
 988   if (cbuf) {
 989     MacroAssembler _masm(cbuf);
 990     int offset = __ offset();
 991     switch (ireg) {
 992     case Op_VecS:
 993       __ pushl(Address(rsp, src_offset));
 994       __ popl (Address(rsp, dst_offset));
 995       break;
 996     case Op_VecD:
 997       __ pushl(Address(rsp, src_offset));
 998       __ popl (Address(rsp, dst_offset));
 999       __ pushl(Address(rsp, src_offset+4));
1000       __ popl (Address(rsp, dst_offset+4));
1001       break;
1002     case Op_VecX:
1003       __ movdqu(Address(rsp, -16), xmm0);
1004       __ movdqu(xmm0, Address(rsp, src_offset));
1005       __ movdqu(Address(rsp, dst_offset), xmm0);
1006       __ movdqu(xmm0, Address(rsp, -16));
1007       break;
1008     case Op_VecY:
1009       __ vmovdqu(Address(rsp, -32), xmm0);
1010       __ vmovdqu(xmm0, Address(rsp, src_offset));
1011       __ vmovdqu(Address(rsp, dst_offset), xmm0);
1012       __ vmovdqu(xmm0, Address(rsp, -32));
1013       break;
1014     case Op_VecZ:
1015       __ evmovdqul(Address(rsp, -64), xmm0, 2);
1016       __ evmovdqul(xmm0, Address(rsp, src_offset), 2);
1017       __ evmovdqul(Address(rsp, dst_offset), xmm0, 2);
1018       __ evmovdqul(xmm0, Address(rsp, -64), 2);
1019       break;
1020     default:
1021       ShouldNotReachHere();
1022     }
1023     int size = __ offset() - offset;
1024     assert(size == calc_size, "incorrect size calculation");
1025     return size;
1026 #ifndef PRODUCT
1027   } else if (!do_size) {
1028     switch (ireg) {
1029     case Op_VecS:
1030       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1031                 "popl    [rsp + #%d]",
1032                 src_offset, dst_offset);
1033       break;
1034     case Op_VecD:
1035       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1036                 "popq    [rsp + #%d]\n\t"
1037                 "pushl   [rsp + #%d]\n\t"
1038                 "popq    [rsp + #%d]",
1039                 src_offset, dst_offset, src_offset+4, dst_offset+4);
1040       break;
1041      case Op_VecX:
1042       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1043                 "movdqu  xmm0, [rsp + #%d]\n\t"
1044                 "movdqu  [rsp + #%d], xmm0\n\t"
1045                 "movdqu  xmm0, [rsp - #16]",
1046                 src_offset, dst_offset);
1047       break;
1048     case Op_VecY:
1049       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1050                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1051                 "vmovdqu [rsp + #%d], xmm0\n\t"
1052                 "vmovdqu xmm0, [rsp - #32]",
1053                 src_offset, dst_offset);
1054       break;
1055     case Op_VecZ:
1056       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1057                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1058                 "vmovdqu [rsp + #%d], xmm0\n\t"
1059                 "vmovdqu xmm0, [rsp - #64]",
1060                 src_offset, dst_offset);
1061       break;
1062     default:
1063       ShouldNotReachHere();
1064     }
1065 #endif
1066   }
1067   return calc_size;
1068 }
1069 
1070 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1071   // Get registers to move
1072   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1073   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1074   OptoReg::Name dst_second = ra_->get_reg_second(this );
1075   OptoReg::Name dst_first = ra_->get_reg_first(this );
1076 
1077   enum RC src_second_rc = rc_class(src_second);
1078   enum RC src_first_rc = rc_class(src_first);
1079   enum RC dst_second_rc = rc_class(dst_second);
1080   enum RC dst_first_rc = rc_class(dst_first);
1081 
1082   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1083 
1084   // Generate spill code!
1085   int size = 0;
1086 
1087   if( src_first == dst_first && src_second == dst_second )
1088     return size;            // Self copy, no move
1089 
1090   if (bottom_type()->isa_vect() != NULL) {
1091     uint ireg = ideal_reg();
1092     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1093     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1094     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1095     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1096       // mem -> mem
1097       int src_offset = ra_->reg2offset(src_first);
1098       int dst_offset = ra_->reg2offset(dst_first);
1099       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1100     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1101       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1102     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1103       int stack_offset = ra_->reg2offset(dst_first);
1104       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1105     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1106       int stack_offset = ra_->reg2offset(src_first);
1107       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1108     } else {
1109       ShouldNotReachHere();
1110     }
1111   }
1112 
1113   // --------------------------------------
1114   // Check for mem-mem move.  push/pop to move.
1115   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1116     if( src_second == dst_first ) { // overlapping stack copy ranges
1117       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1118       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1119       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1120       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1121     }
1122     // move low bits
1123     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1124     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1125     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1126       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1127       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1128     }
1129     return size;
1130   }
1131 
1132   // --------------------------------------
1133   // Check for integer reg-reg copy
1134   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1135     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1136 
1137   // Check for integer store
1138   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1139     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1140 
1141   // Check for integer load
1142   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1143     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1144 
1145   // Check for integer reg-xmm reg copy
1146   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1147     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1148             "no 64 bit integer-float reg moves" );
1149     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1150   }
1151   // --------------------------------------
1152   // Check for float reg-reg copy
1153   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1154     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1155             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1156     if( cbuf ) {
1157 
1158       // Note the mucking with the register encode to compensate for the 0/1
1159       // indexing issue mentioned in a comment in the reg_def sections
1160       // for FPR registers many lines above here.
1161 
1162       if( src_first != FPR1L_num ) {
1163         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1164         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1165         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1166         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1167      } else {
1168         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1169         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1170      }
1171 #ifndef PRODUCT
1172     } else if( !do_size ) {
1173       if( size != 0 ) st->print("\n\t");
1174       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1175       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1176 #endif
1177     }
1178     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1179   }
1180 
1181   // Check for float store
1182   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1183     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1184   }
1185 
1186   // Check for float load
1187   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1188     int offset = ra_->reg2offset(src_first);
1189     const char *op_str;
1190     int op;
1191     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1192       op_str = "FLD_D";
1193       op = 0xDD;
1194     } else {                   // 32-bit load
1195       op_str = "FLD_S";
1196       op = 0xD9;
1197       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1198     }
1199     if( cbuf ) {
1200       emit_opcode  (*cbuf, op );
1201       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1202       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1203       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1204 #ifndef PRODUCT
1205     } else if( !do_size ) {
1206       if( size != 0 ) st->print("\n\t");
1207       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1208 #endif
1209     }
1210     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1211     return size + 3+offset_size+2;
1212   }
1213 
1214   // Check for xmm reg-reg copy
1215   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1216     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1217             (src_first+1 == src_second && dst_first+1 == dst_second),
1218             "no non-adjacent float-moves" );
1219     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1220   }
1221 
1222   // Check for xmm reg-integer reg copy
1223   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1224     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1225             "no 64 bit float-integer reg moves" );
1226     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1227   }
1228 
1229   // Check for xmm store
1230   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1231     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1232   }
1233 
1234   // Check for float xmm load
1235   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1236     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1237   }
1238 
1239   // Copy from float reg to xmm reg
1240   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1241     // copy to the top of stack from floating point reg
1242     // and use LEA to preserve flags
1243     if( cbuf ) {
1244       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1245       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1246       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1247       emit_d8(*cbuf,0xF8);
1248 #ifndef PRODUCT
1249     } else if( !do_size ) {
1250       if( size != 0 ) st->print("\n\t");
1251       st->print("LEA    ESP,[ESP-8]");
1252 #endif
1253     }
1254     size += 4;
1255 
1256     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1257 
1258     // Copy from the temp memory to the xmm reg.
1259     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1260 
1261     if( cbuf ) {
1262       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1263       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1264       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1265       emit_d8(*cbuf,0x08);
1266 #ifndef PRODUCT
1267     } else if( !do_size ) {
1268       if( size != 0 ) st->print("\n\t");
1269       st->print("LEA    ESP,[ESP+8]");
1270 #endif
1271     }
1272     size += 4;
1273     return size;
1274   }
1275 
1276   assert( size > 0, "missed a case" );
1277 
1278   // --------------------------------------------------------------------
1279   // Check for second bits still needing moving.
1280   if( src_second == dst_second )
1281     return size;               // Self copy; no move
1282   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1283 
1284   // Check for second word int-int move
1285   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1286     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1287 
1288   // Check for second word integer store
1289   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1290     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1291 
1292   // Check for second word integer load
1293   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1294     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1295 
1296 
1297   Unimplemented();
1298   return 0; // Mute compiler
1299 }
1300 
1301 #ifndef PRODUCT
1302 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1303   implementation( NULL, ra_, false, st );
1304 }
1305 #endif
1306 
1307 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1308   implementation( &cbuf, ra_, false, NULL );
1309 }
1310 
1311 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1312   return implementation( NULL, ra_, true, NULL );
1313 }
1314 
1315 
1316 //=============================================================================
1317 #ifndef PRODUCT
1318 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1319   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1320   int reg = ra_->get_reg_first(this);
1321   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1322 }
1323 #endif
1324 
1325 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1326   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1327   int reg = ra_->get_encode(this);
1328   if( offset >= 128 ) {
1329     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1330     emit_rm(cbuf, 0x2, reg, 0x04);
1331     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1332     emit_d32(cbuf, offset);
1333   }
1334   else {
1335     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1336     emit_rm(cbuf, 0x1, reg, 0x04);
1337     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1338     emit_d8(cbuf, offset);
1339   }
1340 }
1341 
1342 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1343   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1344   if( offset >= 128 ) {
1345     return 7;
1346   }
1347   else {
1348     return 4;
1349   }
1350 }
1351 
1352 //=============================================================================
1353 #ifndef PRODUCT
1354 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1355   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1356   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1357   st->print_cr("\tNOP");
1358   st->print_cr("\tNOP");
1359   if( !OptoBreakpoint )
1360     st->print_cr("\tNOP");
1361 }
1362 #endif
1363 
1364 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1365   MacroAssembler masm(&cbuf);
1366 #ifdef ASSERT
1367   uint insts_size = cbuf.insts_size();
1368 #endif
1369   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1370   masm.jump_cc(Assembler::notEqual,
1371                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1372   /* WARNING these NOPs are critical so that verified entry point is properly
1373      aligned for patching by NativeJump::patch_verified_entry() */
1374   int nops_cnt = 2;
1375   if( !OptoBreakpoint ) // Leave space for int3
1376      nops_cnt += 1;
1377   masm.nop(nops_cnt);
1378 
1379   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1380 }
1381 
1382 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1383   return OptoBreakpoint ? 11 : 12;
1384 }
1385 
1386 
1387 //=============================================================================
1388 
1389 int Matcher::regnum_to_fpu_offset(int regnum) {
1390   return regnum - 32; // The FP registers are in the second chunk
1391 }
1392 
1393 // This is UltraSparc specific, true just means we have fast l2f conversion
1394 const bool Matcher::convL2FSupported(void) {
1395   return true;
1396 }
1397 
1398 // Is this branch offset short enough that a short branch can be used?
1399 //
1400 // NOTE: If the platform does not provide any short branch variants, then
1401 //       this method should return false for offset 0.
1402 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1403   // The passed offset is relative to address of the branch.
1404   // On 86 a branch displacement is calculated relative to address
1405   // of a next instruction.
1406   offset -= br_size;
1407 
1408   // the short version of jmpConUCF2 contains multiple branches,
1409   // making the reach slightly less
1410   if (rule == jmpConUCF2_rule)
1411     return (-126 <= offset && offset <= 125);
1412   return (-128 <= offset && offset <= 127);
1413 }
1414 
1415 const bool Matcher::isSimpleConstant64(jlong value) {
1416   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1417   return false;
1418 }
1419 
1420 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1421 const bool Matcher::init_array_count_is_in_bytes = false;
1422 
1423 // Threshold size for cleararray.
1424 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1425 
1426 // Needs 2 CMOV's for longs.
1427 const int Matcher::long_cmove_cost() { return 1; }
1428 
1429 // No CMOVF/CMOVD with SSE/SSE2
1430 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1431 
1432 // Does the CPU require late expand (see block.cpp for description of late expand)?
1433 const bool Matcher::require_postalloc_expand = false;
1434 
1435 // Should the Matcher clone shifts on addressing modes, expecting them to
1436 // be subsumed into complex addressing expressions or compute them into
1437 // registers?  True for Intel but false for most RISCs
1438 const bool Matcher::clone_shift_expressions = true;
1439 
1440 // Do we need to mask the count passed to shift instructions or does
1441 // the cpu only look at the lower 5/6 bits anyway?
1442 const bool Matcher::need_masked_shift_count = false;
1443 
1444 bool Matcher::narrow_oop_use_complex_address() {
1445   ShouldNotCallThis();
1446   return true;
1447 }
1448 
1449 bool Matcher::narrow_klass_use_complex_address() {
1450   ShouldNotCallThis();
1451   return true;
1452 }
1453 
1454 
1455 // Is it better to copy float constants, or load them directly from memory?
1456 // Intel can load a float constant from a direct address, requiring no
1457 // extra registers.  Most RISCs will have to materialize an address into a
1458 // register first, so they would do better to copy the constant from stack.
1459 const bool Matcher::rematerialize_float_constants = true;
1460 
1461 // If CPU can load and store mis-aligned doubles directly then no fixup is
1462 // needed.  Else we split the double into 2 integer pieces and move it
1463 // piece-by-piece.  Only happens when passing doubles into C code as the
1464 // Java calling convention forces doubles to be aligned.
1465 const bool Matcher::misaligned_doubles_ok = true;
1466 
1467 
1468 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1469   // Get the memory operand from the node
1470   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1471   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1472   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1473   uint opcnt     = 1;                 // First operand
1474   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1475   while( idx >= skipped+num_edges ) {
1476     skipped += num_edges;
1477     opcnt++;                          // Bump operand count
1478     assert( opcnt < numopnds, "Accessing non-existent operand" );
1479     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1480   }
1481 
1482   MachOper *memory = node->_opnds[opcnt];
1483   MachOper *new_memory = NULL;
1484   switch (memory->opcode()) {
1485   case DIRECT:
1486   case INDOFFSET32X:
1487     // No transformation necessary.
1488     return;
1489   case INDIRECT:
1490     new_memory = new indirect_win95_safeOper( );
1491     break;
1492   case INDOFFSET8:
1493     new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1494     break;
1495   case INDOFFSET32:
1496     new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1497     break;
1498   case INDINDEXOFFSET:
1499     new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1500     break;
1501   case INDINDEXSCALE:
1502     new_memory = new indIndexScale_win95_safeOper(memory->scale());
1503     break;
1504   case INDINDEXSCALEOFFSET:
1505     new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1506     break;
1507   case LOAD_LONG_INDIRECT:
1508   case LOAD_LONG_INDOFFSET32:
1509     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1510     return;
1511   default:
1512     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1513     return;
1514   }
1515   node->_opnds[opcnt] = new_memory;
1516 }
1517 
1518 // Advertise here if the CPU requires explicit rounding operations
1519 // to implement the UseStrictFP mode.
1520 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1521 
1522 // Are floats conerted to double when stored to stack during deoptimization?
1523 // On x32 it is stored with convertion only when FPU is used for floats.
1524 bool Matcher::float_in_double() { return (UseSSE == 0); }
1525 
1526 // Do ints take an entire long register or just half?
1527 const bool Matcher::int_in_long = false;
1528 
1529 // Return whether or not this register is ever used as an argument.  This
1530 // function is used on startup to build the trampoline stubs in generateOptoStub.
1531 // Registers not mentioned will be killed by the VM call in the trampoline, and
1532 // arguments in those registers not be available to the callee.
1533 bool Matcher::can_be_java_arg( int reg ) {
1534   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1535   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1536   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1537   return false;
1538 }
1539 
1540 bool Matcher::is_spillable_arg( int reg ) {
1541   return can_be_java_arg(reg);
1542 }
1543 
1544 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1545   // Use hardware integer DIV instruction when
1546   // it is faster than a code which use multiply.
1547   // Only when constant divisor fits into 32 bit
1548   // (min_jint is excluded to get only correct
1549   // positive 32 bit values from negative).
1550   return VM_Version::has_fast_idiv() &&
1551          (divisor == (int)divisor && divisor != min_jint);
1552 }
1553 
1554 // Register for DIVI projection of divmodI
1555 RegMask Matcher::divI_proj_mask() {
1556   return EAX_REG_mask();
1557 }
1558 
1559 // Register for MODI projection of divmodI
1560 RegMask Matcher::modI_proj_mask() {
1561   return EDX_REG_mask();
1562 }
1563 
1564 // Register for DIVL projection of divmodL
1565 RegMask Matcher::divL_proj_mask() {
1566   ShouldNotReachHere();
1567   return RegMask();
1568 }
1569 
1570 // Register for MODL projection of divmodL
1571 RegMask Matcher::modL_proj_mask() {
1572   ShouldNotReachHere();
1573   return RegMask();
1574 }
1575 
1576 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1577   return NO_REG_mask();
1578 }
1579 
1580 // Returns true if the high 32 bits of the value is known to be zero.
1581 bool is_operand_hi32_zero(Node* n) {
1582   int opc = n->Opcode();
1583   if (opc == Op_AndL) {
1584     Node* o2 = n->in(2);
1585     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1586       return true;
1587     }
1588   }
1589   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1590     return true;
1591   }
1592   return false;
1593 }
1594 
1595 %}
1596 
1597 //----------ENCODING BLOCK-----------------------------------------------------
1598 // This block specifies the encoding classes used by the compiler to output
1599 // byte streams.  Encoding classes generate functions which are called by
1600 // Machine Instruction Nodes in order to generate the bit encoding of the
1601 // instruction.  Operands specify their base encoding interface with the
1602 // interface keyword.  There are currently supported four interfaces,
1603 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1604 // operand to generate a function which returns its register number when
1605 // queried.   CONST_INTER causes an operand to generate a function which
1606 // returns the value of the constant when queried.  MEMORY_INTER causes an
1607 // operand to generate four functions which return the Base Register, the
1608 // Index Register, the Scale Value, and the Offset Value of the operand when
1609 // queried.  COND_INTER causes an operand to generate six functions which
1610 // return the encoding code (ie - encoding bits for the instruction)
1611 // associated with each basic boolean condition for a conditional instruction.
1612 // Instructions specify two basic values for encoding.  They use the
1613 // ins_encode keyword to specify their encoding class (which must be one of
1614 // the class names specified in the encoding block), and they use the
1615 // opcode keyword to specify, in order, their primary, secondary, and
1616 // tertiary opcode.  Only the opcode sections which a particular instruction
1617 // needs for encoding need to be specified.
1618 encode %{
1619   // Build emit functions for each basic byte or larger field in the intel
1620   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1621   // code in the enc_class source block.  Emit functions will live in the
1622   // main source block for now.  In future, we can generalize this by
1623   // adding a syntax that specifies the sizes of fields in an order,
1624   // so that the adlc can build the emit functions automagically
1625 
1626   // Emit primary opcode
1627   enc_class OpcP %{
1628     emit_opcode(cbuf, $primary);
1629   %}
1630 
1631   // Emit secondary opcode
1632   enc_class OpcS %{
1633     emit_opcode(cbuf, $secondary);
1634   %}
1635 
1636   // Emit opcode directly
1637   enc_class Opcode(immI d8) %{
1638     emit_opcode(cbuf, $d8$$constant);
1639   %}
1640 
1641   enc_class SizePrefix %{
1642     emit_opcode(cbuf,0x66);
1643   %}
1644 
1645   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1646     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1647   %}
1648 
1649   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1650     emit_opcode(cbuf,$opcode$$constant);
1651     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1652   %}
1653 
1654   enc_class mov_r32_imm0( rRegI dst ) %{
1655     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1656     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1657   %}
1658 
1659   enc_class cdq_enc %{
1660     // Full implementation of Java idiv and irem; checks for
1661     // special case as described in JVM spec., p.243 & p.271.
1662     //
1663     //         normal case                           special case
1664     //
1665     // input : rax,: dividend                         min_int
1666     //         reg: divisor                          -1
1667     //
1668     // output: rax,: quotient  (= rax, idiv reg)       min_int
1669     //         rdx: remainder (= rax, irem reg)       0
1670     //
1671     //  Code sequnce:
1672     //
1673     //  81 F8 00 00 00 80    cmp         rax,80000000h
1674     //  0F 85 0B 00 00 00    jne         normal_case
1675     //  33 D2                xor         rdx,edx
1676     //  83 F9 FF             cmp         rcx,0FFh
1677     //  0F 84 03 00 00 00    je          done
1678     //                  normal_case:
1679     //  99                   cdq
1680     //  F7 F9                idiv        rax,ecx
1681     //                  done:
1682     //
1683     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1684     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1685     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1686     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1687     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1688     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1689     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1690     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1691     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1692     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1693     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1694     // normal_case:
1695     emit_opcode(cbuf,0x99);                                         // cdq
1696     // idiv (note: must be emitted by the user of this rule)
1697     // normal:
1698   %}
1699 
1700   // Dense encoding for older common ops
1701   enc_class Opc_plus(immI opcode, rRegI reg) %{
1702     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1703   %}
1704 
1705 
1706   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1707   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1708     // Check for 8-bit immediate, and set sign extend bit in opcode
1709     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1710       emit_opcode(cbuf, $primary | 0x02);
1711     }
1712     else {                          // If 32-bit immediate
1713       emit_opcode(cbuf, $primary);
1714     }
1715   %}
1716 
1717   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1718     // Emit primary opcode and set sign-extend bit
1719     // Check for 8-bit immediate, and set sign extend bit in opcode
1720     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1721       emit_opcode(cbuf, $primary | 0x02);    }
1722     else {                          // If 32-bit immediate
1723       emit_opcode(cbuf, $primary);
1724     }
1725     // Emit r/m byte with secondary opcode, after primary opcode.
1726     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1727   %}
1728 
1729   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1730     // Check for 8-bit immediate, and set sign extend bit in opcode
1731     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1732       $$$emit8$imm$$constant;
1733     }
1734     else {                          // If 32-bit immediate
1735       // Output immediate
1736       $$$emit32$imm$$constant;
1737     }
1738   %}
1739 
1740   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1741     // Emit primary opcode and set sign-extend bit
1742     // Check for 8-bit immediate, and set sign extend bit in opcode
1743     int con = (int)$imm$$constant; // Throw away top bits
1744     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1745     // Emit r/m byte with secondary opcode, after primary opcode.
1746     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1747     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1748     else                               emit_d32(cbuf,con);
1749   %}
1750 
1751   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1752     // Emit primary opcode and set sign-extend bit
1753     // Check for 8-bit immediate, and set sign extend bit in opcode
1754     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1755     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1756     // Emit r/m byte with tertiary opcode, after primary opcode.
1757     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1758     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1759     else                               emit_d32(cbuf,con);
1760   %}
1761 
1762   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1763     emit_cc(cbuf, $secondary, $dst$$reg );
1764   %}
1765 
1766   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1767     int destlo = $dst$$reg;
1768     int desthi = HIGH_FROM_LOW(destlo);
1769     // bswap lo
1770     emit_opcode(cbuf, 0x0F);
1771     emit_cc(cbuf, 0xC8, destlo);
1772     // bswap hi
1773     emit_opcode(cbuf, 0x0F);
1774     emit_cc(cbuf, 0xC8, desthi);
1775     // xchg lo and hi
1776     emit_opcode(cbuf, 0x87);
1777     emit_rm(cbuf, 0x3, destlo, desthi);
1778   %}
1779 
1780   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1781     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1782   %}
1783 
1784   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1785     $$$emit8$primary;
1786     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1787   %}
1788 
1789   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1790     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1791     emit_d8(cbuf, op >> 8 );
1792     emit_d8(cbuf, op & 255);
1793   %}
1794 
1795   // emulate a CMOV with a conditional branch around a MOV
1796   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1797     // Invert sense of branch from sense of CMOV
1798     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1799     emit_d8( cbuf, $brOffs$$constant );
1800   %}
1801 
1802   enc_class enc_PartialSubtypeCheck( ) %{
1803     Register Redi = as_Register(EDI_enc); // result register
1804     Register Reax = as_Register(EAX_enc); // super class
1805     Register Recx = as_Register(ECX_enc); // killed
1806     Register Resi = as_Register(ESI_enc); // sub class
1807     Label miss;
1808 
1809     MacroAssembler _masm(&cbuf);
1810     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1811                                      NULL, &miss,
1812                                      /*set_cond_codes:*/ true);
1813     if ($primary) {
1814       __ xorptr(Redi, Redi);
1815     }
1816     __ bind(miss);
1817   %}
1818 
1819   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1820     MacroAssembler masm(&cbuf);
1821     int start = masm.offset();
1822     if (UseSSE >= 2) {
1823       if (VerifyFPU) {
1824         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1825       }
1826     } else {
1827       // External c_calling_convention expects the FPU stack to be 'clean'.
1828       // Compiled code leaves it dirty.  Do cleanup now.
1829       masm.empty_FPU_stack();
1830     }
1831     if (sizeof_FFree_Float_Stack_All == -1) {
1832       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1833     } else {
1834       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1835     }
1836   %}
1837 
1838   enc_class Verify_FPU_For_Leaf %{
1839     if( VerifyFPU ) {
1840       MacroAssembler masm(&cbuf);
1841       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1842     }
1843   %}
1844 
1845   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1846     // This is the instruction starting address for relocation info.
1847     cbuf.set_insts_mark();
1848     $$$emit8$primary;
1849     // CALL directly to the runtime
1850     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1851                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1852 
1853     if (UseSSE >= 2) {
1854       MacroAssembler _masm(&cbuf);
1855       BasicType rt = tf()->return_type();
1856 
1857       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1858         // A C runtime call where the return value is unused.  In SSE2+
1859         // mode the result needs to be removed from the FPU stack.  It's
1860         // likely that this function call could be removed by the
1861         // optimizer if the C function is a pure function.
1862         __ ffree(0);
1863       } else if (rt == T_FLOAT) {
1864         __ lea(rsp, Address(rsp, -4));
1865         __ fstp_s(Address(rsp, 0));
1866         __ movflt(xmm0, Address(rsp, 0));
1867         __ lea(rsp, Address(rsp,  4));
1868       } else if (rt == T_DOUBLE) {
1869         __ lea(rsp, Address(rsp, -8));
1870         __ fstp_d(Address(rsp, 0));
1871         __ movdbl(xmm0, Address(rsp, 0));
1872         __ lea(rsp, Address(rsp,  8));
1873       }
1874     }
1875   %}
1876 
1877 
1878   enc_class pre_call_resets %{
1879     // If method sets FPU control word restore it here
1880     debug_only(int off0 = cbuf.insts_size());
1881     if (ra_->C->in_24_bit_fp_mode()) {
1882       MacroAssembler _masm(&cbuf);
1883       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1884     }
1885     if (ra_->C->max_vector_size() > 16) {
1886       // Clear upper bits of YMM registers when current compiled code uses
1887       // wide vectors to avoid AVX <-> SSE transition penalty during call.
1888       MacroAssembler _masm(&cbuf);
1889       __ vzeroupper();
1890     }
1891     debug_only(int off1 = cbuf.insts_size());
1892     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1893   %}
1894 
1895   enc_class post_call_FPU %{
1896     // If method sets FPU control word do it here also
1897     if (Compile::current()->in_24_bit_fp_mode()) {
1898       MacroAssembler masm(&cbuf);
1899       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1900     }
1901   %}
1902 
1903   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1904     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1905     // who we intended to call.
1906     cbuf.set_insts_mark();
1907     $$$emit8$primary;
1908 
1909     if (!_method) {
1910       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1911                      runtime_call_Relocation::spec(),
1912                      RELOC_IMM32);
1913     } else {
1914       int method_index = resolved_method_index(cbuf);
1915       RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1916                                                   : static_call_Relocation::spec(method_index);
1917       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1918                      rspec, RELOC_DISP32);
1919       // Emit stubs for static call.
1920       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1921       if (stub == NULL) {
1922         ciEnv::current()->record_failure("CodeCache is full");
1923         return;
1924       }
1925     }
1926   %}
1927 
1928   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1929     MacroAssembler _masm(&cbuf);
1930     __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1931   %}
1932 
1933   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1934     int disp = in_bytes(Method::from_compiled_offset());
1935     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1936 
1937     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1938     cbuf.set_insts_mark();
1939     $$$emit8$primary;
1940     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1941     emit_d8(cbuf, disp);             // Displacement
1942 
1943   %}
1944 
1945 //   Following encoding is no longer used, but may be restored if calling
1946 //   convention changes significantly.
1947 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1948 //
1949 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1950 //     // int ic_reg     = Matcher::inline_cache_reg();
1951 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1952 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1953 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1954 //
1955 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1956 //     // // so we load it immediately before the call
1957 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1958 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1959 //
1960 //     // xor rbp,ebp
1961 //     emit_opcode(cbuf, 0x33);
1962 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1963 //
1964 //     // CALL to interpreter.
1965 //     cbuf.set_insts_mark();
1966 //     $$$emit8$primary;
1967 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1968 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1969 //   %}
1970 
1971   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1972     $$$emit8$primary;
1973     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1974     $$$emit8$shift$$constant;
1975   %}
1976 
1977   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1978     // Load immediate does not have a zero or sign extended version
1979     // for 8-bit immediates
1980     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1981     $$$emit32$src$$constant;
1982   %}
1983 
1984   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1985     // Load immediate does not have a zero or sign extended version
1986     // for 8-bit immediates
1987     emit_opcode(cbuf, $primary + $dst$$reg);
1988     $$$emit32$src$$constant;
1989   %}
1990 
1991   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1992     // Load immediate does not have a zero or sign extended version
1993     // for 8-bit immediates
1994     int dst_enc = $dst$$reg;
1995     int src_con = $src$$constant & 0x0FFFFFFFFL;
1996     if (src_con == 0) {
1997       // xor dst, dst
1998       emit_opcode(cbuf, 0x33);
1999       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2000     } else {
2001       emit_opcode(cbuf, $primary + dst_enc);
2002       emit_d32(cbuf, src_con);
2003     }
2004   %}
2005 
2006   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2007     // Load immediate does not have a zero or sign extended version
2008     // for 8-bit immediates
2009     int dst_enc = $dst$$reg + 2;
2010     int src_con = ((julong)($src$$constant)) >> 32;
2011     if (src_con == 0) {
2012       // xor dst, dst
2013       emit_opcode(cbuf, 0x33);
2014       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2015     } else {
2016       emit_opcode(cbuf, $primary + dst_enc);
2017       emit_d32(cbuf, src_con);
2018     }
2019   %}
2020 
2021 
2022   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2023   enc_class enc_Copy( rRegI dst, rRegI src ) %{
2024     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2025   %}
2026 
2027   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2028     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2029   %}
2030 
2031   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2032     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2033   %}
2034 
2035   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2036     $$$emit8$primary;
2037     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2038   %}
2039 
2040   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2041     $$$emit8$secondary;
2042     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2043   %}
2044 
2045   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2046     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2047   %}
2048 
2049   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2050     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2051   %}
2052 
2053   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2054     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2055   %}
2056 
2057   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2058     // Output immediate
2059     $$$emit32$src$$constant;
2060   %}
2061 
2062   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2063     // Output Float immediate bits
2064     jfloat jf = $src$$constant;
2065     int    jf_as_bits = jint_cast( jf );
2066     emit_d32(cbuf, jf_as_bits);
2067   %}
2068 
2069   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2070     // Output Float immediate bits
2071     jfloat jf = $src$$constant;
2072     int    jf_as_bits = jint_cast( jf );
2073     emit_d32(cbuf, jf_as_bits);
2074   %}
2075 
2076   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2077     // Output immediate
2078     $$$emit16$src$$constant;
2079   %}
2080 
2081   enc_class Con_d32(immI src) %{
2082     emit_d32(cbuf,$src$$constant);
2083   %}
2084 
2085   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2086     // Output immediate memory reference
2087     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2088     emit_d32(cbuf, 0x00);
2089   %}
2090 
2091   enc_class lock_prefix( ) %{
2092     if( os::is_MP() )
2093       emit_opcode(cbuf,0xF0);         // [Lock]
2094   %}
2095 
2096   // Cmp-xchg long value.
2097   // Note: we need to swap rbx, and rcx before and after the
2098   //       cmpxchg8 instruction because the instruction uses
2099   //       rcx as the high order word of the new value to store but
2100   //       our register encoding uses rbx,.
2101   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2102 
2103     // XCHG  rbx,ecx
2104     emit_opcode(cbuf,0x87);
2105     emit_opcode(cbuf,0xD9);
2106     // [Lock]
2107     if( os::is_MP() )
2108       emit_opcode(cbuf,0xF0);
2109     // CMPXCHG8 [Eptr]
2110     emit_opcode(cbuf,0x0F);
2111     emit_opcode(cbuf,0xC7);
2112     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2113     // XCHG  rbx,ecx
2114     emit_opcode(cbuf,0x87);
2115     emit_opcode(cbuf,0xD9);
2116   %}
2117 
2118   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2119     // [Lock]
2120     if( os::is_MP() )
2121       emit_opcode(cbuf,0xF0);
2122 
2123     // CMPXCHG [Eptr]
2124     emit_opcode(cbuf,0x0F);
2125     emit_opcode(cbuf,0xB1);
2126     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2127   %}
2128 
2129   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2130     int res_encoding = $res$$reg;
2131 
2132     // MOV  res,0
2133     emit_opcode( cbuf, 0xB8 + res_encoding);
2134     emit_d32( cbuf, 0 );
2135     // JNE,s  fail
2136     emit_opcode(cbuf,0x75);
2137     emit_d8(cbuf, 5 );
2138     // MOV  res,1
2139     emit_opcode( cbuf, 0xB8 + res_encoding);
2140     emit_d32( cbuf, 1 );
2141     // fail:
2142   %}
2143 
2144   enc_class set_instruction_start( ) %{
2145     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2146   %}
2147 
2148   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2149     int reg_encoding = $ereg$$reg;
2150     int base  = $mem$$base;
2151     int index = $mem$$index;
2152     int scale = $mem$$scale;
2153     int displace = $mem$$disp;
2154     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2155     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2156   %}
2157 
2158   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2159     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2160     int base  = $mem$$base;
2161     int index = $mem$$index;
2162     int scale = $mem$$scale;
2163     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2164     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2165     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2166   %}
2167 
2168   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2169     int r1, r2;
2170     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2171     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2172     emit_opcode(cbuf,0x0F);
2173     emit_opcode(cbuf,$tertiary);
2174     emit_rm(cbuf, 0x3, r1, r2);
2175     emit_d8(cbuf,$cnt$$constant);
2176     emit_d8(cbuf,$primary);
2177     emit_rm(cbuf, 0x3, $secondary, r1);
2178     emit_d8(cbuf,$cnt$$constant);
2179   %}
2180 
2181   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2182     emit_opcode( cbuf, 0x8B ); // Move
2183     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2184     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2185       emit_d8(cbuf,$primary);
2186       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2187       emit_d8(cbuf,$cnt$$constant-32);
2188     }
2189     emit_d8(cbuf,$primary);
2190     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2191     emit_d8(cbuf,31);
2192   %}
2193 
2194   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2195     int r1, r2;
2196     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2197     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2198 
2199     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2200     emit_rm(cbuf, 0x3, r1, r2);
2201     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2202       emit_opcode(cbuf,$primary);
2203       emit_rm(cbuf, 0x3, $secondary, r1);
2204       emit_d8(cbuf,$cnt$$constant-32);
2205     }
2206     emit_opcode(cbuf,0x33);  // XOR r2,r2
2207     emit_rm(cbuf, 0x3, r2, r2);
2208   %}
2209 
2210   // Clone of RegMem but accepts an extra parameter to access each
2211   // half of a double in memory; it never needs relocation info.
2212   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2213     emit_opcode(cbuf,$opcode$$constant);
2214     int reg_encoding = $rm_reg$$reg;
2215     int base     = $mem$$base;
2216     int index    = $mem$$index;
2217     int scale    = $mem$$scale;
2218     int displace = $mem$$disp + $disp_for_half$$constant;
2219     relocInfo::relocType disp_reloc = relocInfo::none;
2220     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2221   %}
2222 
2223   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2224   //
2225   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2226   // and it never needs relocation information.
2227   // Frequently used to move data between FPU's Stack Top and memory.
2228   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2229     int rm_byte_opcode = $rm_opcode$$constant;
2230     int base     = $mem$$base;
2231     int index    = $mem$$index;
2232     int scale    = $mem$$scale;
2233     int displace = $mem$$disp;
2234     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2235     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2236   %}
2237 
2238   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2239     int rm_byte_opcode = $rm_opcode$$constant;
2240     int base     = $mem$$base;
2241     int index    = $mem$$index;
2242     int scale    = $mem$$scale;
2243     int displace = $mem$$disp;
2244     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2245     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2246   %}
2247 
2248   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2249     int reg_encoding = $dst$$reg;
2250     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2251     int index        = 0x04;            // 0x04 indicates no index
2252     int scale        = 0x00;            // 0x00 indicates no scale
2253     int displace     = $src1$$constant; // 0x00 indicates no displacement
2254     relocInfo::relocType disp_reloc = relocInfo::none;
2255     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2256   %}
2257 
2258   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2259     // Compare dst,src
2260     emit_opcode(cbuf,0x3B);
2261     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2262     // jmp dst < src around move
2263     emit_opcode(cbuf,0x7C);
2264     emit_d8(cbuf,2);
2265     // move dst,src
2266     emit_opcode(cbuf,0x8B);
2267     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2268   %}
2269 
2270   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2271     // Compare dst,src
2272     emit_opcode(cbuf,0x3B);
2273     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2274     // jmp dst > src around move
2275     emit_opcode(cbuf,0x7F);
2276     emit_d8(cbuf,2);
2277     // move dst,src
2278     emit_opcode(cbuf,0x8B);
2279     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2280   %}
2281 
2282   enc_class enc_FPR_store(memory mem, regDPR src) %{
2283     // If src is FPR1, we can just FST to store it.
2284     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2285     int reg_encoding = 0x2; // Just store
2286     int base  = $mem$$base;
2287     int index = $mem$$index;
2288     int scale = $mem$$scale;
2289     int displace = $mem$$disp;
2290     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2291     if( $src$$reg != FPR1L_enc ) {
2292       reg_encoding = 0x3;  // Store & pop
2293       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2294       emit_d8( cbuf, 0xC0-1+$src$$reg );
2295     }
2296     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2297     emit_opcode(cbuf,$primary);
2298     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2299   %}
2300 
2301   enc_class neg_reg(rRegI dst) %{
2302     // NEG $dst
2303     emit_opcode(cbuf,0xF7);
2304     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2305   %}
2306 
2307   enc_class setLT_reg(eCXRegI dst) %{
2308     // SETLT $dst
2309     emit_opcode(cbuf,0x0F);
2310     emit_opcode(cbuf,0x9C);
2311     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2312   %}
2313 
2314   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2315     int tmpReg = $tmp$$reg;
2316 
2317     // SUB $p,$q
2318     emit_opcode(cbuf,0x2B);
2319     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2320     // SBB $tmp,$tmp
2321     emit_opcode(cbuf,0x1B);
2322     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2323     // AND $tmp,$y
2324     emit_opcode(cbuf,0x23);
2325     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2326     // ADD $p,$tmp
2327     emit_opcode(cbuf,0x03);
2328     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2329   %}
2330 
2331   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2332     // TEST shift,32
2333     emit_opcode(cbuf,0xF7);
2334     emit_rm(cbuf, 0x3, 0, ECX_enc);
2335     emit_d32(cbuf,0x20);
2336     // JEQ,s small
2337     emit_opcode(cbuf, 0x74);
2338     emit_d8(cbuf, 0x04);
2339     // MOV    $dst.hi,$dst.lo
2340     emit_opcode( cbuf, 0x8B );
2341     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2342     // CLR    $dst.lo
2343     emit_opcode(cbuf, 0x33);
2344     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2345 // small:
2346     // SHLD   $dst.hi,$dst.lo,$shift
2347     emit_opcode(cbuf,0x0F);
2348     emit_opcode(cbuf,0xA5);
2349     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2350     // SHL    $dst.lo,$shift"
2351     emit_opcode(cbuf,0xD3);
2352     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2353   %}
2354 
2355   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2356     // TEST shift,32
2357     emit_opcode(cbuf,0xF7);
2358     emit_rm(cbuf, 0x3, 0, ECX_enc);
2359     emit_d32(cbuf,0x20);
2360     // JEQ,s small
2361     emit_opcode(cbuf, 0x74);
2362     emit_d8(cbuf, 0x04);
2363     // MOV    $dst.lo,$dst.hi
2364     emit_opcode( cbuf, 0x8B );
2365     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2366     // CLR    $dst.hi
2367     emit_opcode(cbuf, 0x33);
2368     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2369 // small:
2370     // SHRD   $dst.lo,$dst.hi,$shift
2371     emit_opcode(cbuf,0x0F);
2372     emit_opcode(cbuf,0xAD);
2373     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2374     // SHR    $dst.hi,$shift"
2375     emit_opcode(cbuf,0xD3);
2376     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2377   %}
2378 
2379   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2380     // TEST shift,32
2381     emit_opcode(cbuf,0xF7);
2382     emit_rm(cbuf, 0x3, 0, ECX_enc);
2383     emit_d32(cbuf,0x20);
2384     // JEQ,s small
2385     emit_opcode(cbuf, 0x74);
2386     emit_d8(cbuf, 0x05);
2387     // MOV    $dst.lo,$dst.hi
2388     emit_opcode( cbuf, 0x8B );
2389     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2390     // SAR    $dst.hi,31
2391     emit_opcode(cbuf, 0xC1);
2392     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2393     emit_d8(cbuf, 0x1F );
2394 // small:
2395     // SHRD   $dst.lo,$dst.hi,$shift
2396     emit_opcode(cbuf,0x0F);
2397     emit_opcode(cbuf,0xAD);
2398     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2399     // SAR    $dst.hi,$shift"
2400     emit_opcode(cbuf,0xD3);
2401     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2402   %}
2403 
2404 
2405   // ----------------- Encodings for floating point unit -----------------
2406   // May leave result in FPU-TOS or FPU reg depending on opcodes
2407   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2408     $$$emit8$primary;
2409     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2410   %}
2411 
2412   // Pop argument in FPR0 with FSTP ST(0)
2413   enc_class PopFPU() %{
2414     emit_opcode( cbuf, 0xDD );
2415     emit_d8( cbuf, 0xD8 );
2416   %}
2417 
2418   // !!!!! equivalent to Pop_Reg_F
2419   enc_class Pop_Reg_DPR( regDPR dst ) %{
2420     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2421     emit_d8( cbuf, 0xD8+$dst$$reg );
2422   %}
2423 
2424   enc_class Push_Reg_DPR( regDPR dst ) %{
2425     emit_opcode( cbuf, 0xD9 );
2426     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2427   %}
2428 
2429   enc_class strictfp_bias1( regDPR dst ) %{
2430     emit_opcode( cbuf, 0xDB );           // FLD m80real
2431     emit_opcode( cbuf, 0x2D );
2432     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2433     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2434     emit_opcode( cbuf, 0xC8+$dst$$reg );
2435   %}
2436 
2437   enc_class strictfp_bias2( regDPR dst ) %{
2438     emit_opcode( cbuf, 0xDB );           // FLD m80real
2439     emit_opcode( cbuf, 0x2D );
2440     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2441     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2442     emit_opcode( cbuf, 0xC8+$dst$$reg );
2443   %}
2444 
2445   // Special case for moving an integer register to a stack slot.
2446   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2447     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2448   %}
2449 
2450   // Special case for moving a register to a stack slot.
2451   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2452     // Opcode already emitted
2453     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2454     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2455     emit_d32(cbuf, $dst$$disp);   // Displacement
2456   %}
2457 
2458   // Push the integer in stackSlot 'src' onto FP-stack
2459   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2460     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2461   %}
2462 
2463   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2464   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2465     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2466   %}
2467 
2468   // Same as Pop_Mem_F except for opcode
2469   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2470   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2471     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2472   %}
2473 
2474   enc_class Pop_Reg_FPR( regFPR dst ) %{
2475     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2476     emit_d8( cbuf, 0xD8+$dst$$reg );
2477   %}
2478 
2479   enc_class Push_Reg_FPR( regFPR dst ) %{
2480     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2481     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2482   %}
2483 
2484   // Push FPU's float to a stack-slot, and pop FPU-stack
2485   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2486     int pop = 0x02;
2487     if ($src$$reg != FPR1L_enc) {
2488       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2489       emit_d8( cbuf, 0xC0-1+$src$$reg );
2490       pop = 0x03;
2491     }
2492     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2493   %}
2494 
2495   // Push FPU's double to a stack-slot, and pop FPU-stack
2496   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2497     int pop = 0x02;
2498     if ($src$$reg != FPR1L_enc) {
2499       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2500       emit_d8( cbuf, 0xC0-1+$src$$reg );
2501       pop = 0x03;
2502     }
2503     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2504   %}
2505 
2506   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2507   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2508     int pop = 0xD0 - 1; // -1 since we skip FLD
2509     if ($src$$reg != FPR1L_enc) {
2510       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2511       emit_d8( cbuf, 0xC0-1+$src$$reg );
2512       pop = 0xD8;
2513     }
2514     emit_opcode( cbuf, 0xDD );
2515     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2516   %}
2517 
2518 
2519   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2520     // load dst in FPR0
2521     emit_opcode( cbuf, 0xD9 );
2522     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2523     if ($src$$reg != FPR1L_enc) {
2524       // fincstp
2525       emit_opcode (cbuf, 0xD9);
2526       emit_opcode (cbuf, 0xF7);
2527       // swap src with FPR1:
2528       // FXCH FPR1 with src
2529       emit_opcode(cbuf, 0xD9);
2530       emit_d8(cbuf, 0xC8-1+$src$$reg );
2531       // fdecstp
2532       emit_opcode (cbuf, 0xD9);
2533       emit_opcode (cbuf, 0xF6);
2534     }
2535   %}
2536 
2537   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2538     MacroAssembler _masm(&cbuf);
2539     __ subptr(rsp, 8);
2540     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2541     __ fld_d(Address(rsp, 0));
2542     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2543     __ fld_d(Address(rsp, 0));
2544   %}
2545 
2546   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2547     MacroAssembler _masm(&cbuf);
2548     __ subptr(rsp, 4);
2549     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2550     __ fld_s(Address(rsp, 0));
2551     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2552     __ fld_s(Address(rsp, 0));
2553   %}
2554 
2555   enc_class Push_ResultD(regD dst) %{
2556     MacroAssembler _masm(&cbuf);
2557     __ fstp_d(Address(rsp, 0));
2558     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2559     __ addptr(rsp, 8);
2560   %}
2561 
2562   enc_class Push_ResultF(regF dst, immI d8) %{
2563     MacroAssembler _masm(&cbuf);
2564     __ fstp_s(Address(rsp, 0));
2565     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2566     __ addptr(rsp, $d8$$constant);
2567   %}
2568 
2569   enc_class Push_SrcD(regD src) %{
2570     MacroAssembler _masm(&cbuf);
2571     __ subptr(rsp, 8);
2572     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2573     __ fld_d(Address(rsp, 0));
2574   %}
2575 
2576   enc_class push_stack_temp_qword() %{
2577     MacroAssembler _masm(&cbuf);
2578     __ subptr(rsp, 8);
2579   %}
2580 
2581   enc_class pop_stack_temp_qword() %{
2582     MacroAssembler _masm(&cbuf);
2583     __ addptr(rsp, 8);
2584   %}
2585 
2586   enc_class push_xmm_to_fpr1(regD src) %{
2587     MacroAssembler _masm(&cbuf);
2588     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2589     __ fld_d(Address(rsp, 0));
2590   %}
2591 
2592   enc_class Push_Result_Mod_DPR( regDPR src) %{
2593     if ($src$$reg != FPR1L_enc) {
2594       // fincstp
2595       emit_opcode (cbuf, 0xD9);
2596       emit_opcode (cbuf, 0xF7);
2597       // FXCH FPR1 with src
2598       emit_opcode(cbuf, 0xD9);
2599       emit_d8(cbuf, 0xC8-1+$src$$reg );
2600       // fdecstp
2601       emit_opcode (cbuf, 0xD9);
2602       emit_opcode (cbuf, 0xF6);
2603     }
2604     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2605     // // FSTP   FPR$dst$$reg
2606     // emit_opcode( cbuf, 0xDD );
2607     // emit_d8( cbuf, 0xD8+$dst$$reg );
2608   %}
2609 
2610   enc_class fnstsw_sahf_skip_parity() %{
2611     // fnstsw ax
2612     emit_opcode( cbuf, 0xDF );
2613     emit_opcode( cbuf, 0xE0 );
2614     // sahf
2615     emit_opcode( cbuf, 0x9E );
2616     // jnp  ::skip
2617     emit_opcode( cbuf, 0x7B );
2618     emit_opcode( cbuf, 0x05 );
2619   %}
2620 
2621   enc_class emitModDPR() %{
2622     // fprem must be iterative
2623     // :: loop
2624     // fprem
2625     emit_opcode( cbuf, 0xD9 );
2626     emit_opcode( cbuf, 0xF8 );
2627     // wait
2628     emit_opcode( cbuf, 0x9b );
2629     // fnstsw ax
2630     emit_opcode( cbuf, 0xDF );
2631     emit_opcode( cbuf, 0xE0 );
2632     // sahf
2633     emit_opcode( cbuf, 0x9E );
2634     // jp  ::loop
2635     emit_opcode( cbuf, 0x0F );
2636     emit_opcode( cbuf, 0x8A );
2637     emit_opcode( cbuf, 0xF4 );
2638     emit_opcode( cbuf, 0xFF );
2639     emit_opcode( cbuf, 0xFF );
2640     emit_opcode( cbuf, 0xFF );
2641   %}
2642 
2643   enc_class fpu_flags() %{
2644     // fnstsw_ax
2645     emit_opcode( cbuf, 0xDF);
2646     emit_opcode( cbuf, 0xE0);
2647     // test ax,0x0400
2648     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2649     emit_opcode( cbuf, 0xA9 );
2650     emit_d16   ( cbuf, 0x0400 );
2651     // // // This sequence works, but stalls for 12-16 cycles on PPro
2652     // // test rax,0x0400
2653     // emit_opcode( cbuf, 0xA9 );
2654     // emit_d32   ( cbuf, 0x00000400 );
2655     //
2656     // jz exit (no unordered comparison)
2657     emit_opcode( cbuf, 0x74 );
2658     emit_d8    ( cbuf, 0x02 );
2659     // mov ah,1 - treat as LT case (set carry flag)
2660     emit_opcode( cbuf, 0xB4 );
2661     emit_d8    ( cbuf, 0x01 );
2662     // sahf
2663     emit_opcode( cbuf, 0x9E);
2664   %}
2665 
2666   enc_class cmpF_P6_fixup() %{
2667     // Fixup the integer flags in case comparison involved a NaN
2668     //
2669     // JNP exit (no unordered comparison, P-flag is set by NaN)
2670     emit_opcode( cbuf, 0x7B );
2671     emit_d8    ( cbuf, 0x03 );
2672     // MOV AH,1 - treat as LT case (set carry flag)
2673     emit_opcode( cbuf, 0xB4 );
2674     emit_d8    ( cbuf, 0x01 );
2675     // SAHF
2676     emit_opcode( cbuf, 0x9E);
2677     // NOP     // target for branch to avoid branch to branch
2678     emit_opcode( cbuf, 0x90);
2679   %}
2680 
2681 //     fnstsw_ax();
2682 //     sahf();
2683 //     movl(dst, nan_result);
2684 //     jcc(Assembler::parity, exit);
2685 //     movl(dst, less_result);
2686 //     jcc(Assembler::below, exit);
2687 //     movl(dst, equal_result);
2688 //     jcc(Assembler::equal, exit);
2689 //     movl(dst, greater_result);
2690 
2691 // less_result     =  1;
2692 // greater_result  = -1;
2693 // equal_result    = 0;
2694 // nan_result      = -1;
2695 
2696   enc_class CmpF_Result(rRegI dst) %{
2697     // fnstsw_ax();
2698     emit_opcode( cbuf, 0xDF);
2699     emit_opcode( cbuf, 0xE0);
2700     // sahf
2701     emit_opcode( cbuf, 0x9E);
2702     // movl(dst, nan_result);
2703     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2704     emit_d32( cbuf, -1 );
2705     // jcc(Assembler::parity, exit);
2706     emit_opcode( cbuf, 0x7A );
2707     emit_d8    ( cbuf, 0x13 );
2708     // movl(dst, less_result);
2709     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2710     emit_d32( cbuf, -1 );
2711     // jcc(Assembler::below, exit);
2712     emit_opcode( cbuf, 0x72 );
2713     emit_d8    ( cbuf, 0x0C );
2714     // movl(dst, equal_result);
2715     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2716     emit_d32( cbuf, 0 );
2717     // jcc(Assembler::equal, exit);
2718     emit_opcode( cbuf, 0x74 );
2719     emit_d8    ( cbuf, 0x05 );
2720     // movl(dst, greater_result);
2721     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2722     emit_d32( cbuf, 1 );
2723   %}
2724 
2725 
2726   // Compare the longs and set flags
2727   // BROKEN!  Do Not use as-is
2728   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2729     // CMP    $src1.hi,$src2.hi
2730     emit_opcode( cbuf, 0x3B );
2731     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2732     // JNE,s  done
2733     emit_opcode(cbuf,0x75);
2734     emit_d8(cbuf, 2 );
2735     // CMP    $src1.lo,$src2.lo
2736     emit_opcode( cbuf, 0x3B );
2737     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2738 // done:
2739   %}
2740 
2741   enc_class convert_int_long( regL dst, rRegI src ) %{
2742     // mov $dst.lo,$src
2743     int dst_encoding = $dst$$reg;
2744     int src_encoding = $src$$reg;
2745     encode_Copy( cbuf, dst_encoding  , src_encoding );
2746     // mov $dst.hi,$src
2747     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2748     // sar $dst.hi,31
2749     emit_opcode( cbuf, 0xC1 );
2750     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2751     emit_d8(cbuf, 0x1F );
2752   %}
2753 
2754   enc_class convert_long_double( eRegL src ) %{
2755     // push $src.hi
2756     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2757     // push $src.lo
2758     emit_opcode(cbuf, 0x50+$src$$reg  );
2759     // fild 64-bits at [SP]
2760     emit_opcode(cbuf,0xdf);
2761     emit_d8(cbuf, 0x6C);
2762     emit_d8(cbuf, 0x24);
2763     emit_d8(cbuf, 0x00);
2764     // pop stack
2765     emit_opcode(cbuf, 0x83); // add  SP, #8
2766     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2767     emit_d8(cbuf, 0x8);
2768   %}
2769 
2770   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2771     // IMUL   EDX:EAX,$src1
2772     emit_opcode( cbuf, 0xF7 );
2773     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2774     // SAR    EDX,$cnt-32
2775     int shift_count = ((int)$cnt$$constant) - 32;
2776     if (shift_count > 0) {
2777       emit_opcode(cbuf, 0xC1);
2778       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2779       emit_d8(cbuf, shift_count);
2780     }
2781   %}
2782 
2783   // this version doesn't have add sp, 8
2784   enc_class convert_long_double2( eRegL src ) %{
2785     // push $src.hi
2786     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2787     // push $src.lo
2788     emit_opcode(cbuf, 0x50+$src$$reg  );
2789     // fild 64-bits at [SP]
2790     emit_opcode(cbuf,0xdf);
2791     emit_d8(cbuf, 0x6C);
2792     emit_d8(cbuf, 0x24);
2793     emit_d8(cbuf, 0x00);
2794   %}
2795 
2796   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2797     // Basic idea: long = (long)int * (long)int
2798     // IMUL EDX:EAX, src
2799     emit_opcode( cbuf, 0xF7 );
2800     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2801   %}
2802 
2803   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2804     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2805     // MUL EDX:EAX, src
2806     emit_opcode( cbuf, 0xF7 );
2807     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2808   %}
2809 
2810   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2811     // Basic idea: lo(result) = lo(x_lo * y_lo)
2812     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2813     // MOV    $tmp,$src.lo
2814     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2815     // IMUL   $tmp,EDX
2816     emit_opcode( cbuf, 0x0F );
2817     emit_opcode( cbuf, 0xAF );
2818     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2819     // MOV    EDX,$src.hi
2820     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2821     // IMUL   EDX,EAX
2822     emit_opcode( cbuf, 0x0F );
2823     emit_opcode( cbuf, 0xAF );
2824     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2825     // ADD    $tmp,EDX
2826     emit_opcode( cbuf, 0x03 );
2827     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2828     // MUL   EDX:EAX,$src.lo
2829     emit_opcode( cbuf, 0xF7 );
2830     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2831     // ADD    EDX,ESI
2832     emit_opcode( cbuf, 0x03 );
2833     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2834   %}
2835 
2836   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2837     // Basic idea: lo(result) = lo(src * y_lo)
2838     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2839     // IMUL   $tmp,EDX,$src
2840     emit_opcode( cbuf, 0x6B );
2841     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2842     emit_d8( cbuf, (int)$src$$constant );
2843     // MOV    EDX,$src
2844     emit_opcode(cbuf, 0xB8 + EDX_enc);
2845     emit_d32( cbuf, (int)$src$$constant );
2846     // MUL   EDX:EAX,EDX
2847     emit_opcode( cbuf, 0xF7 );
2848     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2849     // ADD    EDX,ESI
2850     emit_opcode( cbuf, 0x03 );
2851     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2852   %}
2853 
2854   enc_class long_div( eRegL src1, eRegL src2 ) %{
2855     // PUSH src1.hi
2856     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2857     // PUSH src1.lo
2858     emit_opcode(cbuf,               0x50+$src1$$reg  );
2859     // PUSH src2.hi
2860     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2861     // PUSH src2.lo
2862     emit_opcode(cbuf,               0x50+$src2$$reg  );
2863     // CALL directly to the runtime
2864     cbuf.set_insts_mark();
2865     emit_opcode(cbuf,0xE8);       // Call into runtime
2866     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2867     // Restore stack
2868     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2869     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2870     emit_d8(cbuf, 4*4);
2871   %}
2872 
2873   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2874     // PUSH src1.hi
2875     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2876     // PUSH src1.lo
2877     emit_opcode(cbuf,               0x50+$src1$$reg  );
2878     // PUSH src2.hi
2879     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2880     // PUSH src2.lo
2881     emit_opcode(cbuf,               0x50+$src2$$reg  );
2882     // CALL directly to the runtime
2883     cbuf.set_insts_mark();
2884     emit_opcode(cbuf,0xE8);       // Call into runtime
2885     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2886     // Restore stack
2887     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2888     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2889     emit_d8(cbuf, 4*4);
2890   %}
2891 
2892   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2893     // MOV   $tmp,$src.lo
2894     emit_opcode(cbuf, 0x8B);
2895     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2896     // OR    $tmp,$src.hi
2897     emit_opcode(cbuf, 0x0B);
2898     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2899   %}
2900 
2901   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2902     // CMP    $src1.lo,$src2.lo
2903     emit_opcode( cbuf, 0x3B );
2904     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2905     // JNE,s  skip
2906     emit_cc(cbuf, 0x70, 0x5);
2907     emit_d8(cbuf,2);
2908     // CMP    $src1.hi,$src2.hi
2909     emit_opcode( cbuf, 0x3B );
2910     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2911   %}
2912 
2913   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2914     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2915     emit_opcode( cbuf, 0x3B );
2916     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2917     // MOV    $tmp,$src1.hi
2918     emit_opcode( cbuf, 0x8B );
2919     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2920     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2921     emit_opcode( cbuf, 0x1B );
2922     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2923   %}
2924 
2925   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2926     // XOR    $tmp,$tmp
2927     emit_opcode(cbuf,0x33);  // XOR
2928     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2929     // CMP    $tmp,$src.lo
2930     emit_opcode( cbuf, 0x3B );
2931     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2932     // SBB    $tmp,$src.hi
2933     emit_opcode( cbuf, 0x1B );
2934     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2935   %}
2936 
2937  // Sniff, sniff... smells like Gnu Superoptimizer
2938   enc_class neg_long( eRegL dst ) %{
2939     emit_opcode(cbuf,0xF7);    // NEG hi
2940     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2941     emit_opcode(cbuf,0xF7);    // NEG lo
2942     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2943     emit_opcode(cbuf,0x83);    // SBB hi,0
2944     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2945     emit_d8    (cbuf,0 );
2946   %}
2947 
2948   enc_class enc_pop_rdx() %{
2949     emit_opcode(cbuf,0x5A);
2950   %}
2951 
2952   enc_class enc_rethrow() %{
2953     cbuf.set_insts_mark();
2954     emit_opcode(cbuf, 0xE9);        // jmp    entry
2955     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2956                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2957   %}
2958 
2959 
2960   // Convert a double to an int.  Java semantics require we do complex
2961   // manglelations in the corner cases.  So we set the rounding mode to
2962   // 'zero', store the darned double down as an int, and reset the
2963   // rounding mode to 'nearest'.  The hardware throws an exception which
2964   // patches up the correct value directly to the stack.
2965   enc_class DPR2I_encoding( regDPR src ) %{
2966     // Flip to round-to-zero mode.  We attempted to allow invalid-op
2967     // exceptions here, so that a NAN or other corner-case value will
2968     // thrown an exception (but normal values get converted at full speed).
2969     // However, I2C adapters and other float-stack manglers leave pending
2970     // invalid-op exceptions hanging.  We would have to clear them before
2971     // enabling them and that is more expensive than just testing for the
2972     // invalid value Intel stores down in the corner cases.
2973     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2974     emit_opcode(cbuf,0x2D);
2975     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2976     // Allocate a word
2977     emit_opcode(cbuf,0x83);            // SUB ESP,4
2978     emit_opcode(cbuf,0xEC);
2979     emit_d8(cbuf,0x04);
2980     // Encoding assumes a double has been pushed into FPR0.
2981     // Store down the double as an int, popping the FPU stack
2982     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
2983     emit_opcode(cbuf,0x1C);
2984     emit_d8(cbuf,0x24);
2985     // Restore the rounding mode; mask the exception
2986     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
2987     emit_opcode(cbuf,0x2D);
2988     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
2989         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
2990         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
2991 
2992     // Load the converted int; adjust CPU stack
2993     emit_opcode(cbuf,0x58);       // POP EAX
2994     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
2995     emit_d32   (cbuf,0x80000000); //         0x80000000
2996     emit_opcode(cbuf,0x75);       // JNE around_slow_call
2997     emit_d8    (cbuf,0x07);       // Size of slow_call
2998     // Push src onto stack slow-path
2999     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3000     emit_d8    (cbuf,0xC0-1+$src$$reg );
3001     // CALL directly to the runtime
3002     cbuf.set_insts_mark();
3003     emit_opcode(cbuf,0xE8);       // Call into runtime
3004     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3005     // Carry on here...
3006   %}
3007 
3008   enc_class DPR2L_encoding( regDPR src ) %{
3009     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3010     emit_opcode(cbuf,0x2D);
3011     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3012     // Allocate a word
3013     emit_opcode(cbuf,0x83);            // SUB ESP,8
3014     emit_opcode(cbuf,0xEC);
3015     emit_d8(cbuf,0x08);
3016     // Encoding assumes a double has been pushed into FPR0.
3017     // Store down the double as a long, popping the FPU stack
3018     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3019     emit_opcode(cbuf,0x3C);
3020     emit_d8(cbuf,0x24);
3021     // Restore the rounding mode; mask the exception
3022     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3023     emit_opcode(cbuf,0x2D);
3024     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3025         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3026         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3027 
3028     // Load the converted int; adjust CPU stack
3029     emit_opcode(cbuf,0x58);       // POP EAX
3030     emit_opcode(cbuf,0x5A);       // POP EDX
3031     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3032     emit_d8    (cbuf,0xFA);       // rdx
3033     emit_d32   (cbuf,0x80000000); //         0x80000000
3034     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3035     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3036     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3037     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3038     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3039     emit_d8    (cbuf,0x07);       // Size of slow_call
3040     // Push src onto stack slow-path
3041     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3042     emit_d8    (cbuf,0xC0-1+$src$$reg );
3043     // CALL directly to the runtime
3044     cbuf.set_insts_mark();
3045     emit_opcode(cbuf,0xE8);       // Call into runtime
3046     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3047     // Carry on here...
3048   %}
3049 
3050   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3051     // Operand was loaded from memory into fp ST (stack top)
3052     // FMUL   ST,$src  /* D8 C8+i */
3053     emit_opcode(cbuf, 0xD8);
3054     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3055   %}
3056 
3057   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3058     // FADDP  ST,src2  /* D8 C0+i */
3059     emit_opcode(cbuf, 0xD8);
3060     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3061     //could use FADDP  src2,fpST  /* DE C0+i */
3062   %}
3063 
3064   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3065     // FADDP  src2,ST  /* DE C0+i */
3066     emit_opcode(cbuf, 0xDE);
3067     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3068   %}
3069 
3070   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3071     // Operand has been loaded into fp ST (stack top)
3072       // FSUB   ST,$src1
3073       emit_opcode(cbuf, 0xD8);
3074       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3075 
3076       // FDIV
3077       emit_opcode(cbuf, 0xD8);
3078       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3079   %}
3080 
3081   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3082     // Operand was loaded from memory into fp ST (stack top)
3083     // FADD   ST,$src  /* D8 C0+i */
3084     emit_opcode(cbuf, 0xD8);
3085     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3086 
3087     // FMUL  ST,src2  /* D8 C*+i */
3088     emit_opcode(cbuf, 0xD8);
3089     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3090   %}
3091 
3092 
3093   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3094     // Operand was loaded from memory into fp ST (stack top)
3095     // FADD   ST,$src  /* D8 C0+i */
3096     emit_opcode(cbuf, 0xD8);
3097     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3098 
3099     // FMULP  src2,ST  /* DE C8+i */
3100     emit_opcode(cbuf, 0xDE);
3101     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3102   %}
3103 
3104   // Atomically load the volatile long
3105   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3106     emit_opcode(cbuf,0xDF);
3107     int rm_byte_opcode = 0x05;
3108     int base     = $mem$$base;
3109     int index    = $mem$$index;
3110     int scale    = $mem$$scale;
3111     int displace = $mem$$disp;
3112     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3113     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3114     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3115   %}
3116 
3117   // Volatile Store Long.  Must be atomic, so move it into
3118   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3119   // target address before the store (for null-ptr checks)
3120   // so the memory operand is used twice in the encoding.
3121   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3122     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3123     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3124     emit_opcode(cbuf,0xDF);
3125     int rm_byte_opcode = 0x07;
3126     int base     = $mem$$base;
3127     int index    = $mem$$index;
3128     int scale    = $mem$$scale;
3129     int displace = $mem$$disp;
3130     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3131     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3132   %}
3133 
3134   // Safepoint Poll.  This polls the safepoint page, and causes an
3135   // exception if it is not readable. Unfortunately, it kills the condition code
3136   // in the process
3137   // We current use TESTL [spp],EDI
3138   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3139 
3140   enc_class Safepoint_Poll() %{
3141     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3142     emit_opcode(cbuf,0x85);
3143     emit_rm (cbuf, 0x0, 0x7, 0x5);
3144     emit_d32(cbuf, (intptr_t)os::get_polling_page());
3145   %}
3146 %}
3147 
3148 
3149 //----------FRAME--------------------------------------------------------------
3150 // Definition of frame structure and management information.
3151 //
3152 //  S T A C K   L A Y O U T    Allocators stack-slot number
3153 //                             |   (to get allocators register number
3154 //  G  Owned by    |        |  v    add OptoReg::stack0())
3155 //  r   CALLER     |        |
3156 //  o     |        +--------+      pad to even-align allocators stack-slot
3157 //  w     V        |  pad0  |        numbers; owned by CALLER
3158 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3159 //  h     ^        |   in   |  5
3160 //        |        |  args  |  4   Holes in incoming args owned by SELF
3161 //  |     |        |        |  3
3162 //  |     |        +--------+
3163 //  V     |        | old out|      Empty on Intel, window on Sparc
3164 //        |    old |preserve|      Must be even aligned.
3165 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3166 //        |        |   in   |  3   area for Intel ret address
3167 //     Owned by    |preserve|      Empty on Sparc.
3168 //       SELF      +--------+
3169 //        |        |  pad2  |  2   pad to align old SP
3170 //        |        +--------+  1
3171 //        |        | locks  |  0
3172 //        |        +--------+----> OptoReg::stack0(), even aligned
3173 //        |        |  pad1  | 11   pad to align new SP
3174 //        |        +--------+
3175 //        |        |        | 10
3176 //        |        | spills |  9   spills
3177 //        V        |        |  8   (pad0 slot for callee)
3178 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3179 //        ^        |  out   |  7
3180 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3181 //     Owned by    +--------+
3182 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3183 //        |    new |preserve|      Must be even-aligned.
3184 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3185 //        |        |        |
3186 //
3187 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3188 //         known from SELF's arguments and the Java calling convention.
3189 //         Region 6-7 is determined per call site.
3190 // Note 2: If the calling convention leaves holes in the incoming argument
3191 //         area, those holes are owned by SELF.  Holes in the outgoing area
3192 //         are owned by the CALLEE.  Holes should not be nessecary in the
3193 //         incoming area, as the Java calling convention is completely under
3194 //         the control of the AD file.  Doubles can be sorted and packed to
3195 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3196 //         varargs C calling conventions.
3197 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3198 //         even aligned with pad0 as needed.
3199 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3200 //         region 6-11 is even aligned; it may be padded out more so that
3201 //         the region from SP to FP meets the minimum stack alignment.
3202 
3203 frame %{
3204   // What direction does stack grow in (assumed to be same for C & Java)
3205   stack_direction(TOWARDS_LOW);
3206 
3207   // These three registers define part of the calling convention
3208   // between compiled code and the interpreter.
3209   inline_cache_reg(EAX);                // Inline Cache Register
3210   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3211 
3212   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3213   cisc_spilling_operand_name(indOffset32);
3214 
3215   // Number of stack slots consumed by locking an object
3216   sync_stack_slots(1);
3217 
3218   // Compiled code's Frame Pointer
3219   frame_pointer(ESP);
3220   // Interpreter stores its frame pointer in a register which is
3221   // stored to the stack by I2CAdaptors.
3222   // I2CAdaptors convert from interpreted java to compiled java.
3223   interpreter_frame_pointer(EBP);
3224 
3225   // Stack alignment requirement
3226   // Alignment size in bytes (128-bit -> 16 bytes)
3227   stack_alignment(StackAlignmentInBytes);
3228 
3229   // Number of stack slots between incoming argument block and the start of
3230   // a new frame.  The PROLOG must add this many slots to the stack.  The
3231   // EPILOG must remove this many slots.  Intel needs one slot for
3232   // return address and one for rbp, (must save rbp)
3233   in_preserve_stack_slots(2+VerifyStackAtCalls);
3234 
3235   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3236   // for calls to C.  Supports the var-args backing area for register parms.
3237   varargs_C_out_slots_killed(0);
3238 
3239   // The after-PROLOG location of the return address.  Location of
3240   // return address specifies a type (REG or STACK) and a number
3241   // representing the register number (i.e. - use a register name) or
3242   // stack slot.
3243   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3244   // Otherwise, it is above the locks and verification slot and alignment word
3245   return_addr(STACK - 1 +
3246               round_to((Compile::current()->in_preserve_stack_slots() +
3247                         Compile::current()->fixed_slots()),
3248                        stack_alignment_in_slots()));
3249 
3250   // Body of function which returns an integer array locating
3251   // arguments either in registers or in stack slots.  Passed an array
3252   // of ideal registers called "sig" and a "length" count.  Stack-slot
3253   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3254   // arguments for a CALLEE.  Incoming stack arguments are
3255   // automatically biased by the preserve_stack_slots field above.
3256   calling_convention %{
3257     // No difference between ingoing/outgoing just pass false
3258     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3259   %}
3260 
3261 
3262   // Body of function which returns an integer array locating
3263   // arguments either in registers or in stack slots.  Passed an array
3264   // of ideal registers called "sig" and a "length" count.  Stack-slot
3265   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3266   // arguments for a CALLEE.  Incoming stack arguments are
3267   // automatically biased by the preserve_stack_slots field above.
3268   c_calling_convention %{
3269     // This is obviously always outgoing
3270     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3271   %}
3272 
3273   // Location of C & interpreter return values
3274   c_return_value %{
3275     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3276     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3277     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3278 
3279     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3280     // that C functions return float and double results in XMM0.
3281     if( ideal_reg == Op_RegD && UseSSE>=2 )
3282       return OptoRegPair(XMM0b_num,XMM0_num);
3283     if( ideal_reg == Op_RegF && UseSSE>=2 )
3284       return OptoRegPair(OptoReg::Bad,XMM0_num);
3285 
3286     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3287   %}
3288 
3289   // Location of return values
3290   return_value %{
3291     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3292     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3293     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3294     if( ideal_reg == Op_RegD && UseSSE>=2 )
3295       return OptoRegPair(XMM0b_num,XMM0_num);
3296     if( ideal_reg == Op_RegF && UseSSE>=1 )
3297       return OptoRegPair(OptoReg::Bad,XMM0_num);
3298     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3299   %}
3300 
3301 %}
3302 
3303 //----------ATTRIBUTES---------------------------------------------------------
3304 //----------Operand Attributes-------------------------------------------------
3305 op_attrib op_cost(0);        // Required cost attribute
3306 
3307 //----------Instruction Attributes---------------------------------------------
3308 ins_attrib ins_cost(100);       // Required cost attribute
3309 ins_attrib ins_size(8);         // Required size attribute (in bits)
3310 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3311                                 // non-matching short branch variant of some
3312                                                             // long branch?
3313 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3314                                 // specifies the alignment that some part of the instruction (not
3315                                 // necessarily the start) requires.  If > 1, a compute_padding()
3316                                 // function must be provided for the instruction
3317 
3318 //----------OPERANDS-----------------------------------------------------------
3319 // Operand definitions must precede instruction definitions for correct parsing
3320 // in the ADLC because operands constitute user defined types which are used in
3321 // instruction definitions.
3322 
3323 //----------Simple Operands----------------------------------------------------
3324 // Immediate Operands
3325 // Integer Immediate
3326 operand immI() %{
3327   match(ConI);
3328 
3329   op_cost(10);
3330   format %{ %}
3331   interface(CONST_INTER);
3332 %}
3333 
3334 // Constant for test vs zero
3335 operand immI0() %{
3336   predicate(n->get_int() == 0);
3337   match(ConI);
3338 
3339   op_cost(0);
3340   format %{ %}
3341   interface(CONST_INTER);
3342 %}
3343 
3344 // Constant for increment
3345 operand immI1() %{
3346   predicate(n->get_int() == 1);
3347   match(ConI);
3348 
3349   op_cost(0);
3350   format %{ %}
3351   interface(CONST_INTER);
3352 %}
3353 
3354 // Constant for decrement
3355 operand immI_M1() %{
3356   predicate(n->get_int() == -1);
3357   match(ConI);
3358 
3359   op_cost(0);
3360   format %{ %}
3361   interface(CONST_INTER);
3362 %}
3363 
3364 // Valid scale values for addressing modes
3365 operand immI2() %{
3366   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3367   match(ConI);
3368 
3369   format %{ %}
3370   interface(CONST_INTER);
3371 %}
3372 
3373 operand immI8() %{
3374   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3375   match(ConI);
3376 
3377   op_cost(5);
3378   format %{ %}
3379   interface(CONST_INTER);
3380 %}
3381 
3382 operand immI16() %{
3383   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3384   match(ConI);
3385 
3386   op_cost(10);
3387   format %{ %}
3388   interface(CONST_INTER);
3389 %}
3390 
3391 // Int Immediate non-negative
3392 operand immU31()
3393 %{
3394   predicate(n->get_int() >= 0);
3395   match(ConI);
3396 
3397   op_cost(0);
3398   format %{ %}
3399   interface(CONST_INTER);
3400 %}
3401 
3402 // Constant for long shifts
3403 operand immI_32() %{
3404   predicate( n->get_int() == 32 );
3405   match(ConI);
3406 
3407   op_cost(0);
3408   format %{ %}
3409   interface(CONST_INTER);
3410 %}
3411 
3412 operand immI_1_31() %{
3413   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3414   match(ConI);
3415 
3416   op_cost(0);
3417   format %{ %}
3418   interface(CONST_INTER);
3419 %}
3420 
3421 operand immI_32_63() %{
3422   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3423   match(ConI);
3424   op_cost(0);
3425 
3426   format %{ %}
3427   interface(CONST_INTER);
3428 %}
3429 
3430 operand immI_1() %{
3431   predicate( n->get_int() == 1 );
3432   match(ConI);
3433 
3434   op_cost(0);
3435   format %{ %}
3436   interface(CONST_INTER);
3437 %}
3438 
3439 operand immI_2() %{
3440   predicate( n->get_int() == 2 );
3441   match(ConI);
3442 
3443   op_cost(0);
3444   format %{ %}
3445   interface(CONST_INTER);
3446 %}
3447 
3448 operand immI_3() %{
3449   predicate( n->get_int() == 3 );
3450   match(ConI);
3451 
3452   op_cost(0);
3453   format %{ %}
3454   interface(CONST_INTER);
3455 %}
3456 
3457 // Pointer Immediate
3458 operand immP() %{
3459   match(ConP);
3460 
3461   op_cost(10);
3462   format %{ %}
3463   interface(CONST_INTER);
3464 %}
3465 
3466 // NULL Pointer Immediate
3467 operand immP0() %{
3468   predicate( n->get_ptr() == 0 );
3469   match(ConP);
3470   op_cost(0);
3471 
3472   format %{ %}
3473   interface(CONST_INTER);
3474 %}
3475 
3476 // Long Immediate
3477 operand immL() %{
3478   match(ConL);
3479 
3480   op_cost(20);
3481   format %{ %}
3482   interface(CONST_INTER);
3483 %}
3484 
3485 // Long Immediate zero
3486 operand immL0() %{
3487   predicate( n->get_long() == 0L );
3488   match(ConL);
3489   op_cost(0);
3490 
3491   format %{ %}
3492   interface(CONST_INTER);
3493 %}
3494 
3495 // Long Immediate zero
3496 operand immL_M1() %{
3497   predicate( n->get_long() == -1L );
3498   match(ConL);
3499   op_cost(0);
3500 
3501   format %{ %}
3502   interface(CONST_INTER);
3503 %}
3504 
3505 // Long immediate from 0 to 127.
3506 // Used for a shorter form of long mul by 10.
3507 operand immL_127() %{
3508   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3509   match(ConL);
3510   op_cost(0);
3511 
3512   format %{ %}
3513   interface(CONST_INTER);
3514 %}
3515 
3516 // Long Immediate: low 32-bit mask
3517 operand immL_32bits() %{
3518   predicate(n->get_long() == 0xFFFFFFFFL);
3519   match(ConL);
3520   op_cost(0);
3521 
3522   format %{ %}
3523   interface(CONST_INTER);
3524 %}
3525 
3526 // Long Immediate: low 32-bit mask
3527 operand immL32() %{
3528   predicate(n->get_long() == (int)(n->get_long()));
3529   match(ConL);
3530   op_cost(20);
3531 
3532   format %{ %}
3533   interface(CONST_INTER);
3534 %}
3535 
3536 //Double Immediate zero
3537 operand immDPR0() %{
3538   // Do additional (and counter-intuitive) test against NaN to work around VC++
3539   // bug that generates code such that NaNs compare equal to 0.0
3540   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3541   match(ConD);
3542 
3543   op_cost(5);
3544   format %{ %}
3545   interface(CONST_INTER);
3546 %}
3547 
3548 // Double Immediate one
3549 operand immDPR1() %{
3550   predicate( UseSSE<=1 && n->getd() == 1.0 );
3551   match(ConD);
3552 
3553   op_cost(5);
3554   format %{ %}
3555   interface(CONST_INTER);
3556 %}
3557 
3558 // Double Immediate
3559 operand immDPR() %{
3560   predicate(UseSSE<=1);
3561   match(ConD);
3562 
3563   op_cost(5);
3564   format %{ %}
3565   interface(CONST_INTER);
3566 %}
3567 
3568 operand immD() %{
3569   predicate(UseSSE>=2);
3570   match(ConD);
3571 
3572   op_cost(5);
3573   format %{ %}
3574   interface(CONST_INTER);
3575 %}
3576 
3577 // Double Immediate zero
3578 operand immD0() %{
3579   // Do additional (and counter-intuitive) test against NaN to work around VC++
3580   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3581   // compare equal to -0.0.
3582   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3583   match(ConD);
3584 
3585   format %{ %}
3586   interface(CONST_INTER);
3587 %}
3588 
3589 // Float Immediate zero
3590 operand immFPR0() %{
3591   predicate(UseSSE == 0 && n->getf() == 0.0F);
3592   match(ConF);
3593 
3594   op_cost(5);
3595   format %{ %}
3596   interface(CONST_INTER);
3597 %}
3598 
3599 // Float Immediate one
3600 operand immFPR1() %{
3601   predicate(UseSSE == 0 && n->getf() == 1.0F);
3602   match(ConF);
3603 
3604   op_cost(5);
3605   format %{ %}
3606   interface(CONST_INTER);
3607 %}
3608 
3609 // Float Immediate
3610 operand immFPR() %{
3611   predicate( UseSSE == 0 );
3612   match(ConF);
3613 
3614   op_cost(5);
3615   format %{ %}
3616   interface(CONST_INTER);
3617 %}
3618 
3619 // Float Immediate
3620 operand immF() %{
3621   predicate(UseSSE >= 1);
3622   match(ConF);
3623 
3624   op_cost(5);
3625   format %{ %}
3626   interface(CONST_INTER);
3627 %}
3628 
3629 // Float Immediate zero.  Zero and not -0.0
3630 operand immF0() %{
3631   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3632   match(ConF);
3633 
3634   op_cost(5);
3635   format %{ %}
3636   interface(CONST_INTER);
3637 %}
3638 
3639 // Immediates for special shifts (sign extend)
3640 
3641 // Constants for increment
3642 operand immI_16() %{
3643   predicate( n->get_int() == 16 );
3644   match(ConI);
3645 
3646   format %{ %}
3647   interface(CONST_INTER);
3648 %}
3649 
3650 operand immI_24() %{
3651   predicate( n->get_int() == 24 );
3652   match(ConI);
3653 
3654   format %{ %}
3655   interface(CONST_INTER);
3656 %}
3657 
3658 // Constant for byte-wide masking
3659 operand immI_255() %{
3660   predicate( n->get_int() == 255 );
3661   match(ConI);
3662 
3663   format %{ %}
3664   interface(CONST_INTER);
3665 %}
3666 
3667 // Constant for short-wide masking
3668 operand immI_65535() %{
3669   predicate(n->get_int() == 65535);
3670   match(ConI);
3671 
3672   format %{ %}
3673   interface(CONST_INTER);
3674 %}
3675 
3676 // Register Operands
3677 // Integer Register
3678 operand rRegI() %{
3679   constraint(ALLOC_IN_RC(int_reg));
3680   match(RegI);
3681   match(xRegI);
3682   match(eAXRegI);
3683   match(eBXRegI);
3684   match(eCXRegI);
3685   match(eDXRegI);
3686   match(eDIRegI);
3687   match(eSIRegI);
3688 
3689   format %{ %}
3690   interface(REG_INTER);
3691 %}
3692 
3693 // Subset of Integer Register
3694 operand xRegI(rRegI reg) %{
3695   constraint(ALLOC_IN_RC(int_x_reg));
3696   match(reg);
3697   match(eAXRegI);
3698   match(eBXRegI);
3699   match(eCXRegI);
3700   match(eDXRegI);
3701 
3702   format %{ %}
3703   interface(REG_INTER);
3704 %}
3705 
3706 // Special Registers
3707 operand eAXRegI(xRegI reg) %{
3708   constraint(ALLOC_IN_RC(eax_reg));
3709   match(reg);
3710   match(rRegI);
3711 
3712   format %{ "EAX" %}
3713   interface(REG_INTER);
3714 %}
3715 
3716 // Special Registers
3717 operand eBXRegI(xRegI reg) %{
3718   constraint(ALLOC_IN_RC(ebx_reg));
3719   match(reg);
3720   match(rRegI);
3721 
3722   format %{ "EBX" %}
3723   interface(REG_INTER);
3724 %}
3725 
3726 operand eCXRegI(xRegI reg) %{
3727   constraint(ALLOC_IN_RC(ecx_reg));
3728   match(reg);
3729   match(rRegI);
3730 
3731   format %{ "ECX" %}
3732   interface(REG_INTER);
3733 %}
3734 
3735 operand eDXRegI(xRegI reg) %{
3736   constraint(ALLOC_IN_RC(edx_reg));
3737   match(reg);
3738   match(rRegI);
3739 
3740   format %{ "EDX" %}
3741   interface(REG_INTER);
3742 %}
3743 
3744 operand eDIRegI(xRegI reg) %{
3745   constraint(ALLOC_IN_RC(edi_reg));
3746   match(reg);
3747   match(rRegI);
3748 
3749   format %{ "EDI" %}
3750   interface(REG_INTER);
3751 %}
3752 
3753 operand naxRegI() %{
3754   constraint(ALLOC_IN_RC(nax_reg));
3755   match(RegI);
3756   match(eCXRegI);
3757   match(eDXRegI);
3758   match(eSIRegI);
3759   match(eDIRegI);
3760 
3761   format %{ %}
3762   interface(REG_INTER);
3763 %}
3764 
3765 operand nadxRegI() %{
3766   constraint(ALLOC_IN_RC(nadx_reg));
3767   match(RegI);
3768   match(eBXRegI);
3769   match(eCXRegI);
3770   match(eSIRegI);
3771   match(eDIRegI);
3772 
3773   format %{ %}
3774   interface(REG_INTER);
3775 %}
3776 
3777 operand ncxRegI() %{
3778   constraint(ALLOC_IN_RC(ncx_reg));
3779   match(RegI);
3780   match(eAXRegI);
3781   match(eDXRegI);
3782   match(eSIRegI);
3783   match(eDIRegI);
3784 
3785   format %{ %}
3786   interface(REG_INTER);
3787 %}
3788 
3789 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3790 // //
3791 operand eSIRegI(xRegI reg) %{
3792    constraint(ALLOC_IN_RC(esi_reg));
3793    match(reg);
3794    match(rRegI);
3795 
3796    format %{ "ESI" %}
3797    interface(REG_INTER);
3798 %}
3799 
3800 // Pointer Register
3801 operand anyRegP() %{
3802   constraint(ALLOC_IN_RC(any_reg));
3803   match(RegP);
3804   match(eAXRegP);
3805   match(eBXRegP);
3806   match(eCXRegP);
3807   match(eDIRegP);
3808   match(eRegP);
3809 
3810   format %{ %}
3811   interface(REG_INTER);
3812 %}
3813 
3814 operand eRegP() %{
3815   constraint(ALLOC_IN_RC(int_reg));
3816   match(RegP);
3817   match(eAXRegP);
3818   match(eBXRegP);
3819   match(eCXRegP);
3820   match(eDIRegP);
3821 
3822   format %{ %}
3823   interface(REG_INTER);
3824 %}
3825 
3826 // On windows95, EBP is not safe to use for implicit null tests.
3827 operand eRegP_no_EBP() %{
3828   constraint(ALLOC_IN_RC(int_reg_no_ebp));
3829   match(RegP);
3830   match(eAXRegP);
3831   match(eBXRegP);
3832   match(eCXRegP);
3833   match(eDIRegP);
3834 
3835   op_cost(100);
3836   format %{ %}
3837   interface(REG_INTER);
3838 %}
3839 
3840 operand naxRegP() %{
3841   constraint(ALLOC_IN_RC(nax_reg));
3842   match(RegP);
3843   match(eBXRegP);
3844   match(eDXRegP);
3845   match(eCXRegP);
3846   match(eSIRegP);
3847   match(eDIRegP);
3848 
3849   format %{ %}
3850   interface(REG_INTER);
3851 %}
3852 
3853 operand nabxRegP() %{
3854   constraint(ALLOC_IN_RC(nabx_reg));
3855   match(RegP);
3856   match(eCXRegP);
3857   match(eDXRegP);
3858   match(eSIRegP);
3859   match(eDIRegP);
3860 
3861   format %{ %}
3862   interface(REG_INTER);
3863 %}
3864 
3865 operand pRegP() %{
3866   constraint(ALLOC_IN_RC(p_reg));
3867   match(RegP);
3868   match(eBXRegP);
3869   match(eDXRegP);
3870   match(eSIRegP);
3871   match(eDIRegP);
3872 
3873   format %{ %}
3874   interface(REG_INTER);
3875 %}
3876 
3877 // Special Registers
3878 // Return a pointer value
3879 operand eAXRegP(eRegP reg) %{
3880   constraint(ALLOC_IN_RC(eax_reg));
3881   match(reg);
3882   format %{ "EAX" %}
3883   interface(REG_INTER);
3884 %}
3885 
3886 // Used in AtomicAdd
3887 operand eBXRegP(eRegP reg) %{
3888   constraint(ALLOC_IN_RC(ebx_reg));
3889   match(reg);
3890   format %{ "EBX" %}
3891   interface(REG_INTER);
3892 %}
3893 
3894 // Tail-call (interprocedural jump) to interpreter
3895 operand eCXRegP(eRegP reg) %{
3896   constraint(ALLOC_IN_RC(ecx_reg));
3897   match(reg);
3898   format %{ "ECX" %}
3899   interface(REG_INTER);
3900 %}
3901 
3902 operand eSIRegP(eRegP reg) %{
3903   constraint(ALLOC_IN_RC(esi_reg));
3904   match(reg);
3905   format %{ "ESI" %}
3906   interface(REG_INTER);
3907 %}
3908 
3909 // Used in rep stosw
3910 operand eDIRegP(eRegP reg) %{
3911   constraint(ALLOC_IN_RC(edi_reg));
3912   match(reg);
3913   format %{ "EDI" %}
3914   interface(REG_INTER);
3915 %}
3916 
3917 operand eRegL() %{
3918   constraint(ALLOC_IN_RC(long_reg));
3919   match(RegL);
3920   match(eADXRegL);
3921 
3922   format %{ %}
3923   interface(REG_INTER);
3924 %}
3925 
3926 operand eADXRegL( eRegL reg ) %{
3927   constraint(ALLOC_IN_RC(eadx_reg));
3928   match(reg);
3929 
3930   format %{ "EDX:EAX" %}
3931   interface(REG_INTER);
3932 %}
3933 
3934 operand eBCXRegL( eRegL reg ) %{
3935   constraint(ALLOC_IN_RC(ebcx_reg));
3936   match(reg);
3937 
3938   format %{ "EBX:ECX" %}
3939   interface(REG_INTER);
3940 %}
3941 
3942 // Special case for integer high multiply
3943 operand eADXRegL_low_only() %{
3944   constraint(ALLOC_IN_RC(eadx_reg));
3945   match(RegL);
3946 
3947   format %{ "EAX" %}
3948   interface(REG_INTER);
3949 %}
3950 
3951 // Flags register, used as output of compare instructions
3952 operand eFlagsReg() %{
3953   constraint(ALLOC_IN_RC(int_flags));
3954   match(RegFlags);
3955 
3956   format %{ "EFLAGS" %}
3957   interface(REG_INTER);
3958 %}
3959 
3960 // Flags register, used as output of FLOATING POINT compare instructions
3961 operand eFlagsRegU() %{
3962   constraint(ALLOC_IN_RC(int_flags));
3963   match(RegFlags);
3964 
3965   format %{ "EFLAGS_U" %}
3966   interface(REG_INTER);
3967 %}
3968 
3969 operand eFlagsRegUCF() %{
3970   constraint(ALLOC_IN_RC(int_flags));
3971   match(RegFlags);
3972   predicate(false);
3973 
3974   format %{ "EFLAGS_U_CF" %}
3975   interface(REG_INTER);
3976 %}
3977 
3978 // Condition Code Register used by long compare
3979 operand flagsReg_long_LTGE() %{
3980   constraint(ALLOC_IN_RC(int_flags));
3981   match(RegFlags);
3982   format %{ "FLAGS_LTGE" %}
3983   interface(REG_INTER);
3984 %}
3985 operand flagsReg_long_EQNE() %{
3986   constraint(ALLOC_IN_RC(int_flags));
3987   match(RegFlags);
3988   format %{ "FLAGS_EQNE" %}
3989   interface(REG_INTER);
3990 %}
3991 operand flagsReg_long_LEGT() %{
3992   constraint(ALLOC_IN_RC(int_flags));
3993   match(RegFlags);
3994   format %{ "FLAGS_LEGT" %}
3995   interface(REG_INTER);
3996 %}
3997 
3998 // Float register operands
3999 operand regDPR() %{
4000   predicate( UseSSE < 2 );
4001   constraint(ALLOC_IN_RC(fp_dbl_reg));
4002   match(RegD);
4003   match(regDPR1);
4004   match(regDPR2);
4005   format %{ %}
4006   interface(REG_INTER);
4007 %}
4008 
4009 operand regDPR1(regDPR reg) %{
4010   predicate( UseSSE < 2 );
4011   constraint(ALLOC_IN_RC(fp_dbl_reg0));
4012   match(reg);
4013   format %{ "FPR1" %}
4014   interface(REG_INTER);
4015 %}
4016 
4017 operand regDPR2(regDPR reg) %{
4018   predicate( UseSSE < 2 );
4019   constraint(ALLOC_IN_RC(fp_dbl_reg1));
4020   match(reg);
4021   format %{ "FPR2" %}
4022   interface(REG_INTER);
4023 %}
4024 
4025 operand regnotDPR1(regDPR reg) %{
4026   predicate( UseSSE < 2 );
4027   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4028   match(reg);
4029   format %{ %}
4030   interface(REG_INTER);
4031 %}
4032 
4033 // Float register operands
4034 operand regFPR() %{
4035   predicate( UseSSE < 2 );
4036   constraint(ALLOC_IN_RC(fp_flt_reg));
4037   match(RegF);
4038   match(regFPR1);
4039   format %{ %}
4040   interface(REG_INTER);
4041 %}
4042 
4043 // Float register operands
4044 operand regFPR1(regFPR reg) %{
4045   predicate( UseSSE < 2 );
4046   constraint(ALLOC_IN_RC(fp_flt_reg0));
4047   match(reg);
4048   format %{ "FPR1" %}
4049   interface(REG_INTER);
4050 %}
4051 
4052 // XMM Float register operands
4053 operand regF() %{
4054   predicate( UseSSE>=1 );
4055   constraint(ALLOC_IN_RC(float_reg_legacy));
4056   match(RegF);
4057   format %{ %}
4058   interface(REG_INTER);
4059 %}
4060 
4061 // XMM Double register operands
4062 operand regD() %{
4063   predicate( UseSSE>=2 );
4064   constraint(ALLOC_IN_RC(double_reg_legacy));
4065   match(RegD);
4066   format %{ %}
4067   interface(REG_INTER);
4068 %}
4069 
4070 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4071 // runtime code generation via reg_class_dynamic.
4072 operand vecS() %{
4073   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4074   match(VecS);
4075 
4076   format %{ %}
4077   interface(REG_INTER);
4078 %}
4079 
4080 operand vecD() %{
4081   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4082   match(VecD);
4083 
4084   format %{ %}
4085   interface(REG_INTER);
4086 %}
4087 
4088 operand vecX() %{
4089   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4090   match(VecX);
4091 
4092   format %{ %}
4093   interface(REG_INTER);
4094 %}
4095 
4096 operand vecY() %{
4097   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4098   match(VecY);
4099 
4100   format %{ %}
4101   interface(REG_INTER);
4102 %}
4103 
4104 //----------Memory Operands----------------------------------------------------
4105 // Direct Memory Operand
4106 operand direct(immP addr) %{
4107   match(addr);
4108 
4109   format %{ "[$addr]" %}
4110   interface(MEMORY_INTER) %{
4111     base(0xFFFFFFFF);
4112     index(0x4);
4113     scale(0x0);
4114     disp($addr);
4115   %}
4116 %}
4117 
4118 // Indirect Memory Operand
4119 operand indirect(eRegP reg) %{
4120   constraint(ALLOC_IN_RC(int_reg));
4121   match(reg);
4122 
4123   format %{ "[$reg]" %}
4124   interface(MEMORY_INTER) %{
4125     base($reg);
4126     index(0x4);
4127     scale(0x0);
4128     disp(0x0);
4129   %}
4130 %}
4131 
4132 // Indirect Memory Plus Short Offset Operand
4133 operand indOffset8(eRegP reg, immI8 off) %{
4134   match(AddP reg off);
4135 
4136   format %{ "[$reg + $off]" %}
4137   interface(MEMORY_INTER) %{
4138     base($reg);
4139     index(0x4);
4140     scale(0x0);
4141     disp($off);
4142   %}
4143 %}
4144 
4145 // Indirect Memory Plus Long Offset Operand
4146 operand indOffset32(eRegP reg, immI off) %{
4147   match(AddP reg off);
4148 
4149   format %{ "[$reg + $off]" %}
4150   interface(MEMORY_INTER) %{
4151     base($reg);
4152     index(0x4);
4153     scale(0x0);
4154     disp($off);
4155   %}
4156 %}
4157 
4158 // Indirect Memory Plus Long Offset Operand
4159 operand indOffset32X(rRegI reg, immP off) %{
4160   match(AddP off reg);
4161 
4162   format %{ "[$reg + $off]" %}
4163   interface(MEMORY_INTER) %{
4164     base($reg);
4165     index(0x4);
4166     scale(0x0);
4167     disp($off);
4168   %}
4169 %}
4170 
4171 // Indirect Memory Plus Index Register Plus Offset Operand
4172 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4173   match(AddP (AddP reg ireg) off);
4174 
4175   op_cost(10);
4176   format %{"[$reg + $off + $ireg]" %}
4177   interface(MEMORY_INTER) %{
4178     base($reg);
4179     index($ireg);
4180     scale(0x0);
4181     disp($off);
4182   %}
4183 %}
4184 
4185 // Indirect Memory Plus Index Register Plus Offset Operand
4186 operand indIndex(eRegP reg, rRegI ireg) %{
4187   match(AddP reg ireg);
4188 
4189   op_cost(10);
4190   format %{"[$reg + $ireg]" %}
4191   interface(MEMORY_INTER) %{
4192     base($reg);
4193     index($ireg);
4194     scale(0x0);
4195     disp(0x0);
4196   %}
4197 %}
4198 
4199 // // -------------------------------------------------------------------------
4200 // // 486 architecture doesn't support "scale * index + offset" with out a base
4201 // // -------------------------------------------------------------------------
4202 // // Scaled Memory Operands
4203 // // Indirect Memory Times Scale Plus Offset Operand
4204 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4205 //   match(AddP off (LShiftI ireg scale));
4206 //
4207 //   op_cost(10);
4208 //   format %{"[$off + $ireg << $scale]" %}
4209 //   interface(MEMORY_INTER) %{
4210 //     base(0x4);
4211 //     index($ireg);
4212 //     scale($scale);
4213 //     disp($off);
4214 //   %}
4215 // %}
4216 
4217 // Indirect Memory Times Scale Plus Index Register
4218 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4219   match(AddP reg (LShiftI ireg scale));
4220 
4221   op_cost(10);
4222   format %{"[$reg + $ireg << $scale]" %}
4223   interface(MEMORY_INTER) %{
4224     base($reg);
4225     index($ireg);
4226     scale($scale);
4227     disp(0x0);
4228   %}
4229 %}
4230 
4231 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4232 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4233   match(AddP (AddP reg (LShiftI ireg scale)) off);
4234 
4235   op_cost(10);
4236   format %{"[$reg + $off + $ireg << $scale]" %}
4237   interface(MEMORY_INTER) %{
4238     base($reg);
4239     index($ireg);
4240     scale($scale);
4241     disp($off);
4242   %}
4243 %}
4244 
4245 //----------Load Long Memory Operands------------------------------------------
4246 // The load-long idiom will use it's address expression again after loading
4247 // the first word of the long.  If the load-long destination overlaps with
4248 // registers used in the addressing expression, the 2nd half will be loaded
4249 // from a clobbered address.  Fix this by requiring that load-long use
4250 // address registers that do not overlap with the load-long target.
4251 
4252 // load-long support
4253 operand load_long_RegP() %{
4254   constraint(ALLOC_IN_RC(esi_reg));
4255   match(RegP);
4256   match(eSIRegP);
4257   op_cost(100);
4258   format %{  %}
4259   interface(REG_INTER);
4260 %}
4261 
4262 // Indirect Memory Operand Long
4263 operand load_long_indirect(load_long_RegP reg) %{
4264   constraint(ALLOC_IN_RC(esi_reg));
4265   match(reg);
4266 
4267   format %{ "[$reg]" %}
4268   interface(MEMORY_INTER) %{
4269     base($reg);
4270     index(0x4);
4271     scale(0x0);
4272     disp(0x0);
4273   %}
4274 %}
4275 
4276 // Indirect Memory Plus Long Offset Operand
4277 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4278   match(AddP reg off);
4279 
4280   format %{ "[$reg + $off]" %}
4281   interface(MEMORY_INTER) %{
4282     base($reg);
4283     index(0x4);
4284     scale(0x0);
4285     disp($off);
4286   %}
4287 %}
4288 
4289 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4290 
4291 
4292 //----------Special Memory Operands--------------------------------------------
4293 // Stack Slot Operand - This operand is used for loading and storing temporary
4294 //                      values on the stack where a match requires a value to
4295 //                      flow through memory.
4296 operand stackSlotP(sRegP reg) %{
4297   constraint(ALLOC_IN_RC(stack_slots));
4298   // No match rule because this operand is only generated in matching
4299   format %{ "[$reg]" %}
4300   interface(MEMORY_INTER) %{
4301     base(0x4);   // ESP
4302     index(0x4);  // No Index
4303     scale(0x0);  // No Scale
4304     disp($reg);  // Stack Offset
4305   %}
4306 %}
4307 
4308 operand stackSlotI(sRegI reg) %{
4309   constraint(ALLOC_IN_RC(stack_slots));
4310   // No match rule because this operand is only generated in matching
4311   format %{ "[$reg]" %}
4312   interface(MEMORY_INTER) %{
4313     base(0x4);   // ESP
4314     index(0x4);  // No Index
4315     scale(0x0);  // No Scale
4316     disp($reg);  // Stack Offset
4317   %}
4318 %}
4319 
4320 operand stackSlotF(sRegF reg) %{
4321   constraint(ALLOC_IN_RC(stack_slots));
4322   // No match rule because this operand is only generated in matching
4323   format %{ "[$reg]" %}
4324   interface(MEMORY_INTER) %{
4325     base(0x4);   // ESP
4326     index(0x4);  // No Index
4327     scale(0x0);  // No Scale
4328     disp($reg);  // Stack Offset
4329   %}
4330 %}
4331 
4332 operand stackSlotD(sRegD reg) %{
4333   constraint(ALLOC_IN_RC(stack_slots));
4334   // No match rule because this operand is only generated in matching
4335   format %{ "[$reg]" %}
4336   interface(MEMORY_INTER) %{
4337     base(0x4);   // ESP
4338     index(0x4);  // No Index
4339     scale(0x0);  // No Scale
4340     disp($reg);  // Stack Offset
4341   %}
4342 %}
4343 
4344 operand stackSlotL(sRegL reg) %{
4345   constraint(ALLOC_IN_RC(stack_slots));
4346   // No match rule because this operand is only generated in matching
4347   format %{ "[$reg]" %}
4348   interface(MEMORY_INTER) %{
4349     base(0x4);   // ESP
4350     index(0x4);  // No Index
4351     scale(0x0);  // No Scale
4352     disp($reg);  // Stack Offset
4353   %}
4354 %}
4355 
4356 //----------Memory Operands - Win95 Implicit Null Variants----------------
4357 // Indirect Memory Operand
4358 operand indirect_win95_safe(eRegP_no_EBP reg)
4359 %{
4360   constraint(ALLOC_IN_RC(int_reg));
4361   match(reg);
4362 
4363   op_cost(100);
4364   format %{ "[$reg]" %}
4365   interface(MEMORY_INTER) %{
4366     base($reg);
4367     index(0x4);
4368     scale(0x0);
4369     disp(0x0);
4370   %}
4371 %}
4372 
4373 // Indirect Memory Plus Short Offset Operand
4374 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4375 %{
4376   match(AddP reg off);
4377 
4378   op_cost(100);
4379   format %{ "[$reg + $off]" %}
4380   interface(MEMORY_INTER) %{
4381     base($reg);
4382     index(0x4);
4383     scale(0x0);
4384     disp($off);
4385   %}
4386 %}
4387 
4388 // Indirect Memory Plus Long Offset Operand
4389 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4390 %{
4391   match(AddP reg off);
4392 
4393   op_cost(100);
4394   format %{ "[$reg + $off]" %}
4395   interface(MEMORY_INTER) %{
4396     base($reg);
4397     index(0x4);
4398     scale(0x0);
4399     disp($off);
4400   %}
4401 %}
4402 
4403 // Indirect Memory Plus Index Register Plus Offset Operand
4404 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4405 %{
4406   match(AddP (AddP reg ireg) off);
4407 
4408   op_cost(100);
4409   format %{"[$reg + $off + $ireg]" %}
4410   interface(MEMORY_INTER) %{
4411     base($reg);
4412     index($ireg);
4413     scale(0x0);
4414     disp($off);
4415   %}
4416 %}
4417 
4418 // Indirect Memory Times Scale Plus Index Register
4419 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4420 %{
4421   match(AddP reg (LShiftI ireg scale));
4422 
4423   op_cost(100);
4424   format %{"[$reg + $ireg << $scale]" %}
4425   interface(MEMORY_INTER) %{
4426     base($reg);
4427     index($ireg);
4428     scale($scale);
4429     disp(0x0);
4430   %}
4431 %}
4432 
4433 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4434 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4435 %{
4436   match(AddP (AddP reg (LShiftI ireg scale)) off);
4437 
4438   op_cost(100);
4439   format %{"[$reg + $off + $ireg << $scale]" %}
4440   interface(MEMORY_INTER) %{
4441     base($reg);
4442     index($ireg);
4443     scale($scale);
4444     disp($off);
4445   %}
4446 %}
4447 
4448 //----------Conditional Branch Operands----------------------------------------
4449 // Comparison Op  - This is the operation of the comparison, and is limited to
4450 //                  the following set of codes:
4451 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4452 //
4453 // Other attributes of the comparison, such as unsignedness, are specified
4454 // by the comparison instruction that sets a condition code flags register.
4455 // That result is represented by a flags operand whose subtype is appropriate
4456 // to the unsignedness (etc.) of the comparison.
4457 //
4458 // Later, the instruction which matches both the Comparison Op (a Bool) and
4459 // the flags (produced by the Cmp) specifies the coding of the comparison op
4460 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4461 
4462 // Comparision Code
4463 operand cmpOp() %{
4464   match(Bool);
4465 
4466   format %{ "" %}
4467   interface(COND_INTER) %{
4468     equal(0x4, "e");
4469     not_equal(0x5, "ne");
4470     less(0xC, "l");
4471     greater_equal(0xD, "ge");
4472     less_equal(0xE, "le");
4473     greater(0xF, "g");
4474     overflow(0x0, "o");
4475     no_overflow(0x1, "no");
4476   %}
4477 %}
4478 
4479 // Comparison Code, unsigned compare.  Used by FP also, with
4480 // C2 (unordered) turned into GT or LT already.  The other bits
4481 // C0 and C3 are turned into Carry & Zero flags.
4482 operand cmpOpU() %{
4483   match(Bool);
4484 
4485   format %{ "" %}
4486   interface(COND_INTER) %{
4487     equal(0x4, "e");
4488     not_equal(0x5, "ne");
4489     less(0x2, "b");
4490     greater_equal(0x3, "nb");
4491     less_equal(0x6, "be");
4492     greater(0x7, "nbe");
4493     overflow(0x0, "o");
4494     no_overflow(0x1, "no");
4495   %}
4496 %}
4497 
4498 // Floating comparisons that don't require any fixup for the unordered case
4499 operand cmpOpUCF() %{
4500   match(Bool);
4501   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4502             n->as_Bool()->_test._test == BoolTest::ge ||
4503             n->as_Bool()->_test._test == BoolTest::le ||
4504             n->as_Bool()->_test._test == BoolTest::gt);
4505   format %{ "" %}
4506   interface(COND_INTER) %{
4507     equal(0x4, "e");
4508     not_equal(0x5, "ne");
4509     less(0x2, "b");
4510     greater_equal(0x3, "nb");
4511     less_equal(0x6, "be");
4512     greater(0x7, "nbe");
4513     overflow(0x0, "o");
4514     no_overflow(0x1, "no");
4515   %}
4516 %}
4517 
4518 
4519 // Floating comparisons that can be fixed up with extra conditional jumps
4520 operand cmpOpUCF2() %{
4521   match(Bool);
4522   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4523             n->as_Bool()->_test._test == BoolTest::eq);
4524   format %{ "" %}
4525   interface(COND_INTER) %{
4526     equal(0x4, "e");
4527     not_equal(0x5, "ne");
4528     less(0x2, "b");
4529     greater_equal(0x3, "nb");
4530     less_equal(0x6, "be");
4531     greater(0x7, "nbe");
4532     overflow(0x0, "o");
4533     no_overflow(0x1, "no");
4534   %}
4535 %}
4536 
4537 // Comparison Code for FP conditional move
4538 operand cmpOp_fcmov() %{
4539   match(Bool);
4540 
4541   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4542             n->as_Bool()->_test._test != BoolTest::no_overflow);
4543   format %{ "" %}
4544   interface(COND_INTER) %{
4545     equal        (0x0C8);
4546     not_equal    (0x1C8);
4547     less         (0x0C0);
4548     greater_equal(0x1C0);
4549     less_equal   (0x0D0);
4550     greater      (0x1D0);
4551     overflow(0x0, "o"); // not really supported by the instruction
4552     no_overflow(0x1, "no"); // not really supported by the instruction
4553   %}
4554 %}
4555 
4556 // Comparision Code used in long compares
4557 operand cmpOp_commute() %{
4558   match(Bool);
4559 
4560   format %{ "" %}
4561   interface(COND_INTER) %{
4562     equal(0x4, "e");
4563     not_equal(0x5, "ne");
4564     less(0xF, "g");
4565     greater_equal(0xE, "le");
4566     less_equal(0xD, "ge");
4567     greater(0xC, "l");
4568     overflow(0x0, "o");
4569     no_overflow(0x1, "no");
4570   %}
4571 %}
4572 
4573 //----------OPERAND CLASSES----------------------------------------------------
4574 // Operand Classes are groups of operands that are used as to simplify
4575 // instruction definitions by not requiring the AD writer to specify separate
4576 // instructions for every form of operand when the instruction accepts
4577 // multiple operand types with the same basic encoding and format.  The classic
4578 // case of this is memory operands.
4579 
4580 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4581                indIndex, indIndexScale, indIndexScaleOffset);
4582 
4583 // Long memory operations are encoded in 2 instructions and a +4 offset.
4584 // This means some kind of offset is always required and you cannot use
4585 // an oop as the offset (done when working on static globals).
4586 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4587                     indIndex, indIndexScale, indIndexScaleOffset);
4588 
4589 
4590 //----------PIPELINE-----------------------------------------------------------
4591 // Rules which define the behavior of the target architectures pipeline.
4592 pipeline %{
4593 
4594 //----------ATTRIBUTES---------------------------------------------------------
4595 attributes %{
4596   variable_size_instructions;        // Fixed size instructions
4597   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4598   instruction_unit_size = 1;         // An instruction is 1 bytes long
4599   instruction_fetch_unit_size = 16;  // The processor fetches one line
4600   instruction_fetch_units = 1;       // of 16 bytes
4601 
4602   // List of nop instructions
4603   nops( MachNop );
4604 %}
4605 
4606 //----------RESOURCES----------------------------------------------------------
4607 // Resources are the functional units available to the machine
4608 
4609 // Generic P2/P3 pipeline
4610 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4611 // 3 instructions decoded per cycle.
4612 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4613 // 2 ALU op, only ALU0 handles mul/div instructions.
4614 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4615            MS0, MS1, MEM = MS0 | MS1,
4616            BR, FPU,
4617            ALU0, ALU1, ALU = ALU0 | ALU1 );
4618 
4619 //----------PIPELINE DESCRIPTION-----------------------------------------------
4620 // Pipeline Description specifies the stages in the machine's pipeline
4621 
4622 // Generic P2/P3 pipeline
4623 pipe_desc(S0, S1, S2, S3, S4, S5);
4624 
4625 //----------PIPELINE CLASSES---------------------------------------------------
4626 // Pipeline Classes describe the stages in which input and output are
4627 // referenced by the hardware pipeline.
4628 
4629 // Naming convention: ialu or fpu
4630 // Then: _reg
4631 // Then: _reg if there is a 2nd register
4632 // Then: _long if it's a pair of instructions implementing a long
4633 // Then: _fat if it requires the big decoder
4634 //   Or: _mem if it requires the big decoder and a memory unit.
4635 
4636 // Integer ALU reg operation
4637 pipe_class ialu_reg(rRegI dst) %{
4638     single_instruction;
4639     dst    : S4(write);
4640     dst    : S3(read);
4641     DECODE : S0;        // any decoder
4642     ALU    : S3;        // any alu
4643 %}
4644 
4645 // Long ALU reg operation
4646 pipe_class ialu_reg_long(eRegL dst) %{
4647     instruction_count(2);
4648     dst    : S4(write);
4649     dst    : S3(read);
4650     DECODE : S0(2);     // any 2 decoders
4651     ALU    : S3(2);     // both alus
4652 %}
4653 
4654 // Integer ALU reg operation using big decoder
4655 pipe_class ialu_reg_fat(rRegI dst) %{
4656     single_instruction;
4657     dst    : S4(write);
4658     dst    : S3(read);
4659     D0     : S0;        // big decoder only
4660     ALU    : S3;        // any alu
4661 %}
4662 
4663 // Long ALU reg operation using big decoder
4664 pipe_class ialu_reg_long_fat(eRegL dst) %{
4665     instruction_count(2);
4666     dst    : S4(write);
4667     dst    : S3(read);
4668     D0     : S0(2);     // big decoder only; twice
4669     ALU    : S3(2);     // any 2 alus
4670 %}
4671 
4672 // Integer ALU reg-reg operation
4673 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4674     single_instruction;
4675     dst    : S4(write);
4676     src    : S3(read);
4677     DECODE : S0;        // any decoder
4678     ALU    : S3;        // any alu
4679 %}
4680 
4681 // Long ALU reg-reg operation
4682 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4683     instruction_count(2);
4684     dst    : S4(write);
4685     src    : S3(read);
4686     DECODE : S0(2);     // any 2 decoders
4687     ALU    : S3(2);     // both alus
4688 %}
4689 
4690 // Integer ALU reg-reg operation
4691 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4692     single_instruction;
4693     dst    : S4(write);
4694     src    : S3(read);
4695     D0     : S0;        // big decoder only
4696     ALU    : S3;        // any alu
4697 %}
4698 
4699 // Long ALU reg-reg operation
4700 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4701     instruction_count(2);
4702     dst    : S4(write);
4703     src    : S3(read);
4704     D0     : S0(2);     // big decoder only; twice
4705     ALU    : S3(2);     // both alus
4706 %}
4707 
4708 // Integer ALU reg-mem operation
4709 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4710     single_instruction;
4711     dst    : S5(write);
4712     mem    : S3(read);
4713     D0     : S0;        // big decoder only
4714     ALU    : S4;        // any alu
4715     MEM    : S3;        // any mem
4716 %}
4717 
4718 // Long ALU reg-mem operation
4719 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4720     instruction_count(2);
4721     dst    : S5(write);
4722     mem    : S3(read);
4723     D0     : S0(2);     // big decoder only; twice
4724     ALU    : S4(2);     // any 2 alus
4725     MEM    : S3(2);     // both mems
4726 %}
4727 
4728 // Integer mem operation (prefetch)
4729 pipe_class ialu_mem(memory mem)
4730 %{
4731     single_instruction;
4732     mem    : S3(read);
4733     D0     : S0;        // big decoder only
4734     MEM    : S3;        // any mem
4735 %}
4736 
4737 // Integer Store to Memory
4738 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4739     single_instruction;
4740     mem    : S3(read);
4741     src    : S5(read);
4742     D0     : S0;        // big decoder only
4743     ALU    : S4;        // any alu
4744     MEM    : S3;
4745 %}
4746 
4747 // Long Store to Memory
4748 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4749     instruction_count(2);
4750     mem    : S3(read);
4751     src    : S5(read);
4752     D0     : S0(2);     // big decoder only; twice
4753     ALU    : S4(2);     // any 2 alus
4754     MEM    : S3(2);     // Both mems
4755 %}
4756 
4757 // Integer Store to Memory
4758 pipe_class ialu_mem_imm(memory mem) %{
4759     single_instruction;
4760     mem    : S3(read);
4761     D0     : S0;        // big decoder only
4762     ALU    : S4;        // any alu
4763     MEM    : S3;
4764 %}
4765 
4766 // Integer ALU0 reg-reg operation
4767 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4768     single_instruction;
4769     dst    : S4(write);
4770     src    : S3(read);
4771     D0     : S0;        // Big decoder only
4772     ALU0   : S3;        // only alu0
4773 %}
4774 
4775 // Integer ALU0 reg-mem operation
4776 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4777     single_instruction;
4778     dst    : S5(write);
4779     mem    : S3(read);
4780     D0     : S0;        // big decoder only
4781     ALU0   : S4;        // ALU0 only
4782     MEM    : S3;        // any mem
4783 %}
4784 
4785 // Integer ALU reg-reg operation
4786 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4787     single_instruction;
4788     cr     : S4(write);
4789     src1   : S3(read);
4790     src2   : S3(read);
4791     DECODE : S0;        // any decoder
4792     ALU    : S3;        // any alu
4793 %}
4794 
4795 // Integer ALU reg-imm operation
4796 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4797     single_instruction;
4798     cr     : S4(write);
4799     src1   : S3(read);
4800     DECODE : S0;        // any decoder
4801     ALU    : S3;        // any alu
4802 %}
4803 
4804 // Integer ALU reg-mem operation
4805 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4806     single_instruction;
4807     cr     : S4(write);
4808     src1   : S3(read);
4809     src2   : S3(read);
4810     D0     : S0;        // big decoder only
4811     ALU    : S4;        // any alu
4812     MEM    : S3;
4813 %}
4814 
4815 // Conditional move reg-reg
4816 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4817     instruction_count(4);
4818     y      : S4(read);
4819     q      : S3(read);
4820     p      : S3(read);
4821     DECODE : S0(4);     // any decoder
4822 %}
4823 
4824 // Conditional move reg-reg
4825 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4826     single_instruction;
4827     dst    : S4(write);
4828     src    : S3(read);
4829     cr     : S3(read);
4830     DECODE : S0;        // any decoder
4831 %}
4832 
4833 // Conditional move reg-mem
4834 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4835     single_instruction;
4836     dst    : S4(write);
4837     src    : S3(read);
4838     cr     : S3(read);
4839     DECODE : S0;        // any decoder
4840     MEM    : S3;
4841 %}
4842 
4843 // Conditional move reg-reg long
4844 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4845     single_instruction;
4846     dst    : S4(write);
4847     src    : S3(read);
4848     cr     : S3(read);
4849     DECODE : S0(2);     // any 2 decoders
4850 %}
4851 
4852 // Conditional move double reg-reg
4853 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4854     single_instruction;
4855     dst    : S4(write);
4856     src    : S3(read);
4857     cr     : S3(read);
4858     DECODE : S0;        // any decoder
4859 %}
4860 
4861 // Float reg-reg operation
4862 pipe_class fpu_reg(regDPR dst) %{
4863     instruction_count(2);
4864     dst    : S3(read);
4865     DECODE : S0(2);     // any 2 decoders
4866     FPU    : S3;
4867 %}
4868 
4869 // Float reg-reg operation
4870 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4871     instruction_count(2);
4872     dst    : S4(write);
4873     src    : S3(read);
4874     DECODE : S0(2);     // any 2 decoders
4875     FPU    : S3;
4876 %}
4877 
4878 // Float reg-reg operation
4879 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4880     instruction_count(3);
4881     dst    : S4(write);
4882     src1   : S3(read);
4883     src2   : S3(read);
4884     DECODE : S0(3);     // any 3 decoders
4885     FPU    : S3(2);
4886 %}
4887 
4888 // Float reg-reg operation
4889 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4890     instruction_count(4);
4891     dst    : S4(write);
4892     src1   : S3(read);
4893     src2   : S3(read);
4894     src3   : S3(read);
4895     DECODE : S0(4);     // any 3 decoders
4896     FPU    : S3(2);
4897 %}
4898 
4899 // Float reg-reg operation
4900 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4901     instruction_count(4);
4902     dst    : S4(write);
4903     src1   : S3(read);
4904     src2   : S3(read);
4905     src3   : S3(read);
4906     DECODE : S1(3);     // any 3 decoders
4907     D0     : S0;        // Big decoder only
4908     FPU    : S3(2);
4909     MEM    : S3;
4910 %}
4911 
4912 // Float reg-mem operation
4913 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4914     instruction_count(2);
4915     dst    : S5(write);
4916     mem    : S3(read);
4917     D0     : S0;        // big decoder only
4918     DECODE : S1;        // any decoder for FPU POP
4919     FPU    : S4;
4920     MEM    : S3;        // any mem
4921 %}
4922 
4923 // Float reg-mem operation
4924 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4925     instruction_count(3);
4926     dst    : S5(write);
4927     src1   : S3(read);
4928     mem    : S3(read);
4929     D0     : S0;        // big decoder only
4930     DECODE : S1(2);     // any decoder for FPU POP
4931     FPU    : S4;
4932     MEM    : S3;        // any mem
4933 %}
4934 
4935 // Float mem-reg operation
4936 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4937     instruction_count(2);
4938     src    : S5(read);
4939     mem    : S3(read);
4940     DECODE : S0;        // any decoder for FPU PUSH
4941     D0     : S1;        // big decoder only
4942     FPU    : S4;
4943     MEM    : S3;        // any mem
4944 %}
4945 
4946 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
4947     instruction_count(3);
4948     src1   : S3(read);
4949     src2   : S3(read);
4950     mem    : S3(read);
4951     DECODE : S0(2);     // any decoder for FPU PUSH
4952     D0     : S1;        // big decoder only
4953     FPU    : S4;
4954     MEM    : S3;        // any mem
4955 %}
4956 
4957 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
4958     instruction_count(3);
4959     src1   : S3(read);
4960     src2   : S3(read);
4961     mem    : S4(read);
4962     DECODE : S0;        // any decoder for FPU PUSH
4963     D0     : S0(2);     // big decoder only
4964     FPU    : S4;
4965     MEM    : S3(2);     // any mem
4966 %}
4967 
4968 pipe_class fpu_mem_mem(memory dst, memory src1) %{
4969     instruction_count(2);
4970     src1   : S3(read);
4971     dst    : S4(read);
4972     D0     : S0(2);     // big decoder only
4973     MEM    : S3(2);     // any mem
4974 %}
4975 
4976 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
4977     instruction_count(3);
4978     src1   : S3(read);
4979     src2   : S3(read);
4980     dst    : S4(read);
4981     D0     : S0(3);     // big decoder only
4982     FPU    : S4;
4983     MEM    : S3(3);     // any mem
4984 %}
4985 
4986 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
4987     instruction_count(3);
4988     src1   : S4(read);
4989     mem    : S4(read);
4990     DECODE : S0;        // any decoder for FPU PUSH
4991     D0     : S0(2);     // big decoder only
4992     FPU    : S4;
4993     MEM    : S3(2);     // any mem
4994 %}
4995 
4996 // Float load constant
4997 pipe_class fpu_reg_con(regDPR dst) %{
4998     instruction_count(2);
4999     dst    : S5(write);
5000     D0     : S0;        // big decoder only for the load
5001     DECODE : S1;        // any decoder for FPU POP
5002     FPU    : S4;
5003     MEM    : S3;        // any mem
5004 %}
5005 
5006 // Float load constant
5007 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5008     instruction_count(3);
5009     dst    : S5(write);
5010     src    : S3(read);
5011     D0     : S0;        // big decoder only for the load
5012     DECODE : S1(2);     // any decoder for FPU POP
5013     FPU    : S4;
5014     MEM    : S3;        // any mem
5015 %}
5016 
5017 // UnConditional branch
5018 pipe_class pipe_jmp( label labl ) %{
5019     single_instruction;
5020     BR   : S3;
5021 %}
5022 
5023 // Conditional branch
5024 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5025     single_instruction;
5026     cr    : S1(read);
5027     BR    : S3;
5028 %}
5029 
5030 // Allocation idiom
5031 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5032     instruction_count(1); force_serialization;
5033     fixed_latency(6);
5034     heap_ptr : S3(read);
5035     DECODE   : S0(3);
5036     D0       : S2;
5037     MEM      : S3;
5038     ALU      : S3(2);
5039     dst      : S5(write);
5040     BR       : S5;
5041 %}
5042 
5043 // Generic big/slow expanded idiom
5044 pipe_class pipe_slow(  ) %{
5045     instruction_count(10); multiple_bundles; force_serialization;
5046     fixed_latency(100);
5047     D0  : S0(2);
5048     MEM : S3(2);
5049 %}
5050 
5051 // The real do-nothing guy
5052 pipe_class empty( ) %{
5053     instruction_count(0);
5054 %}
5055 
5056 // Define the class for the Nop node
5057 define %{
5058    MachNop = empty;
5059 %}
5060 
5061 %}
5062 
5063 //----------INSTRUCTIONS-------------------------------------------------------
5064 //
5065 // match      -- States which machine-independent subtree may be replaced
5066 //               by this instruction.
5067 // ins_cost   -- The estimated cost of this instruction is used by instruction
5068 //               selection to identify a minimum cost tree of machine
5069 //               instructions that matches a tree of machine-independent
5070 //               instructions.
5071 // format     -- A string providing the disassembly for this instruction.
5072 //               The value of an instruction's operand may be inserted
5073 //               by referring to it with a '$' prefix.
5074 // opcode     -- Three instruction opcodes may be provided.  These are referred
5075 //               to within an encode class as $primary, $secondary, and $tertiary
5076 //               respectively.  The primary opcode is commonly used to
5077 //               indicate the type of machine instruction, while secondary
5078 //               and tertiary are often used for prefix options or addressing
5079 //               modes.
5080 // ins_encode -- A list of encode classes with parameters. The encode class
5081 //               name must have been defined in an 'enc_class' specification
5082 //               in the encode section of the architecture description.
5083 
5084 //----------BSWAP-Instruction--------------------------------------------------
5085 instruct bytes_reverse_int(rRegI dst) %{
5086   match(Set dst (ReverseBytesI dst));
5087 
5088   format %{ "BSWAP  $dst" %}
5089   opcode(0x0F, 0xC8);
5090   ins_encode( OpcP, OpcSReg(dst) );
5091   ins_pipe( ialu_reg );
5092 %}
5093 
5094 instruct bytes_reverse_long(eRegL dst) %{
5095   match(Set dst (ReverseBytesL dst));
5096 
5097   format %{ "BSWAP  $dst.lo\n\t"
5098             "BSWAP  $dst.hi\n\t"
5099             "XCHG   $dst.lo $dst.hi" %}
5100 
5101   ins_cost(125);
5102   ins_encode( bswap_long_bytes(dst) );
5103   ins_pipe( ialu_reg_reg);
5104 %}
5105 
5106 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5107   match(Set dst (ReverseBytesUS dst));
5108   effect(KILL cr);
5109 
5110   format %{ "BSWAP  $dst\n\t"
5111             "SHR    $dst,16\n\t" %}
5112   ins_encode %{
5113     __ bswapl($dst$$Register);
5114     __ shrl($dst$$Register, 16);
5115   %}
5116   ins_pipe( ialu_reg );
5117 %}
5118 
5119 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5120   match(Set dst (ReverseBytesS dst));
5121   effect(KILL cr);
5122 
5123   format %{ "BSWAP  $dst\n\t"
5124             "SAR    $dst,16\n\t" %}
5125   ins_encode %{
5126     __ bswapl($dst$$Register);
5127     __ sarl($dst$$Register, 16);
5128   %}
5129   ins_pipe( ialu_reg );
5130 %}
5131 
5132 
5133 //---------- Zeros Count Instructions ------------------------------------------
5134 
5135 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5136   predicate(UseCountLeadingZerosInstruction);
5137   match(Set dst (CountLeadingZerosI src));
5138   effect(KILL cr);
5139 
5140   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5141   ins_encode %{
5142     __ lzcntl($dst$$Register, $src$$Register);
5143   %}
5144   ins_pipe(ialu_reg);
5145 %}
5146 
5147 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5148   predicate(!UseCountLeadingZerosInstruction);
5149   match(Set dst (CountLeadingZerosI src));
5150   effect(KILL cr);
5151 
5152   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5153             "JNZ    skip\n\t"
5154             "MOV    $dst, -1\n"
5155       "skip:\n\t"
5156             "NEG    $dst\n\t"
5157             "ADD    $dst, 31" %}
5158   ins_encode %{
5159     Register Rdst = $dst$$Register;
5160     Register Rsrc = $src$$Register;
5161     Label skip;
5162     __ bsrl(Rdst, Rsrc);
5163     __ jccb(Assembler::notZero, skip);
5164     __ movl(Rdst, -1);
5165     __ bind(skip);
5166     __ negl(Rdst);
5167     __ addl(Rdst, BitsPerInt - 1);
5168   %}
5169   ins_pipe(ialu_reg);
5170 %}
5171 
5172 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5173   predicate(UseCountLeadingZerosInstruction);
5174   match(Set dst (CountLeadingZerosL src));
5175   effect(TEMP dst, KILL cr);
5176 
5177   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5178             "JNC    done\n\t"
5179             "LZCNT  $dst, $src.lo\n\t"
5180             "ADD    $dst, 32\n"
5181       "done:" %}
5182   ins_encode %{
5183     Register Rdst = $dst$$Register;
5184     Register Rsrc = $src$$Register;
5185     Label done;
5186     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5187     __ jccb(Assembler::carryClear, done);
5188     __ lzcntl(Rdst, Rsrc);
5189     __ addl(Rdst, BitsPerInt);
5190     __ bind(done);
5191   %}
5192   ins_pipe(ialu_reg);
5193 %}
5194 
5195 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5196   predicate(!UseCountLeadingZerosInstruction);
5197   match(Set dst (CountLeadingZerosL src));
5198   effect(TEMP dst, KILL cr);
5199 
5200   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5201             "JZ     msw_is_zero\n\t"
5202             "ADD    $dst, 32\n\t"
5203             "JMP    not_zero\n"
5204       "msw_is_zero:\n\t"
5205             "BSR    $dst, $src.lo\n\t"
5206             "JNZ    not_zero\n\t"
5207             "MOV    $dst, -1\n"
5208       "not_zero:\n\t"
5209             "NEG    $dst\n\t"
5210             "ADD    $dst, 63\n" %}
5211  ins_encode %{
5212     Register Rdst = $dst$$Register;
5213     Register Rsrc = $src$$Register;
5214     Label msw_is_zero;
5215     Label not_zero;
5216     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5217     __ jccb(Assembler::zero, msw_is_zero);
5218     __ addl(Rdst, BitsPerInt);
5219     __ jmpb(not_zero);
5220     __ bind(msw_is_zero);
5221     __ bsrl(Rdst, Rsrc);
5222     __ jccb(Assembler::notZero, not_zero);
5223     __ movl(Rdst, -1);
5224     __ bind(not_zero);
5225     __ negl(Rdst);
5226     __ addl(Rdst, BitsPerLong - 1);
5227   %}
5228   ins_pipe(ialu_reg);
5229 %}
5230 
5231 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5232   predicate(UseCountTrailingZerosInstruction);
5233   match(Set dst (CountTrailingZerosI src));
5234   effect(KILL cr);
5235 
5236   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5237   ins_encode %{
5238     __ tzcntl($dst$$Register, $src$$Register);
5239   %}
5240   ins_pipe(ialu_reg);
5241 %}
5242 
5243 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5244   predicate(!UseCountTrailingZerosInstruction);
5245   match(Set dst (CountTrailingZerosI src));
5246   effect(KILL cr);
5247 
5248   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5249             "JNZ    done\n\t"
5250             "MOV    $dst, 32\n"
5251       "done:" %}
5252   ins_encode %{
5253     Register Rdst = $dst$$Register;
5254     Label done;
5255     __ bsfl(Rdst, $src$$Register);
5256     __ jccb(Assembler::notZero, done);
5257     __ movl(Rdst, BitsPerInt);
5258     __ bind(done);
5259   %}
5260   ins_pipe(ialu_reg);
5261 %}
5262 
5263 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5264   predicate(UseCountTrailingZerosInstruction);
5265   match(Set dst (CountTrailingZerosL src));
5266   effect(TEMP dst, KILL cr);
5267 
5268   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5269             "JNC    done\n\t"
5270             "TZCNT  $dst, $src.hi\n\t"
5271             "ADD    $dst, 32\n"
5272             "done:" %}
5273   ins_encode %{
5274     Register Rdst = $dst$$Register;
5275     Register Rsrc = $src$$Register;
5276     Label done;
5277     __ tzcntl(Rdst, Rsrc);
5278     __ jccb(Assembler::carryClear, done);
5279     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5280     __ addl(Rdst, BitsPerInt);
5281     __ bind(done);
5282   %}
5283   ins_pipe(ialu_reg);
5284 %}
5285 
5286 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5287   predicate(!UseCountTrailingZerosInstruction);
5288   match(Set dst (CountTrailingZerosL src));
5289   effect(TEMP dst, KILL cr);
5290 
5291   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5292             "JNZ    done\n\t"
5293             "BSF    $dst, $src.hi\n\t"
5294             "JNZ    msw_not_zero\n\t"
5295             "MOV    $dst, 32\n"
5296       "msw_not_zero:\n\t"
5297             "ADD    $dst, 32\n"
5298       "done:" %}
5299   ins_encode %{
5300     Register Rdst = $dst$$Register;
5301     Register Rsrc = $src$$Register;
5302     Label msw_not_zero;
5303     Label done;
5304     __ bsfl(Rdst, Rsrc);
5305     __ jccb(Assembler::notZero, done);
5306     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5307     __ jccb(Assembler::notZero, msw_not_zero);
5308     __ movl(Rdst, BitsPerInt);
5309     __ bind(msw_not_zero);
5310     __ addl(Rdst, BitsPerInt);
5311     __ bind(done);
5312   %}
5313   ins_pipe(ialu_reg);
5314 %}
5315 
5316 
5317 //---------- Population Count Instructions -------------------------------------
5318 
5319 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5320   predicate(UsePopCountInstruction);
5321   match(Set dst (PopCountI src));
5322   effect(KILL cr);
5323 
5324   format %{ "POPCNT $dst, $src" %}
5325   ins_encode %{
5326     __ popcntl($dst$$Register, $src$$Register);
5327   %}
5328   ins_pipe(ialu_reg);
5329 %}
5330 
5331 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5332   predicate(UsePopCountInstruction);
5333   match(Set dst (PopCountI (LoadI mem)));
5334   effect(KILL cr);
5335 
5336   format %{ "POPCNT $dst, $mem" %}
5337   ins_encode %{
5338     __ popcntl($dst$$Register, $mem$$Address);
5339   %}
5340   ins_pipe(ialu_reg);
5341 %}
5342 
5343 // Note: Long.bitCount(long) returns an int.
5344 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5345   predicate(UsePopCountInstruction);
5346   match(Set dst (PopCountL src));
5347   effect(KILL cr, TEMP tmp, TEMP dst);
5348 
5349   format %{ "POPCNT $dst, $src.lo\n\t"
5350             "POPCNT $tmp, $src.hi\n\t"
5351             "ADD    $dst, $tmp" %}
5352   ins_encode %{
5353     __ popcntl($dst$$Register, $src$$Register);
5354     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5355     __ addl($dst$$Register, $tmp$$Register);
5356   %}
5357   ins_pipe(ialu_reg);
5358 %}
5359 
5360 // Note: Long.bitCount(long) returns an int.
5361 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5362   predicate(UsePopCountInstruction);
5363   match(Set dst (PopCountL (LoadL mem)));
5364   effect(KILL cr, TEMP tmp, TEMP dst);
5365 
5366   format %{ "POPCNT $dst, $mem\n\t"
5367             "POPCNT $tmp, $mem+4\n\t"
5368             "ADD    $dst, $tmp" %}
5369   ins_encode %{
5370     //__ popcntl($dst$$Register, $mem$$Address$$first);
5371     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5372     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5373     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5374     __ addl($dst$$Register, $tmp$$Register);
5375   %}
5376   ins_pipe(ialu_reg);
5377 %}
5378 
5379 
5380 //----------Load/Store/Move Instructions---------------------------------------
5381 //----------Load Instructions--------------------------------------------------
5382 // Load Byte (8bit signed)
5383 instruct loadB(xRegI dst, memory mem) %{
5384   match(Set dst (LoadB mem));
5385 
5386   ins_cost(125);
5387   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5388 
5389   ins_encode %{
5390     __ movsbl($dst$$Register, $mem$$Address);
5391   %}
5392 
5393   ins_pipe(ialu_reg_mem);
5394 %}
5395 
5396 // Load Byte (8bit signed) into Long Register
5397 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5398   match(Set dst (ConvI2L (LoadB mem)));
5399   effect(KILL cr);
5400 
5401   ins_cost(375);
5402   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5403             "MOV    $dst.hi,$dst.lo\n\t"
5404             "SAR    $dst.hi,7" %}
5405 
5406   ins_encode %{
5407     __ movsbl($dst$$Register, $mem$$Address);
5408     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5409     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5410   %}
5411 
5412   ins_pipe(ialu_reg_mem);
5413 %}
5414 
5415 // Load Unsigned Byte (8bit UNsigned)
5416 instruct loadUB(xRegI dst, memory mem) %{
5417   match(Set dst (LoadUB mem));
5418 
5419   ins_cost(125);
5420   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5421 
5422   ins_encode %{
5423     __ movzbl($dst$$Register, $mem$$Address);
5424   %}
5425 
5426   ins_pipe(ialu_reg_mem);
5427 %}
5428 
5429 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5430 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5431   match(Set dst (ConvI2L (LoadUB mem)));
5432   effect(KILL cr);
5433 
5434   ins_cost(250);
5435   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5436             "XOR    $dst.hi,$dst.hi" %}
5437 
5438   ins_encode %{
5439     Register Rdst = $dst$$Register;
5440     __ movzbl(Rdst, $mem$$Address);
5441     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5442   %}
5443 
5444   ins_pipe(ialu_reg_mem);
5445 %}
5446 
5447 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5448 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5449   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5450   effect(KILL cr);
5451 
5452   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5453             "XOR    $dst.hi,$dst.hi\n\t"
5454             "AND    $dst.lo,right_n_bits($mask, 8)" %}
5455   ins_encode %{
5456     Register Rdst = $dst$$Register;
5457     __ movzbl(Rdst, $mem$$Address);
5458     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5459     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5460   %}
5461   ins_pipe(ialu_reg_mem);
5462 %}
5463 
5464 // Load Short (16bit signed)
5465 instruct loadS(rRegI dst, memory mem) %{
5466   match(Set dst (LoadS mem));
5467 
5468   ins_cost(125);
5469   format %{ "MOVSX  $dst,$mem\t# short" %}
5470 
5471   ins_encode %{
5472     __ movswl($dst$$Register, $mem$$Address);
5473   %}
5474 
5475   ins_pipe(ialu_reg_mem);
5476 %}
5477 
5478 // Load Short (16 bit signed) to Byte (8 bit signed)
5479 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5480   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5481 
5482   ins_cost(125);
5483   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5484   ins_encode %{
5485     __ movsbl($dst$$Register, $mem$$Address);
5486   %}
5487   ins_pipe(ialu_reg_mem);
5488 %}
5489 
5490 // Load Short (16bit signed) into Long Register
5491 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5492   match(Set dst (ConvI2L (LoadS mem)));
5493   effect(KILL cr);
5494 
5495   ins_cost(375);
5496   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5497             "MOV    $dst.hi,$dst.lo\n\t"
5498             "SAR    $dst.hi,15" %}
5499 
5500   ins_encode %{
5501     __ movswl($dst$$Register, $mem$$Address);
5502     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5503     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5504   %}
5505 
5506   ins_pipe(ialu_reg_mem);
5507 %}
5508 
5509 // Load Unsigned Short/Char (16bit unsigned)
5510 instruct loadUS(rRegI dst, memory mem) %{
5511   match(Set dst (LoadUS mem));
5512 
5513   ins_cost(125);
5514   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5515 
5516   ins_encode %{
5517     __ movzwl($dst$$Register, $mem$$Address);
5518   %}
5519 
5520   ins_pipe(ialu_reg_mem);
5521 %}
5522 
5523 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5524 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5525   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5526 
5527   ins_cost(125);
5528   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5529   ins_encode %{
5530     __ movsbl($dst$$Register, $mem$$Address);
5531   %}
5532   ins_pipe(ialu_reg_mem);
5533 %}
5534 
5535 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5536 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5537   match(Set dst (ConvI2L (LoadUS mem)));
5538   effect(KILL cr);
5539 
5540   ins_cost(250);
5541   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5542             "XOR    $dst.hi,$dst.hi" %}
5543 
5544   ins_encode %{
5545     __ movzwl($dst$$Register, $mem$$Address);
5546     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5547   %}
5548 
5549   ins_pipe(ialu_reg_mem);
5550 %}
5551 
5552 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5553 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5554   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5555   effect(KILL cr);
5556 
5557   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5558             "XOR    $dst.hi,$dst.hi" %}
5559   ins_encode %{
5560     Register Rdst = $dst$$Register;
5561     __ movzbl(Rdst, $mem$$Address);
5562     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5563   %}
5564   ins_pipe(ialu_reg_mem);
5565 %}
5566 
5567 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5568 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5569   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5570   effect(KILL cr);
5571 
5572   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5573             "XOR    $dst.hi,$dst.hi\n\t"
5574             "AND    $dst.lo,right_n_bits($mask, 16)" %}
5575   ins_encode %{
5576     Register Rdst = $dst$$Register;
5577     __ movzwl(Rdst, $mem$$Address);
5578     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5579     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5580   %}
5581   ins_pipe(ialu_reg_mem);
5582 %}
5583 
5584 // Load Integer
5585 instruct loadI(rRegI dst, memory mem) %{
5586   match(Set dst (LoadI mem));
5587 
5588   ins_cost(125);
5589   format %{ "MOV    $dst,$mem\t# int" %}
5590 
5591   ins_encode %{
5592     __ movl($dst$$Register, $mem$$Address);
5593   %}
5594 
5595   ins_pipe(ialu_reg_mem);
5596 %}
5597 
5598 // Load Integer (32 bit signed) to Byte (8 bit signed)
5599 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5600   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5601 
5602   ins_cost(125);
5603   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5604   ins_encode %{
5605     __ movsbl($dst$$Register, $mem$$Address);
5606   %}
5607   ins_pipe(ialu_reg_mem);
5608 %}
5609 
5610 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5611 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5612   match(Set dst (AndI (LoadI mem) mask));
5613 
5614   ins_cost(125);
5615   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5616   ins_encode %{
5617     __ movzbl($dst$$Register, $mem$$Address);
5618   %}
5619   ins_pipe(ialu_reg_mem);
5620 %}
5621 
5622 // Load Integer (32 bit signed) to Short (16 bit signed)
5623 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5624   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5625 
5626   ins_cost(125);
5627   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5628   ins_encode %{
5629     __ movswl($dst$$Register, $mem$$Address);
5630   %}
5631   ins_pipe(ialu_reg_mem);
5632 %}
5633 
5634 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5635 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5636   match(Set dst (AndI (LoadI mem) mask));
5637 
5638   ins_cost(125);
5639   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5640   ins_encode %{
5641     __ movzwl($dst$$Register, $mem$$Address);
5642   %}
5643   ins_pipe(ialu_reg_mem);
5644 %}
5645 
5646 // Load Integer into Long Register
5647 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5648   match(Set dst (ConvI2L (LoadI mem)));
5649   effect(KILL cr);
5650 
5651   ins_cost(375);
5652   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5653             "MOV    $dst.hi,$dst.lo\n\t"
5654             "SAR    $dst.hi,31" %}
5655 
5656   ins_encode %{
5657     __ movl($dst$$Register, $mem$$Address);
5658     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5659     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5660   %}
5661 
5662   ins_pipe(ialu_reg_mem);
5663 %}
5664 
5665 // Load Integer with mask 0xFF into Long Register
5666 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5667   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5668   effect(KILL cr);
5669 
5670   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5671             "XOR    $dst.hi,$dst.hi" %}
5672   ins_encode %{
5673     Register Rdst = $dst$$Register;
5674     __ movzbl(Rdst, $mem$$Address);
5675     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5676   %}
5677   ins_pipe(ialu_reg_mem);
5678 %}
5679 
5680 // Load Integer with mask 0xFFFF into Long Register
5681 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5682   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5683   effect(KILL cr);
5684 
5685   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5686             "XOR    $dst.hi,$dst.hi" %}
5687   ins_encode %{
5688     Register Rdst = $dst$$Register;
5689     __ movzwl(Rdst, $mem$$Address);
5690     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5691   %}
5692   ins_pipe(ialu_reg_mem);
5693 %}
5694 
5695 // Load Integer with 31-bit mask into Long Register
5696 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5697   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5698   effect(KILL cr);
5699 
5700   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5701             "XOR    $dst.hi,$dst.hi\n\t"
5702             "AND    $dst.lo,$mask" %}
5703   ins_encode %{
5704     Register Rdst = $dst$$Register;
5705     __ movl(Rdst, $mem$$Address);
5706     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5707     __ andl(Rdst, $mask$$constant);
5708   %}
5709   ins_pipe(ialu_reg_mem);
5710 %}
5711 
5712 // Load Unsigned Integer into Long Register
5713 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5714   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5715   effect(KILL cr);
5716 
5717   ins_cost(250);
5718   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5719             "XOR    $dst.hi,$dst.hi" %}
5720 
5721   ins_encode %{
5722     __ movl($dst$$Register, $mem$$Address);
5723     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5724   %}
5725 
5726   ins_pipe(ialu_reg_mem);
5727 %}
5728 
5729 // Load Long.  Cannot clobber address while loading, so restrict address
5730 // register to ESI
5731 instruct loadL(eRegL dst, load_long_memory mem) %{
5732   predicate(!((LoadLNode*)n)->require_atomic_access());
5733   match(Set dst (LoadL mem));
5734 
5735   ins_cost(250);
5736   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5737             "MOV    $dst.hi,$mem+4" %}
5738 
5739   ins_encode %{
5740     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5741     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5742     __ movl($dst$$Register, Amemlo);
5743     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5744   %}
5745 
5746   ins_pipe(ialu_reg_long_mem);
5747 %}
5748 
5749 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5750 // then store it down to the stack and reload on the int
5751 // side.
5752 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5753   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5754   match(Set dst (LoadL mem));
5755 
5756   ins_cost(200);
5757   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5758             "FISTp  $dst" %}
5759   ins_encode(enc_loadL_volatile(mem,dst));
5760   ins_pipe( fpu_reg_mem );
5761 %}
5762 
5763 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5764   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5765   match(Set dst (LoadL mem));
5766   effect(TEMP tmp);
5767   ins_cost(180);
5768   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5769             "MOVSD  $dst,$tmp" %}
5770   ins_encode %{
5771     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5772     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5773   %}
5774   ins_pipe( pipe_slow );
5775 %}
5776 
5777 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5778   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5779   match(Set dst (LoadL mem));
5780   effect(TEMP tmp);
5781   ins_cost(160);
5782   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5783             "MOVD   $dst.lo,$tmp\n\t"
5784             "PSRLQ  $tmp,32\n\t"
5785             "MOVD   $dst.hi,$tmp" %}
5786   ins_encode %{
5787     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5788     __ movdl($dst$$Register, $tmp$$XMMRegister);
5789     __ psrlq($tmp$$XMMRegister, 32);
5790     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5791   %}
5792   ins_pipe( pipe_slow );
5793 %}
5794 
5795 // Load Range
5796 instruct loadRange(rRegI dst, memory mem) %{
5797   match(Set dst (LoadRange mem));
5798 
5799   ins_cost(125);
5800   format %{ "MOV    $dst,$mem" %}
5801   opcode(0x8B);
5802   ins_encode( OpcP, RegMem(dst,mem));
5803   ins_pipe( ialu_reg_mem );
5804 %}
5805 
5806 
5807 // Load Pointer
5808 instruct loadP(eRegP dst, memory mem) %{
5809   match(Set dst (LoadP mem));
5810 
5811   ins_cost(125);
5812   format %{ "MOV    $dst,$mem" %}
5813   opcode(0x8B);
5814   ins_encode( OpcP, RegMem(dst,mem));
5815   ins_pipe( ialu_reg_mem );
5816 %}
5817 
5818 // Load Klass Pointer
5819 instruct loadKlass(eRegP dst, memory mem) %{
5820   match(Set dst (LoadKlass mem));
5821 
5822   ins_cost(125);
5823   format %{ "MOV    $dst,$mem" %}
5824   opcode(0x8B);
5825   ins_encode( OpcP, RegMem(dst,mem));
5826   ins_pipe( ialu_reg_mem );
5827 %}
5828 
5829 // Load Double
5830 instruct loadDPR(regDPR dst, memory mem) %{
5831   predicate(UseSSE<=1);
5832   match(Set dst (LoadD mem));
5833 
5834   ins_cost(150);
5835   format %{ "FLD_D  ST,$mem\n\t"
5836             "FSTP   $dst" %}
5837   opcode(0xDD);               /* DD /0 */
5838   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5839               Pop_Reg_DPR(dst) );
5840   ins_pipe( fpu_reg_mem );
5841 %}
5842 
5843 // Load Double to XMM
5844 instruct loadD(regD dst, memory mem) %{
5845   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5846   match(Set dst (LoadD mem));
5847   ins_cost(145);
5848   format %{ "MOVSD  $dst,$mem" %}
5849   ins_encode %{
5850     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5851   %}
5852   ins_pipe( pipe_slow );
5853 %}
5854 
5855 instruct loadD_partial(regD dst, memory mem) %{
5856   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5857   match(Set dst (LoadD mem));
5858   ins_cost(145);
5859   format %{ "MOVLPD $dst,$mem" %}
5860   ins_encode %{
5861     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5862   %}
5863   ins_pipe( pipe_slow );
5864 %}
5865 
5866 // Load to XMM register (single-precision floating point)
5867 // MOVSS instruction
5868 instruct loadF(regF dst, memory mem) %{
5869   predicate(UseSSE>=1);
5870   match(Set dst (LoadF mem));
5871   ins_cost(145);
5872   format %{ "MOVSS  $dst,$mem" %}
5873   ins_encode %{
5874     __ movflt ($dst$$XMMRegister, $mem$$Address);
5875   %}
5876   ins_pipe( pipe_slow );
5877 %}
5878 
5879 // Load Float
5880 instruct loadFPR(regFPR dst, memory mem) %{
5881   predicate(UseSSE==0);
5882   match(Set dst (LoadF mem));
5883 
5884   ins_cost(150);
5885   format %{ "FLD_S  ST,$mem\n\t"
5886             "FSTP   $dst" %}
5887   opcode(0xD9);               /* D9 /0 */
5888   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5889               Pop_Reg_FPR(dst) );
5890   ins_pipe( fpu_reg_mem );
5891 %}
5892 
5893 // Load Effective Address
5894 instruct leaP8(eRegP dst, indOffset8 mem) %{
5895   match(Set dst mem);
5896 
5897   ins_cost(110);
5898   format %{ "LEA    $dst,$mem" %}
5899   opcode(0x8D);
5900   ins_encode( OpcP, RegMem(dst,mem));
5901   ins_pipe( ialu_reg_reg_fat );
5902 %}
5903 
5904 instruct leaP32(eRegP dst, indOffset32 mem) %{
5905   match(Set dst mem);
5906 
5907   ins_cost(110);
5908   format %{ "LEA    $dst,$mem" %}
5909   opcode(0x8D);
5910   ins_encode( OpcP, RegMem(dst,mem));
5911   ins_pipe( ialu_reg_reg_fat );
5912 %}
5913 
5914 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5915   match(Set dst mem);
5916 
5917   ins_cost(110);
5918   format %{ "LEA    $dst,$mem" %}
5919   opcode(0x8D);
5920   ins_encode( OpcP, RegMem(dst,mem));
5921   ins_pipe( ialu_reg_reg_fat );
5922 %}
5923 
5924 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5925   match(Set dst mem);
5926 
5927   ins_cost(110);
5928   format %{ "LEA    $dst,$mem" %}
5929   opcode(0x8D);
5930   ins_encode( OpcP, RegMem(dst,mem));
5931   ins_pipe( ialu_reg_reg_fat );
5932 %}
5933 
5934 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5935   match(Set dst mem);
5936 
5937   ins_cost(110);
5938   format %{ "LEA    $dst,$mem" %}
5939   opcode(0x8D);
5940   ins_encode( OpcP, RegMem(dst,mem));
5941   ins_pipe( ialu_reg_reg_fat );
5942 %}
5943 
5944 // Load Constant
5945 instruct loadConI(rRegI dst, immI src) %{
5946   match(Set dst src);
5947 
5948   format %{ "MOV    $dst,$src" %}
5949   ins_encode( LdImmI(dst, src) );
5950   ins_pipe( ialu_reg_fat );
5951 %}
5952 
5953 // Load Constant zero
5954 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
5955   match(Set dst src);
5956   effect(KILL cr);
5957 
5958   ins_cost(50);
5959   format %{ "XOR    $dst,$dst" %}
5960   opcode(0x33);  /* + rd */
5961   ins_encode( OpcP, RegReg( dst, dst ) );
5962   ins_pipe( ialu_reg );
5963 %}
5964 
5965 instruct loadConP(eRegP dst, immP src) %{
5966   match(Set dst src);
5967 
5968   format %{ "MOV    $dst,$src" %}
5969   opcode(0xB8);  /* + rd */
5970   ins_encode( LdImmP(dst, src) );
5971   ins_pipe( ialu_reg_fat );
5972 %}
5973 
5974 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
5975   match(Set dst src);
5976   effect(KILL cr);
5977   ins_cost(200);
5978   format %{ "MOV    $dst.lo,$src.lo\n\t"
5979             "MOV    $dst.hi,$src.hi" %}
5980   opcode(0xB8);
5981   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
5982   ins_pipe( ialu_reg_long_fat );
5983 %}
5984 
5985 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
5986   match(Set dst src);
5987   effect(KILL cr);
5988   ins_cost(150);
5989   format %{ "XOR    $dst.lo,$dst.lo\n\t"
5990             "XOR    $dst.hi,$dst.hi" %}
5991   opcode(0x33,0x33);
5992   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
5993   ins_pipe( ialu_reg_long );
5994 %}
5995 
5996 // The instruction usage is guarded by predicate in operand immFPR().
5997 instruct loadConFPR(regFPR dst, immFPR con) %{
5998   match(Set dst con);
5999   ins_cost(125);
6000   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
6001             "FSTP   $dst" %}
6002   ins_encode %{
6003     __ fld_s($constantaddress($con));
6004     __ fstp_d($dst$$reg);
6005   %}
6006   ins_pipe(fpu_reg_con);
6007 %}
6008 
6009 // The instruction usage is guarded by predicate in operand immFPR0().
6010 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6011   match(Set dst con);
6012   ins_cost(125);
6013   format %{ "FLDZ   ST\n\t"
6014             "FSTP   $dst" %}
6015   ins_encode %{
6016     __ fldz();
6017     __ fstp_d($dst$$reg);
6018   %}
6019   ins_pipe(fpu_reg_con);
6020 %}
6021 
6022 // The instruction usage is guarded by predicate in operand immFPR1().
6023 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6024   match(Set dst con);
6025   ins_cost(125);
6026   format %{ "FLD1   ST\n\t"
6027             "FSTP   $dst" %}
6028   ins_encode %{
6029     __ fld1();
6030     __ fstp_d($dst$$reg);
6031   %}
6032   ins_pipe(fpu_reg_con);
6033 %}
6034 
6035 // The instruction usage is guarded by predicate in operand immF().
6036 instruct loadConF(regF dst, immF con) %{
6037   match(Set dst con);
6038   ins_cost(125);
6039   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6040   ins_encode %{
6041     __ movflt($dst$$XMMRegister, $constantaddress($con));
6042   %}
6043   ins_pipe(pipe_slow);
6044 %}
6045 
6046 // The instruction usage is guarded by predicate in operand immF0().
6047 instruct loadConF0(regF dst, immF0 src) %{
6048   match(Set dst src);
6049   ins_cost(100);
6050   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6051   ins_encode %{
6052     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6053   %}
6054   ins_pipe(pipe_slow);
6055 %}
6056 
6057 // The instruction usage is guarded by predicate in operand immDPR().
6058 instruct loadConDPR(regDPR dst, immDPR con) %{
6059   match(Set dst con);
6060   ins_cost(125);
6061 
6062   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6063             "FSTP   $dst" %}
6064   ins_encode %{
6065     __ fld_d($constantaddress($con));
6066     __ fstp_d($dst$$reg);
6067   %}
6068   ins_pipe(fpu_reg_con);
6069 %}
6070 
6071 // The instruction usage is guarded by predicate in operand immDPR0().
6072 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6073   match(Set dst con);
6074   ins_cost(125);
6075 
6076   format %{ "FLDZ   ST\n\t"
6077             "FSTP   $dst" %}
6078   ins_encode %{
6079     __ fldz();
6080     __ fstp_d($dst$$reg);
6081   %}
6082   ins_pipe(fpu_reg_con);
6083 %}
6084 
6085 // The instruction usage is guarded by predicate in operand immDPR1().
6086 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6087   match(Set dst con);
6088   ins_cost(125);
6089 
6090   format %{ "FLD1   ST\n\t"
6091             "FSTP   $dst" %}
6092   ins_encode %{
6093     __ fld1();
6094     __ fstp_d($dst$$reg);
6095   %}
6096   ins_pipe(fpu_reg_con);
6097 %}
6098 
6099 // The instruction usage is guarded by predicate in operand immD().
6100 instruct loadConD(regD dst, immD con) %{
6101   match(Set dst con);
6102   ins_cost(125);
6103   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6104   ins_encode %{
6105     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6106   %}
6107   ins_pipe(pipe_slow);
6108 %}
6109 
6110 // The instruction usage is guarded by predicate in operand immD0().
6111 instruct loadConD0(regD dst, immD0 src) %{
6112   match(Set dst src);
6113   ins_cost(100);
6114   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6115   ins_encode %{
6116     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6117   %}
6118   ins_pipe( pipe_slow );
6119 %}
6120 
6121 // Load Stack Slot
6122 instruct loadSSI(rRegI dst, stackSlotI src) %{
6123   match(Set dst src);
6124   ins_cost(125);
6125 
6126   format %{ "MOV    $dst,$src" %}
6127   opcode(0x8B);
6128   ins_encode( OpcP, RegMem(dst,src));
6129   ins_pipe( ialu_reg_mem );
6130 %}
6131 
6132 instruct loadSSL(eRegL dst, stackSlotL src) %{
6133   match(Set dst src);
6134 
6135   ins_cost(200);
6136   format %{ "MOV    $dst,$src.lo\n\t"
6137             "MOV    $dst+4,$src.hi" %}
6138   opcode(0x8B, 0x8B);
6139   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6140   ins_pipe( ialu_mem_long_reg );
6141 %}
6142 
6143 // Load Stack Slot
6144 instruct loadSSP(eRegP dst, stackSlotP src) %{
6145   match(Set dst src);
6146   ins_cost(125);
6147 
6148   format %{ "MOV    $dst,$src" %}
6149   opcode(0x8B);
6150   ins_encode( OpcP, RegMem(dst,src));
6151   ins_pipe( ialu_reg_mem );
6152 %}
6153 
6154 // Load Stack Slot
6155 instruct loadSSF(regFPR dst, stackSlotF src) %{
6156   match(Set dst src);
6157   ins_cost(125);
6158 
6159   format %{ "FLD_S  $src\n\t"
6160             "FSTP   $dst" %}
6161   opcode(0xD9);               /* D9 /0, FLD m32real */
6162   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6163               Pop_Reg_FPR(dst) );
6164   ins_pipe( fpu_reg_mem );
6165 %}
6166 
6167 // Load Stack Slot
6168 instruct loadSSD(regDPR dst, stackSlotD src) %{
6169   match(Set dst src);
6170   ins_cost(125);
6171 
6172   format %{ "FLD_D  $src\n\t"
6173             "FSTP   $dst" %}
6174   opcode(0xDD);               /* DD /0, FLD m64real */
6175   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6176               Pop_Reg_DPR(dst) );
6177   ins_pipe( fpu_reg_mem );
6178 %}
6179 
6180 // Prefetch instructions for allocation.
6181 // Must be safe to execute with invalid address (cannot fault).
6182 
6183 instruct prefetchAlloc0( memory mem ) %{
6184   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6185   match(PrefetchAllocation mem);
6186   ins_cost(0);
6187   size(0);
6188   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6189   ins_encode();
6190   ins_pipe(empty);
6191 %}
6192 
6193 instruct prefetchAlloc( memory mem ) %{
6194   predicate(AllocatePrefetchInstr==3);
6195   match( PrefetchAllocation mem );
6196   ins_cost(100);
6197 
6198   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6199   ins_encode %{
6200     __ prefetchw($mem$$Address);
6201   %}
6202   ins_pipe(ialu_mem);
6203 %}
6204 
6205 instruct prefetchAllocNTA( memory mem ) %{
6206   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6207   match(PrefetchAllocation mem);
6208   ins_cost(100);
6209 
6210   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6211   ins_encode %{
6212     __ prefetchnta($mem$$Address);
6213   %}
6214   ins_pipe(ialu_mem);
6215 %}
6216 
6217 instruct prefetchAllocT0( memory mem ) %{
6218   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6219   match(PrefetchAllocation mem);
6220   ins_cost(100);
6221 
6222   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6223   ins_encode %{
6224     __ prefetcht0($mem$$Address);
6225   %}
6226   ins_pipe(ialu_mem);
6227 %}
6228 
6229 instruct prefetchAllocT2( memory mem ) %{
6230   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6231   match(PrefetchAllocation mem);
6232   ins_cost(100);
6233 
6234   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6235   ins_encode %{
6236     __ prefetcht2($mem$$Address);
6237   %}
6238   ins_pipe(ialu_mem);
6239 %}
6240 
6241 //----------Store Instructions-------------------------------------------------
6242 
6243 // Store Byte
6244 instruct storeB(memory mem, xRegI src) %{
6245   match(Set mem (StoreB mem src));
6246 
6247   ins_cost(125);
6248   format %{ "MOV8   $mem,$src" %}
6249   opcode(0x88);
6250   ins_encode( OpcP, RegMem( src, mem ) );
6251   ins_pipe( ialu_mem_reg );
6252 %}
6253 
6254 // Store Char/Short
6255 instruct storeC(memory mem, rRegI src) %{
6256   match(Set mem (StoreC mem src));
6257 
6258   ins_cost(125);
6259   format %{ "MOV16  $mem,$src" %}
6260   opcode(0x89, 0x66);
6261   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6262   ins_pipe( ialu_mem_reg );
6263 %}
6264 
6265 // Store Integer
6266 instruct storeI(memory mem, rRegI src) %{
6267   match(Set mem (StoreI mem src));
6268 
6269   ins_cost(125);
6270   format %{ "MOV    $mem,$src" %}
6271   opcode(0x89);
6272   ins_encode( OpcP, RegMem( src, mem ) );
6273   ins_pipe( ialu_mem_reg );
6274 %}
6275 
6276 // Store Long
6277 instruct storeL(long_memory mem, eRegL src) %{
6278   predicate(!((StoreLNode*)n)->require_atomic_access());
6279   match(Set mem (StoreL mem src));
6280 
6281   ins_cost(200);
6282   format %{ "MOV    $mem,$src.lo\n\t"
6283             "MOV    $mem+4,$src.hi" %}
6284   opcode(0x89, 0x89);
6285   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6286   ins_pipe( ialu_mem_long_reg );
6287 %}
6288 
6289 // Store Long to Integer
6290 instruct storeL2I(memory mem, eRegL src) %{
6291   match(Set mem (StoreI mem (ConvL2I src)));
6292 
6293   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6294   ins_encode %{
6295     __ movl($mem$$Address, $src$$Register);
6296   %}
6297   ins_pipe(ialu_mem_reg);
6298 %}
6299 
6300 // Volatile Store Long.  Must be atomic, so move it into
6301 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6302 // target address before the store (for null-ptr checks)
6303 // so the memory operand is used twice in the encoding.
6304 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6305   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6306   match(Set mem (StoreL mem src));
6307   effect( KILL cr );
6308   ins_cost(400);
6309   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6310             "FILD   $src\n\t"
6311             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6312   opcode(0x3B);
6313   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6314   ins_pipe( fpu_reg_mem );
6315 %}
6316 
6317 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6318   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6319   match(Set mem (StoreL mem src));
6320   effect( TEMP tmp, KILL cr );
6321   ins_cost(380);
6322   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6323             "MOVSD  $tmp,$src\n\t"
6324             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6325   ins_encode %{
6326     __ cmpl(rax, $mem$$Address);
6327     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6328     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6329   %}
6330   ins_pipe( pipe_slow );
6331 %}
6332 
6333 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6334   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6335   match(Set mem (StoreL mem src));
6336   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6337   ins_cost(360);
6338   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6339             "MOVD   $tmp,$src.lo\n\t"
6340             "MOVD   $tmp2,$src.hi\n\t"
6341             "PUNPCKLDQ $tmp,$tmp2\n\t"
6342             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6343   ins_encode %{
6344     __ cmpl(rax, $mem$$Address);
6345     __ movdl($tmp$$XMMRegister, $src$$Register);
6346     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6347     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6348     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6349   %}
6350   ins_pipe( pipe_slow );
6351 %}
6352 
6353 // Store Pointer; for storing unknown oops and raw pointers
6354 instruct storeP(memory mem, anyRegP src) %{
6355   match(Set mem (StoreP mem src));
6356 
6357   ins_cost(125);
6358   format %{ "MOV    $mem,$src" %}
6359   opcode(0x89);
6360   ins_encode( OpcP, RegMem( src, mem ) );
6361   ins_pipe( ialu_mem_reg );
6362 %}
6363 
6364 // Store Integer Immediate
6365 instruct storeImmI(memory mem, immI src) %{
6366   match(Set mem (StoreI mem src));
6367 
6368   ins_cost(150);
6369   format %{ "MOV    $mem,$src" %}
6370   opcode(0xC7);               /* C7 /0 */
6371   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6372   ins_pipe( ialu_mem_imm );
6373 %}
6374 
6375 // Store Short/Char Immediate
6376 instruct storeImmI16(memory mem, immI16 src) %{
6377   predicate(UseStoreImmI16);
6378   match(Set mem (StoreC mem src));
6379 
6380   ins_cost(150);
6381   format %{ "MOV16  $mem,$src" %}
6382   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6383   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6384   ins_pipe( ialu_mem_imm );
6385 %}
6386 
6387 // Store Pointer Immediate; null pointers or constant oops that do not
6388 // need card-mark barriers.
6389 instruct storeImmP(memory mem, immP src) %{
6390   match(Set mem (StoreP mem src));
6391 
6392   ins_cost(150);
6393   format %{ "MOV    $mem,$src" %}
6394   opcode(0xC7);               /* C7 /0 */
6395   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6396   ins_pipe( ialu_mem_imm );
6397 %}
6398 
6399 // Store Byte Immediate
6400 instruct storeImmB(memory mem, immI8 src) %{
6401   match(Set mem (StoreB mem src));
6402 
6403   ins_cost(150);
6404   format %{ "MOV8   $mem,$src" %}
6405   opcode(0xC6);               /* C6 /0 */
6406   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6407   ins_pipe( ialu_mem_imm );
6408 %}
6409 
6410 // Store CMS card-mark Immediate
6411 instruct storeImmCM(memory mem, immI8 src) %{
6412   match(Set mem (StoreCM mem src));
6413 
6414   ins_cost(150);
6415   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6416   opcode(0xC6);               /* C6 /0 */
6417   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6418   ins_pipe( ialu_mem_imm );
6419 %}
6420 
6421 // Store Double
6422 instruct storeDPR( memory mem, regDPR1 src) %{
6423   predicate(UseSSE<=1);
6424   match(Set mem (StoreD mem src));
6425 
6426   ins_cost(100);
6427   format %{ "FST_D  $mem,$src" %}
6428   opcode(0xDD);       /* DD /2 */
6429   ins_encode( enc_FPR_store(mem,src) );
6430   ins_pipe( fpu_mem_reg );
6431 %}
6432 
6433 // Store double does rounding on x86
6434 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6435   predicate(UseSSE<=1);
6436   match(Set mem (StoreD mem (RoundDouble src)));
6437 
6438   ins_cost(100);
6439   format %{ "FST_D  $mem,$src\t# round" %}
6440   opcode(0xDD);       /* DD /2 */
6441   ins_encode( enc_FPR_store(mem,src) );
6442   ins_pipe( fpu_mem_reg );
6443 %}
6444 
6445 // Store XMM register to memory (double-precision floating points)
6446 // MOVSD instruction
6447 instruct storeD(memory mem, regD src) %{
6448   predicate(UseSSE>=2);
6449   match(Set mem (StoreD mem src));
6450   ins_cost(95);
6451   format %{ "MOVSD  $mem,$src" %}
6452   ins_encode %{
6453     __ movdbl($mem$$Address, $src$$XMMRegister);
6454   %}
6455   ins_pipe( pipe_slow );
6456 %}
6457 
6458 // Store XMM register to memory (single-precision floating point)
6459 // MOVSS instruction
6460 instruct storeF(memory mem, regF src) %{
6461   predicate(UseSSE>=1);
6462   match(Set mem (StoreF mem src));
6463   ins_cost(95);
6464   format %{ "MOVSS  $mem,$src" %}
6465   ins_encode %{
6466     __ movflt($mem$$Address, $src$$XMMRegister);
6467   %}
6468   ins_pipe( pipe_slow );
6469 %}
6470 
6471 // Store Float
6472 instruct storeFPR( memory mem, regFPR1 src) %{
6473   predicate(UseSSE==0);
6474   match(Set mem (StoreF mem src));
6475 
6476   ins_cost(100);
6477   format %{ "FST_S  $mem,$src" %}
6478   opcode(0xD9);       /* D9 /2 */
6479   ins_encode( enc_FPR_store(mem,src) );
6480   ins_pipe( fpu_mem_reg );
6481 %}
6482 
6483 // Store Float does rounding on x86
6484 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6485   predicate(UseSSE==0);
6486   match(Set mem (StoreF mem (RoundFloat src)));
6487 
6488   ins_cost(100);
6489   format %{ "FST_S  $mem,$src\t# round" %}
6490   opcode(0xD9);       /* D9 /2 */
6491   ins_encode( enc_FPR_store(mem,src) );
6492   ins_pipe( fpu_mem_reg );
6493 %}
6494 
6495 // Store Float does rounding on x86
6496 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6497   predicate(UseSSE<=1);
6498   match(Set mem (StoreF mem (ConvD2F src)));
6499 
6500   ins_cost(100);
6501   format %{ "FST_S  $mem,$src\t# D-round" %}
6502   opcode(0xD9);       /* D9 /2 */
6503   ins_encode( enc_FPR_store(mem,src) );
6504   ins_pipe( fpu_mem_reg );
6505 %}
6506 
6507 // Store immediate Float value (it is faster than store from FPU register)
6508 // The instruction usage is guarded by predicate in operand immFPR().
6509 instruct storeFPR_imm( memory mem, immFPR src) %{
6510   match(Set mem (StoreF mem src));
6511 
6512   ins_cost(50);
6513   format %{ "MOV    $mem,$src\t# store float" %}
6514   opcode(0xC7);               /* C7 /0 */
6515   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6516   ins_pipe( ialu_mem_imm );
6517 %}
6518 
6519 // Store immediate Float value (it is faster than store from XMM register)
6520 // The instruction usage is guarded by predicate in operand immF().
6521 instruct storeF_imm( memory mem, immF src) %{
6522   match(Set mem (StoreF mem src));
6523 
6524   ins_cost(50);
6525   format %{ "MOV    $mem,$src\t# store float" %}
6526   opcode(0xC7);               /* C7 /0 */
6527   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6528   ins_pipe( ialu_mem_imm );
6529 %}
6530 
6531 // Store Integer to stack slot
6532 instruct storeSSI(stackSlotI dst, rRegI src) %{
6533   match(Set dst src);
6534 
6535   ins_cost(100);
6536   format %{ "MOV    $dst,$src" %}
6537   opcode(0x89);
6538   ins_encode( OpcPRegSS( dst, src ) );
6539   ins_pipe( ialu_mem_reg );
6540 %}
6541 
6542 // Store Integer to stack slot
6543 instruct storeSSP(stackSlotP dst, eRegP src) %{
6544   match(Set dst src);
6545 
6546   ins_cost(100);
6547   format %{ "MOV    $dst,$src" %}
6548   opcode(0x89);
6549   ins_encode( OpcPRegSS( dst, src ) );
6550   ins_pipe( ialu_mem_reg );
6551 %}
6552 
6553 // Store Long to stack slot
6554 instruct storeSSL(stackSlotL dst, eRegL src) %{
6555   match(Set dst src);
6556 
6557   ins_cost(200);
6558   format %{ "MOV    $dst,$src.lo\n\t"
6559             "MOV    $dst+4,$src.hi" %}
6560   opcode(0x89, 0x89);
6561   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6562   ins_pipe( ialu_mem_long_reg );
6563 %}
6564 
6565 //----------MemBar Instructions-----------------------------------------------
6566 // Memory barrier flavors
6567 
6568 instruct membar_acquire() %{
6569   match(MemBarAcquire);
6570   match(LoadFence);
6571   ins_cost(400);
6572 
6573   size(0);
6574   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6575   ins_encode();
6576   ins_pipe(empty);
6577 %}
6578 
6579 instruct membar_acquire_lock() %{
6580   match(MemBarAcquireLock);
6581   ins_cost(0);
6582 
6583   size(0);
6584   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6585   ins_encode( );
6586   ins_pipe(empty);
6587 %}
6588 
6589 instruct membar_release() %{
6590   match(MemBarRelease);
6591   match(StoreFence);
6592   ins_cost(400);
6593 
6594   size(0);
6595   format %{ "MEMBAR-release ! (empty encoding)" %}
6596   ins_encode( );
6597   ins_pipe(empty);
6598 %}
6599 
6600 instruct membar_release_lock() %{
6601   match(MemBarReleaseLock);
6602   ins_cost(0);
6603 
6604   size(0);
6605   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6606   ins_encode( );
6607   ins_pipe(empty);
6608 %}
6609 
6610 instruct membar_volatile(eFlagsReg cr) %{
6611   match(MemBarVolatile);
6612   effect(KILL cr);
6613   ins_cost(400);
6614 
6615   format %{
6616     $$template
6617     if (os::is_MP()) {
6618       $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6619     } else {
6620       $$emit$$"MEMBAR-volatile ! (empty encoding)"
6621     }
6622   %}
6623   ins_encode %{
6624     __ membar(Assembler::StoreLoad);
6625   %}
6626   ins_pipe(pipe_slow);
6627 %}
6628 
6629 instruct unnecessary_membar_volatile() %{
6630   match(MemBarVolatile);
6631   predicate(Matcher::post_store_load_barrier(n));
6632   ins_cost(0);
6633 
6634   size(0);
6635   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6636   ins_encode( );
6637   ins_pipe(empty);
6638 %}
6639 
6640 instruct membar_storestore() %{
6641   match(MemBarStoreStore);
6642   ins_cost(0);
6643 
6644   size(0);
6645   format %{ "MEMBAR-storestore (empty encoding)" %}
6646   ins_encode( );
6647   ins_pipe(empty);
6648 %}
6649 
6650 //----------Move Instructions--------------------------------------------------
6651 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6652   match(Set dst (CastX2P src));
6653   format %{ "# X2P  $dst, $src" %}
6654   ins_encode( /*empty encoding*/ );
6655   ins_cost(0);
6656   ins_pipe(empty);
6657 %}
6658 
6659 instruct castP2X(rRegI dst, eRegP src ) %{
6660   match(Set dst (CastP2X src));
6661   ins_cost(50);
6662   format %{ "MOV    $dst, $src\t# CastP2X" %}
6663   ins_encode( enc_Copy( dst, src) );
6664   ins_pipe( ialu_reg_reg );
6665 %}
6666 
6667 //----------Conditional Move---------------------------------------------------
6668 // Conditional move
6669 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6670   predicate(!VM_Version::supports_cmov() );
6671   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6672   ins_cost(200);
6673   format %{ "J$cop,us skip\t# signed cmove\n\t"
6674             "MOV    $dst,$src\n"
6675       "skip:" %}
6676   ins_encode %{
6677     Label Lskip;
6678     // Invert sense of branch from sense of CMOV
6679     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6680     __ movl($dst$$Register, $src$$Register);
6681     __ bind(Lskip);
6682   %}
6683   ins_pipe( pipe_cmov_reg );
6684 %}
6685 
6686 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6687   predicate(!VM_Version::supports_cmov() );
6688   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6689   ins_cost(200);
6690   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6691             "MOV    $dst,$src\n"
6692       "skip:" %}
6693   ins_encode %{
6694     Label Lskip;
6695     // Invert sense of branch from sense of CMOV
6696     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6697     __ movl($dst$$Register, $src$$Register);
6698     __ bind(Lskip);
6699   %}
6700   ins_pipe( pipe_cmov_reg );
6701 %}
6702 
6703 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6704   predicate(VM_Version::supports_cmov() );
6705   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6706   ins_cost(200);
6707   format %{ "CMOV$cop $dst,$src" %}
6708   opcode(0x0F,0x40);
6709   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6710   ins_pipe( pipe_cmov_reg );
6711 %}
6712 
6713 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6714   predicate(VM_Version::supports_cmov() );
6715   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6716   ins_cost(200);
6717   format %{ "CMOV$cop $dst,$src" %}
6718   opcode(0x0F,0x40);
6719   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6720   ins_pipe( pipe_cmov_reg );
6721 %}
6722 
6723 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6724   predicate(VM_Version::supports_cmov() );
6725   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6726   ins_cost(200);
6727   expand %{
6728     cmovI_regU(cop, cr, dst, src);
6729   %}
6730 %}
6731 
6732 // Conditional move
6733 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6734   predicate(VM_Version::supports_cmov() );
6735   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6736   ins_cost(250);
6737   format %{ "CMOV$cop $dst,$src" %}
6738   opcode(0x0F,0x40);
6739   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6740   ins_pipe( pipe_cmov_mem );
6741 %}
6742 
6743 // Conditional move
6744 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6745   predicate(VM_Version::supports_cmov() );
6746   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6747   ins_cost(250);
6748   format %{ "CMOV$cop $dst,$src" %}
6749   opcode(0x0F,0x40);
6750   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6751   ins_pipe( pipe_cmov_mem );
6752 %}
6753 
6754 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6755   predicate(VM_Version::supports_cmov() );
6756   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6757   ins_cost(250);
6758   expand %{
6759     cmovI_memU(cop, cr, dst, src);
6760   %}
6761 %}
6762 
6763 // Conditional move
6764 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6765   predicate(VM_Version::supports_cmov() );
6766   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6767   ins_cost(200);
6768   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6769   opcode(0x0F,0x40);
6770   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6771   ins_pipe( pipe_cmov_reg );
6772 %}
6773 
6774 // Conditional move (non-P6 version)
6775 // Note:  a CMoveP is generated for  stubs and native wrappers
6776 //        regardless of whether we are on a P6, so we
6777 //        emulate a cmov here
6778 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6779   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6780   ins_cost(300);
6781   format %{ "Jn$cop   skip\n\t"
6782           "MOV    $dst,$src\t# pointer\n"
6783       "skip:" %}
6784   opcode(0x8b);
6785   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6786   ins_pipe( pipe_cmov_reg );
6787 %}
6788 
6789 // Conditional move
6790 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6791   predicate(VM_Version::supports_cmov() );
6792   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6793   ins_cost(200);
6794   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6795   opcode(0x0F,0x40);
6796   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6797   ins_pipe( pipe_cmov_reg );
6798 %}
6799 
6800 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6801   predicate(VM_Version::supports_cmov() );
6802   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6803   ins_cost(200);
6804   expand %{
6805     cmovP_regU(cop, cr, dst, src);
6806   %}
6807 %}
6808 
6809 // DISABLED: Requires the ADLC to emit a bottom_type call that
6810 // correctly meets the two pointer arguments; one is an incoming
6811 // register but the other is a memory operand.  ALSO appears to
6812 // be buggy with implicit null checks.
6813 //
6814 //// Conditional move
6815 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6816 //  predicate(VM_Version::supports_cmov() );
6817 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6818 //  ins_cost(250);
6819 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6820 //  opcode(0x0F,0x40);
6821 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6822 //  ins_pipe( pipe_cmov_mem );
6823 //%}
6824 //
6825 //// Conditional move
6826 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6827 //  predicate(VM_Version::supports_cmov() );
6828 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6829 //  ins_cost(250);
6830 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6831 //  opcode(0x0F,0x40);
6832 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6833 //  ins_pipe( pipe_cmov_mem );
6834 //%}
6835 
6836 // Conditional move
6837 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6838   predicate(UseSSE<=1);
6839   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6840   ins_cost(200);
6841   format %{ "FCMOV$cop $dst,$src\t# double" %}
6842   opcode(0xDA);
6843   ins_encode( enc_cmov_dpr(cop,src) );
6844   ins_pipe( pipe_cmovDPR_reg );
6845 %}
6846 
6847 // Conditional move
6848 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6849   predicate(UseSSE==0);
6850   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6851   ins_cost(200);
6852   format %{ "FCMOV$cop $dst,$src\t# float" %}
6853   opcode(0xDA);
6854   ins_encode( enc_cmov_dpr(cop,src) );
6855   ins_pipe( pipe_cmovDPR_reg );
6856 %}
6857 
6858 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6859 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6860   predicate(UseSSE<=1);
6861   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6862   ins_cost(200);
6863   format %{ "Jn$cop   skip\n\t"
6864             "MOV    $dst,$src\t# double\n"
6865       "skip:" %}
6866   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6867   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6868   ins_pipe( pipe_cmovDPR_reg );
6869 %}
6870 
6871 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6872 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6873   predicate(UseSSE==0);
6874   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6875   ins_cost(200);
6876   format %{ "Jn$cop    skip\n\t"
6877             "MOV    $dst,$src\t# float\n"
6878       "skip:" %}
6879   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6880   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6881   ins_pipe( pipe_cmovDPR_reg );
6882 %}
6883 
6884 // No CMOVE with SSE/SSE2
6885 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6886   predicate (UseSSE>=1);
6887   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6888   ins_cost(200);
6889   format %{ "Jn$cop   skip\n\t"
6890             "MOVSS  $dst,$src\t# float\n"
6891       "skip:" %}
6892   ins_encode %{
6893     Label skip;
6894     // Invert sense of branch from sense of CMOV
6895     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6896     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6897     __ bind(skip);
6898   %}
6899   ins_pipe( pipe_slow );
6900 %}
6901 
6902 // No CMOVE with SSE/SSE2
6903 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6904   predicate (UseSSE>=2);
6905   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6906   ins_cost(200);
6907   format %{ "Jn$cop   skip\n\t"
6908             "MOVSD  $dst,$src\t# float\n"
6909       "skip:" %}
6910   ins_encode %{
6911     Label skip;
6912     // Invert sense of branch from sense of CMOV
6913     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6914     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6915     __ bind(skip);
6916   %}
6917   ins_pipe( pipe_slow );
6918 %}
6919 
6920 // unsigned version
6921 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6922   predicate (UseSSE>=1);
6923   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6924   ins_cost(200);
6925   format %{ "Jn$cop   skip\n\t"
6926             "MOVSS  $dst,$src\t# float\n"
6927       "skip:" %}
6928   ins_encode %{
6929     Label skip;
6930     // Invert sense of branch from sense of CMOV
6931     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6932     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6933     __ bind(skip);
6934   %}
6935   ins_pipe( pipe_slow );
6936 %}
6937 
6938 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
6939   predicate (UseSSE>=1);
6940   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6941   ins_cost(200);
6942   expand %{
6943     fcmovF_regU(cop, cr, dst, src);
6944   %}
6945 %}
6946 
6947 // unsigned version
6948 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
6949   predicate (UseSSE>=2);
6950   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6951   ins_cost(200);
6952   format %{ "Jn$cop   skip\n\t"
6953             "MOVSD  $dst,$src\t# float\n"
6954       "skip:" %}
6955   ins_encode %{
6956     Label skip;
6957     // Invert sense of branch from sense of CMOV
6958     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6959     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6960     __ bind(skip);
6961   %}
6962   ins_pipe( pipe_slow );
6963 %}
6964 
6965 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
6966   predicate (UseSSE>=2);
6967   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6968   ins_cost(200);
6969   expand %{
6970     fcmovD_regU(cop, cr, dst, src);
6971   %}
6972 %}
6973 
6974 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
6975   predicate(VM_Version::supports_cmov() );
6976   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6977   ins_cost(200);
6978   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6979             "CMOV$cop $dst.hi,$src.hi" %}
6980   opcode(0x0F,0x40);
6981   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6982   ins_pipe( pipe_cmov_reg_long );
6983 %}
6984 
6985 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
6986   predicate(VM_Version::supports_cmov() );
6987   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6988   ins_cost(200);
6989   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6990             "CMOV$cop $dst.hi,$src.hi" %}
6991   opcode(0x0F,0x40);
6992   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6993   ins_pipe( pipe_cmov_reg_long );
6994 %}
6995 
6996 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
6997   predicate(VM_Version::supports_cmov() );
6998   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6999   ins_cost(200);
7000   expand %{
7001     cmovL_regU(cop, cr, dst, src);
7002   %}
7003 %}
7004 
7005 //----------Arithmetic Instructions--------------------------------------------
7006 //----------Addition Instructions----------------------------------------------
7007 
7008 // Integer Addition Instructions
7009 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7010   match(Set dst (AddI dst src));
7011   effect(KILL cr);
7012 
7013   size(2);
7014   format %{ "ADD    $dst,$src" %}
7015   opcode(0x03);
7016   ins_encode( OpcP, RegReg( dst, src) );
7017   ins_pipe( ialu_reg_reg );
7018 %}
7019 
7020 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7021   match(Set dst (AddI dst src));
7022   effect(KILL cr);
7023 
7024   format %{ "ADD    $dst,$src" %}
7025   opcode(0x81, 0x00); /* /0 id */
7026   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7027   ins_pipe( ialu_reg );
7028 %}
7029 
7030 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7031   predicate(UseIncDec);
7032   match(Set dst (AddI dst src));
7033   effect(KILL cr);
7034 
7035   size(1);
7036   format %{ "INC    $dst" %}
7037   opcode(0x40); /*  */
7038   ins_encode( Opc_plus( primary, dst ) );
7039   ins_pipe( ialu_reg );
7040 %}
7041 
7042 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7043   match(Set dst (AddI src0 src1));
7044   ins_cost(110);
7045 
7046   format %{ "LEA    $dst,[$src0 + $src1]" %}
7047   opcode(0x8D); /* 0x8D /r */
7048   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7049   ins_pipe( ialu_reg_reg );
7050 %}
7051 
7052 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7053   match(Set dst (AddP src0 src1));
7054   ins_cost(110);
7055 
7056   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7057   opcode(0x8D); /* 0x8D /r */
7058   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7059   ins_pipe( ialu_reg_reg );
7060 %}
7061 
7062 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7063   predicate(UseIncDec);
7064   match(Set dst (AddI dst src));
7065   effect(KILL cr);
7066 
7067   size(1);
7068   format %{ "DEC    $dst" %}
7069   opcode(0x48); /*  */
7070   ins_encode( Opc_plus( primary, dst ) );
7071   ins_pipe( ialu_reg );
7072 %}
7073 
7074 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7075   match(Set dst (AddP dst src));
7076   effect(KILL cr);
7077 
7078   size(2);
7079   format %{ "ADD    $dst,$src" %}
7080   opcode(0x03);
7081   ins_encode( OpcP, RegReg( dst, src) );
7082   ins_pipe( ialu_reg_reg );
7083 %}
7084 
7085 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7086   match(Set dst (AddP dst src));
7087   effect(KILL cr);
7088 
7089   format %{ "ADD    $dst,$src" %}
7090   opcode(0x81,0x00); /* Opcode 81 /0 id */
7091   // ins_encode( RegImm( dst, src) );
7092   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7093   ins_pipe( ialu_reg );
7094 %}
7095 
7096 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7097   match(Set dst (AddI dst (LoadI src)));
7098   effect(KILL cr);
7099 
7100   ins_cost(125);
7101   format %{ "ADD    $dst,$src" %}
7102   opcode(0x03);
7103   ins_encode( OpcP, RegMem( dst, src) );
7104   ins_pipe( ialu_reg_mem );
7105 %}
7106 
7107 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7108   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7109   effect(KILL cr);
7110 
7111   ins_cost(150);
7112   format %{ "ADD    $dst,$src" %}
7113   opcode(0x01);  /* Opcode 01 /r */
7114   ins_encode( OpcP, RegMem( src, dst ) );
7115   ins_pipe( ialu_mem_reg );
7116 %}
7117 
7118 // Add Memory with Immediate
7119 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7120   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7121   effect(KILL cr);
7122 
7123   ins_cost(125);
7124   format %{ "ADD    $dst,$src" %}
7125   opcode(0x81);               /* Opcode 81 /0 id */
7126   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7127   ins_pipe( ialu_mem_imm );
7128 %}
7129 
7130 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7131   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7132   effect(KILL cr);
7133 
7134   ins_cost(125);
7135   format %{ "INC    $dst" %}
7136   opcode(0xFF);               /* Opcode FF /0 */
7137   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7138   ins_pipe( ialu_mem_imm );
7139 %}
7140 
7141 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7142   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7143   effect(KILL cr);
7144 
7145   ins_cost(125);
7146   format %{ "DEC    $dst" %}
7147   opcode(0xFF);               /* Opcode FF /1 */
7148   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7149   ins_pipe( ialu_mem_imm );
7150 %}
7151 
7152 
7153 instruct checkCastPP( eRegP dst ) %{
7154   match(Set dst (CheckCastPP dst));
7155 
7156   size(0);
7157   format %{ "#checkcastPP of $dst" %}
7158   ins_encode( /*empty encoding*/ );
7159   ins_pipe( empty );
7160 %}
7161 
7162 instruct castPP( eRegP dst ) %{
7163   match(Set dst (CastPP dst));
7164   format %{ "#castPP of $dst" %}
7165   ins_encode( /*empty encoding*/ );
7166   ins_pipe( empty );
7167 %}
7168 
7169 instruct castII( rRegI dst ) %{
7170   match(Set dst (CastII dst));
7171   format %{ "#castII of $dst" %}
7172   ins_encode( /*empty encoding*/ );
7173   ins_cost(0);
7174   ins_pipe( empty );
7175 %}
7176 
7177 
7178 // Load-locked - same as a regular pointer load when used with compare-swap
7179 instruct loadPLocked(eRegP dst, memory mem) %{
7180   match(Set dst (LoadPLocked mem));
7181 
7182   ins_cost(125);
7183   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7184   opcode(0x8B);
7185   ins_encode( OpcP, RegMem(dst,mem));
7186   ins_pipe( ialu_reg_mem );
7187 %}
7188 
7189 // Conditional-store of the updated heap-top.
7190 // Used during allocation of the shared heap.
7191 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7192 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7193   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7194   // EAX is killed if there is contention, but then it's also unused.
7195   // In the common case of no contention, EAX holds the new oop address.
7196   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7197   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7198   ins_pipe( pipe_cmpxchg );
7199 %}
7200 
7201 // Conditional-store of an int value.
7202 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7203 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7204   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7205   effect(KILL oldval);
7206   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7207   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7208   ins_pipe( pipe_cmpxchg );
7209 %}
7210 
7211 // Conditional-store of a long value.
7212 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7213 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7214   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7215   effect(KILL oldval);
7216   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7217             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7218             "XCHG   EBX,ECX"
7219   %}
7220   ins_encode %{
7221     // Note: we need to swap rbx, and rcx before and after the
7222     //       cmpxchg8 instruction because the instruction uses
7223     //       rcx as the high order word of the new value to store but
7224     //       our register encoding uses rbx.
7225     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7226     if( os::is_MP() )
7227       __ lock();
7228     __ cmpxchg8($mem$$Address);
7229     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7230   %}
7231   ins_pipe( pipe_cmpxchg );
7232 %}
7233 
7234 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7235 
7236 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7237   predicate(VM_Version::supports_cx8());
7238   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7239   effect(KILL cr, KILL oldval);
7240   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7241             "MOV    $res,0\n\t"
7242             "JNE,s  fail\n\t"
7243             "MOV    $res,1\n"
7244           "fail:" %}
7245   ins_encode( enc_cmpxchg8(mem_ptr),
7246               enc_flags_ne_to_boolean(res) );
7247   ins_pipe( pipe_cmpxchg );
7248 %}
7249 
7250 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7251   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7252   effect(KILL cr, KILL oldval);
7253   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7254             "MOV    $res,0\n\t"
7255             "JNE,s  fail\n\t"
7256             "MOV    $res,1\n"
7257           "fail:" %}
7258   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7259   ins_pipe( pipe_cmpxchg );
7260 %}
7261 
7262 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7263   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7264   effect(KILL cr, KILL oldval);
7265   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7266             "MOV    $res,0\n\t"
7267             "JNE,s  fail\n\t"
7268             "MOV    $res,1\n"
7269           "fail:" %}
7270   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7271   ins_pipe( pipe_cmpxchg );
7272 %}
7273 
7274 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7275   predicate(n->as_LoadStore()->result_not_used());
7276   match(Set dummy (GetAndAddI mem add));
7277   effect(KILL cr);
7278   format %{ "ADDL  [$mem],$add" %}
7279   ins_encode %{
7280     if (os::is_MP()) { __ lock(); }
7281     __ addl($mem$$Address, $add$$constant);
7282   %}
7283   ins_pipe( pipe_cmpxchg );
7284 %}
7285 
7286 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7287   match(Set newval (GetAndAddI mem newval));
7288   effect(KILL cr);
7289   format %{ "XADDL  [$mem],$newval" %}
7290   ins_encode %{
7291     if (os::is_MP()) { __ lock(); }
7292     __ xaddl($mem$$Address, $newval$$Register);
7293   %}
7294   ins_pipe( pipe_cmpxchg );
7295 %}
7296 
7297 instruct xchgI( memory mem, rRegI newval) %{
7298   match(Set newval (GetAndSetI mem newval));
7299   format %{ "XCHGL  $newval,[$mem]" %}
7300   ins_encode %{
7301     __ xchgl($newval$$Register, $mem$$Address);
7302   %}
7303   ins_pipe( pipe_cmpxchg );
7304 %}
7305 
7306 instruct xchgP( memory mem, pRegP newval) %{
7307   match(Set newval (GetAndSetP mem newval));
7308   format %{ "XCHGL  $newval,[$mem]" %}
7309   ins_encode %{
7310     __ xchgl($newval$$Register, $mem$$Address);
7311   %}
7312   ins_pipe( pipe_cmpxchg );
7313 %}
7314 
7315 //----------Subtraction Instructions-------------------------------------------
7316 
7317 // Integer Subtraction Instructions
7318 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7319   match(Set dst (SubI dst src));
7320   effect(KILL cr);
7321 
7322   size(2);
7323   format %{ "SUB    $dst,$src" %}
7324   opcode(0x2B);
7325   ins_encode( OpcP, RegReg( dst, src) );
7326   ins_pipe( ialu_reg_reg );
7327 %}
7328 
7329 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7330   match(Set dst (SubI dst src));
7331   effect(KILL cr);
7332 
7333   format %{ "SUB    $dst,$src" %}
7334   opcode(0x81,0x05);  /* Opcode 81 /5 */
7335   // ins_encode( RegImm( dst, src) );
7336   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7337   ins_pipe( ialu_reg );
7338 %}
7339 
7340 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7341   match(Set dst (SubI dst (LoadI src)));
7342   effect(KILL cr);
7343 
7344   ins_cost(125);
7345   format %{ "SUB    $dst,$src" %}
7346   opcode(0x2B);
7347   ins_encode( OpcP, RegMem( dst, src) );
7348   ins_pipe( ialu_reg_mem );
7349 %}
7350 
7351 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7352   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7353   effect(KILL cr);
7354 
7355   ins_cost(150);
7356   format %{ "SUB    $dst,$src" %}
7357   opcode(0x29);  /* Opcode 29 /r */
7358   ins_encode( OpcP, RegMem( src, dst ) );
7359   ins_pipe( ialu_mem_reg );
7360 %}
7361 
7362 // Subtract from a pointer
7363 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7364   match(Set dst (AddP dst (SubI zero src)));
7365   effect(KILL cr);
7366 
7367   size(2);
7368   format %{ "SUB    $dst,$src" %}
7369   opcode(0x2B);
7370   ins_encode( OpcP, RegReg( dst, src) );
7371   ins_pipe( ialu_reg_reg );
7372 %}
7373 
7374 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7375   match(Set dst (SubI zero dst));
7376   effect(KILL cr);
7377 
7378   size(2);
7379   format %{ "NEG    $dst" %}
7380   opcode(0xF7,0x03);  // Opcode F7 /3
7381   ins_encode( OpcP, RegOpc( dst ) );
7382   ins_pipe( ialu_reg );
7383 %}
7384 
7385 //----------Multiplication/Division Instructions-------------------------------
7386 // Integer Multiplication Instructions
7387 // Multiply Register
7388 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7389   match(Set dst (MulI dst src));
7390   effect(KILL cr);
7391 
7392   size(3);
7393   ins_cost(300);
7394   format %{ "IMUL   $dst,$src" %}
7395   opcode(0xAF, 0x0F);
7396   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7397   ins_pipe( ialu_reg_reg_alu0 );
7398 %}
7399 
7400 // Multiply 32-bit Immediate
7401 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7402   match(Set dst (MulI src imm));
7403   effect(KILL cr);
7404 
7405   ins_cost(300);
7406   format %{ "IMUL   $dst,$src,$imm" %}
7407   opcode(0x69);  /* 69 /r id */
7408   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7409   ins_pipe( ialu_reg_reg_alu0 );
7410 %}
7411 
7412 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7413   match(Set dst src);
7414   effect(KILL cr);
7415 
7416   // Note that this is artificially increased to make it more expensive than loadConL
7417   ins_cost(250);
7418   format %{ "MOV    EAX,$src\t// low word only" %}
7419   opcode(0xB8);
7420   ins_encode( LdImmL_Lo(dst, src) );
7421   ins_pipe( ialu_reg_fat );
7422 %}
7423 
7424 // Multiply by 32-bit Immediate, taking the shifted high order results
7425 //  (special case for shift by 32)
7426 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7427   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7428   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7429              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7430              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7431   effect(USE src1, KILL cr);
7432 
7433   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7434   ins_cost(0*100 + 1*400 - 150);
7435   format %{ "IMUL   EDX:EAX,$src1" %}
7436   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7437   ins_pipe( pipe_slow );
7438 %}
7439 
7440 // Multiply by 32-bit Immediate, taking the shifted high order results
7441 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7442   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7443   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7444              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7445              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7446   effect(USE src1, KILL cr);
7447 
7448   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7449   ins_cost(1*100 + 1*400 - 150);
7450   format %{ "IMUL   EDX:EAX,$src1\n\t"
7451             "SAR    EDX,$cnt-32" %}
7452   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7453   ins_pipe( pipe_slow );
7454 %}
7455 
7456 // Multiply Memory 32-bit Immediate
7457 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7458   match(Set dst (MulI (LoadI src) imm));
7459   effect(KILL cr);
7460 
7461   ins_cost(300);
7462   format %{ "IMUL   $dst,$src,$imm" %}
7463   opcode(0x69);  /* 69 /r id */
7464   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7465   ins_pipe( ialu_reg_mem_alu0 );
7466 %}
7467 
7468 // Multiply Memory
7469 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7470   match(Set dst (MulI dst (LoadI src)));
7471   effect(KILL cr);
7472 
7473   ins_cost(350);
7474   format %{ "IMUL   $dst,$src" %}
7475   opcode(0xAF, 0x0F);
7476   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7477   ins_pipe( ialu_reg_mem_alu0 );
7478 %}
7479 
7480 // Multiply Register Int to Long
7481 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7482   // Basic Idea: long = (long)int * (long)int
7483   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7484   effect(DEF dst, USE src, USE src1, KILL flags);
7485 
7486   ins_cost(300);
7487   format %{ "IMUL   $dst,$src1" %}
7488 
7489   ins_encode( long_int_multiply( dst, src1 ) );
7490   ins_pipe( ialu_reg_reg_alu0 );
7491 %}
7492 
7493 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7494   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7495   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7496   effect(KILL flags);
7497 
7498   ins_cost(300);
7499   format %{ "MUL    $dst,$src1" %}
7500 
7501   ins_encode( long_uint_multiply(dst, src1) );
7502   ins_pipe( ialu_reg_reg_alu0 );
7503 %}
7504 
7505 // Multiply Register Long
7506 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7507   match(Set dst (MulL dst src));
7508   effect(KILL cr, TEMP tmp);
7509   ins_cost(4*100+3*400);
7510 // Basic idea: lo(result) = lo(x_lo * y_lo)
7511 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7512   format %{ "MOV    $tmp,$src.lo\n\t"
7513             "IMUL   $tmp,EDX\n\t"
7514             "MOV    EDX,$src.hi\n\t"
7515             "IMUL   EDX,EAX\n\t"
7516             "ADD    $tmp,EDX\n\t"
7517             "MUL    EDX:EAX,$src.lo\n\t"
7518             "ADD    EDX,$tmp" %}
7519   ins_encode( long_multiply( dst, src, tmp ) );
7520   ins_pipe( pipe_slow );
7521 %}
7522 
7523 // Multiply Register Long where the left operand's high 32 bits are zero
7524 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7525   predicate(is_operand_hi32_zero(n->in(1)));
7526   match(Set dst (MulL dst src));
7527   effect(KILL cr, TEMP tmp);
7528   ins_cost(2*100+2*400);
7529 // Basic idea: lo(result) = lo(x_lo * y_lo)
7530 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7531   format %{ "MOV    $tmp,$src.hi\n\t"
7532             "IMUL   $tmp,EAX\n\t"
7533             "MUL    EDX:EAX,$src.lo\n\t"
7534             "ADD    EDX,$tmp" %}
7535   ins_encode %{
7536     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7537     __ imull($tmp$$Register, rax);
7538     __ mull($src$$Register);
7539     __ addl(rdx, $tmp$$Register);
7540   %}
7541   ins_pipe( pipe_slow );
7542 %}
7543 
7544 // Multiply Register Long where the right operand's high 32 bits are zero
7545 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7546   predicate(is_operand_hi32_zero(n->in(2)));
7547   match(Set dst (MulL dst src));
7548   effect(KILL cr, TEMP tmp);
7549   ins_cost(2*100+2*400);
7550 // Basic idea: lo(result) = lo(x_lo * y_lo)
7551 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7552   format %{ "MOV    $tmp,$src.lo\n\t"
7553             "IMUL   $tmp,EDX\n\t"
7554             "MUL    EDX:EAX,$src.lo\n\t"
7555             "ADD    EDX,$tmp" %}
7556   ins_encode %{
7557     __ movl($tmp$$Register, $src$$Register);
7558     __ imull($tmp$$Register, rdx);
7559     __ mull($src$$Register);
7560     __ addl(rdx, $tmp$$Register);
7561   %}
7562   ins_pipe( pipe_slow );
7563 %}
7564 
7565 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7566 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7567   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7568   match(Set dst (MulL dst src));
7569   effect(KILL cr);
7570   ins_cost(1*400);
7571 // Basic idea: lo(result) = lo(x_lo * y_lo)
7572 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7573   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7574   ins_encode %{
7575     __ mull($src$$Register);
7576   %}
7577   ins_pipe( pipe_slow );
7578 %}
7579 
7580 // Multiply Register Long by small constant
7581 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7582   match(Set dst (MulL dst src));
7583   effect(KILL cr, TEMP tmp);
7584   ins_cost(2*100+2*400);
7585   size(12);
7586 // Basic idea: lo(result) = lo(src * EAX)
7587 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7588   format %{ "IMUL   $tmp,EDX,$src\n\t"
7589             "MOV    EDX,$src\n\t"
7590             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7591             "ADD    EDX,$tmp" %}
7592   ins_encode( long_multiply_con( dst, src, tmp ) );
7593   ins_pipe( pipe_slow );
7594 %}
7595 
7596 // Integer DIV with Register
7597 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7598   match(Set rax (DivI rax div));
7599   effect(KILL rdx, KILL cr);
7600   size(26);
7601   ins_cost(30*100+10*100);
7602   format %{ "CMP    EAX,0x80000000\n\t"
7603             "JNE,s  normal\n\t"
7604             "XOR    EDX,EDX\n\t"
7605             "CMP    ECX,-1\n\t"
7606             "JE,s   done\n"
7607     "normal: CDQ\n\t"
7608             "IDIV   $div\n\t"
7609     "done:"        %}
7610   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7611   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7612   ins_pipe( ialu_reg_reg_alu0 );
7613 %}
7614 
7615 // Divide Register Long
7616 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7617   match(Set dst (DivL src1 src2));
7618   effect( KILL cr, KILL cx, KILL bx );
7619   ins_cost(10000);
7620   format %{ "PUSH   $src1.hi\n\t"
7621             "PUSH   $src1.lo\n\t"
7622             "PUSH   $src2.hi\n\t"
7623             "PUSH   $src2.lo\n\t"
7624             "CALL   SharedRuntime::ldiv\n\t"
7625             "ADD    ESP,16" %}
7626   ins_encode( long_div(src1,src2) );
7627   ins_pipe( pipe_slow );
7628 %}
7629 
7630 // Integer DIVMOD with Register, both quotient and mod results
7631 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7632   match(DivModI rax div);
7633   effect(KILL cr);
7634   size(26);
7635   ins_cost(30*100+10*100);
7636   format %{ "CMP    EAX,0x80000000\n\t"
7637             "JNE,s  normal\n\t"
7638             "XOR    EDX,EDX\n\t"
7639             "CMP    ECX,-1\n\t"
7640             "JE,s   done\n"
7641     "normal: CDQ\n\t"
7642             "IDIV   $div\n\t"
7643     "done:"        %}
7644   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7645   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7646   ins_pipe( pipe_slow );
7647 %}
7648 
7649 // Integer MOD with Register
7650 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7651   match(Set rdx (ModI rax div));
7652   effect(KILL rax, KILL cr);
7653 
7654   size(26);
7655   ins_cost(300);
7656   format %{ "CDQ\n\t"
7657             "IDIV   $div" %}
7658   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7659   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7660   ins_pipe( ialu_reg_reg_alu0 );
7661 %}
7662 
7663 // Remainder Register Long
7664 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7665   match(Set dst (ModL src1 src2));
7666   effect( KILL cr, KILL cx, KILL bx );
7667   ins_cost(10000);
7668   format %{ "PUSH   $src1.hi\n\t"
7669             "PUSH   $src1.lo\n\t"
7670             "PUSH   $src2.hi\n\t"
7671             "PUSH   $src2.lo\n\t"
7672             "CALL   SharedRuntime::lrem\n\t"
7673             "ADD    ESP,16" %}
7674   ins_encode( long_mod(src1,src2) );
7675   ins_pipe( pipe_slow );
7676 %}
7677 
7678 // Divide Register Long (no special case since divisor != -1)
7679 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7680   match(Set dst (DivL dst imm));
7681   effect( TEMP tmp, TEMP tmp2, KILL cr );
7682   ins_cost(1000);
7683   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7684             "XOR    $tmp2,$tmp2\n\t"
7685             "CMP    $tmp,EDX\n\t"
7686             "JA,s   fast\n\t"
7687             "MOV    $tmp2,EAX\n\t"
7688             "MOV    EAX,EDX\n\t"
7689             "MOV    EDX,0\n\t"
7690             "JLE,s  pos\n\t"
7691             "LNEG   EAX : $tmp2\n\t"
7692             "DIV    $tmp # unsigned division\n\t"
7693             "XCHG   EAX,$tmp2\n\t"
7694             "DIV    $tmp\n\t"
7695             "LNEG   $tmp2 : EAX\n\t"
7696             "JMP,s  done\n"
7697     "pos:\n\t"
7698             "DIV    $tmp\n\t"
7699             "XCHG   EAX,$tmp2\n"
7700     "fast:\n\t"
7701             "DIV    $tmp\n"
7702     "done:\n\t"
7703             "MOV    EDX,$tmp2\n\t"
7704             "NEG    EDX:EAX # if $imm < 0" %}
7705   ins_encode %{
7706     int con = (int)$imm$$constant;
7707     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7708     int pcon = (con > 0) ? con : -con;
7709     Label Lfast, Lpos, Ldone;
7710 
7711     __ movl($tmp$$Register, pcon);
7712     __ xorl($tmp2$$Register,$tmp2$$Register);
7713     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7714     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7715 
7716     __ movl($tmp2$$Register, $dst$$Register); // save
7717     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7718     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7719     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7720 
7721     // Negative dividend.
7722     // convert value to positive to use unsigned division
7723     __ lneg($dst$$Register, $tmp2$$Register);
7724     __ divl($tmp$$Register);
7725     __ xchgl($dst$$Register, $tmp2$$Register);
7726     __ divl($tmp$$Register);
7727     // revert result back to negative
7728     __ lneg($tmp2$$Register, $dst$$Register);
7729     __ jmpb(Ldone);
7730 
7731     __ bind(Lpos);
7732     __ divl($tmp$$Register); // Use unsigned division
7733     __ xchgl($dst$$Register, $tmp2$$Register);
7734     // Fallthrow for final divide, tmp2 has 32 bit hi result
7735 
7736     __ bind(Lfast);
7737     // fast path: src is positive
7738     __ divl($tmp$$Register); // Use unsigned division
7739 
7740     __ bind(Ldone);
7741     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7742     if (con < 0) {
7743       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7744     }
7745   %}
7746   ins_pipe( pipe_slow );
7747 %}
7748 
7749 // Remainder Register Long (remainder fit into 32 bits)
7750 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7751   match(Set dst (ModL dst imm));
7752   effect( TEMP tmp, TEMP tmp2, KILL cr );
7753   ins_cost(1000);
7754   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7755             "CMP    $tmp,EDX\n\t"
7756             "JA,s   fast\n\t"
7757             "MOV    $tmp2,EAX\n\t"
7758             "MOV    EAX,EDX\n\t"
7759             "MOV    EDX,0\n\t"
7760             "JLE,s  pos\n\t"
7761             "LNEG   EAX : $tmp2\n\t"
7762             "DIV    $tmp # unsigned division\n\t"
7763             "MOV    EAX,$tmp2\n\t"
7764             "DIV    $tmp\n\t"
7765             "NEG    EDX\n\t"
7766             "JMP,s  done\n"
7767     "pos:\n\t"
7768             "DIV    $tmp\n\t"
7769             "MOV    EAX,$tmp2\n"
7770     "fast:\n\t"
7771             "DIV    $tmp\n"
7772     "done:\n\t"
7773             "MOV    EAX,EDX\n\t"
7774             "SAR    EDX,31\n\t" %}
7775   ins_encode %{
7776     int con = (int)$imm$$constant;
7777     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7778     int pcon = (con > 0) ? con : -con;
7779     Label  Lfast, Lpos, Ldone;
7780 
7781     __ movl($tmp$$Register, pcon);
7782     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7783     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7784 
7785     __ movl($tmp2$$Register, $dst$$Register); // save
7786     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7787     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7788     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7789 
7790     // Negative dividend.
7791     // convert value to positive to use unsigned division
7792     __ lneg($dst$$Register, $tmp2$$Register);
7793     __ divl($tmp$$Register);
7794     __ movl($dst$$Register, $tmp2$$Register);
7795     __ divl($tmp$$Register);
7796     // revert remainder back to negative
7797     __ negl(HIGH_FROM_LOW($dst$$Register));
7798     __ jmpb(Ldone);
7799 
7800     __ bind(Lpos);
7801     __ divl($tmp$$Register);
7802     __ movl($dst$$Register, $tmp2$$Register);
7803 
7804     __ bind(Lfast);
7805     // fast path: src is positive
7806     __ divl($tmp$$Register);
7807 
7808     __ bind(Ldone);
7809     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7810     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
7811 
7812   %}
7813   ins_pipe( pipe_slow );
7814 %}
7815 
7816 // Integer Shift Instructions
7817 // Shift Left by one
7818 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7819   match(Set dst (LShiftI dst shift));
7820   effect(KILL cr);
7821 
7822   size(2);
7823   format %{ "SHL    $dst,$shift" %}
7824   opcode(0xD1, 0x4);  /* D1 /4 */
7825   ins_encode( OpcP, RegOpc( dst ) );
7826   ins_pipe( ialu_reg );
7827 %}
7828 
7829 // Shift Left by 8-bit immediate
7830 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7831   match(Set dst (LShiftI dst shift));
7832   effect(KILL cr);
7833 
7834   size(3);
7835   format %{ "SHL    $dst,$shift" %}
7836   opcode(0xC1, 0x4);  /* C1 /4 ib */
7837   ins_encode( RegOpcImm( dst, shift) );
7838   ins_pipe( ialu_reg );
7839 %}
7840 
7841 // Shift Left by variable
7842 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7843   match(Set dst (LShiftI dst shift));
7844   effect(KILL cr);
7845 
7846   size(2);
7847   format %{ "SHL    $dst,$shift" %}
7848   opcode(0xD3, 0x4);  /* D3 /4 */
7849   ins_encode( OpcP, RegOpc( dst ) );
7850   ins_pipe( ialu_reg_reg );
7851 %}
7852 
7853 // Arithmetic shift right by one
7854 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7855   match(Set dst (RShiftI dst shift));
7856   effect(KILL cr);
7857 
7858   size(2);
7859   format %{ "SAR    $dst,$shift" %}
7860   opcode(0xD1, 0x7);  /* D1 /7 */
7861   ins_encode( OpcP, RegOpc( dst ) );
7862   ins_pipe( ialu_reg );
7863 %}
7864 
7865 // Arithmetic shift right by one
7866 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
7867   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7868   effect(KILL cr);
7869   format %{ "SAR    $dst,$shift" %}
7870   opcode(0xD1, 0x7);  /* D1 /7 */
7871   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
7872   ins_pipe( ialu_mem_imm );
7873 %}
7874 
7875 // Arithmetic Shift Right by 8-bit immediate
7876 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7877   match(Set dst (RShiftI dst shift));
7878   effect(KILL cr);
7879 
7880   size(3);
7881   format %{ "SAR    $dst,$shift" %}
7882   opcode(0xC1, 0x7);  /* C1 /7 ib */
7883   ins_encode( RegOpcImm( dst, shift ) );
7884   ins_pipe( ialu_mem_imm );
7885 %}
7886 
7887 // Arithmetic Shift Right by 8-bit immediate
7888 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
7889   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7890   effect(KILL cr);
7891 
7892   format %{ "SAR    $dst,$shift" %}
7893   opcode(0xC1, 0x7);  /* C1 /7 ib */
7894   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
7895   ins_pipe( ialu_mem_imm );
7896 %}
7897 
7898 // Arithmetic Shift Right by variable
7899 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7900   match(Set dst (RShiftI dst shift));
7901   effect(KILL cr);
7902 
7903   size(2);
7904   format %{ "SAR    $dst,$shift" %}
7905   opcode(0xD3, 0x7);  /* D3 /7 */
7906   ins_encode( OpcP, RegOpc( dst ) );
7907   ins_pipe( ialu_reg_reg );
7908 %}
7909 
7910 // Logical shift right by one
7911 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7912   match(Set dst (URShiftI dst shift));
7913   effect(KILL cr);
7914 
7915   size(2);
7916   format %{ "SHR    $dst,$shift" %}
7917   opcode(0xD1, 0x5);  /* D1 /5 */
7918   ins_encode( OpcP, RegOpc( dst ) );
7919   ins_pipe( ialu_reg );
7920 %}
7921 
7922 // Logical Shift Right by 8-bit immediate
7923 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7924   match(Set dst (URShiftI dst shift));
7925   effect(KILL cr);
7926 
7927   size(3);
7928   format %{ "SHR    $dst,$shift" %}
7929   opcode(0xC1, 0x5);  /* C1 /5 ib */
7930   ins_encode( RegOpcImm( dst, shift) );
7931   ins_pipe( ialu_reg );
7932 %}
7933 
7934 
7935 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
7936 // This idiom is used by the compiler for the i2b bytecode.
7937 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
7938   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
7939 
7940   size(3);
7941   format %{ "MOVSX  $dst,$src :8" %}
7942   ins_encode %{
7943     __ movsbl($dst$$Register, $src$$Register);
7944   %}
7945   ins_pipe(ialu_reg_reg);
7946 %}
7947 
7948 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
7949 // This idiom is used by the compiler the i2s bytecode.
7950 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
7951   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
7952 
7953   size(3);
7954   format %{ "MOVSX  $dst,$src :16" %}
7955   ins_encode %{
7956     __ movswl($dst$$Register, $src$$Register);
7957   %}
7958   ins_pipe(ialu_reg_reg);
7959 %}
7960 
7961 
7962 // Logical Shift Right by variable
7963 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7964   match(Set dst (URShiftI dst shift));
7965   effect(KILL cr);
7966 
7967   size(2);
7968   format %{ "SHR    $dst,$shift" %}
7969   opcode(0xD3, 0x5);  /* D3 /5 */
7970   ins_encode( OpcP, RegOpc( dst ) );
7971   ins_pipe( ialu_reg_reg );
7972 %}
7973 
7974 
7975 //----------Logical Instructions-----------------------------------------------
7976 //----------Integer Logical Instructions---------------------------------------
7977 // And Instructions
7978 // And Register with Register
7979 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7980   match(Set dst (AndI dst src));
7981   effect(KILL cr);
7982 
7983   size(2);
7984   format %{ "AND    $dst,$src" %}
7985   opcode(0x23);
7986   ins_encode( OpcP, RegReg( dst, src) );
7987   ins_pipe( ialu_reg_reg );
7988 %}
7989 
7990 // And Register with Immediate
7991 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7992   match(Set dst (AndI dst src));
7993   effect(KILL cr);
7994 
7995   format %{ "AND    $dst,$src" %}
7996   opcode(0x81,0x04);  /* Opcode 81 /4 */
7997   // ins_encode( RegImm( dst, src) );
7998   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7999   ins_pipe( ialu_reg );
8000 %}
8001 
8002 // And Register with Memory
8003 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8004   match(Set dst (AndI dst (LoadI src)));
8005   effect(KILL cr);
8006 
8007   ins_cost(125);
8008   format %{ "AND    $dst,$src" %}
8009   opcode(0x23);
8010   ins_encode( OpcP, RegMem( dst, src) );
8011   ins_pipe( ialu_reg_mem );
8012 %}
8013 
8014 // And Memory with Register
8015 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8016   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8017   effect(KILL cr);
8018 
8019   ins_cost(150);
8020   format %{ "AND    $dst,$src" %}
8021   opcode(0x21);  /* Opcode 21 /r */
8022   ins_encode( OpcP, RegMem( src, dst ) );
8023   ins_pipe( ialu_mem_reg );
8024 %}
8025 
8026 // And Memory with Immediate
8027 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8028   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8029   effect(KILL cr);
8030 
8031   ins_cost(125);
8032   format %{ "AND    $dst,$src" %}
8033   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8034   // ins_encode( MemImm( dst, src) );
8035   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8036   ins_pipe( ialu_mem_imm );
8037 %}
8038 
8039 // BMI1 instructions
8040 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8041   match(Set dst (AndI (XorI src1 minus_1) src2));
8042   predicate(UseBMI1Instructions);
8043   effect(KILL cr);
8044 
8045   format %{ "ANDNL  $dst, $src1, $src2" %}
8046 
8047   ins_encode %{
8048     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8049   %}
8050   ins_pipe(ialu_reg);
8051 %}
8052 
8053 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8054   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8055   predicate(UseBMI1Instructions);
8056   effect(KILL cr);
8057 
8058   ins_cost(125);
8059   format %{ "ANDNL  $dst, $src1, $src2" %}
8060 
8061   ins_encode %{
8062     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8063   %}
8064   ins_pipe(ialu_reg_mem);
8065 %}
8066 
8067 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8068   match(Set dst (AndI (SubI imm_zero src) src));
8069   predicate(UseBMI1Instructions);
8070   effect(KILL cr);
8071 
8072   format %{ "BLSIL  $dst, $src" %}
8073 
8074   ins_encode %{
8075     __ blsil($dst$$Register, $src$$Register);
8076   %}
8077   ins_pipe(ialu_reg);
8078 %}
8079 
8080 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8081   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8082   predicate(UseBMI1Instructions);
8083   effect(KILL cr);
8084 
8085   ins_cost(125);
8086   format %{ "BLSIL  $dst, $src" %}
8087 
8088   ins_encode %{
8089     __ blsil($dst$$Register, $src$$Address);
8090   %}
8091   ins_pipe(ialu_reg_mem);
8092 %}
8093 
8094 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8095 %{
8096   match(Set dst (XorI (AddI src minus_1) src));
8097   predicate(UseBMI1Instructions);
8098   effect(KILL cr);
8099 
8100   format %{ "BLSMSKL $dst, $src" %}
8101 
8102   ins_encode %{
8103     __ blsmskl($dst$$Register, $src$$Register);
8104   %}
8105 
8106   ins_pipe(ialu_reg);
8107 %}
8108 
8109 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8110 %{
8111   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8112   predicate(UseBMI1Instructions);
8113   effect(KILL cr);
8114 
8115   ins_cost(125);
8116   format %{ "BLSMSKL $dst, $src" %}
8117 
8118   ins_encode %{
8119     __ blsmskl($dst$$Register, $src$$Address);
8120   %}
8121 
8122   ins_pipe(ialu_reg_mem);
8123 %}
8124 
8125 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8126 %{
8127   match(Set dst (AndI (AddI src minus_1) src) );
8128   predicate(UseBMI1Instructions);
8129   effect(KILL cr);
8130 
8131   format %{ "BLSRL  $dst, $src" %}
8132 
8133   ins_encode %{
8134     __ blsrl($dst$$Register, $src$$Register);
8135   %}
8136 
8137   ins_pipe(ialu_reg);
8138 %}
8139 
8140 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8141 %{
8142   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8143   predicate(UseBMI1Instructions);
8144   effect(KILL cr);
8145 
8146   ins_cost(125);
8147   format %{ "BLSRL  $dst, $src" %}
8148 
8149   ins_encode %{
8150     __ blsrl($dst$$Register, $src$$Address);
8151   %}
8152 
8153   ins_pipe(ialu_reg_mem);
8154 %}
8155 
8156 // Or Instructions
8157 // Or Register with Register
8158 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8159   match(Set dst (OrI dst src));
8160   effect(KILL cr);
8161 
8162   size(2);
8163   format %{ "OR     $dst,$src" %}
8164   opcode(0x0B);
8165   ins_encode( OpcP, RegReg( dst, src) );
8166   ins_pipe( ialu_reg_reg );
8167 %}
8168 
8169 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8170   match(Set dst (OrI dst (CastP2X src)));
8171   effect(KILL cr);
8172 
8173   size(2);
8174   format %{ "OR     $dst,$src" %}
8175   opcode(0x0B);
8176   ins_encode( OpcP, RegReg( dst, src) );
8177   ins_pipe( ialu_reg_reg );
8178 %}
8179 
8180 
8181 // Or Register with Immediate
8182 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8183   match(Set dst (OrI dst src));
8184   effect(KILL cr);
8185 
8186   format %{ "OR     $dst,$src" %}
8187   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8188   // ins_encode( RegImm( dst, src) );
8189   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8190   ins_pipe( ialu_reg );
8191 %}
8192 
8193 // Or Register with Memory
8194 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8195   match(Set dst (OrI dst (LoadI src)));
8196   effect(KILL cr);
8197 
8198   ins_cost(125);
8199   format %{ "OR     $dst,$src" %}
8200   opcode(0x0B);
8201   ins_encode( OpcP, RegMem( dst, src) );
8202   ins_pipe( ialu_reg_mem );
8203 %}
8204 
8205 // Or Memory with Register
8206 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8207   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8208   effect(KILL cr);
8209 
8210   ins_cost(150);
8211   format %{ "OR     $dst,$src" %}
8212   opcode(0x09);  /* Opcode 09 /r */
8213   ins_encode( OpcP, RegMem( src, dst ) );
8214   ins_pipe( ialu_mem_reg );
8215 %}
8216 
8217 // Or Memory with Immediate
8218 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8219   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8220   effect(KILL cr);
8221 
8222   ins_cost(125);
8223   format %{ "OR     $dst,$src" %}
8224   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8225   // ins_encode( MemImm( dst, src) );
8226   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8227   ins_pipe( ialu_mem_imm );
8228 %}
8229 
8230 // ROL/ROR
8231 // ROL expand
8232 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8233   effect(USE_DEF dst, USE shift, KILL cr);
8234 
8235   format %{ "ROL    $dst, $shift" %}
8236   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8237   ins_encode( OpcP, RegOpc( dst ));
8238   ins_pipe( ialu_reg );
8239 %}
8240 
8241 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8242   effect(USE_DEF dst, USE shift, KILL cr);
8243 
8244   format %{ "ROL    $dst, $shift" %}
8245   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8246   ins_encode( RegOpcImm(dst, shift) );
8247   ins_pipe(ialu_reg);
8248 %}
8249 
8250 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8251   effect(USE_DEF dst, USE shift, KILL cr);
8252 
8253   format %{ "ROL    $dst, $shift" %}
8254   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8255   ins_encode(OpcP, RegOpc(dst));
8256   ins_pipe( ialu_reg_reg );
8257 %}
8258 // end of ROL expand
8259 
8260 // ROL 32bit by one once
8261 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8262   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8263 
8264   expand %{
8265     rolI_eReg_imm1(dst, lshift, cr);
8266   %}
8267 %}
8268 
8269 // ROL 32bit var by imm8 once
8270 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8271   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8272   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8273 
8274   expand %{
8275     rolI_eReg_imm8(dst, lshift, cr);
8276   %}
8277 %}
8278 
8279 // ROL 32bit var by var once
8280 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8281   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8282 
8283   expand %{
8284     rolI_eReg_CL(dst, shift, cr);
8285   %}
8286 %}
8287 
8288 // ROL 32bit var by var once
8289 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8290   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8291 
8292   expand %{
8293     rolI_eReg_CL(dst, shift, cr);
8294   %}
8295 %}
8296 
8297 // ROR expand
8298 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8299   effect(USE_DEF dst, USE shift, KILL cr);
8300 
8301   format %{ "ROR    $dst, $shift" %}
8302   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8303   ins_encode( OpcP, RegOpc( dst ) );
8304   ins_pipe( ialu_reg );
8305 %}
8306 
8307 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8308   effect (USE_DEF dst, USE shift, KILL cr);
8309 
8310   format %{ "ROR    $dst, $shift" %}
8311   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8312   ins_encode( RegOpcImm(dst, shift) );
8313   ins_pipe( ialu_reg );
8314 %}
8315 
8316 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8317   effect(USE_DEF dst, USE shift, KILL cr);
8318 
8319   format %{ "ROR    $dst, $shift" %}
8320   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8321   ins_encode(OpcP, RegOpc(dst));
8322   ins_pipe( ialu_reg_reg );
8323 %}
8324 // end of ROR expand
8325 
8326 // ROR right once
8327 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8328   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8329 
8330   expand %{
8331     rorI_eReg_imm1(dst, rshift, cr);
8332   %}
8333 %}
8334 
8335 // ROR 32bit by immI8 once
8336 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8337   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8338   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8339 
8340   expand %{
8341     rorI_eReg_imm8(dst, rshift, cr);
8342   %}
8343 %}
8344 
8345 // ROR 32bit var by var once
8346 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8347   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8348 
8349   expand %{
8350     rorI_eReg_CL(dst, shift, cr);
8351   %}
8352 %}
8353 
8354 // ROR 32bit var by var once
8355 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8356   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8357 
8358   expand %{
8359     rorI_eReg_CL(dst, shift, cr);
8360   %}
8361 %}
8362 
8363 // Xor Instructions
8364 // Xor Register with Register
8365 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8366   match(Set dst (XorI dst src));
8367   effect(KILL cr);
8368 
8369   size(2);
8370   format %{ "XOR    $dst,$src" %}
8371   opcode(0x33);
8372   ins_encode( OpcP, RegReg( dst, src) );
8373   ins_pipe( ialu_reg_reg );
8374 %}
8375 
8376 // Xor Register with Immediate -1
8377 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8378   match(Set dst (XorI dst imm));
8379 
8380   size(2);
8381   format %{ "NOT    $dst" %}
8382   ins_encode %{
8383      __ notl($dst$$Register);
8384   %}
8385   ins_pipe( ialu_reg );
8386 %}
8387 
8388 // Xor Register with Immediate
8389 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8390   match(Set dst (XorI dst src));
8391   effect(KILL cr);
8392 
8393   format %{ "XOR    $dst,$src" %}
8394   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8395   // ins_encode( RegImm( dst, src) );
8396   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8397   ins_pipe( ialu_reg );
8398 %}
8399 
8400 // Xor Register with Memory
8401 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8402   match(Set dst (XorI dst (LoadI src)));
8403   effect(KILL cr);
8404 
8405   ins_cost(125);
8406   format %{ "XOR    $dst,$src" %}
8407   opcode(0x33);
8408   ins_encode( OpcP, RegMem(dst, src) );
8409   ins_pipe( ialu_reg_mem );
8410 %}
8411 
8412 // Xor Memory with Register
8413 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8414   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8415   effect(KILL cr);
8416 
8417   ins_cost(150);
8418   format %{ "XOR    $dst,$src" %}
8419   opcode(0x31);  /* Opcode 31 /r */
8420   ins_encode( OpcP, RegMem( src, dst ) );
8421   ins_pipe( ialu_mem_reg );
8422 %}
8423 
8424 // Xor Memory with Immediate
8425 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8426   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8427   effect(KILL cr);
8428 
8429   ins_cost(125);
8430   format %{ "XOR    $dst,$src" %}
8431   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8432   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8433   ins_pipe( ialu_mem_imm );
8434 %}
8435 
8436 //----------Convert Int to Boolean---------------------------------------------
8437 
8438 instruct movI_nocopy(rRegI dst, rRegI src) %{
8439   effect( DEF dst, USE src );
8440   format %{ "MOV    $dst,$src" %}
8441   ins_encode( enc_Copy( dst, src) );
8442   ins_pipe( ialu_reg_reg );
8443 %}
8444 
8445 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8446   effect( USE_DEF dst, USE src, KILL cr );
8447 
8448   size(4);
8449   format %{ "NEG    $dst\n\t"
8450             "ADC    $dst,$src" %}
8451   ins_encode( neg_reg(dst),
8452               OpcRegReg(0x13,dst,src) );
8453   ins_pipe( ialu_reg_reg_long );
8454 %}
8455 
8456 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8457   match(Set dst (Conv2B src));
8458 
8459   expand %{
8460     movI_nocopy(dst,src);
8461     ci2b(dst,src,cr);
8462   %}
8463 %}
8464 
8465 instruct movP_nocopy(rRegI dst, eRegP src) %{
8466   effect( DEF dst, USE src );
8467   format %{ "MOV    $dst,$src" %}
8468   ins_encode( enc_Copy( dst, src) );
8469   ins_pipe( ialu_reg_reg );
8470 %}
8471 
8472 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8473   effect( USE_DEF dst, USE src, KILL cr );
8474   format %{ "NEG    $dst\n\t"
8475             "ADC    $dst,$src" %}
8476   ins_encode( neg_reg(dst),
8477               OpcRegReg(0x13,dst,src) );
8478   ins_pipe( ialu_reg_reg_long );
8479 %}
8480 
8481 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8482   match(Set dst (Conv2B src));
8483 
8484   expand %{
8485     movP_nocopy(dst,src);
8486     cp2b(dst,src,cr);
8487   %}
8488 %}
8489 
8490 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8491   match(Set dst (CmpLTMask p q));
8492   effect(KILL cr);
8493   ins_cost(400);
8494 
8495   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8496   format %{ "XOR    $dst,$dst\n\t"
8497             "CMP    $p,$q\n\t"
8498             "SETlt  $dst\n\t"
8499             "NEG    $dst" %}
8500   ins_encode %{
8501     Register Rp = $p$$Register;
8502     Register Rq = $q$$Register;
8503     Register Rd = $dst$$Register;
8504     Label done;
8505     __ xorl(Rd, Rd);
8506     __ cmpl(Rp, Rq);
8507     __ setb(Assembler::less, Rd);
8508     __ negl(Rd);
8509   %}
8510 
8511   ins_pipe(pipe_slow);
8512 %}
8513 
8514 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8515   match(Set dst (CmpLTMask dst zero));
8516   effect(DEF dst, KILL cr);
8517   ins_cost(100);
8518 
8519   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8520   ins_encode %{
8521   __ sarl($dst$$Register, 31);
8522   %}
8523   ins_pipe(ialu_reg);
8524 %}
8525 
8526 /* better to save a register than avoid a branch */
8527 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8528   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8529   effect(KILL cr);
8530   ins_cost(400);
8531   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8532             "JGE    done\n\t"
8533             "ADD    $p,$y\n"
8534             "done:  " %}
8535   ins_encode %{
8536     Register Rp = $p$$Register;
8537     Register Rq = $q$$Register;
8538     Register Ry = $y$$Register;
8539     Label done;
8540     __ subl(Rp, Rq);
8541     __ jccb(Assembler::greaterEqual, done);
8542     __ addl(Rp, Ry);
8543     __ bind(done);
8544   %}
8545 
8546   ins_pipe(pipe_cmplt);
8547 %}
8548 
8549 /* better to save a register than avoid a branch */
8550 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8551   match(Set y (AndI (CmpLTMask p q) y));
8552   effect(KILL cr);
8553 
8554   ins_cost(300);
8555 
8556   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8557             "JLT      done\n\t"
8558             "XORL     $y, $y\n"
8559             "done:  " %}
8560   ins_encode %{
8561     Register Rp = $p$$Register;
8562     Register Rq = $q$$Register;
8563     Register Ry = $y$$Register;
8564     Label done;
8565     __ cmpl(Rp, Rq);
8566     __ jccb(Assembler::less, done);
8567     __ xorl(Ry, Ry);
8568     __ bind(done);
8569   %}
8570 
8571   ins_pipe(pipe_cmplt);
8572 %}
8573 
8574 /* If I enable this, I encourage spilling in the inner loop of compress.
8575 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8576   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8577 */
8578 //----------Overflow Math Instructions-----------------------------------------
8579 
8580 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8581 %{
8582   match(Set cr (OverflowAddI op1 op2));
8583   effect(DEF cr, USE_KILL op1, USE op2);
8584 
8585   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8586 
8587   ins_encode %{
8588     __ addl($op1$$Register, $op2$$Register);
8589   %}
8590   ins_pipe(ialu_reg_reg);
8591 %}
8592 
8593 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8594 %{
8595   match(Set cr (OverflowAddI op1 op2));
8596   effect(DEF cr, USE_KILL op1, USE op2);
8597 
8598   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8599 
8600   ins_encode %{
8601     __ addl($op1$$Register, $op2$$constant);
8602   %}
8603   ins_pipe(ialu_reg_reg);
8604 %}
8605 
8606 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8607 %{
8608   match(Set cr (OverflowSubI op1 op2));
8609 
8610   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8611   ins_encode %{
8612     __ cmpl($op1$$Register, $op2$$Register);
8613   %}
8614   ins_pipe(ialu_reg_reg);
8615 %}
8616 
8617 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8618 %{
8619   match(Set cr (OverflowSubI op1 op2));
8620 
8621   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8622   ins_encode %{
8623     __ cmpl($op1$$Register, $op2$$constant);
8624   %}
8625   ins_pipe(ialu_reg_reg);
8626 %}
8627 
8628 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8629 %{
8630   match(Set cr (OverflowSubI zero op2));
8631   effect(DEF cr, USE_KILL op2);
8632 
8633   format %{ "NEG    $op2\t# overflow check int" %}
8634   ins_encode %{
8635     __ negl($op2$$Register);
8636   %}
8637   ins_pipe(ialu_reg_reg);
8638 %}
8639 
8640 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8641 %{
8642   match(Set cr (OverflowMulI op1 op2));
8643   effect(DEF cr, USE_KILL op1, USE op2);
8644 
8645   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8646   ins_encode %{
8647     __ imull($op1$$Register, $op2$$Register);
8648   %}
8649   ins_pipe(ialu_reg_reg_alu0);
8650 %}
8651 
8652 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8653 %{
8654   match(Set cr (OverflowMulI op1 op2));
8655   effect(DEF cr, TEMP tmp, USE op1, USE op2);
8656 
8657   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8658   ins_encode %{
8659     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8660   %}
8661   ins_pipe(ialu_reg_reg_alu0);
8662 %}
8663 
8664 //----------Long Instructions------------------------------------------------
8665 // Add Long Register with Register
8666 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8667   match(Set dst (AddL dst src));
8668   effect(KILL cr);
8669   ins_cost(200);
8670   format %{ "ADD    $dst.lo,$src.lo\n\t"
8671             "ADC    $dst.hi,$src.hi" %}
8672   opcode(0x03, 0x13);
8673   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8674   ins_pipe( ialu_reg_reg_long );
8675 %}
8676 
8677 // Add Long Register with Immediate
8678 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8679   match(Set dst (AddL dst src));
8680   effect(KILL cr);
8681   format %{ "ADD    $dst.lo,$src.lo\n\t"
8682             "ADC    $dst.hi,$src.hi" %}
8683   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8684   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8685   ins_pipe( ialu_reg_long );
8686 %}
8687 
8688 // Add Long Register with Memory
8689 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8690   match(Set dst (AddL dst (LoadL mem)));
8691   effect(KILL cr);
8692   ins_cost(125);
8693   format %{ "ADD    $dst.lo,$mem\n\t"
8694             "ADC    $dst.hi,$mem+4" %}
8695   opcode(0x03, 0x13);
8696   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8697   ins_pipe( ialu_reg_long_mem );
8698 %}
8699 
8700 // Subtract Long Register with Register.
8701 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8702   match(Set dst (SubL dst src));
8703   effect(KILL cr);
8704   ins_cost(200);
8705   format %{ "SUB    $dst.lo,$src.lo\n\t"
8706             "SBB    $dst.hi,$src.hi" %}
8707   opcode(0x2B, 0x1B);
8708   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8709   ins_pipe( ialu_reg_reg_long );
8710 %}
8711 
8712 // Subtract Long Register with Immediate
8713 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8714   match(Set dst (SubL dst src));
8715   effect(KILL cr);
8716   format %{ "SUB    $dst.lo,$src.lo\n\t"
8717             "SBB    $dst.hi,$src.hi" %}
8718   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
8719   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8720   ins_pipe( ialu_reg_long );
8721 %}
8722 
8723 // Subtract Long Register with Memory
8724 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8725   match(Set dst (SubL dst (LoadL mem)));
8726   effect(KILL cr);
8727   ins_cost(125);
8728   format %{ "SUB    $dst.lo,$mem\n\t"
8729             "SBB    $dst.hi,$mem+4" %}
8730   opcode(0x2B, 0x1B);
8731   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8732   ins_pipe( ialu_reg_long_mem );
8733 %}
8734 
8735 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8736   match(Set dst (SubL zero dst));
8737   effect(KILL cr);
8738   ins_cost(300);
8739   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
8740   ins_encode( neg_long(dst) );
8741   ins_pipe( ialu_reg_reg_long );
8742 %}
8743 
8744 // And Long Register with Register
8745 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8746   match(Set dst (AndL dst src));
8747   effect(KILL cr);
8748   format %{ "AND    $dst.lo,$src.lo\n\t"
8749             "AND    $dst.hi,$src.hi" %}
8750   opcode(0x23,0x23);
8751   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8752   ins_pipe( ialu_reg_reg_long );
8753 %}
8754 
8755 // And Long Register with Immediate
8756 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8757   match(Set dst (AndL dst src));
8758   effect(KILL cr);
8759   format %{ "AND    $dst.lo,$src.lo\n\t"
8760             "AND    $dst.hi,$src.hi" %}
8761   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
8762   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8763   ins_pipe( ialu_reg_long );
8764 %}
8765 
8766 // And Long Register with Memory
8767 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8768   match(Set dst (AndL dst (LoadL mem)));
8769   effect(KILL cr);
8770   ins_cost(125);
8771   format %{ "AND    $dst.lo,$mem\n\t"
8772             "AND    $dst.hi,$mem+4" %}
8773   opcode(0x23, 0x23);
8774   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8775   ins_pipe( ialu_reg_long_mem );
8776 %}
8777 
8778 // BMI1 instructions
8779 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
8780   match(Set dst (AndL (XorL src1 minus_1) src2));
8781   predicate(UseBMI1Instructions);
8782   effect(KILL cr, TEMP dst);
8783 
8784   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
8785             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
8786          %}
8787 
8788   ins_encode %{
8789     Register Rdst = $dst$$Register;
8790     Register Rsrc1 = $src1$$Register;
8791     Register Rsrc2 = $src2$$Register;
8792     __ andnl(Rdst, Rsrc1, Rsrc2);
8793     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
8794   %}
8795   ins_pipe(ialu_reg_reg_long);
8796 %}
8797 
8798 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
8799   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
8800   predicate(UseBMI1Instructions);
8801   effect(KILL cr, TEMP dst);
8802 
8803   ins_cost(125);
8804   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
8805             "ANDNL  $dst.hi, $src1.hi, $src2+4"
8806          %}
8807 
8808   ins_encode %{
8809     Register Rdst = $dst$$Register;
8810     Register Rsrc1 = $src1$$Register;
8811     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
8812 
8813     __ andnl(Rdst, Rsrc1, $src2$$Address);
8814     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
8815   %}
8816   ins_pipe(ialu_reg_mem);
8817 %}
8818 
8819 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
8820   match(Set dst (AndL (SubL imm_zero src) src));
8821   predicate(UseBMI1Instructions);
8822   effect(KILL cr, TEMP dst);
8823 
8824   format %{ "MOVL   $dst.hi, 0\n\t"
8825             "BLSIL  $dst.lo, $src.lo\n\t"
8826             "JNZ    done\n\t"
8827             "BLSIL  $dst.hi, $src.hi\n"
8828             "done:"
8829          %}
8830 
8831   ins_encode %{
8832     Label done;
8833     Register Rdst = $dst$$Register;
8834     Register Rsrc = $src$$Register;
8835     __ movl(HIGH_FROM_LOW(Rdst), 0);
8836     __ blsil(Rdst, Rsrc);
8837     __ jccb(Assembler::notZero, done);
8838     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8839     __ bind(done);
8840   %}
8841   ins_pipe(ialu_reg);
8842 %}
8843 
8844 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
8845   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
8846   predicate(UseBMI1Instructions);
8847   effect(KILL cr, TEMP dst);
8848 
8849   ins_cost(125);
8850   format %{ "MOVL   $dst.hi, 0\n\t"
8851             "BLSIL  $dst.lo, $src\n\t"
8852             "JNZ    done\n\t"
8853             "BLSIL  $dst.hi, $src+4\n"
8854             "done:"
8855          %}
8856 
8857   ins_encode %{
8858     Label done;
8859     Register Rdst = $dst$$Register;
8860     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8861 
8862     __ movl(HIGH_FROM_LOW(Rdst), 0);
8863     __ blsil(Rdst, $src$$Address);
8864     __ jccb(Assembler::notZero, done);
8865     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
8866     __ bind(done);
8867   %}
8868   ins_pipe(ialu_reg_mem);
8869 %}
8870 
8871 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8872 %{
8873   match(Set dst (XorL (AddL src minus_1) src));
8874   predicate(UseBMI1Instructions);
8875   effect(KILL cr, TEMP dst);
8876 
8877   format %{ "MOVL    $dst.hi, 0\n\t"
8878             "BLSMSKL $dst.lo, $src.lo\n\t"
8879             "JNC     done\n\t"
8880             "BLSMSKL $dst.hi, $src.hi\n"
8881             "done:"
8882          %}
8883 
8884   ins_encode %{
8885     Label done;
8886     Register Rdst = $dst$$Register;
8887     Register Rsrc = $src$$Register;
8888     __ movl(HIGH_FROM_LOW(Rdst), 0);
8889     __ blsmskl(Rdst, Rsrc);
8890     __ jccb(Assembler::carryClear, done);
8891     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8892     __ bind(done);
8893   %}
8894 
8895   ins_pipe(ialu_reg);
8896 %}
8897 
8898 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8899 %{
8900   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
8901   predicate(UseBMI1Instructions);
8902   effect(KILL cr, TEMP dst);
8903 
8904   ins_cost(125);
8905   format %{ "MOVL    $dst.hi, 0\n\t"
8906             "BLSMSKL $dst.lo, $src\n\t"
8907             "JNC     done\n\t"
8908             "BLSMSKL $dst.hi, $src+4\n"
8909             "done:"
8910          %}
8911 
8912   ins_encode %{
8913     Label done;
8914     Register Rdst = $dst$$Register;
8915     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8916 
8917     __ movl(HIGH_FROM_LOW(Rdst), 0);
8918     __ blsmskl(Rdst, $src$$Address);
8919     __ jccb(Assembler::carryClear, done);
8920     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
8921     __ bind(done);
8922   %}
8923 
8924   ins_pipe(ialu_reg_mem);
8925 %}
8926 
8927 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8928 %{
8929   match(Set dst (AndL (AddL src minus_1) src) );
8930   predicate(UseBMI1Instructions);
8931   effect(KILL cr, TEMP dst);
8932 
8933   format %{ "MOVL   $dst.hi, $src.hi\n\t"
8934             "BLSRL  $dst.lo, $src.lo\n\t"
8935             "JNC    done\n\t"
8936             "BLSRL  $dst.hi, $src.hi\n"
8937             "done:"
8938   %}
8939 
8940   ins_encode %{
8941     Label done;
8942     Register Rdst = $dst$$Register;
8943     Register Rsrc = $src$$Register;
8944     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8945     __ blsrl(Rdst, Rsrc);
8946     __ jccb(Assembler::carryClear, done);
8947     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8948     __ bind(done);
8949   %}
8950 
8951   ins_pipe(ialu_reg);
8952 %}
8953 
8954 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8955 %{
8956   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
8957   predicate(UseBMI1Instructions);
8958   effect(KILL cr, TEMP dst);
8959 
8960   ins_cost(125);
8961   format %{ "MOVL   $dst.hi, $src+4\n\t"
8962             "BLSRL  $dst.lo, $src\n\t"
8963             "JNC    done\n\t"
8964             "BLSRL  $dst.hi, $src+4\n"
8965             "done:"
8966   %}
8967 
8968   ins_encode %{
8969     Label done;
8970     Register Rdst = $dst$$Register;
8971     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8972     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
8973     __ blsrl(Rdst, $src$$Address);
8974     __ jccb(Assembler::carryClear, done);
8975     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
8976     __ bind(done);
8977   %}
8978 
8979   ins_pipe(ialu_reg_mem);
8980 %}
8981 
8982 // Or Long Register with Register
8983 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8984   match(Set dst (OrL dst src));
8985   effect(KILL cr);
8986   format %{ "OR     $dst.lo,$src.lo\n\t"
8987             "OR     $dst.hi,$src.hi" %}
8988   opcode(0x0B,0x0B);
8989   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8990   ins_pipe( ialu_reg_reg_long );
8991 %}
8992 
8993 // Or Long Register with Immediate
8994 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8995   match(Set dst (OrL dst src));
8996   effect(KILL cr);
8997   format %{ "OR     $dst.lo,$src.lo\n\t"
8998             "OR     $dst.hi,$src.hi" %}
8999   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9000   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9001   ins_pipe( ialu_reg_long );
9002 %}
9003 
9004 // Or Long Register with Memory
9005 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9006   match(Set dst (OrL dst (LoadL mem)));
9007   effect(KILL cr);
9008   ins_cost(125);
9009   format %{ "OR     $dst.lo,$mem\n\t"
9010             "OR     $dst.hi,$mem+4" %}
9011   opcode(0x0B,0x0B);
9012   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9013   ins_pipe( ialu_reg_long_mem );
9014 %}
9015 
9016 // Xor Long Register with Register
9017 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9018   match(Set dst (XorL dst src));
9019   effect(KILL cr);
9020   format %{ "XOR    $dst.lo,$src.lo\n\t"
9021             "XOR    $dst.hi,$src.hi" %}
9022   opcode(0x33,0x33);
9023   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9024   ins_pipe( ialu_reg_reg_long );
9025 %}
9026 
9027 // Xor Long Register with Immediate -1
9028 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9029   match(Set dst (XorL dst imm));
9030   format %{ "NOT    $dst.lo\n\t"
9031             "NOT    $dst.hi" %}
9032   ins_encode %{
9033      __ notl($dst$$Register);
9034      __ notl(HIGH_FROM_LOW($dst$$Register));
9035   %}
9036   ins_pipe( ialu_reg_long );
9037 %}
9038 
9039 // Xor Long Register with Immediate
9040 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9041   match(Set dst (XorL dst src));
9042   effect(KILL cr);
9043   format %{ "XOR    $dst.lo,$src.lo\n\t"
9044             "XOR    $dst.hi,$src.hi" %}
9045   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9046   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9047   ins_pipe( ialu_reg_long );
9048 %}
9049 
9050 // Xor Long Register with Memory
9051 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9052   match(Set dst (XorL dst (LoadL mem)));
9053   effect(KILL cr);
9054   ins_cost(125);
9055   format %{ "XOR    $dst.lo,$mem\n\t"
9056             "XOR    $dst.hi,$mem+4" %}
9057   opcode(0x33,0x33);
9058   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9059   ins_pipe( ialu_reg_long_mem );
9060 %}
9061 
9062 // Shift Left Long by 1
9063 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9064   predicate(UseNewLongLShift);
9065   match(Set dst (LShiftL dst cnt));
9066   effect(KILL cr);
9067   ins_cost(100);
9068   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9069             "ADC    $dst.hi,$dst.hi" %}
9070   ins_encode %{
9071     __ addl($dst$$Register,$dst$$Register);
9072     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9073   %}
9074   ins_pipe( ialu_reg_long );
9075 %}
9076 
9077 // Shift Left Long by 2
9078 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9079   predicate(UseNewLongLShift);
9080   match(Set dst (LShiftL dst cnt));
9081   effect(KILL cr);
9082   ins_cost(100);
9083   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9084             "ADC    $dst.hi,$dst.hi\n\t"
9085             "ADD    $dst.lo,$dst.lo\n\t"
9086             "ADC    $dst.hi,$dst.hi" %}
9087   ins_encode %{
9088     __ addl($dst$$Register,$dst$$Register);
9089     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9090     __ addl($dst$$Register,$dst$$Register);
9091     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9092   %}
9093   ins_pipe( ialu_reg_long );
9094 %}
9095 
9096 // Shift Left Long by 3
9097 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9098   predicate(UseNewLongLShift);
9099   match(Set dst (LShiftL dst cnt));
9100   effect(KILL cr);
9101   ins_cost(100);
9102   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9103             "ADC    $dst.hi,$dst.hi\n\t"
9104             "ADD    $dst.lo,$dst.lo\n\t"
9105             "ADC    $dst.hi,$dst.hi\n\t"
9106             "ADD    $dst.lo,$dst.lo\n\t"
9107             "ADC    $dst.hi,$dst.hi" %}
9108   ins_encode %{
9109     __ addl($dst$$Register,$dst$$Register);
9110     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9111     __ addl($dst$$Register,$dst$$Register);
9112     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9113     __ addl($dst$$Register,$dst$$Register);
9114     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9115   %}
9116   ins_pipe( ialu_reg_long );
9117 %}
9118 
9119 // Shift Left Long by 1-31
9120 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9121   match(Set dst (LShiftL dst cnt));
9122   effect(KILL cr);
9123   ins_cost(200);
9124   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9125             "SHL    $dst.lo,$cnt" %}
9126   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9127   ins_encode( move_long_small_shift(dst,cnt) );
9128   ins_pipe( ialu_reg_long );
9129 %}
9130 
9131 // Shift Left Long by 32-63
9132 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9133   match(Set dst (LShiftL dst cnt));
9134   effect(KILL cr);
9135   ins_cost(300);
9136   format %{ "MOV    $dst.hi,$dst.lo\n"
9137           "\tSHL    $dst.hi,$cnt-32\n"
9138           "\tXOR    $dst.lo,$dst.lo" %}
9139   opcode(0xC1, 0x4);  /* C1 /4 ib */
9140   ins_encode( move_long_big_shift_clr(dst,cnt) );
9141   ins_pipe( ialu_reg_long );
9142 %}
9143 
9144 // Shift Left Long by variable
9145 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9146   match(Set dst (LShiftL dst shift));
9147   effect(KILL cr);
9148   ins_cost(500+200);
9149   size(17);
9150   format %{ "TEST   $shift,32\n\t"
9151             "JEQ,s  small\n\t"
9152             "MOV    $dst.hi,$dst.lo\n\t"
9153             "XOR    $dst.lo,$dst.lo\n"
9154     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9155             "SHL    $dst.lo,$shift" %}
9156   ins_encode( shift_left_long( dst, shift ) );
9157   ins_pipe( pipe_slow );
9158 %}
9159 
9160 // Shift Right Long by 1-31
9161 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9162   match(Set dst (URShiftL dst cnt));
9163   effect(KILL cr);
9164   ins_cost(200);
9165   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9166             "SHR    $dst.hi,$cnt" %}
9167   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9168   ins_encode( move_long_small_shift(dst,cnt) );
9169   ins_pipe( ialu_reg_long );
9170 %}
9171 
9172 // Shift Right Long by 32-63
9173 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9174   match(Set dst (URShiftL dst cnt));
9175   effect(KILL cr);
9176   ins_cost(300);
9177   format %{ "MOV    $dst.lo,$dst.hi\n"
9178           "\tSHR    $dst.lo,$cnt-32\n"
9179           "\tXOR    $dst.hi,$dst.hi" %}
9180   opcode(0xC1, 0x5);  /* C1 /5 ib */
9181   ins_encode( move_long_big_shift_clr(dst,cnt) );
9182   ins_pipe( ialu_reg_long );
9183 %}
9184 
9185 // Shift Right Long by variable
9186 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9187   match(Set dst (URShiftL dst shift));
9188   effect(KILL cr);
9189   ins_cost(600);
9190   size(17);
9191   format %{ "TEST   $shift,32\n\t"
9192             "JEQ,s  small\n\t"
9193             "MOV    $dst.lo,$dst.hi\n\t"
9194             "XOR    $dst.hi,$dst.hi\n"
9195     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9196             "SHR    $dst.hi,$shift" %}
9197   ins_encode( shift_right_long( dst, shift ) );
9198   ins_pipe( pipe_slow );
9199 %}
9200 
9201 // Shift Right Long by 1-31
9202 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9203   match(Set dst (RShiftL dst cnt));
9204   effect(KILL cr);
9205   ins_cost(200);
9206   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9207             "SAR    $dst.hi,$cnt" %}
9208   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9209   ins_encode( move_long_small_shift(dst,cnt) );
9210   ins_pipe( ialu_reg_long );
9211 %}
9212 
9213 // Shift Right Long by 32-63
9214 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9215   match(Set dst (RShiftL dst cnt));
9216   effect(KILL cr);
9217   ins_cost(300);
9218   format %{ "MOV    $dst.lo,$dst.hi\n"
9219           "\tSAR    $dst.lo,$cnt-32\n"
9220           "\tSAR    $dst.hi,31" %}
9221   opcode(0xC1, 0x7);  /* C1 /7 ib */
9222   ins_encode( move_long_big_shift_sign(dst,cnt) );
9223   ins_pipe( ialu_reg_long );
9224 %}
9225 
9226 // Shift Right arithmetic Long by variable
9227 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9228   match(Set dst (RShiftL dst shift));
9229   effect(KILL cr);
9230   ins_cost(600);
9231   size(18);
9232   format %{ "TEST   $shift,32\n\t"
9233             "JEQ,s  small\n\t"
9234             "MOV    $dst.lo,$dst.hi\n\t"
9235             "SAR    $dst.hi,31\n"
9236     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9237             "SAR    $dst.hi,$shift" %}
9238   ins_encode( shift_right_arith_long( dst, shift ) );
9239   ins_pipe( pipe_slow );
9240 %}
9241 
9242 
9243 //----------Double Instructions------------------------------------------------
9244 // Double Math
9245 
9246 // Compare & branch
9247 
9248 // P6 version of float compare, sets condition codes in EFLAGS
9249 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9250   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9251   match(Set cr (CmpD src1 src2));
9252   effect(KILL rax);
9253   ins_cost(150);
9254   format %{ "FLD    $src1\n\t"
9255             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9256             "JNP    exit\n\t"
9257             "MOV    ah,1       // saw a NaN, set CF\n\t"
9258             "SAHF\n"
9259      "exit:\tNOP               // avoid branch to branch" %}
9260   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9261   ins_encode( Push_Reg_DPR(src1),
9262               OpcP, RegOpc(src2),
9263               cmpF_P6_fixup );
9264   ins_pipe( pipe_slow );
9265 %}
9266 
9267 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9268   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9269   match(Set cr (CmpD src1 src2));
9270   ins_cost(150);
9271   format %{ "FLD    $src1\n\t"
9272             "FUCOMIP ST,$src2  // P6 instruction" %}
9273   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9274   ins_encode( Push_Reg_DPR(src1),
9275               OpcP, RegOpc(src2));
9276   ins_pipe( pipe_slow );
9277 %}
9278 
9279 // Compare & branch
9280 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9281   predicate(UseSSE<=1);
9282   match(Set cr (CmpD src1 src2));
9283   effect(KILL rax);
9284   ins_cost(200);
9285   format %{ "FLD    $src1\n\t"
9286             "FCOMp  $src2\n\t"
9287             "FNSTSW AX\n\t"
9288             "TEST   AX,0x400\n\t"
9289             "JZ,s   flags\n\t"
9290             "MOV    AH,1\t# unordered treat as LT\n"
9291     "flags:\tSAHF" %}
9292   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9293   ins_encode( Push_Reg_DPR(src1),
9294               OpcP, RegOpc(src2),
9295               fpu_flags);
9296   ins_pipe( pipe_slow );
9297 %}
9298 
9299 // Compare vs zero into -1,0,1
9300 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9301   predicate(UseSSE<=1);
9302   match(Set dst (CmpD3 src1 zero));
9303   effect(KILL cr, KILL rax);
9304   ins_cost(280);
9305   format %{ "FTSTD  $dst,$src1" %}
9306   opcode(0xE4, 0xD9);
9307   ins_encode( Push_Reg_DPR(src1),
9308               OpcS, OpcP, PopFPU,
9309               CmpF_Result(dst));
9310   ins_pipe( pipe_slow );
9311 %}
9312 
9313 // Compare into -1,0,1
9314 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9315   predicate(UseSSE<=1);
9316   match(Set dst (CmpD3 src1 src2));
9317   effect(KILL cr, KILL rax);
9318   ins_cost(300);
9319   format %{ "FCMPD  $dst,$src1,$src2" %}
9320   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9321   ins_encode( Push_Reg_DPR(src1),
9322               OpcP, RegOpc(src2),
9323               CmpF_Result(dst));
9324   ins_pipe( pipe_slow );
9325 %}
9326 
9327 // float compare and set condition codes in EFLAGS by XMM regs
9328 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9329   predicate(UseSSE>=2);
9330   match(Set cr (CmpD src1 src2));
9331   ins_cost(145);
9332   format %{ "UCOMISD $src1,$src2\n\t"
9333             "JNP,s   exit\n\t"
9334             "PUSHF\t# saw NaN, set CF\n\t"
9335             "AND     [rsp], #0xffffff2b\n\t"
9336             "POPF\n"
9337     "exit:" %}
9338   ins_encode %{
9339     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9340     emit_cmpfp_fixup(_masm);
9341   %}
9342   ins_pipe( pipe_slow );
9343 %}
9344 
9345 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9346   predicate(UseSSE>=2);
9347   match(Set cr (CmpD src1 src2));
9348   ins_cost(100);
9349   format %{ "UCOMISD $src1,$src2" %}
9350   ins_encode %{
9351     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9352   %}
9353   ins_pipe( pipe_slow );
9354 %}
9355 
9356 // float compare and set condition codes in EFLAGS by XMM regs
9357 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9358   predicate(UseSSE>=2);
9359   match(Set cr (CmpD src1 (LoadD src2)));
9360   ins_cost(145);
9361   format %{ "UCOMISD $src1,$src2\n\t"
9362             "JNP,s   exit\n\t"
9363             "PUSHF\t# saw NaN, set CF\n\t"
9364             "AND     [rsp], #0xffffff2b\n\t"
9365             "POPF\n"
9366     "exit:" %}
9367   ins_encode %{
9368     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9369     emit_cmpfp_fixup(_masm);
9370   %}
9371   ins_pipe( pipe_slow );
9372 %}
9373 
9374 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9375   predicate(UseSSE>=2);
9376   match(Set cr (CmpD src1 (LoadD src2)));
9377   ins_cost(100);
9378   format %{ "UCOMISD $src1,$src2" %}
9379   ins_encode %{
9380     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9381   %}
9382   ins_pipe( pipe_slow );
9383 %}
9384 
9385 // Compare into -1,0,1 in XMM
9386 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9387   predicate(UseSSE>=2);
9388   match(Set dst (CmpD3 src1 src2));
9389   effect(KILL cr);
9390   ins_cost(255);
9391   format %{ "UCOMISD $src1, $src2\n\t"
9392             "MOV     $dst, #-1\n\t"
9393             "JP,s    done\n\t"
9394             "JB,s    done\n\t"
9395             "SETNE   $dst\n\t"
9396             "MOVZB   $dst, $dst\n"
9397     "done:" %}
9398   ins_encode %{
9399     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9400     emit_cmpfp3(_masm, $dst$$Register);
9401   %}
9402   ins_pipe( pipe_slow );
9403 %}
9404 
9405 // Compare into -1,0,1 in XMM and memory
9406 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9407   predicate(UseSSE>=2);
9408   match(Set dst (CmpD3 src1 (LoadD src2)));
9409   effect(KILL cr);
9410   ins_cost(275);
9411   format %{ "UCOMISD $src1, $src2\n\t"
9412             "MOV     $dst, #-1\n\t"
9413             "JP,s    done\n\t"
9414             "JB,s    done\n\t"
9415             "SETNE   $dst\n\t"
9416             "MOVZB   $dst, $dst\n"
9417     "done:" %}
9418   ins_encode %{
9419     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9420     emit_cmpfp3(_masm, $dst$$Register);
9421   %}
9422   ins_pipe( pipe_slow );
9423 %}
9424 
9425 
9426 instruct subDPR_reg(regDPR dst, regDPR src) %{
9427   predicate (UseSSE <=1);
9428   match(Set dst (SubD dst src));
9429 
9430   format %{ "FLD    $src\n\t"
9431             "DSUBp  $dst,ST" %}
9432   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9433   ins_cost(150);
9434   ins_encode( Push_Reg_DPR(src),
9435               OpcP, RegOpc(dst) );
9436   ins_pipe( fpu_reg_reg );
9437 %}
9438 
9439 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9440   predicate (UseSSE <=1);
9441   match(Set dst (RoundDouble (SubD src1 src2)));
9442   ins_cost(250);
9443 
9444   format %{ "FLD    $src2\n\t"
9445             "DSUB   ST,$src1\n\t"
9446             "FSTP_D $dst\t# D-round" %}
9447   opcode(0xD8, 0x5);
9448   ins_encode( Push_Reg_DPR(src2),
9449               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9450   ins_pipe( fpu_mem_reg_reg );
9451 %}
9452 
9453 
9454 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9455   predicate (UseSSE <=1);
9456   match(Set dst (SubD dst (LoadD src)));
9457   ins_cost(150);
9458 
9459   format %{ "FLD    $src\n\t"
9460             "DSUBp  $dst,ST" %}
9461   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9462   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9463               OpcP, RegOpc(dst) );
9464   ins_pipe( fpu_reg_mem );
9465 %}
9466 
9467 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9468   predicate (UseSSE<=1);
9469   match(Set dst (AbsD src));
9470   ins_cost(100);
9471   format %{ "FABS" %}
9472   opcode(0xE1, 0xD9);
9473   ins_encode( OpcS, OpcP );
9474   ins_pipe( fpu_reg_reg );
9475 %}
9476 
9477 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9478   predicate(UseSSE<=1);
9479   match(Set dst (NegD src));
9480   ins_cost(100);
9481   format %{ "FCHS" %}
9482   opcode(0xE0, 0xD9);
9483   ins_encode( OpcS, OpcP );
9484   ins_pipe( fpu_reg_reg );
9485 %}
9486 
9487 instruct addDPR_reg(regDPR dst, regDPR src) %{
9488   predicate(UseSSE<=1);
9489   match(Set dst (AddD dst src));
9490   format %{ "FLD    $src\n\t"
9491             "DADD   $dst,ST" %}
9492   size(4);
9493   ins_cost(150);
9494   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9495   ins_encode( Push_Reg_DPR(src),
9496               OpcP, RegOpc(dst) );
9497   ins_pipe( fpu_reg_reg );
9498 %}
9499 
9500 
9501 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9502   predicate(UseSSE<=1);
9503   match(Set dst (RoundDouble (AddD src1 src2)));
9504   ins_cost(250);
9505 
9506   format %{ "FLD    $src2\n\t"
9507             "DADD   ST,$src1\n\t"
9508             "FSTP_D $dst\t# D-round" %}
9509   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9510   ins_encode( Push_Reg_DPR(src2),
9511               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9512   ins_pipe( fpu_mem_reg_reg );
9513 %}
9514 
9515 
9516 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9517   predicate(UseSSE<=1);
9518   match(Set dst (AddD dst (LoadD src)));
9519   ins_cost(150);
9520 
9521   format %{ "FLD    $src\n\t"
9522             "DADDp  $dst,ST" %}
9523   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9524   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9525               OpcP, RegOpc(dst) );
9526   ins_pipe( fpu_reg_mem );
9527 %}
9528 
9529 // add-to-memory
9530 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9531   predicate(UseSSE<=1);
9532   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9533   ins_cost(150);
9534 
9535   format %{ "FLD_D  $dst\n\t"
9536             "DADD   ST,$src\n\t"
9537             "FST_D  $dst" %}
9538   opcode(0xDD, 0x0);
9539   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9540               Opcode(0xD8), RegOpc(src),
9541               set_instruction_start,
9542               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9543   ins_pipe( fpu_reg_mem );
9544 %}
9545 
9546 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9547   predicate(UseSSE<=1);
9548   match(Set dst (AddD dst con));
9549   ins_cost(125);
9550   format %{ "FLD1\n\t"
9551             "DADDp  $dst,ST" %}
9552   ins_encode %{
9553     __ fld1();
9554     __ faddp($dst$$reg);
9555   %}
9556   ins_pipe(fpu_reg);
9557 %}
9558 
9559 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9560   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9561   match(Set dst (AddD dst con));
9562   ins_cost(200);
9563   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9564             "DADDp  $dst,ST" %}
9565   ins_encode %{
9566     __ fld_d($constantaddress($con));
9567     __ faddp($dst$$reg);
9568   %}
9569   ins_pipe(fpu_reg_mem);
9570 %}
9571 
9572 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9573   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9574   match(Set dst (RoundDouble (AddD src con)));
9575   ins_cost(200);
9576   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9577             "DADD   ST,$src\n\t"
9578             "FSTP_D $dst\t# D-round" %}
9579   ins_encode %{
9580     __ fld_d($constantaddress($con));
9581     __ fadd($src$$reg);
9582     __ fstp_d(Address(rsp, $dst$$disp));
9583   %}
9584   ins_pipe(fpu_mem_reg_con);
9585 %}
9586 
9587 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9588   predicate(UseSSE<=1);
9589   match(Set dst (MulD dst src));
9590   format %{ "FLD    $src\n\t"
9591             "DMULp  $dst,ST" %}
9592   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9593   ins_cost(150);
9594   ins_encode( Push_Reg_DPR(src),
9595               OpcP, RegOpc(dst) );
9596   ins_pipe( fpu_reg_reg );
9597 %}
9598 
9599 // Strict FP instruction biases argument before multiply then
9600 // biases result to avoid double rounding of subnormals.
9601 //
9602 // scale arg1 by multiplying arg1 by 2^(-15360)
9603 // load arg2
9604 // multiply scaled arg1 by arg2
9605 // rescale product by 2^(15360)
9606 //
9607 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9608   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9609   match(Set dst (MulD dst src));
9610   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9611 
9612   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9613             "DMULp  $dst,ST\n\t"
9614             "FLD    $src\n\t"
9615             "DMULp  $dst,ST\n\t"
9616             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9617             "DMULp  $dst,ST\n\t" %}
9618   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9619   ins_encode( strictfp_bias1(dst),
9620               Push_Reg_DPR(src),
9621               OpcP, RegOpc(dst),
9622               strictfp_bias2(dst) );
9623   ins_pipe( fpu_reg_reg );
9624 %}
9625 
9626 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9627   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9628   match(Set dst (MulD dst con));
9629   ins_cost(200);
9630   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9631             "DMULp  $dst,ST" %}
9632   ins_encode %{
9633     __ fld_d($constantaddress($con));
9634     __ fmulp($dst$$reg);
9635   %}
9636   ins_pipe(fpu_reg_mem);
9637 %}
9638 
9639 
9640 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9641   predicate( UseSSE<=1 );
9642   match(Set dst (MulD dst (LoadD src)));
9643   ins_cost(200);
9644   format %{ "FLD_D  $src\n\t"
9645             "DMULp  $dst,ST" %}
9646   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9647   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9648               OpcP, RegOpc(dst) );
9649   ins_pipe( fpu_reg_mem );
9650 %}
9651 
9652 //
9653 // Cisc-alternate to reg-reg multiply
9654 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9655   predicate( UseSSE<=1 );
9656   match(Set dst (MulD src (LoadD mem)));
9657   ins_cost(250);
9658   format %{ "FLD_D  $mem\n\t"
9659             "DMUL   ST,$src\n\t"
9660             "FSTP_D $dst" %}
9661   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9662   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9663               OpcReg_FPR(src),
9664               Pop_Reg_DPR(dst) );
9665   ins_pipe( fpu_reg_reg_mem );
9666 %}
9667 
9668 
9669 // MACRO3 -- addDPR a mulDPR
9670 // This instruction is a '2-address' instruction in that the result goes
9671 // back to src2.  This eliminates a move from the macro; possibly the
9672 // register allocator will have to add it back (and maybe not).
9673 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9674   predicate( UseSSE<=1 );
9675   match(Set src2 (AddD (MulD src0 src1) src2));
9676   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9677             "DMUL   ST,$src1\n\t"
9678             "DADDp  $src2,ST" %}
9679   ins_cost(250);
9680   opcode(0xDD); /* LoadD DD /0 */
9681   ins_encode( Push_Reg_FPR(src0),
9682               FMul_ST_reg(src1),
9683               FAddP_reg_ST(src2) );
9684   ins_pipe( fpu_reg_reg_reg );
9685 %}
9686 
9687 
9688 // MACRO3 -- subDPR a mulDPR
9689 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9690   predicate( UseSSE<=1 );
9691   match(Set src2 (SubD (MulD src0 src1) src2));
9692   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9693             "DMUL   ST,$src1\n\t"
9694             "DSUBRp $src2,ST" %}
9695   ins_cost(250);
9696   ins_encode( Push_Reg_FPR(src0),
9697               FMul_ST_reg(src1),
9698               Opcode(0xDE), Opc_plus(0xE0,src2));
9699   ins_pipe( fpu_reg_reg_reg );
9700 %}
9701 
9702 
9703 instruct divDPR_reg(regDPR dst, regDPR src) %{
9704   predicate( UseSSE<=1 );
9705   match(Set dst (DivD dst src));
9706 
9707   format %{ "FLD    $src\n\t"
9708             "FDIVp  $dst,ST" %}
9709   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9710   ins_cost(150);
9711   ins_encode( Push_Reg_DPR(src),
9712               OpcP, RegOpc(dst) );
9713   ins_pipe( fpu_reg_reg );
9714 %}
9715 
9716 // Strict FP instruction biases argument before division then
9717 // biases result, to avoid double rounding of subnormals.
9718 //
9719 // scale dividend by multiplying dividend by 2^(-15360)
9720 // load divisor
9721 // divide scaled dividend by divisor
9722 // rescale quotient by 2^(15360)
9723 //
9724 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9725   predicate (UseSSE<=1);
9726   match(Set dst (DivD dst src));
9727   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9728   ins_cost(01);
9729 
9730   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9731             "DMULp  $dst,ST\n\t"
9732             "FLD    $src\n\t"
9733             "FDIVp  $dst,ST\n\t"
9734             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9735             "DMULp  $dst,ST\n\t" %}
9736   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9737   ins_encode( strictfp_bias1(dst),
9738               Push_Reg_DPR(src),
9739               OpcP, RegOpc(dst),
9740               strictfp_bias2(dst) );
9741   ins_pipe( fpu_reg_reg );
9742 %}
9743 
9744 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9745   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
9746   match(Set dst (RoundDouble (DivD src1 src2)));
9747 
9748   format %{ "FLD    $src1\n\t"
9749             "FDIV   ST,$src2\n\t"
9750             "FSTP_D $dst\t# D-round" %}
9751   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
9752   ins_encode( Push_Reg_DPR(src1),
9753               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
9754   ins_pipe( fpu_mem_reg_reg );
9755 %}
9756 
9757 
9758 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9759   predicate(UseSSE<=1);
9760   match(Set dst (ModD dst src));
9761   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9762 
9763   format %{ "DMOD   $dst,$src" %}
9764   ins_cost(250);
9765   ins_encode(Push_Reg_Mod_DPR(dst, src),
9766               emitModDPR(),
9767               Push_Result_Mod_DPR(src),
9768               Pop_Reg_DPR(dst));
9769   ins_pipe( pipe_slow );
9770 %}
9771 
9772 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9773   predicate(UseSSE>=2);
9774   match(Set dst (ModD src0 src1));
9775   effect(KILL rax, KILL cr);
9776 
9777   format %{ "SUB    ESP,8\t # DMOD\n"
9778           "\tMOVSD  [ESP+0],$src1\n"
9779           "\tFLD_D  [ESP+0]\n"
9780           "\tMOVSD  [ESP+0],$src0\n"
9781           "\tFLD_D  [ESP+0]\n"
9782      "loop:\tFPREM\n"
9783           "\tFWAIT\n"
9784           "\tFNSTSW AX\n"
9785           "\tSAHF\n"
9786           "\tJP     loop\n"
9787           "\tFSTP_D [ESP+0]\n"
9788           "\tMOVSD  $dst,[ESP+0]\n"
9789           "\tADD    ESP,8\n"
9790           "\tFSTP   ST0\t # Restore FPU Stack"
9791     %}
9792   ins_cost(250);
9793   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
9794   ins_pipe( pipe_slow );
9795 %}
9796 
9797 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
9798   predicate (UseSSE<=1);
9799   match(Set dst(TanD src));
9800   format %{ "DTAN   $dst" %}
9801   ins_encode( Opcode(0xD9), Opcode(0xF2),    // fptan
9802               Opcode(0xDD), Opcode(0xD8));   // fstp st
9803   ins_pipe( pipe_slow );
9804 %}
9805 
9806 instruct tanD_reg(regD dst, eFlagsReg cr) %{
9807   predicate (UseSSE>=2);
9808   match(Set dst(TanD dst));
9809   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9810   format %{ "DTAN   $dst" %}
9811   ins_encode( Push_SrcD(dst),
9812               Opcode(0xD9), Opcode(0xF2),    // fptan
9813               Opcode(0xDD), Opcode(0xD8),   // fstp st
9814               Push_ResultD(dst) );
9815   ins_pipe( pipe_slow );
9816 %}
9817 
9818 instruct atanDPR_reg(regDPR dst, regDPR src) %{
9819   predicate (UseSSE<=1);
9820   match(Set dst(AtanD dst src));
9821   format %{ "DATA   $dst,$src" %}
9822   opcode(0xD9, 0xF3);
9823   ins_encode( Push_Reg_DPR(src),
9824               OpcP, OpcS, RegOpc(dst) );
9825   ins_pipe( pipe_slow );
9826 %}
9827 
9828 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
9829   predicate (UseSSE>=2);
9830   match(Set dst(AtanD dst src));
9831   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9832   format %{ "DATA   $dst,$src" %}
9833   opcode(0xD9, 0xF3);
9834   ins_encode( Push_SrcD(src),
9835               OpcP, OpcS, Push_ResultD(dst) );
9836   ins_pipe( pipe_slow );
9837 %}
9838 
9839 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
9840   predicate (UseSSE<=1);
9841   match(Set dst (SqrtD src));
9842   format %{ "DSQRT  $dst,$src" %}
9843   opcode(0xFA, 0xD9);
9844   ins_encode( Push_Reg_DPR(src),
9845               OpcS, OpcP, Pop_Reg_DPR(dst) );
9846   ins_pipe( pipe_slow );
9847 %}
9848 
9849 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
9850   predicate (UseSSE<=1);
9851   // The source Double operand on FPU stack
9852   match(Set dst (Log10D src));
9853   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9854   // fxch         ; swap ST(0) with ST(1)
9855   // fyl2x        ; compute log_10(2) * log_2(x)
9856   format %{ "FLDLG2 \t\t\t#Log10\n\t"
9857             "FXCH   \n\t"
9858             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9859          %}
9860   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9861               Opcode(0xD9), Opcode(0xC9),   // fxch
9862               Opcode(0xD9), Opcode(0xF1));  // fyl2x
9863 
9864   ins_pipe( pipe_slow );
9865 %}
9866 
9867 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
9868   predicate (UseSSE>=2);
9869   effect(KILL cr);
9870   match(Set dst (Log10D src));
9871   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9872   // fyl2x        ; compute log_10(2) * log_2(x)
9873   format %{ "FLDLG2 \t\t\t#Log10\n\t"
9874             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9875          %}
9876   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9877               Push_SrcD(src),
9878               Opcode(0xD9), Opcode(0xF1),   // fyl2x
9879               Push_ResultD(dst));
9880 
9881   ins_pipe( pipe_slow );
9882 %}
9883 
9884 //-------------Float Instructions-------------------------------
9885 // Float Math
9886 
9887 // Code for float compare:
9888 //     fcompp();
9889 //     fwait(); fnstsw_ax();
9890 //     sahf();
9891 //     movl(dst, unordered_result);
9892 //     jcc(Assembler::parity, exit);
9893 //     movl(dst, less_result);
9894 //     jcc(Assembler::below, exit);
9895 //     movl(dst, equal_result);
9896 //     jcc(Assembler::equal, exit);
9897 //     movl(dst, greater_result);
9898 //   exit:
9899 
9900 // P6 version of float compare, sets condition codes in EFLAGS
9901 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
9902   predicate(VM_Version::supports_cmov() && UseSSE == 0);
9903   match(Set cr (CmpF src1 src2));
9904   effect(KILL rax);
9905   ins_cost(150);
9906   format %{ "FLD    $src1\n\t"
9907             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9908             "JNP    exit\n\t"
9909             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
9910             "SAHF\n"
9911      "exit:\tNOP               // avoid branch to branch" %}
9912   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9913   ins_encode( Push_Reg_DPR(src1),
9914               OpcP, RegOpc(src2),
9915               cmpF_P6_fixup );
9916   ins_pipe( pipe_slow );
9917 %}
9918 
9919 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
9920   predicate(VM_Version::supports_cmov() && UseSSE == 0);
9921   match(Set cr (CmpF src1 src2));
9922   ins_cost(100);
9923   format %{ "FLD    $src1\n\t"
9924             "FUCOMIP ST,$src2  // P6 instruction" %}
9925   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9926   ins_encode( Push_Reg_DPR(src1),
9927               OpcP, RegOpc(src2));
9928   ins_pipe( pipe_slow );
9929 %}
9930 
9931 
9932 // Compare & branch
9933 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
9934   predicate(UseSSE == 0);
9935   match(Set cr (CmpF src1 src2));
9936   effect(KILL rax);
9937   ins_cost(200);
9938   format %{ "FLD    $src1\n\t"
9939             "FCOMp  $src2\n\t"
9940             "FNSTSW AX\n\t"
9941             "TEST   AX,0x400\n\t"
9942             "JZ,s   flags\n\t"
9943             "MOV    AH,1\t# unordered treat as LT\n"
9944     "flags:\tSAHF" %}
9945   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9946   ins_encode( Push_Reg_DPR(src1),
9947               OpcP, RegOpc(src2),
9948               fpu_flags);
9949   ins_pipe( pipe_slow );
9950 %}
9951 
9952 // Compare vs zero into -1,0,1
9953 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9954   predicate(UseSSE == 0);
9955   match(Set dst (CmpF3 src1 zero));
9956   effect(KILL cr, KILL rax);
9957   ins_cost(280);
9958   format %{ "FTSTF  $dst,$src1" %}
9959   opcode(0xE4, 0xD9);
9960   ins_encode( Push_Reg_DPR(src1),
9961               OpcS, OpcP, PopFPU,
9962               CmpF_Result(dst));
9963   ins_pipe( pipe_slow );
9964 %}
9965 
9966 // Compare into -1,0,1
9967 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
9968   predicate(UseSSE == 0);
9969   match(Set dst (CmpF3 src1 src2));
9970   effect(KILL cr, KILL rax);
9971   ins_cost(300);
9972   format %{ "FCMPF  $dst,$src1,$src2" %}
9973   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9974   ins_encode( Push_Reg_DPR(src1),
9975               OpcP, RegOpc(src2),
9976               CmpF_Result(dst));
9977   ins_pipe( pipe_slow );
9978 %}
9979 
9980 // float compare and set condition codes in EFLAGS by XMM regs
9981 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
9982   predicate(UseSSE>=1);
9983   match(Set cr (CmpF src1 src2));
9984   ins_cost(145);
9985   format %{ "UCOMISS $src1,$src2\n\t"
9986             "JNP,s   exit\n\t"
9987             "PUSHF\t# saw NaN, set CF\n\t"
9988             "AND     [rsp], #0xffffff2b\n\t"
9989             "POPF\n"
9990     "exit:" %}
9991   ins_encode %{
9992     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
9993     emit_cmpfp_fixup(_masm);
9994   %}
9995   ins_pipe( pipe_slow );
9996 %}
9997 
9998 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
9999   predicate(UseSSE>=1);
10000   match(Set cr (CmpF src1 src2));
10001   ins_cost(100);
10002   format %{ "UCOMISS $src1,$src2" %}
10003   ins_encode %{
10004     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10005   %}
10006   ins_pipe( pipe_slow );
10007 %}
10008 
10009 // float compare and set condition codes in EFLAGS by XMM regs
10010 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10011   predicate(UseSSE>=1);
10012   match(Set cr (CmpF src1 (LoadF src2)));
10013   ins_cost(165);
10014   format %{ "UCOMISS $src1,$src2\n\t"
10015             "JNP,s   exit\n\t"
10016             "PUSHF\t# saw NaN, set CF\n\t"
10017             "AND     [rsp], #0xffffff2b\n\t"
10018             "POPF\n"
10019     "exit:" %}
10020   ins_encode %{
10021     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10022     emit_cmpfp_fixup(_masm);
10023   %}
10024   ins_pipe( pipe_slow );
10025 %}
10026 
10027 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10028   predicate(UseSSE>=1);
10029   match(Set cr (CmpF src1 (LoadF src2)));
10030   ins_cost(100);
10031   format %{ "UCOMISS $src1,$src2" %}
10032   ins_encode %{
10033     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10034   %}
10035   ins_pipe( pipe_slow );
10036 %}
10037 
10038 // Compare into -1,0,1 in XMM
10039 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10040   predicate(UseSSE>=1);
10041   match(Set dst (CmpF3 src1 src2));
10042   effect(KILL cr);
10043   ins_cost(255);
10044   format %{ "UCOMISS $src1, $src2\n\t"
10045             "MOV     $dst, #-1\n\t"
10046             "JP,s    done\n\t"
10047             "JB,s    done\n\t"
10048             "SETNE   $dst\n\t"
10049             "MOVZB   $dst, $dst\n"
10050     "done:" %}
10051   ins_encode %{
10052     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10053     emit_cmpfp3(_masm, $dst$$Register);
10054   %}
10055   ins_pipe( pipe_slow );
10056 %}
10057 
10058 // Compare into -1,0,1 in XMM and memory
10059 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10060   predicate(UseSSE>=1);
10061   match(Set dst (CmpF3 src1 (LoadF src2)));
10062   effect(KILL cr);
10063   ins_cost(275);
10064   format %{ "UCOMISS $src1, $src2\n\t"
10065             "MOV     $dst, #-1\n\t"
10066             "JP,s    done\n\t"
10067             "JB,s    done\n\t"
10068             "SETNE   $dst\n\t"
10069             "MOVZB   $dst, $dst\n"
10070     "done:" %}
10071   ins_encode %{
10072     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10073     emit_cmpfp3(_masm, $dst$$Register);
10074   %}
10075   ins_pipe( pipe_slow );
10076 %}
10077 
10078 // Spill to obtain 24-bit precision
10079 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10080   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10081   match(Set dst (SubF src1 src2));
10082 
10083   format %{ "FSUB   $dst,$src1 - $src2" %}
10084   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10085   ins_encode( Push_Reg_FPR(src1),
10086               OpcReg_FPR(src2),
10087               Pop_Mem_FPR(dst) );
10088   ins_pipe( fpu_mem_reg_reg );
10089 %}
10090 //
10091 // This instruction does not round to 24-bits
10092 instruct subFPR_reg(regFPR dst, regFPR src) %{
10093   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10094   match(Set dst (SubF dst src));
10095 
10096   format %{ "FSUB   $dst,$src" %}
10097   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10098   ins_encode( Push_Reg_FPR(src),
10099               OpcP, RegOpc(dst) );
10100   ins_pipe( fpu_reg_reg );
10101 %}
10102 
10103 // Spill to obtain 24-bit precision
10104 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10105   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10106   match(Set dst (AddF src1 src2));
10107 
10108   format %{ "FADD   $dst,$src1,$src2" %}
10109   opcode(0xD8, 0x0); /* D8 C0+i */
10110   ins_encode( Push_Reg_FPR(src2),
10111               OpcReg_FPR(src1),
10112               Pop_Mem_FPR(dst) );
10113   ins_pipe( fpu_mem_reg_reg );
10114 %}
10115 //
10116 // This instruction does not round to 24-bits
10117 instruct addFPR_reg(regFPR dst, regFPR src) %{
10118   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10119   match(Set dst (AddF dst src));
10120 
10121   format %{ "FLD    $src\n\t"
10122             "FADDp  $dst,ST" %}
10123   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10124   ins_encode( Push_Reg_FPR(src),
10125               OpcP, RegOpc(dst) );
10126   ins_pipe( fpu_reg_reg );
10127 %}
10128 
10129 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10130   predicate(UseSSE==0);
10131   match(Set dst (AbsF src));
10132   ins_cost(100);
10133   format %{ "FABS" %}
10134   opcode(0xE1, 0xD9);
10135   ins_encode( OpcS, OpcP );
10136   ins_pipe( fpu_reg_reg );
10137 %}
10138 
10139 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10140   predicate(UseSSE==0);
10141   match(Set dst (NegF src));
10142   ins_cost(100);
10143   format %{ "FCHS" %}
10144   opcode(0xE0, 0xD9);
10145   ins_encode( OpcS, OpcP );
10146   ins_pipe( fpu_reg_reg );
10147 %}
10148 
10149 // Cisc-alternate to addFPR_reg
10150 // Spill to obtain 24-bit precision
10151 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10152   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10153   match(Set dst (AddF src1 (LoadF src2)));
10154 
10155   format %{ "FLD    $src2\n\t"
10156             "FADD   ST,$src1\n\t"
10157             "FSTP_S $dst" %}
10158   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10159   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10160               OpcReg_FPR(src1),
10161               Pop_Mem_FPR(dst) );
10162   ins_pipe( fpu_mem_reg_mem );
10163 %}
10164 //
10165 // Cisc-alternate to addFPR_reg
10166 // This instruction does not round to 24-bits
10167 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10168   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10169   match(Set dst (AddF dst (LoadF src)));
10170 
10171   format %{ "FADD   $dst,$src" %}
10172   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10173   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10174               OpcP, RegOpc(dst) );
10175   ins_pipe( fpu_reg_mem );
10176 %}
10177 
10178 // // Following two instructions for _222_mpegaudio
10179 // Spill to obtain 24-bit precision
10180 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10181   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10182   match(Set dst (AddF src1 src2));
10183 
10184   format %{ "FADD   $dst,$src1,$src2" %}
10185   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10186   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10187               OpcReg_FPR(src2),
10188               Pop_Mem_FPR(dst) );
10189   ins_pipe( fpu_mem_reg_mem );
10190 %}
10191 
10192 // Cisc-spill variant
10193 // Spill to obtain 24-bit precision
10194 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10195   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10196   match(Set dst (AddF src1 (LoadF src2)));
10197 
10198   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10199   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10200   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10201               set_instruction_start,
10202               OpcP, RMopc_Mem(secondary,src1),
10203               Pop_Mem_FPR(dst) );
10204   ins_pipe( fpu_mem_mem_mem );
10205 %}
10206 
10207 // Spill to obtain 24-bit precision
10208 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10209   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10210   match(Set dst (AddF src1 src2));
10211 
10212   format %{ "FADD   $dst,$src1,$src2" %}
10213   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10214   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10215               set_instruction_start,
10216               OpcP, RMopc_Mem(secondary,src1),
10217               Pop_Mem_FPR(dst) );
10218   ins_pipe( fpu_mem_mem_mem );
10219 %}
10220 
10221 
10222 // Spill to obtain 24-bit precision
10223 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10224   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10225   match(Set dst (AddF src con));
10226   format %{ "FLD    $src\n\t"
10227             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10228             "FSTP_S $dst"  %}
10229   ins_encode %{
10230     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10231     __ fadd_s($constantaddress($con));
10232     __ fstp_s(Address(rsp, $dst$$disp));
10233   %}
10234   ins_pipe(fpu_mem_reg_con);
10235 %}
10236 //
10237 // This instruction does not round to 24-bits
10238 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10239   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10240   match(Set dst (AddF src con));
10241   format %{ "FLD    $src\n\t"
10242             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10243             "FSTP   $dst"  %}
10244   ins_encode %{
10245     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10246     __ fadd_s($constantaddress($con));
10247     __ fstp_d($dst$$reg);
10248   %}
10249   ins_pipe(fpu_reg_reg_con);
10250 %}
10251 
10252 // Spill to obtain 24-bit precision
10253 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10254   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10255   match(Set dst (MulF src1 src2));
10256 
10257   format %{ "FLD    $src1\n\t"
10258             "FMUL   $src2\n\t"
10259             "FSTP_S $dst"  %}
10260   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10261   ins_encode( Push_Reg_FPR(src1),
10262               OpcReg_FPR(src2),
10263               Pop_Mem_FPR(dst) );
10264   ins_pipe( fpu_mem_reg_reg );
10265 %}
10266 //
10267 // This instruction does not round to 24-bits
10268 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10269   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10270   match(Set dst (MulF src1 src2));
10271 
10272   format %{ "FLD    $src1\n\t"
10273             "FMUL   $src2\n\t"
10274             "FSTP_S $dst"  %}
10275   opcode(0xD8, 0x1); /* D8 C8+i */
10276   ins_encode( Push_Reg_FPR(src2),
10277               OpcReg_FPR(src1),
10278               Pop_Reg_FPR(dst) );
10279   ins_pipe( fpu_reg_reg_reg );
10280 %}
10281 
10282 
10283 // Spill to obtain 24-bit precision
10284 // Cisc-alternate to reg-reg multiply
10285 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10286   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10287   match(Set dst (MulF src1 (LoadF src2)));
10288 
10289   format %{ "FLD_S  $src2\n\t"
10290             "FMUL   $src1\n\t"
10291             "FSTP_S $dst"  %}
10292   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10293   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10294               OpcReg_FPR(src1),
10295               Pop_Mem_FPR(dst) );
10296   ins_pipe( fpu_mem_reg_mem );
10297 %}
10298 //
10299 // This instruction does not round to 24-bits
10300 // Cisc-alternate to reg-reg multiply
10301 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10302   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10303   match(Set dst (MulF src1 (LoadF src2)));
10304 
10305   format %{ "FMUL   $dst,$src1,$src2" %}
10306   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10307   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10308               OpcReg_FPR(src1),
10309               Pop_Reg_FPR(dst) );
10310   ins_pipe( fpu_reg_reg_mem );
10311 %}
10312 
10313 // Spill to obtain 24-bit precision
10314 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10315   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10316   match(Set dst (MulF src1 src2));
10317 
10318   format %{ "FMUL   $dst,$src1,$src2" %}
10319   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10320   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10321               set_instruction_start,
10322               OpcP, RMopc_Mem(secondary,src1),
10323               Pop_Mem_FPR(dst) );
10324   ins_pipe( fpu_mem_mem_mem );
10325 %}
10326 
10327 // Spill to obtain 24-bit precision
10328 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10329   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10330   match(Set dst (MulF src con));
10331 
10332   format %{ "FLD    $src\n\t"
10333             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10334             "FSTP_S $dst"  %}
10335   ins_encode %{
10336     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10337     __ fmul_s($constantaddress($con));
10338     __ fstp_s(Address(rsp, $dst$$disp));
10339   %}
10340   ins_pipe(fpu_mem_reg_con);
10341 %}
10342 //
10343 // This instruction does not round to 24-bits
10344 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10345   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10346   match(Set dst (MulF src con));
10347 
10348   format %{ "FLD    $src\n\t"
10349             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10350             "FSTP   $dst"  %}
10351   ins_encode %{
10352     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10353     __ fmul_s($constantaddress($con));
10354     __ fstp_d($dst$$reg);
10355   %}
10356   ins_pipe(fpu_reg_reg_con);
10357 %}
10358 
10359 
10360 //
10361 // MACRO1 -- subsume unshared load into mulFPR
10362 // This instruction does not round to 24-bits
10363 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10364   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10365   match(Set dst (MulF (LoadF mem1) src));
10366 
10367   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10368             "FMUL   ST,$src\n\t"
10369             "FSTP   $dst" %}
10370   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10371   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10372               OpcReg_FPR(src),
10373               Pop_Reg_FPR(dst) );
10374   ins_pipe( fpu_reg_reg_mem );
10375 %}
10376 //
10377 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10378 // This instruction does not round to 24-bits
10379 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10380   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10381   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10382   ins_cost(95);
10383 
10384   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10385             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10386             "FADD   ST,$src2\n\t"
10387             "FSTP   $dst" %}
10388   opcode(0xD9); /* LoadF D9 /0 */
10389   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10390               FMul_ST_reg(src1),
10391               FAdd_ST_reg(src2),
10392               Pop_Reg_FPR(dst) );
10393   ins_pipe( fpu_reg_mem_reg_reg );
10394 %}
10395 
10396 // MACRO3 -- addFPR a mulFPR
10397 // This instruction does not round to 24-bits.  It is a '2-address'
10398 // instruction in that the result goes back to src2.  This eliminates
10399 // a move from the macro; possibly the register allocator will have
10400 // to add it back (and maybe not).
10401 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10402   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10403   match(Set src2 (AddF (MulF src0 src1) src2));
10404 
10405   format %{ "FLD    $src0     ===MACRO3===\n\t"
10406             "FMUL   ST,$src1\n\t"
10407             "FADDP  $src2,ST" %}
10408   opcode(0xD9); /* LoadF D9 /0 */
10409   ins_encode( Push_Reg_FPR(src0),
10410               FMul_ST_reg(src1),
10411               FAddP_reg_ST(src2) );
10412   ins_pipe( fpu_reg_reg_reg );
10413 %}
10414 
10415 // MACRO4 -- divFPR subFPR
10416 // This instruction does not round to 24-bits
10417 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10418   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10419   match(Set dst (DivF (SubF src2 src1) src3));
10420 
10421   format %{ "FLD    $src2   ===MACRO4===\n\t"
10422             "FSUB   ST,$src1\n\t"
10423             "FDIV   ST,$src3\n\t"
10424             "FSTP  $dst" %}
10425   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10426   ins_encode( Push_Reg_FPR(src2),
10427               subFPR_divFPR_encode(src1,src3),
10428               Pop_Reg_FPR(dst) );
10429   ins_pipe( fpu_reg_reg_reg_reg );
10430 %}
10431 
10432 // Spill to obtain 24-bit precision
10433 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10434   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10435   match(Set dst (DivF src1 src2));
10436 
10437   format %{ "FDIV   $dst,$src1,$src2" %}
10438   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10439   ins_encode( Push_Reg_FPR(src1),
10440               OpcReg_FPR(src2),
10441               Pop_Mem_FPR(dst) );
10442   ins_pipe( fpu_mem_reg_reg );
10443 %}
10444 //
10445 // This instruction does not round to 24-bits
10446 instruct divFPR_reg(regFPR dst, regFPR src) %{
10447   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10448   match(Set dst (DivF dst src));
10449 
10450   format %{ "FDIV   $dst,$src" %}
10451   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10452   ins_encode( Push_Reg_FPR(src),
10453               OpcP, RegOpc(dst) );
10454   ins_pipe( fpu_reg_reg );
10455 %}
10456 
10457 
10458 // Spill to obtain 24-bit precision
10459 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10460   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10461   match(Set dst (ModF src1 src2));
10462   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10463 
10464   format %{ "FMOD   $dst,$src1,$src2" %}
10465   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10466               emitModDPR(),
10467               Push_Result_Mod_DPR(src2),
10468               Pop_Mem_FPR(dst));
10469   ins_pipe( pipe_slow );
10470 %}
10471 //
10472 // This instruction does not round to 24-bits
10473 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10474   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10475   match(Set dst (ModF dst src));
10476   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10477 
10478   format %{ "FMOD   $dst,$src" %}
10479   ins_encode(Push_Reg_Mod_DPR(dst, src),
10480               emitModDPR(),
10481               Push_Result_Mod_DPR(src),
10482               Pop_Reg_FPR(dst));
10483   ins_pipe( pipe_slow );
10484 %}
10485 
10486 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10487   predicate(UseSSE>=1);
10488   match(Set dst (ModF src0 src1));
10489   effect(KILL rax, KILL cr);
10490   format %{ "SUB    ESP,4\t # FMOD\n"
10491           "\tMOVSS  [ESP+0],$src1\n"
10492           "\tFLD_S  [ESP+0]\n"
10493           "\tMOVSS  [ESP+0],$src0\n"
10494           "\tFLD_S  [ESP+0]\n"
10495      "loop:\tFPREM\n"
10496           "\tFWAIT\n"
10497           "\tFNSTSW AX\n"
10498           "\tSAHF\n"
10499           "\tJP     loop\n"
10500           "\tFSTP_S [ESP+0]\n"
10501           "\tMOVSS  $dst,[ESP+0]\n"
10502           "\tADD    ESP,4\n"
10503           "\tFSTP   ST0\t # Restore FPU Stack"
10504     %}
10505   ins_cost(250);
10506   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10507   ins_pipe( pipe_slow );
10508 %}
10509 
10510 
10511 //----------Arithmetic Conversion Instructions---------------------------------
10512 // The conversions operations are all Alpha sorted.  Please keep it that way!
10513 
10514 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10515   predicate(UseSSE==0);
10516   match(Set dst (RoundFloat src));
10517   ins_cost(125);
10518   format %{ "FST_S  $dst,$src\t# F-round" %}
10519   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10520   ins_pipe( fpu_mem_reg );
10521 %}
10522 
10523 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10524   predicate(UseSSE<=1);
10525   match(Set dst (RoundDouble src));
10526   ins_cost(125);
10527   format %{ "FST_D  $dst,$src\t# D-round" %}
10528   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10529   ins_pipe( fpu_mem_reg );
10530 %}
10531 
10532 // Force rounding to 24-bit precision and 6-bit exponent
10533 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10534   predicate(UseSSE==0);
10535   match(Set dst (ConvD2F src));
10536   format %{ "FST_S  $dst,$src\t# F-round" %}
10537   expand %{
10538     roundFloat_mem_reg(dst,src);
10539   %}
10540 %}
10541 
10542 // Force rounding to 24-bit precision and 6-bit exponent
10543 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10544   predicate(UseSSE==1);
10545   match(Set dst (ConvD2F src));
10546   effect( KILL cr );
10547   format %{ "SUB    ESP,4\n\t"
10548             "FST_S  [ESP],$src\t# F-round\n\t"
10549             "MOVSS  $dst,[ESP]\n\t"
10550             "ADD ESP,4" %}
10551   ins_encode %{
10552     __ subptr(rsp, 4);
10553     if ($src$$reg != FPR1L_enc) {
10554       __ fld_s($src$$reg-1);
10555       __ fstp_s(Address(rsp, 0));
10556     } else {
10557       __ fst_s(Address(rsp, 0));
10558     }
10559     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10560     __ addptr(rsp, 4);
10561   %}
10562   ins_pipe( pipe_slow );
10563 %}
10564 
10565 // Force rounding double precision to single precision
10566 instruct convD2F_reg(regF dst, regD src) %{
10567   predicate(UseSSE>=2);
10568   match(Set dst (ConvD2F src));
10569   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10570   ins_encode %{
10571     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10572   %}
10573   ins_pipe( pipe_slow );
10574 %}
10575 
10576 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10577   predicate(UseSSE==0);
10578   match(Set dst (ConvF2D src));
10579   format %{ "FST_S  $dst,$src\t# D-round" %}
10580   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10581   ins_pipe( fpu_reg_reg );
10582 %}
10583 
10584 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10585   predicate(UseSSE==1);
10586   match(Set dst (ConvF2D src));
10587   format %{ "FST_D  $dst,$src\t# D-round" %}
10588   expand %{
10589     roundDouble_mem_reg(dst,src);
10590   %}
10591 %}
10592 
10593 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10594   predicate(UseSSE==1);
10595   match(Set dst (ConvF2D src));
10596   effect( KILL cr );
10597   format %{ "SUB    ESP,4\n\t"
10598             "MOVSS  [ESP] $src\n\t"
10599             "FLD_S  [ESP]\n\t"
10600             "ADD    ESP,4\n\t"
10601             "FSTP   $dst\t# D-round" %}
10602   ins_encode %{
10603     __ subptr(rsp, 4);
10604     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10605     __ fld_s(Address(rsp, 0));
10606     __ addptr(rsp, 4);
10607     __ fstp_d($dst$$reg);
10608   %}
10609   ins_pipe( pipe_slow );
10610 %}
10611 
10612 instruct convF2D_reg(regD dst, regF src) %{
10613   predicate(UseSSE>=2);
10614   match(Set dst (ConvF2D src));
10615   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10616   ins_encode %{
10617     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10618   %}
10619   ins_pipe( pipe_slow );
10620 %}
10621 
10622 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10623 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10624   predicate(UseSSE<=1);
10625   match(Set dst (ConvD2I src));
10626   effect( KILL tmp, KILL cr );
10627   format %{ "FLD    $src\t# Convert double to int \n\t"
10628             "FLDCW  trunc mode\n\t"
10629             "SUB    ESP,4\n\t"
10630             "FISTp  [ESP + #0]\n\t"
10631             "FLDCW  std/24-bit mode\n\t"
10632             "POP    EAX\n\t"
10633             "CMP    EAX,0x80000000\n\t"
10634             "JNE,s  fast\n\t"
10635             "FLD_D  $src\n\t"
10636             "CALL   d2i_wrapper\n"
10637       "fast:" %}
10638   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10639   ins_pipe( pipe_slow );
10640 %}
10641 
10642 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10643 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10644   predicate(UseSSE>=2);
10645   match(Set dst (ConvD2I src));
10646   effect( KILL tmp, KILL cr );
10647   format %{ "CVTTSD2SI $dst, $src\n\t"
10648             "CMP    $dst,0x80000000\n\t"
10649             "JNE,s  fast\n\t"
10650             "SUB    ESP, 8\n\t"
10651             "MOVSD  [ESP], $src\n\t"
10652             "FLD_D  [ESP]\n\t"
10653             "ADD    ESP, 8\n\t"
10654             "CALL   d2i_wrapper\n"
10655       "fast:" %}
10656   ins_encode %{
10657     Label fast;
10658     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10659     __ cmpl($dst$$Register, 0x80000000);
10660     __ jccb(Assembler::notEqual, fast);
10661     __ subptr(rsp, 8);
10662     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10663     __ fld_d(Address(rsp, 0));
10664     __ addptr(rsp, 8);
10665     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10666     __ bind(fast);
10667   %}
10668   ins_pipe( pipe_slow );
10669 %}
10670 
10671 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10672   predicate(UseSSE<=1);
10673   match(Set dst (ConvD2L src));
10674   effect( KILL cr );
10675   format %{ "FLD    $src\t# Convert double to long\n\t"
10676             "FLDCW  trunc mode\n\t"
10677             "SUB    ESP,8\n\t"
10678             "FISTp  [ESP + #0]\n\t"
10679             "FLDCW  std/24-bit mode\n\t"
10680             "POP    EAX\n\t"
10681             "POP    EDX\n\t"
10682             "CMP    EDX,0x80000000\n\t"
10683             "JNE,s  fast\n\t"
10684             "TEST   EAX,EAX\n\t"
10685             "JNE,s  fast\n\t"
10686             "FLD    $src\n\t"
10687             "CALL   d2l_wrapper\n"
10688       "fast:" %}
10689   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10690   ins_pipe( pipe_slow );
10691 %}
10692 
10693 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10694 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10695   predicate (UseSSE>=2);
10696   match(Set dst (ConvD2L src));
10697   effect( KILL cr );
10698   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10699             "MOVSD  [ESP],$src\n\t"
10700             "FLD_D  [ESP]\n\t"
10701             "FLDCW  trunc mode\n\t"
10702             "FISTp  [ESP + #0]\n\t"
10703             "FLDCW  std/24-bit mode\n\t"
10704             "POP    EAX\n\t"
10705             "POP    EDX\n\t"
10706             "CMP    EDX,0x80000000\n\t"
10707             "JNE,s  fast\n\t"
10708             "TEST   EAX,EAX\n\t"
10709             "JNE,s  fast\n\t"
10710             "SUB    ESP,8\n\t"
10711             "MOVSD  [ESP],$src\n\t"
10712             "FLD_D  [ESP]\n\t"
10713             "ADD    ESP,8\n\t"
10714             "CALL   d2l_wrapper\n"
10715       "fast:" %}
10716   ins_encode %{
10717     Label fast;
10718     __ subptr(rsp, 8);
10719     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10720     __ fld_d(Address(rsp, 0));
10721     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10722     __ fistp_d(Address(rsp, 0));
10723     // Restore the rounding mode, mask the exception
10724     if (Compile::current()->in_24_bit_fp_mode()) {
10725       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10726     } else {
10727       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10728     }
10729     // Load the converted long, adjust CPU stack
10730     __ pop(rax);
10731     __ pop(rdx);
10732     __ cmpl(rdx, 0x80000000);
10733     __ jccb(Assembler::notEqual, fast);
10734     __ testl(rax, rax);
10735     __ jccb(Assembler::notEqual, fast);
10736     __ subptr(rsp, 8);
10737     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10738     __ fld_d(Address(rsp, 0));
10739     __ addptr(rsp, 8);
10740     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10741     __ bind(fast);
10742   %}
10743   ins_pipe( pipe_slow );
10744 %}
10745 
10746 // Convert a double to an int.  Java semantics require we do complex
10747 // manglations in the corner cases.  So we set the rounding mode to
10748 // 'zero', store the darned double down as an int, and reset the
10749 // rounding mode to 'nearest'.  The hardware stores a flag value down
10750 // if we would overflow or converted a NAN; we check for this and
10751 // and go the slow path if needed.
10752 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10753   predicate(UseSSE==0);
10754   match(Set dst (ConvF2I src));
10755   effect( KILL tmp, KILL cr );
10756   format %{ "FLD    $src\t# Convert float to int \n\t"
10757             "FLDCW  trunc mode\n\t"
10758             "SUB    ESP,4\n\t"
10759             "FISTp  [ESP + #0]\n\t"
10760             "FLDCW  std/24-bit mode\n\t"
10761             "POP    EAX\n\t"
10762             "CMP    EAX,0x80000000\n\t"
10763             "JNE,s  fast\n\t"
10764             "FLD    $src\n\t"
10765             "CALL   d2i_wrapper\n"
10766       "fast:" %}
10767   // DPR2I_encoding works for FPR2I
10768   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10769   ins_pipe( pipe_slow );
10770 %}
10771 
10772 // Convert a float in xmm to an int reg.
10773 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10774   predicate(UseSSE>=1);
10775   match(Set dst (ConvF2I src));
10776   effect( KILL tmp, KILL cr );
10777   format %{ "CVTTSS2SI $dst, $src\n\t"
10778             "CMP    $dst,0x80000000\n\t"
10779             "JNE,s  fast\n\t"
10780             "SUB    ESP, 4\n\t"
10781             "MOVSS  [ESP], $src\n\t"
10782             "FLD    [ESP]\n\t"
10783             "ADD    ESP, 4\n\t"
10784             "CALL   d2i_wrapper\n"
10785       "fast:" %}
10786   ins_encode %{
10787     Label fast;
10788     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10789     __ cmpl($dst$$Register, 0x80000000);
10790     __ jccb(Assembler::notEqual, fast);
10791     __ subptr(rsp, 4);
10792     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10793     __ fld_s(Address(rsp, 0));
10794     __ addptr(rsp, 4);
10795     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10796     __ bind(fast);
10797   %}
10798   ins_pipe( pipe_slow );
10799 %}
10800 
10801 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10802   predicate(UseSSE==0);
10803   match(Set dst (ConvF2L src));
10804   effect( KILL cr );
10805   format %{ "FLD    $src\t# Convert float to long\n\t"
10806             "FLDCW  trunc mode\n\t"
10807             "SUB    ESP,8\n\t"
10808             "FISTp  [ESP + #0]\n\t"
10809             "FLDCW  std/24-bit mode\n\t"
10810             "POP    EAX\n\t"
10811             "POP    EDX\n\t"
10812             "CMP    EDX,0x80000000\n\t"
10813             "JNE,s  fast\n\t"
10814             "TEST   EAX,EAX\n\t"
10815             "JNE,s  fast\n\t"
10816             "FLD    $src\n\t"
10817             "CALL   d2l_wrapper\n"
10818       "fast:" %}
10819   // DPR2L_encoding works for FPR2L
10820   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10821   ins_pipe( pipe_slow );
10822 %}
10823 
10824 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10825 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10826   predicate (UseSSE>=1);
10827   match(Set dst (ConvF2L src));
10828   effect( KILL cr );
10829   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10830             "MOVSS  [ESP],$src\n\t"
10831             "FLD_S  [ESP]\n\t"
10832             "FLDCW  trunc mode\n\t"
10833             "FISTp  [ESP + #0]\n\t"
10834             "FLDCW  std/24-bit mode\n\t"
10835             "POP    EAX\n\t"
10836             "POP    EDX\n\t"
10837             "CMP    EDX,0x80000000\n\t"
10838             "JNE,s  fast\n\t"
10839             "TEST   EAX,EAX\n\t"
10840             "JNE,s  fast\n\t"
10841             "SUB    ESP,4\t# Convert float to long\n\t"
10842             "MOVSS  [ESP],$src\n\t"
10843             "FLD_S  [ESP]\n\t"
10844             "ADD    ESP,4\n\t"
10845             "CALL   d2l_wrapper\n"
10846       "fast:" %}
10847   ins_encode %{
10848     Label fast;
10849     __ subptr(rsp, 8);
10850     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10851     __ fld_s(Address(rsp, 0));
10852     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10853     __ fistp_d(Address(rsp, 0));
10854     // Restore the rounding mode, mask the exception
10855     if (Compile::current()->in_24_bit_fp_mode()) {
10856       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10857     } else {
10858       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10859     }
10860     // Load the converted long, adjust CPU stack
10861     __ pop(rax);
10862     __ pop(rdx);
10863     __ cmpl(rdx, 0x80000000);
10864     __ jccb(Assembler::notEqual, fast);
10865     __ testl(rax, rax);
10866     __ jccb(Assembler::notEqual, fast);
10867     __ subptr(rsp, 4);
10868     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10869     __ fld_s(Address(rsp, 0));
10870     __ addptr(rsp, 4);
10871     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10872     __ bind(fast);
10873   %}
10874   ins_pipe( pipe_slow );
10875 %}
10876 
10877 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
10878   predicate( UseSSE<=1 );
10879   match(Set dst (ConvI2D src));
10880   format %{ "FILD   $src\n\t"
10881             "FSTP   $dst" %}
10882   opcode(0xDB, 0x0);  /* DB /0 */
10883   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
10884   ins_pipe( fpu_reg_mem );
10885 %}
10886 
10887 instruct convI2D_reg(regD dst, rRegI src) %{
10888   predicate( UseSSE>=2 && !UseXmmI2D );
10889   match(Set dst (ConvI2D src));
10890   format %{ "CVTSI2SD $dst,$src" %}
10891   ins_encode %{
10892     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10893   %}
10894   ins_pipe( pipe_slow );
10895 %}
10896 
10897 instruct convI2D_mem(regD dst, memory mem) %{
10898   predicate( UseSSE>=2 );
10899   match(Set dst (ConvI2D (LoadI mem)));
10900   format %{ "CVTSI2SD $dst,$mem" %}
10901   ins_encode %{
10902     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
10903   %}
10904   ins_pipe( pipe_slow );
10905 %}
10906 
10907 instruct convXI2D_reg(regD dst, rRegI src)
10908 %{
10909   predicate( UseSSE>=2 && UseXmmI2D );
10910   match(Set dst (ConvI2D src));
10911 
10912   format %{ "MOVD  $dst,$src\n\t"
10913             "CVTDQ2PD $dst,$dst\t# i2d" %}
10914   ins_encode %{
10915     __ movdl($dst$$XMMRegister, $src$$Register);
10916     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10917   %}
10918   ins_pipe(pipe_slow); // XXX
10919 %}
10920 
10921 instruct convI2DPR_mem(regDPR dst, memory mem) %{
10922   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
10923   match(Set dst (ConvI2D (LoadI mem)));
10924   format %{ "FILD   $mem\n\t"
10925             "FSTP   $dst" %}
10926   opcode(0xDB);      /* DB /0 */
10927   ins_encode( OpcP, RMopc_Mem(0x00,mem),
10928               Pop_Reg_DPR(dst));
10929   ins_pipe( fpu_reg_mem );
10930 %}
10931 
10932 // Convert a byte to a float; no rounding step needed.
10933 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
10934   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
10935   match(Set dst (ConvI2F src));
10936   format %{ "FILD   $src\n\t"
10937             "FSTP   $dst" %}
10938 
10939   opcode(0xDB, 0x0);  /* DB /0 */
10940   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
10941   ins_pipe( fpu_reg_mem );
10942 %}
10943 
10944 // In 24-bit mode, force exponent rounding by storing back out
10945 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
10946   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10947   match(Set dst (ConvI2F src));
10948   ins_cost(200);
10949   format %{ "FILD   $src\n\t"
10950             "FSTP_S $dst" %}
10951   opcode(0xDB, 0x0);  /* DB /0 */
10952   ins_encode( Push_Mem_I(src),
10953               Pop_Mem_FPR(dst));
10954   ins_pipe( fpu_mem_mem );
10955 %}
10956 
10957 // In 24-bit mode, force exponent rounding by storing back out
10958 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
10959   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10960   match(Set dst (ConvI2F (LoadI mem)));
10961   ins_cost(200);
10962   format %{ "FILD   $mem\n\t"
10963             "FSTP_S $dst" %}
10964   opcode(0xDB);  /* DB /0 */
10965   ins_encode( OpcP, RMopc_Mem(0x00,mem),
10966               Pop_Mem_FPR(dst));
10967   ins_pipe( fpu_mem_mem );
10968 %}
10969 
10970 // This instruction does not round to 24-bits
10971 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
10972   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10973   match(Set dst (ConvI2F src));
10974   format %{ "FILD   $src\n\t"
10975             "FSTP   $dst" %}
10976   opcode(0xDB, 0x0);  /* DB /0 */
10977   ins_encode( Push_Mem_I(src),
10978               Pop_Reg_FPR(dst));
10979   ins_pipe( fpu_reg_mem );
10980 %}
10981 
10982 // This instruction does not round to 24-bits
10983 instruct convI2FPR_mem(regFPR dst, memory mem) %{
10984   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10985   match(Set dst (ConvI2F (LoadI mem)));
10986   format %{ "FILD   $mem\n\t"
10987             "FSTP   $dst" %}
10988   opcode(0xDB);      /* DB /0 */
10989   ins_encode( OpcP, RMopc_Mem(0x00,mem),
10990               Pop_Reg_FPR(dst));
10991   ins_pipe( fpu_reg_mem );
10992 %}
10993 
10994 // Convert an int to a float in xmm; no rounding step needed.
10995 instruct convI2F_reg(regF dst, rRegI src) %{
10996   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
10997   match(Set dst (ConvI2F src));
10998   format %{ "CVTSI2SS $dst, $src" %}
10999   ins_encode %{
11000     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11001   %}
11002   ins_pipe( pipe_slow );
11003 %}
11004 
11005  instruct convXI2F_reg(regF dst, rRegI src)
11006 %{
11007   predicate( UseSSE>=2 && UseXmmI2F );
11008   match(Set dst (ConvI2F src));
11009 
11010   format %{ "MOVD  $dst,$src\n\t"
11011             "CVTDQ2PS $dst,$dst\t# i2f" %}
11012   ins_encode %{
11013     __ movdl($dst$$XMMRegister, $src$$Register);
11014     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11015   %}
11016   ins_pipe(pipe_slow); // XXX
11017 %}
11018 
11019 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11020   match(Set dst (ConvI2L src));
11021   effect(KILL cr);
11022   ins_cost(375);
11023   format %{ "MOV    $dst.lo,$src\n\t"
11024             "MOV    $dst.hi,$src\n\t"
11025             "SAR    $dst.hi,31" %}
11026   ins_encode(convert_int_long(dst,src));
11027   ins_pipe( ialu_reg_reg_long );
11028 %}
11029 
11030 // Zero-extend convert int to long
11031 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11032   match(Set dst (AndL (ConvI2L src) mask) );
11033   effect( KILL flags );
11034   ins_cost(250);
11035   format %{ "MOV    $dst.lo,$src\n\t"
11036             "XOR    $dst.hi,$dst.hi" %}
11037   opcode(0x33); // XOR
11038   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11039   ins_pipe( ialu_reg_reg_long );
11040 %}
11041 
11042 // Zero-extend long
11043 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11044   match(Set dst (AndL src mask) );
11045   effect( KILL flags );
11046   ins_cost(250);
11047   format %{ "MOV    $dst.lo,$src.lo\n\t"
11048             "XOR    $dst.hi,$dst.hi\n\t" %}
11049   opcode(0x33); // XOR
11050   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11051   ins_pipe( ialu_reg_reg_long );
11052 %}
11053 
11054 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11055   predicate (UseSSE<=1);
11056   match(Set dst (ConvL2D src));
11057   effect( KILL cr );
11058   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11059             "PUSH   $src.lo\n\t"
11060             "FILD   ST,[ESP + #0]\n\t"
11061             "ADD    ESP,8\n\t"
11062             "FSTP_D $dst\t# D-round" %}
11063   opcode(0xDF, 0x5);  /* DF /5 */
11064   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11065   ins_pipe( pipe_slow );
11066 %}
11067 
11068 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11069   predicate (UseSSE>=2);
11070   match(Set dst (ConvL2D src));
11071   effect( KILL cr );
11072   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11073             "PUSH   $src.lo\n\t"
11074             "FILD_D [ESP]\n\t"
11075             "FSTP_D [ESP]\n\t"
11076             "MOVSD  $dst,[ESP]\n\t"
11077             "ADD    ESP,8" %}
11078   opcode(0xDF, 0x5);  /* DF /5 */
11079   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11080   ins_pipe( pipe_slow );
11081 %}
11082 
11083 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11084   predicate (UseSSE>=1);
11085   match(Set dst (ConvL2F src));
11086   effect( KILL cr );
11087   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11088             "PUSH   $src.lo\n\t"
11089             "FILD_D [ESP]\n\t"
11090             "FSTP_S [ESP]\n\t"
11091             "MOVSS  $dst,[ESP]\n\t"
11092             "ADD    ESP,8" %}
11093   opcode(0xDF, 0x5);  /* DF /5 */
11094   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11095   ins_pipe( pipe_slow );
11096 %}
11097 
11098 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11099   match(Set dst (ConvL2F src));
11100   effect( KILL cr );
11101   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11102             "PUSH   $src.lo\n\t"
11103             "FILD   ST,[ESP + #0]\n\t"
11104             "ADD    ESP,8\n\t"
11105             "FSTP_S $dst\t# F-round" %}
11106   opcode(0xDF, 0x5);  /* DF /5 */
11107   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11108   ins_pipe( pipe_slow );
11109 %}
11110 
11111 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11112   match(Set dst (ConvL2I src));
11113   effect( DEF dst, USE src );
11114   format %{ "MOV    $dst,$src.lo" %}
11115   ins_encode(enc_CopyL_Lo(dst,src));
11116   ins_pipe( ialu_reg_reg );
11117 %}
11118 
11119 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11120   match(Set dst (MoveF2I src));
11121   effect( DEF dst, USE src );
11122   ins_cost(100);
11123   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11124   ins_encode %{
11125     __ movl($dst$$Register, Address(rsp, $src$$disp));
11126   %}
11127   ins_pipe( ialu_reg_mem );
11128 %}
11129 
11130 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11131   predicate(UseSSE==0);
11132   match(Set dst (MoveF2I src));
11133   effect( DEF dst, USE src );
11134 
11135   ins_cost(125);
11136   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11137   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11138   ins_pipe( fpu_mem_reg );
11139 %}
11140 
11141 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11142   predicate(UseSSE>=1);
11143   match(Set dst (MoveF2I src));
11144   effect( DEF dst, USE src );
11145 
11146   ins_cost(95);
11147   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11148   ins_encode %{
11149     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11150   %}
11151   ins_pipe( pipe_slow );
11152 %}
11153 
11154 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11155   predicate(UseSSE>=2);
11156   match(Set dst (MoveF2I src));
11157   effect( DEF dst, USE src );
11158   ins_cost(85);
11159   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11160   ins_encode %{
11161     __ movdl($dst$$Register, $src$$XMMRegister);
11162   %}
11163   ins_pipe( pipe_slow );
11164 %}
11165 
11166 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11167   match(Set dst (MoveI2F src));
11168   effect( DEF dst, USE src );
11169 
11170   ins_cost(100);
11171   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11172   ins_encode %{
11173     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11174   %}
11175   ins_pipe( ialu_mem_reg );
11176 %}
11177 
11178 
11179 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11180   predicate(UseSSE==0);
11181   match(Set dst (MoveI2F src));
11182   effect(DEF dst, USE src);
11183 
11184   ins_cost(125);
11185   format %{ "FLD_S  $src\n\t"
11186             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11187   opcode(0xD9);               /* D9 /0, FLD m32real */
11188   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11189               Pop_Reg_FPR(dst) );
11190   ins_pipe( fpu_reg_mem );
11191 %}
11192 
11193 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11194   predicate(UseSSE>=1);
11195   match(Set dst (MoveI2F src));
11196   effect( DEF dst, USE src );
11197 
11198   ins_cost(95);
11199   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11200   ins_encode %{
11201     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11202   %}
11203   ins_pipe( pipe_slow );
11204 %}
11205 
11206 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11207   predicate(UseSSE>=2);
11208   match(Set dst (MoveI2F src));
11209   effect( DEF dst, USE src );
11210 
11211   ins_cost(85);
11212   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11213   ins_encode %{
11214     __ movdl($dst$$XMMRegister, $src$$Register);
11215   %}
11216   ins_pipe( pipe_slow );
11217 %}
11218 
11219 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11220   match(Set dst (MoveD2L src));
11221   effect(DEF dst, USE src);
11222 
11223   ins_cost(250);
11224   format %{ "MOV    $dst.lo,$src\n\t"
11225             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11226   opcode(0x8B, 0x8B);
11227   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11228   ins_pipe( ialu_mem_long_reg );
11229 %}
11230 
11231 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11232   predicate(UseSSE<=1);
11233   match(Set dst (MoveD2L src));
11234   effect(DEF dst, USE src);
11235 
11236   ins_cost(125);
11237   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11238   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11239   ins_pipe( fpu_mem_reg );
11240 %}
11241 
11242 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11243   predicate(UseSSE>=2);
11244   match(Set dst (MoveD2L src));
11245   effect(DEF dst, USE src);
11246   ins_cost(95);
11247   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11248   ins_encode %{
11249     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11250   %}
11251   ins_pipe( pipe_slow );
11252 %}
11253 
11254 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11255   predicate(UseSSE>=2);
11256   match(Set dst (MoveD2L src));
11257   effect(DEF dst, USE src, TEMP tmp);
11258   ins_cost(85);
11259   format %{ "MOVD   $dst.lo,$src\n\t"
11260             "PSHUFLW $tmp,$src,0x4E\n\t"
11261             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11262   ins_encode %{
11263     __ movdl($dst$$Register, $src$$XMMRegister);
11264     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11265     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11266   %}
11267   ins_pipe( pipe_slow );
11268 %}
11269 
11270 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11271   match(Set dst (MoveL2D src));
11272   effect(DEF dst, USE src);
11273 
11274   ins_cost(200);
11275   format %{ "MOV    $dst,$src.lo\n\t"
11276             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11277   opcode(0x89, 0x89);
11278   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11279   ins_pipe( ialu_mem_long_reg );
11280 %}
11281 
11282 
11283 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11284   predicate(UseSSE<=1);
11285   match(Set dst (MoveL2D src));
11286   effect(DEF dst, USE src);
11287   ins_cost(125);
11288 
11289   format %{ "FLD_D  $src\n\t"
11290             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11291   opcode(0xDD);               /* DD /0, FLD m64real */
11292   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11293               Pop_Reg_DPR(dst) );
11294   ins_pipe( fpu_reg_mem );
11295 %}
11296 
11297 
11298 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11299   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11300   match(Set dst (MoveL2D src));
11301   effect(DEF dst, USE src);
11302 
11303   ins_cost(95);
11304   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11305   ins_encode %{
11306     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11307   %}
11308   ins_pipe( pipe_slow );
11309 %}
11310 
11311 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11312   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11313   match(Set dst (MoveL2D src));
11314   effect(DEF dst, USE src);
11315 
11316   ins_cost(95);
11317   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11318   ins_encode %{
11319     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11320   %}
11321   ins_pipe( pipe_slow );
11322 %}
11323 
11324 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11325   predicate(UseSSE>=2);
11326   match(Set dst (MoveL2D src));
11327   effect(TEMP dst, USE src, TEMP tmp);
11328   ins_cost(85);
11329   format %{ "MOVD   $dst,$src.lo\n\t"
11330             "MOVD   $tmp,$src.hi\n\t"
11331             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11332   ins_encode %{
11333     __ movdl($dst$$XMMRegister, $src$$Register);
11334     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11335     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11336   %}
11337   ins_pipe( pipe_slow );
11338 %}
11339 
11340 
11341 // =======================================================================
11342 // fast clearing of an array
11343 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11344   predicate(!UseFastStosb);
11345   match(Set dummy (ClearArray cnt base));
11346   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11347   format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11348             "SHL    ECX,1\t# Convert doublewords to words\n\t"
11349             "REP STOS\t# store EAX into [EDI++] while ECX--" %}
11350   ins_encode %{
11351     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
11352   %}
11353   ins_pipe( pipe_slow );
11354 %}
11355 
11356 instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11357   predicate(UseFastStosb);
11358   match(Set dummy (ClearArray cnt base));
11359   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11360   format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11361             "SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11362             "REP STOSB\t# store EAX into [EDI++] while ECX--" %}
11363   ins_encode %{
11364     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
11365   %}
11366   ins_pipe( pipe_slow );
11367 %}
11368 
11369 instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11370                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11371   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11372   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11373   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11374 
11375   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11376   ins_encode %{
11377     __ string_compare($str1$$Register, $str2$$Register,
11378                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11379                       $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11380   %}
11381   ins_pipe( pipe_slow );
11382 %}
11383 
11384 instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11385                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11386   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11387   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11388   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11389 
11390   format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11391   ins_encode %{
11392     __ string_compare($str1$$Register, $str2$$Register,
11393                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11394                       $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11395   %}
11396   ins_pipe( pipe_slow );
11397 %}
11398 
11399 instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11400                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11401   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11402   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11403   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11404 
11405   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11406   ins_encode %{
11407     __ string_compare($str1$$Register, $str2$$Register,
11408                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11409                       $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11410   %}
11411   ins_pipe( pipe_slow );
11412 %}
11413 
11414 instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11415                           eAXRegI result, regD tmp1, eFlagsReg cr) %{
11416   predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11417   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11418   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11419 
11420   format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11421   ins_encode %{
11422     __ string_compare($str2$$Register, $str1$$Register,
11423                       $cnt2$$Register, $cnt1$$Register, $result$$Register,
11424                       $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11425   %}
11426   ins_pipe( pipe_slow );
11427 %}
11428 
11429 // fast string equals
11430 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11431                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11432   match(Set result (StrEquals (Binary str1 str2) cnt));
11433   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11434 
11435   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11436   ins_encode %{
11437     __ arrays_equals(false, $str1$$Register, $str2$$Register,
11438                      $cnt$$Register, $result$$Register, $tmp3$$Register,
11439                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11440   %}
11441 
11442   ins_pipe( pipe_slow );
11443 %}
11444 
11445 // fast search of substring with known size.
11446 instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11447                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11448   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11449   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11450   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11451 
11452   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11453   ins_encode %{
11454     int icnt2 = (int)$int_cnt2$$constant;
11455     if (icnt2 >= 16) {
11456       // IndexOf for constant substrings with size >= 16 elements
11457       // which don't need to be loaded through stack.
11458       __ string_indexofC8($str1$$Register, $str2$$Register,
11459                           $cnt1$$Register, $cnt2$$Register,
11460                           icnt2, $result$$Register,
11461                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11462     } else {
11463       // Small strings are loaded through stack if they cross page boundary.
11464       __ string_indexof($str1$$Register, $str2$$Register,
11465                         $cnt1$$Register, $cnt2$$Register,
11466                         icnt2, $result$$Register,
11467                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11468     }
11469   %}
11470   ins_pipe( pipe_slow );
11471 %}
11472 
11473 // fast search of substring with known size.
11474 instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11475                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11476   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11477   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11478   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11479 
11480   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11481   ins_encode %{
11482     int icnt2 = (int)$int_cnt2$$constant;
11483     if (icnt2 >= 8) {
11484       // IndexOf for constant substrings with size >= 8 elements
11485       // which don't need to be loaded through stack.
11486       __ string_indexofC8($str1$$Register, $str2$$Register,
11487                           $cnt1$$Register, $cnt2$$Register,
11488                           icnt2, $result$$Register,
11489                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11490     } else {
11491       // Small strings are loaded through stack if they cross page boundary.
11492       __ string_indexof($str1$$Register, $str2$$Register,
11493                         $cnt1$$Register, $cnt2$$Register,
11494                         icnt2, $result$$Register,
11495                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11496     }
11497   %}
11498   ins_pipe( pipe_slow );
11499 %}
11500 
11501 // fast search of substring with known size.
11502 instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11503                              eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11504   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11505   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11506   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11507 
11508   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11509   ins_encode %{
11510     int icnt2 = (int)$int_cnt2$$constant;
11511     if (icnt2 >= 8) {
11512       // IndexOf for constant substrings with size >= 8 elements
11513       // which don't need to be loaded through stack.
11514       __ string_indexofC8($str1$$Register, $str2$$Register,
11515                           $cnt1$$Register, $cnt2$$Register,
11516                           icnt2, $result$$Register,
11517                           $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11518     } else {
11519       // Small strings are loaded through stack if they cross page boundary.
11520       __ string_indexof($str1$$Register, $str2$$Register,
11521                         $cnt1$$Register, $cnt2$$Register,
11522                         icnt2, $result$$Register,
11523                         $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11524     }
11525   %}
11526   ins_pipe( pipe_slow );
11527 %}
11528 
11529 instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11530                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11531   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11532   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11533   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11534 
11535   format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11536   ins_encode %{
11537     __ string_indexof($str1$$Register, $str2$$Register,
11538                       $cnt1$$Register, $cnt2$$Register,
11539                       (-1), $result$$Register,
11540                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11541   %}
11542   ins_pipe( pipe_slow );
11543 %}
11544 
11545 instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11546                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11547   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11548   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11549   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11550 
11551   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11552   ins_encode %{
11553     __ string_indexof($str1$$Register, $str2$$Register,
11554                       $cnt1$$Register, $cnt2$$Register,
11555                       (-1), $result$$Register,
11556                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11557   %}
11558   ins_pipe( pipe_slow );
11559 %}
11560 
11561 instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11562                          eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11563   predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11564   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11565   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11566 
11567   format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11568   ins_encode %{
11569     __ string_indexof($str1$$Register, $str2$$Register,
11570                       $cnt1$$Register, $cnt2$$Register,
11571                       (-1), $result$$Register,
11572                       $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11573   %}
11574   ins_pipe( pipe_slow );
11575 %}
11576 
11577 instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11578                               eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11579   predicate(UseSSE42Intrinsics);
11580   match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11581   effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11582   format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11583   ins_encode %{
11584     __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11585                            $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11586   %}
11587   ins_pipe( pipe_slow );
11588 %}
11589 
11590 // fast array equals
11591 instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11592                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11593 %{
11594   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11595   match(Set result (AryEq ary1 ary2));
11596   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11597   //ins_cost(300);
11598 
11599   format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11600   ins_encode %{
11601     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11602                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11603                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11604   %}
11605   ins_pipe( pipe_slow );
11606 %}
11607 
11608 instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11609                        regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11610 %{
11611   predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11612   match(Set result (AryEq ary1 ary2));
11613   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11614   //ins_cost(300);
11615 
11616   format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11617   ins_encode %{
11618     __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11619                      $tmp3$$Register, $result$$Register, $tmp4$$Register,
11620                      $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11621   %}
11622   ins_pipe( pipe_slow );
11623 %}
11624 
11625 instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
11626                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
11627 %{
11628   match(Set result (HasNegatives ary1 len));
11629   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11630 
11631   format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11632   ins_encode %{
11633     __ has_negatives($ary1$$Register, $len$$Register,
11634                      $result$$Register, $tmp3$$Register,
11635                      $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11636   %}
11637   ins_pipe( pipe_slow );
11638 %}
11639 
11640 // fast char[] to byte[] compression
11641 instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11642                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11643   match(Set result (StrCompressedCopy src (Binary dst len)));
11644   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11645 
11646   format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11647   ins_encode %{
11648     __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11649                            $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11650                            $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11651   %}
11652   ins_pipe( pipe_slow );
11653 %}
11654 
11655 // fast byte[] to char[] inflation
11656 instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
11657                         regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
11658   match(Set dummy (StrInflatedCopy src (Binary dst len)));
11659   effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11660 
11661   format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11662   ins_encode %{
11663     __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11664                           $tmp1$$XMMRegister, $tmp2$$Register);
11665   %}
11666   ins_pipe( pipe_slow );
11667 %}
11668 
11669 // encode char[] to byte[] in ISO_8859_1
11670 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11671                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11672                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11673   match(Set result (EncodeISOArray src (Binary dst len)));
11674   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11675 
11676   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11677   ins_encode %{
11678     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11679                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11680                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11681   %}
11682   ins_pipe( pipe_slow );
11683 %}
11684 
11685 
11686 //----------Control Flow Instructions------------------------------------------
11687 // Signed compare Instructions
11688 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
11689   match(Set cr (CmpI op1 op2));
11690   effect( DEF cr, USE op1, USE op2 );
11691   format %{ "CMP    $op1,$op2" %}
11692   opcode(0x3B);  /* Opcode 3B /r */
11693   ins_encode( OpcP, RegReg( op1, op2) );
11694   ins_pipe( ialu_cr_reg_reg );
11695 %}
11696 
11697 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
11698   match(Set cr (CmpI op1 op2));
11699   effect( DEF cr, USE op1 );
11700   format %{ "CMP    $op1,$op2" %}
11701   opcode(0x81,0x07);  /* Opcode 81 /7 */
11702   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
11703   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11704   ins_pipe( ialu_cr_reg_imm );
11705 %}
11706 
11707 // Cisc-spilled version of cmpI_eReg
11708 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
11709   match(Set cr (CmpI op1 (LoadI op2)));
11710 
11711   format %{ "CMP    $op1,$op2" %}
11712   ins_cost(500);
11713   opcode(0x3B);  /* Opcode 3B /r */
11714   ins_encode( OpcP, RegMem( op1, op2) );
11715   ins_pipe( ialu_cr_reg_mem );
11716 %}
11717 
11718 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
11719   match(Set cr (CmpI src zero));
11720   effect( DEF cr, USE src );
11721 
11722   format %{ "TEST   $src,$src" %}
11723   opcode(0x85);
11724   ins_encode( OpcP, RegReg( src, src ) );
11725   ins_pipe( ialu_cr_reg_imm );
11726 %}
11727 
11728 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
11729   match(Set cr (CmpI (AndI src con) zero));
11730 
11731   format %{ "TEST   $src,$con" %}
11732   opcode(0xF7,0x00);
11733   ins_encode( OpcP, RegOpc(src), Con32(con) );
11734   ins_pipe( ialu_cr_reg_imm );
11735 %}
11736 
11737 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
11738   match(Set cr (CmpI (AndI src mem) zero));
11739 
11740   format %{ "TEST   $src,$mem" %}
11741   opcode(0x85);
11742   ins_encode( OpcP, RegMem( src, mem ) );
11743   ins_pipe( ialu_cr_reg_mem );
11744 %}
11745 
11746 // Unsigned compare Instructions; really, same as signed except they
11747 // produce an eFlagsRegU instead of eFlagsReg.
11748 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
11749   match(Set cr (CmpU op1 op2));
11750 
11751   format %{ "CMPu   $op1,$op2" %}
11752   opcode(0x3B);  /* Opcode 3B /r */
11753   ins_encode( OpcP, RegReg( op1, op2) );
11754   ins_pipe( ialu_cr_reg_reg );
11755 %}
11756 
11757 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
11758   match(Set cr (CmpU op1 op2));
11759 
11760   format %{ "CMPu   $op1,$op2" %}
11761   opcode(0x81,0x07);  /* Opcode 81 /7 */
11762   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11763   ins_pipe( ialu_cr_reg_imm );
11764 %}
11765 
11766 // // Cisc-spilled version of cmpU_eReg
11767 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
11768   match(Set cr (CmpU op1 (LoadI op2)));
11769 
11770   format %{ "CMPu   $op1,$op2" %}
11771   ins_cost(500);
11772   opcode(0x3B);  /* Opcode 3B /r */
11773   ins_encode( OpcP, RegMem( op1, op2) );
11774   ins_pipe( ialu_cr_reg_mem );
11775 %}
11776 
11777 // // Cisc-spilled version of cmpU_eReg
11778 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
11779 //  match(Set cr (CmpU (LoadI op1) op2));
11780 //
11781 //  format %{ "CMPu   $op1,$op2" %}
11782 //  ins_cost(500);
11783 //  opcode(0x39);  /* Opcode 39 /r */
11784 //  ins_encode( OpcP, RegMem( op1, op2) );
11785 //%}
11786 
11787 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
11788   match(Set cr (CmpU src zero));
11789 
11790   format %{ "TESTu  $src,$src" %}
11791   opcode(0x85);
11792   ins_encode( OpcP, RegReg( src, src ) );
11793   ins_pipe( ialu_cr_reg_imm );
11794 %}
11795 
11796 // Unsigned pointer compare Instructions
11797 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
11798   match(Set cr (CmpP op1 op2));
11799 
11800   format %{ "CMPu   $op1,$op2" %}
11801   opcode(0x3B);  /* Opcode 3B /r */
11802   ins_encode( OpcP, RegReg( op1, op2) );
11803   ins_pipe( ialu_cr_reg_reg );
11804 %}
11805 
11806 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
11807   match(Set cr (CmpP op1 op2));
11808 
11809   format %{ "CMPu   $op1,$op2" %}
11810   opcode(0x81,0x07);  /* Opcode 81 /7 */
11811   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11812   ins_pipe( ialu_cr_reg_imm );
11813 %}
11814 
11815 // // Cisc-spilled version of cmpP_eReg
11816 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
11817   match(Set cr (CmpP op1 (LoadP op2)));
11818 
11819   format %{ "CMPu   $op1,$op2" %}
11820   ins_cost(500);
11821   opcode(0x3B);  /* Opcode 3B /r */
11822   ins_encode( OpcP, RegMem( op1, op2) );
11823   ins_pipe( ialu_cr_reg_mem );
11824 %}
11825 
11826 // // Cisc-spilled version of cmpP_eReg
11827 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
11828 //  match(Set cr (CmpP (LoadP op1) op2));
11829 //
11830 //  format %{ "CMPu   $op1,$op2" %}
11831 //  ins_cost(500);
11832 //  opcode(0x39);  /* Opcode 39 /r */
11833 //  ins_encode( OpcP, RegMem( op1, op2) );
11834 //%}
11835 
11836 // Compare raw pointer (used in out-of-heap check).
11837 // Only works because non-oop pointers must be raw pointers
11838 // and raw pointers have no anti-dependencies.
11839 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
11840   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
11841   match(Set cr (CmpP op1 (LoadP op2)));
11842 
11843   format %{ "CMPu   $op1,$op2" %}
11844   opcode(0x3B);  /* Opcode 3B /r */
11845   ins_encode( OpcP, RegMem( op1, op2) );
11846   ins_pipe( ialu_cr_reg_mem );
11847 %}
11848 
11849 //
11850 // This will generate a signed flags result. This should be ok
11851 // since any compare to a zero should be eq/neq.
11852 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
11853   match(Set cr (CmpP src zero));
11854 
11855   format %{ "TEST   $src,$src" %}
11856   opcode(0x85);
11857   ins_encode( OpcP, RegReg( src, src ) );
11858   ins_pipe( ialu_cr_reg_imm );
11859 %}
11860 
11861 // Cisc-spilled version of testP_reg
11862 // This will generate a signed flags result. This should be ok
11863 // since any compare to a zero should be eq/neq.
11864 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
11865   match(Set cr (CmpP (LoadP op) zero));
11866 
11867   format %{ "TEST   $op,0xFFFFFFFF" %}
11868   ins_cost(500);
11869   opcode(0xF7);               /* Opcode F7 /0 */
11870   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
11871   ins_pipe( ialu_cr_reg_imm );
11872 %}
11873 
11874 // Yanked all unsigned pointer compare operations.
11875 // Pointer compares are done with CmpP which is already unsigned.
11876 
11877 //----------Max and Min--------------------------------------------------------
11878 // Min Instructions
11879 ////
11880 //   *** Min and Max using the conditional move are slower than the
11881 //   *** branch version on a Pentium III.
11882 // // Conditional move for min
11883 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11884 //  effect( USE_DEF op2, USE op1, USE cr );
11885 //  format %{ "CMOVlt $op2,$op1\t! min" %}
11886 //  opcode(0x4C,0x0F);
11887 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11888 //  ins_pipe( pipe_cmov_reg );
11889 //%}
11890 //
11891 //// Min Register with Register (P6 version)
11892 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
11893 //  predicate(VM_Version::supports_cmov() );
11894 //  match(Set op2 (MinI op1 op2));
11895 //  ins_cost(200);
11896 //  expand %{
11897 //    eFlagsReg cr;
11898 //    compI_eReg(cr,op1,op2);
11899 //    cmovI_reg_lt(op2,op1,cr);
11900 //  %}
11901 //%}
11902 
11903 // Min Register with Register (generic version)
11904 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11905   match(Set dst (MinI dst src));
11906   effect(KILL flags);
11907   ins_cost(300);
11908 
11909   format %{ "MIN    $dst,$src" %}
11910   opcode(0xCC);
11911   ins_encode( min_enc(dst,src) );
11912   ins_pipe( pipe_slow );
11913 %}
11914 
11915 // Max Register with Register
11916 //   *** Min and Max using the conditional move are slower than the
11917 //   *** branch version on a Pentium III.
11918 // // Conditional move for max
11919 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11920 //  effect( USE_DEF op2, USE op1, USE cr );
11921 //  format %{ "CMOVgt $op2,$op1\t! max" %}
11922 //  opcode(0x4F,0x0F);
11923 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11924 //  ins_pipe( pipe_cmov_reg );
11925 //%}
11926 //
11927 // // Max Register with Register (P6 version)
11928 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
11929 //  predicate(VM_Version::supports_cmov() );
11930 //  match(Set op2 (MaxI op1 op2));
11931 //  ins_cost(200);
11932 //  expand %{
11933 //    eFlagsReg cr;
11934 //    compI_eReg(cr,op1,op2);
11935 //    cmovI_reg_gt(op2,op1,cr);
11936 //  %}
11937 //%}
11938 
11939 // Max Register with Register (generic version)
11940 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11941   match(Set dst (MaxI dst src));
11942   effect(KILL flags);
11943   ins_cost(300);
11944 
11945   format %{ "MAX    $dst,$src" %}
11946   opcode(0xCC);
11947   ins_encode( max_enc(dst,src) );
11948   ins_pipe( pipe_slow );
11949 %}
11950 
11951 // ============================================================================
11952 // Counted Loop limit node which represents exact final iterator value.
11953 // Note: the resulting value should fit into integer range since
11954 // counted loops have limit check on overflow.
11955 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
11956   match(Set limit (LoopLimit (Binary init limit) stride));
11957   effect(TEMP limit_hi, TEMP tmp, KILL flags);
11958   ins_cost(300);
11959 
11960   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
11961   ins_encode %{
11962     int strd = (int)$stride$$constant;
11963     assert(strd != 1 && strd != -1, "sanity");
11964     int m1 = (strd > 0) ? 1 : -1;
11965     // Convert limit to long (EAX:EDX)
11966     __ cdql();
11967     // Convert init to long (init:tmp)
11968     __ movl($tmp$$Register, $init$$Register);
11969     __ sarl($tmp$$Register, 31);
11970     // $limit - $init
11971     __ subl($limit$$Register, $init$$Register);
11972     __ sbbl($limit_hi$$Register, $tmp$$Register);
11973     // + ($stride - 1)
11974     if (strd > 0) {
11975       __ addl($limit$$Register, (strd - 1));
11976       __ adcl($limit_hi$$Register, 0);
11977       __ movl($tmp$$Register, strd);
11978     } else {
11979       __ addl($limit$$Register, (strd + 1));
11980       __ adcl($limit_hi$$Register, -1);
11981       __ lneg($limit_hi$$Register, $limit$$Register);
11982       __ movl($tmp$$Register, -strd);
11983     }
11984     // signed devision: (EAX:EDX) / pos_stride
11985     __ idivl($tmp$$Register);
11986     if (strd < 0) {
11987       // restore sign
11988       __ negl($tmp$$Register);
11989     }
11990     // (EAX) * stride
11991     __ mull($tmp$$Register);
11992     // + init (ignore upper bits)
11993     __ addl($limit$$Register, $init$$Register);
11994   %}
11995   ins_pipe( pipe_slow );
11996 %}
11997 
11998 // ============================================================================
11999 // Branch Instructions
12000 // Jump Table
12001 instruct jumpXtnd(rRegI switch_val) %{
12002   match(Jump switch_val);
12003   ins_cost(350);
12004   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12005   ins_encode %{
12006     // Jump to Address(table_base + switch_reg)
12007     Address index(noreg, $switch_val$$Register, Address::times_1);
12008     __ jump(ArrayAddress($constantaddress, index));
12009   %}
12010   ins_pipe(pipe_jmp);
12011 %}
12012 
12013 // Jump Direct - Label defines a relative address from JMP+1
12014 instruct jmpDir(label labl) %{
12015   match(Goto);
12016   effect(USE labl);
12017 
12018   ins_cost(300);
12019   format %{ "JMP    $labl" %}
12020   size(5);
12021   ins_encode %{
12022     Label* L = $labl$$label;
12023     __ jmp(*L, false); // Always long jump
12024   %}
12025   ins_pipe( pipe_jmp );
12026 %}
12027 
12028 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12029 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12030   match(If cop cr);
12031   effect(USE labl);
12032 
12033   ins_cost(300);
12034   format %{ "J$cop    $labl" %}
12035   size(6);
12036   ins_encode %{
12037     Label* L = $labl$$label;
12038     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12039   %}
12040   ins_pipe( pipe_jcc );
12041 %}
12042 
12043 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12044 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12045   match(CountedLoopEnd cop cr);
12046   effect(USE labl);
12047 
12048   ins_cost(300);
12049   format %{ "J$cop    $labl\t# Loop end" %}
12050   size(6);
12051   ins_encode %{
12052     Label* L = $labl$$label;
12053     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12054   %}
12055   ins_pipe( pipe_jcc );
12056 %}
12057 
12058 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12059 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12060   match(CountedLoopEnd cop cmp);
12061   effect(USE labl);
12062 
12063   ins_cost(300);
12064   format %{ "J$cop,u  $labl\t# Loop end" %}
12065   size(6);
12066   ins_encode %{
12067     Label* L = $labl$$label;
12068     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12069   %}
12070   ins_pipe( pipe_jcc );
12071 %}
12072 
12073 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12074   match(CountedLoopEnd cop cmp);
12075   effect(USE labl);
12076 
12077   ins_cost(200);
12078   format %{ "J$cop,u  $labl\t# Loop end" %}
12079   size(6);
12080   ins_encode %{
12081     Label* L = $labl$$label;
12082     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12083   %}
12084   ins_pipe( pipe_jcc );
12085 %}
12086 
12087 // Jump Direct Conditional - using unsigned comparison
12088 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12089   match(If cop cmp);
12090   effect(USE labl);
12091 
12092   ins_cost(300);
12093   format %{ "J$cop,u  $labl" %}
12094   size(6);
12095   ins_encode %{
12096     Label* L = $labl$$label;
12097     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12098   %}
12099   ins_pipe(pipe_jcc);
12100 %}
12101 
12102 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12103   match(If cop cmp);
12104   effect(USE labl);
12105 
12106   ins_cost(200);
12107   format %{ "J$cop,u  $labl" %}
12108   size(6);
12109   ins_encode %{
12110     Label* L = $labl$$label;
12111     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12112   %}
12113   ins_pipe(pipe_jcc);
12114 %}
12115 
12116 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12117   match(If cop cmp);
12118   effect(USE labl);
12119 
12120   ins_cost(200);
12121   format %{ $$template
12122     if ($cop$$cmpcode == Assembler::notEqual) {
12123       $$emit$$"JP,u   $labl\n\t"
12124       $$emit$$"J$cop,u   $labl"
12125     } else {
12126       $$emit$$"JP,u   done\n\t"
12127       $$emit$$"J$cop,u   $labl\n\t"
12128       $$emit$$"done:"
12129     }
12130   %}
12131   ins_encode %{
12132     Label* l = $labl$$label;
12133     if ($cop$$cmpcode == Assembler::notEqual) {
12134       __ jcc(Assembler::parity, *l, false);
12135       __ jcc(Assembler::notEqual, *l, false);
12136     } else if ($cop$$cmpcode == Assembler::equal) {
12137       Label done;
12138       __ jccb(Assembler::parity, done);
12139       __ jcc(Assembler::equal, *l, false);
12140       __ bind(done);
12141     } else {
12142        ShouldNotReachHere();
12143     }
12144   %}
12145   ins_pipe(pipe_jcc);
12146 %}
12147 
12148 // ============================================================================
12149 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12150 // array for an instance of the superklass.  Set a hidden internal cache on a
12151 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12152 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12153 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12154   match(Set result (PartialSubtypeCheck sub super));
12155   effect( KILL rcx, KILL cr );
12156 
12157   ins_cost(1100);  // slightly larger than the next version
12158   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12159             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12160             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12161             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12162             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12163             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12164             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12165      "miss:\t" %}
12166 
12167   opcode(0x1); // Force a XOR of EDI
12168   ins_encode( enc_PartialSubtypeCheck() );
12169   ins_pipe( pipe_slow );
12170 %}
12171 
12172 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12173   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12174   effect( KILL rcx, KILL result );
12175 
12176   ins_cost(1000);
12177   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12178             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12179             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12180             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12181             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12182             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12183      "miss:\t" %}
12184 
12185   opcode(0x0);  // No need to XOR EDI
12186   ins_encode( enc_PartialSubtypeCheck() );
12187   ins_pipe( pipe_slow );
12188 %}
12189 
12190 // ============================================================================
12191 // Branch Instructions -- short offset versions
12192 //
12193 // These instructions are used to replace jumps of a long offset (the default
12194 // match) with jumps of a shorter offset.  These instructions are all tagged
12195 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12196 // match rules in general matching.  Instead, the ADLC generates a conversion
12197 // method in the MachNode which can be used to do in-place replacement of the
12198 // long variant with the shorter variant.  The compiler will determine if a
12199 // branch can be taken by the is_short_branch_offset() predicate in the machine
12200 // specific code section of the file.
12201 
12202 // Jump Direct - Label defines a relative address from JMP+1
12203 instruct jmpDir_short(label labl) %{
12204   match(Goto);
12205   effect(USE labl);
12206 
12207   ins_cost(300);
12208   format %{ "JMP,s  $labl" %}
12209   size(2);
12210   ins_encode %{
12211     Label* L = $labl$$label;
12212     __ jmpb(*L);
12213   %}
12214   ins_pipe( pipe_jmp );
12215   ins_short_branch(1);
12216 %}
12217 
12218 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12219 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12220   match(If cop cr);
12221   effect(USE labl);
12222 
12223   ins_cost(300);
12224   format %{ "J$cop,s  $labl" %}
12225   size(2);
12226   ins_encode %{
12227     Label* L = $labl$$label;
12228     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12229   %}
12230   ins_pipe( pipe_jcc );
12231   ins_short_branch(1);
12232 %}
12233 
12234 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12235 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12236   match(CountedLoopEnd cop cr);
12237   effect(USE labl);
12238 
12239   ins_cost(300);
12240   format %{ "J$cop,s  $labl\t# Loop end" %}
12241   size(2);
12242   ins_encode %{
12243     Label* L = $labl$$label;
12244     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12245   %}
12246   ins_pipe( pipe_jcc );
12247   ins_short_branch(1);
12248 %}
12249 
12250 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12251 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12252   match(CountedLoopEnd cop cmp);
12253   effect(USE labl);
12254 
12255   ins_cost(300);
12256   format %{ "J$cop,us $labl\t# Loop end" %}
12257   size(2);
12258   ins_encode %{
12259     Label* L = $labl$$label;
12260     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12261   %}
12262   ins_pipe( pipe_jcc );
12263   ins_short_branch(1);
12264 %}
12265 
12266 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12267   match(CountedLoopEnd cop cmp);
12268   effect(USE labl);
12269 
12270   ins_cost(300);
12271   format %{ "J$cop,us $labl\t# Loop end" %}
12272   size(2);
12273   ins_encode %{
12274     Label* L = $labl$$label;
12275     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12276   %}
12277   ins_pipe( pipe_jcc );
12278   ins_short_branch(1);
12279 %}
12280 
12281 // Jump Direct Conditional - using unsigned comparison
12282 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12283   match(If cop cmp);
12284   effect(USE labl);
12285 
12286   ins_cost(300);
12287   format %{ "J$cop,us $labl" %}
12288   size(2);
12289   ins_encode %{
12290     Label* L = $labl$$label;
12291     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12292   %}
12293   ins_pipe( pipe_jcc );
12294   ins_short_branch(1);
12295 %}
12296 
12297 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12298   match(If cop cmp);
12299   effect(USE labl);
12300 
12301   ins_cost(300);
12302   format %{ "J$cop,us $labl" %}
12303   size(2);
12304   ins_encode %{
12305     Label* L = $labl$$label;
12306     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12307   %}
12308   ins_pipe( pipe_jcc );
12309   ins_short_branch(1);
12310 %}
12311 
12312 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12313   match(If cop cmp);
12314   effect(USE labl);
12315 
12316   ins_cost(300);
12317   format %{ $$template
12318     if ($cop$$cmpcode == Assembler::notEqual) {
12319       $$emit$$"JP,u,s   $labl\n\t"
12320       $$emit$$"J$cop,u,s   $labl"
12321     } else {
12322       $$emit$$"JP,u,s   done\n\t"
12323       $$emit$$"J$cop,u,s  $labl\n\t"
12324       $$emit$$"done:"
12325     }
12326   %}
12327   size(4);
12328   ins_encode %{
12329     Label* l = $labl$$label;
12330     if ($cop$$cmpcode == Assembler::notEqual) {
12331       __ jccb(Assembler::parity, *l);
12332       __ jccb(Assembler::notEqual, *l);
12333     } else if ($cop$$cmpcode == Assembler::equal) {
12334       Label done;
12335       __ jccb(Assembler::parity, done);
12336       __ jccb(Assembler::equal, *l);
12337       __ bind(done);
12338     } else {
12339        ShouldNotReachHere();
12340     }
12341   %}
12342   ins_pipe(pipe_jcc);
12343   ins_short_branch(1);
12344 %}
12345 
12346 // ============================================================================
12347 // Long Compare
12348 //
12349 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12350 // is tricky.  The flavor of compare used depends on whether we are testing
12351 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12352 // The GE test is the negated LT test.  The LE test can be had by commuting
12353 // the operands (yielding a GE test) and then negating; negate again for the
12354 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12355 // NE test is negated from that.
12356 
12357 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12358 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12359 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12360 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12361 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12362 // foo match ends up with the wrong leaf.  One fix is to not match both
12363 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12364 // both forms beat the trinary form of long-compare and both are very useful
12365 // on Intel which has so few registers.
12366 
12367 // Manifest a CmpL result in an integer register.  Very painful.
12368 // This is the test to avoid.
12369 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12370   match(Set dst (CmpL3 src1 src2));
12371   effect( KILL flags );
12372   ins_cost(1000);
12373   format %{ "XOR    $dst,$dst\n\t"
12374             "CMP    $src1.hi,$src2.hi\n\t"
12375             "JLT,s  m_one\n\t"
12376             "JGT,s  p_one\n\t"
12377             "CMP    $src1.lo,$src2.lo\n\t"
12378             "JB,s   m_one\n\t"
12379             "JEQ,s  done\n"
12380     "p_one:\tINC    $dst\n\t"
12381             "JMP,s  done\n"
12382     "m_one:\tDEC    $dst\n"
12383      "done:" %}
12384   ins_encode %{
12385     Label p_one, m_one, done;
12386     __ xorptr($dst$$Register, $dst$$Register);
12387     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12388     __ jccb(Assembler::less,    m_one);
12389     __ jccb(Assembler::greater, p_one);
12390     __ cmpl($src1$$Register, $src2$$Register);
12391     __ jccb(Assembler::below,   m_one);
12392     __ jccb(Assembler::equal,   done);
12393     __ bind(p_one);
12394     __ incrementl($dst$$Register);
12395     __ jmpb(done);
12396     __ bind(m_one);
12397     __ decrementl($dst$$Register);
12398     __ bind(done);
12399   %}
12400   ins_pipe( pipe_slow );
12401 %}
12402 
12403 //======
12404 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12405 // compares.  Can be used for LE or GT compares by reversing arguments.
12406 // NOT GOOD FOR EQ/NE tests.
12407 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12408   match( Set flags (CmpL src zero ));
12409   ins_cost(100);
12410   format %{ "TEST   $src.hi,$src.hi" %}
12411   opcode(0x85);
12412   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12413   ins_pipe( ialu_cr_reg_reg );
12414 %}
12415 
12416 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12417 // compares.  Can be used for LE or GT compares by reversing arguments.
12418 // NOT GOOD FOR EQ/NE tests.
12419 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12420   match( Set flags (CmpL src1 src2 ));
12421   effect( TEMP tmp );
12422   ins_cost(300);
12423   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12424             "MOV    $tmp,$src1.hi\n\t"
12425             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12426   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12427   ins_pipe( ialu_cr_reg_reg );
12428 %}
12429 
12430 // Long compares reg < zero/req OR reg >= zero/req.
12431 // Just a wrapper for a normal branch, plus the predicate test.
12432 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12433   match(If cmp flags);
12434   effect(USE labl);
12435   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12436   expand %{
12437     jmpCon(cmp,flags,labl);    // JLT or JGE...
12438   %}
12439 %}
12440 
12441 // Compare 2 longs and CMOVE longs.
12442 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12443   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12444   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12445   ins_cost(400);
12446   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12447             "CMOV$cmp $dst.hi,$src.hi" %}
12448   opcode(0x0F,0x40);
12449   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12450   ins_pipe( pipe_cmov_reg_long );
12451 %}
12452 
12453 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12454   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12455   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12456   ins_cost(500);
12457   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12458             "CMOV$cmp $dst.hi,$src.hi" %}
12459   opcode(0x0F,0x40);
12460   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12461   ins_pipe( pipe_cmov_reg_long );
12462 %}
12463 
12464 // Compare 2 longs and CMOVE ints.
12465 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12466   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12467   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12468   ins_cost(200);
12469   format %{ "CMOV$cmp $dst,$src" %}
12470   opcode(0x0F,0x40);
12471   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12472   ins_pipe( pipe_cmov_reg );
12473 %}
12474 
12475 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12476   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12477   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12478   ins_cost(250);
12479   format %{ "CMOV$cmp $dst,$src" %}
12480   opcode(0x0F,0x40);
12481   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12482   ins_pipe( pipe_cmov_mem );
12483 %}
12484 
12485 // Compare 2 longs and CMOVE ints.
12486 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12487   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12488   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12489   ins_cost(200);
12490   format %{ "CMOV$cmp $dst,$src" %}
12491   opcode(0x0F,0x40);
12492   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12493   ins_pipe( pipe_cmov_reg );
12494 %}
12495 
12496 // Compare 2 longs and CMOVE doubles
12497 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12498   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12499   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12500   ins_cost(200);
12501   expand %{
12502     fcmovDPR_regS(cmp,flags,dst,src);
12503   %}
12504 %}
12505 
12506 // Compare 2 longs and CMOVE doubles
12507 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12508   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12509   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12510   ins_cost(200);
12511   expand %{
12512     fcmovD_regS(cmp,flags,dst,src);
12513   %}
12514 %}
12515 
12516 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12517   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12518   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12519   ins_cost(200);
12520   expand %{
12521     fcmovFPR_regS(cmp,flags,dst,src);
12522   %}
12523 %}
12524 
12525 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12526   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12527   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12528   ins_cost(200);
12529   expand %{
12530     fcmovF_regS(cmp,flags,dst,src);
12531   %}
12532 %}
12533 
12534 //======
12535 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12536 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12537   match( Set flags (CmpL src zero ));
12538   effect(TEMP tmp);
12539   ins_cost(200);
12540   format %{ "MOV    $tmp,$src.lo\n\t"
12541             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12542   ins_encode( long_cmp_flags0( src, tmp ) );
12543   ins_pipe( ialu_reg_reg_long );
12544 %}
12545 
12546 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12547 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12548   match( Set flags (CmpL src1 src2 ));
12549   ins_cost(200+300);
12550   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12551             "JNE,s  skip\n\t"
12552             "CMP    $src1.hi,$src2.hi\n\t"
12553      "skip:\t" %}
12554   ins_encode( long_cmp_flags1( src1, src2 ) );
12555   ins_pipe( ialu_cr_reg_reg );
12556 %}
12557 
12558 // Long compare reg == zero/reg OR reg != zero/reg
12559 // Just a wrapper for a normal branch, plus the predicate test.
12560 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12561   match(If cmp flags);
12562   effect(USE labl);
12563   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12564   expand %{
12565     jmpCon(cmp,flags,labl);    // JEQ or JNE...
12566   %}
12567 %}
12568 
12569 // Compare 2 longs and CMOVE longs.
12570 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12571   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12572   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12573   ins_cost(400);
12574   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12575             "CMOV$cmp $dst.hi,$src.hi" %}
12576   opcode(0x0F,0x40);
12577   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12578   ins_pipe( pipe_cmov_reg_long );
12579 %}
12580 
12581 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12582   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12583   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12584   ins_cost(500);
12585   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12586             "CMOV$cmp $dst.hi,$src.hi" %}
12587   opcode(0x0F,0x40);
12588   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12589   ins_pipe( pipe_cmov_reg_long );
12590 %}
12591 
12592 // Compare 2 longs and CMOVE ints.
12593 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
12594   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12595   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12596   ins_cost(200);
12597   format %{ "CMOV$cmp $dst,$src" %}
12598   opcode(0x0F,0x40);
12599   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12600   ins_pipe( pipe_cmov_reg );
12601 %}
12602 
12603 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
12604   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12605   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12606   ins_cost(250);
12607   format %{ "CMOV$cmp $dst,$src" %}
12608   opcode(0x0F,0x40);
12609   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12610   ins_pipe( pipe_cmov_mem );
12611 %}
12612 
12613 // Compare 2 longs and CMOVE ints.
12614 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
12615   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12616   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12617   ins_cost(200);
12618   format %{ "CMOV$cmp $dst,$src" %}
12619   opcode(0x0F,0x40);
12620   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12621   ins_pipe( pipe_cmov_reg );
12622 %}
12623 
12624 // Compare 2 longs and CMOVE doubles
12625 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
12626   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12627   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12628   ins_cost(200);
12629   expand %{
12630     fcmovDPR_regS(cmp,flags,dst,src);
12631   %}
12632 %}
12633 
12634 // Compare 2 longs and CMOVE doubles
12635 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
12636   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12637   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12638   ins_cost(200);
12639   expand %{
12640     fcmovD_regS(cmp,flags,dst,src);
12641   %}
12642 %}
12643 
12644 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
12645   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12646   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12647   ins_cost(200);
12648   expand %{
12649     fcmovFPR_regS(cmp,flags,dst,src);
12650   %}
12651 %}
12652 
12653 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
12654   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12655   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12656   ins_cost(200);
12657   expand %{
12658     fcmovF_regS(cmp,flags,dst,src);
12659   %}
12660 %}
12661 
12662 //======
12663 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12664 // Same as cmpL_reg_flags_LEGT except must negate src
12665 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
12666   match( Set flags (CmpL src zero ));
12667   effect( TEMP tmp );
12668   ins_cost(300);
12669   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
12670             "CMP    $tmp,$src.lo\n\t"
12671             "SBB    $tmp,$src.hi\n\t" %}
12672   ins_encode( long_cmp_flags3(src, tmp) );
12673   ins_pipe( ialu_reg_reg_long );
12674 %}
12675 
12676 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12677 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
12678 // requires a commuted test to get the same result.
12679 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12680   match( Set flags (CmpL src1 src2 ));
12681   effect( TEMP tmp );
12682   ins_cost(300);
12683   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
12684             "MOV    $tmp,$src2.hi\n\t"
12685             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
12686   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
12687   ins_pipe( ialu_cr_reg_reg );
12688 %}
12689 
12690 // Long compares reg < zero/req OR reg >= zero/req.
12691 // Just a wrapper for a normal branch, plus the predicate test
12692 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
12693   match(If cmp flags);
12694   effect(USE labl);
12695   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
12696   ins_cost(300);
12697   expand %{
12698     jmpCon(cmp,flags,labl);    // JGT or JLE...
12699   %}
12700 %}
12701 
12702 // Compare 2 longs and CMOVE longs.
12703 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
12704   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12705   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12706   ins_cost(400);
12707   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12708             "CMOV$cmp $dst.hi,$src.hi" %}
12709   opcode(0x0F,0x40);
12710   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12711   ins_pipe( pipe_cmov_reg_long );
12712 %}
12713 
12714 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
12715   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12716   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12717   ins_cost(500);
12718   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12719             "CMOV$cmp $dst.hi,$src.hi+4" %}
12720   opcode(0x0F,0x40);
12721   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12722   ins_pipe( pipe_cmov_reg_long );
12723 %}
12724 
12725 // Compare 2 longs and CMOVE ints.
12726 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
12727   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12728   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12729   ins_cost(200);
12730   format %{ "CMOV$cmp $dst,$src" %}
12731   opcode(0x0F,0x40);
12732   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12733   ins_pipe( pipe_cmov_reg );
12734 %}
12735 
12736 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
12737   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12738   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12739   ins_cost(250);
12740   format %{ "CMOV$cmp $dst,$src" %}
12741   opcode(0x0F,0x40);
12742   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12743   ins_pipe( pipe_cmov_mem );
12744 %}
12745 
12746 // Compare 2 longs and CMOVE ptrs.
12747 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
12748   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12749   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12750   ins_cost(200);
12751   format %{ "CMOV$cmp $dst,$src" %}
12752   opcode(0x0F,0x40);
12753   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12754   ins_pipe( pipe_cmov_reg );
12755 %}
12756 
12757 // Compare 2 longs and CMOVE doubles
12758 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
12759   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12760   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12761   ins_cost(200);
12762   expand %{
12763     fcmovDPR_regS(cmp,flags,dst,src);
12764   %}
12765 %}
12766 
12767 // Compare 2 longs and CMOVE doubles
12768 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
12769   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12770   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12771   ins_cost(200);
12772   expand %{
12773     fcmovD_regS(cmp,flags,dst,src);
12774   %}
12775 %}
12776 
12777 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
12778   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12779   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12780   ins_cost(200);
12781   expand %{
12782     fcmovFPR_regS(cmp,flags,dst,src);
12783   %}
12784 %}
12785 
12786 
12787 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
12788   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12789   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12790   ins_cost(200);
12791   expand %{
12792     fcmovF_regS(cmp,flags,dst,src);
12793   %}
12794 %}
12795 
12796 
12797 // ============================================================================
12798 // Procedure Call/Return Instructions
12799 // Call Java Static Instruction
12800 // Note: If this code changes, the corresponding ret_addr_offset() and
12801 //       compute_padding() functions will have to be adjusted.
12802 instruct CallStaticJavaDirect(method meth) %{
12803   match(CallStaticJava);
12804   effect(USE meth);
12805 
12806   ins_cost(300);
12807   format %{ "CALL,static " %}
12808   opcode(0xE8); /* E8 cd */
12809   ins_encode( pre_call_resets,
12810               Java_Static_Call( meth ),
12811               call_epilog,
12812               post_call_FPU );
12813   ins_pipe( pipe_slow );
12814   ins_alignment(4);
12815 %}
12816 
12817 // Call Java Dynamic Instruction
12818 // Note: If this code changes, the corresponding ret_addr_offset() and
12819 //       compute_padding() functions will have to be adjusted.
12820 instruct CallDynamicJavaDirect(method meth) %{
12821   match(CallDynamicJava);
12822   effect(USE meth);
12823 
12824   ins_cost(300);
12825   format %{ "MOV    EAX,(oop)-1\n\t"
12826             "CALL,dynamic" %}
12827   opcode(0xE8); /* E8 cd */
12828   ins_encode( pre_call_resets,
12829               Java_Dynamic_Call( meth ),
12830               call_epilog,
12831               post_call_FPU );
12832   ins_pipe( pipe_slow );
12833   ins_alignment(4);
12834 %}
12835 
12836 // Call Runtime Instruction
12837 instruct CallRuntimeDirect(method meth) %{
12838   match(CallRuntime );
12839   effect(USE meth);
12840 
12841   ins_cost(300);
12842   format %{ "CALL,runtime " %}
12843   opcode(0xE8); /* E8 cd */
12844   // Use FFREEs to clear entries in float stack
12845   ins_encode( pre_call_resets,
12846               FFree_Float_Stack_All,
12847               Java_To_Runtime( meth ),
12848               post_call_FPU );
12849   ins_pipe( pipe_slow );
12850 %}
12851 
12852 // Call runtime without safepoint
12853 instruct CallLeafDirect(method meth) %{
12854   match(CallLeaf);
12855   effect(USE meth);
12856 
12857   ins_cost(300);
12858   format %{ "CALL_LEAF,runtime " %}
12859   opcode(0xE8); /* E8 cd */
12860   ins_encode( pre_call_resets,
12861               FFree_Float_Stack_All,
12862               Java_To_Runtime( meth ),
12863               Verify_FPU_For_Leaf, post_call_FPU );
12864   ins_pipe( pipe_slow );
12865 %}
12866 
12867 instruct CallLeafNoFPDirect(method meth) %{
12868   match(CallLeafNoFP);
12869   effect(USE meth);
12870 
12871   ins_cost(300);
12872   format %{ "CALL_LEAF_NOFP,runtime " %}
12873   opcode(0xE8); /* E8 cd */
12874   ins_encode(Java_To_Runtime(meth));
12875   ins_pipe( pipe_slow );
12876 %}
12877 
12878 
12879 // Return Instruction
12880 // Remove the return address & jump to it.
12881 instruct Ret() %{
12882   match(Return);
12883   format %{ "RET" %}
12884   opcode(0xC3);
12885   ins_encode(OpcP);
12886   ins_pipe( pipe_jmp );
12887 %}
12888 
12889 // Tail Call; Jump from runtime stub to Java code.
12890 // Also known as an 'interprocedural jump'.
12891 // Target of jump will eventually return to caller.
12892 // TailJump below removes the return address.
12893 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
12894   match(TailCall jump_target method_oop );
12895   ins_cost(300);
12896   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
12897   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12898   ins_encode( OpcP, RegOpc(jump_target) );
12899   ins_pipe( pipe_jmp );
12900 %}
12901 
12902 
12903 // Tail Jump; remove the return address; jump to target.
12904 // TailCall above leaves the return address around.
12905 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
12906   match( TailJump jump_target ex_oop );
12907   ins_cost(300);
12908   format %{ "POP    EDX\t# pop return address into dummy\n\t"
12909             "JMP    $jump_target " %}
12910   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12911   ins_encode( enc_pop_rdx,
12912               OpcP, RegOpc(jump_target) );
12913   ins_pipe( pipe_jmp );
12914 %}
12915 
12916 // Create exception oop: created by stack-crawling runtime code.
12917 // Created exception is now available to this handler, and is setup
12918 // just prior to jumping to this handler.  No code emitted.
12919 instruct CreateException( eAXRegP ex_oop )
12920 %{
12921   match(Set ex_oop (CreateEx));
12922 
12923   size(0);
12924   // use the following format syntax
12925   format %{ "# exception oop is in EAX; no code emitted" %}
12926   ins_encode();
12927   ins_pipe( empty );
12928 %}
12929 
12930 
12931 // Rethrow exception:
12932 // The exception oop will come in the first argument position.
12933 // Then JUMP (not call) to the rethrow stub code.
12934 instruct RethrowException()
12935 %{
12936   match(Rethrow);
12937 
12938   // use the following format syntax
12939   format %{ "JMP    rethrow_stub" %}
12940   ins_encode(enc_rethrow);
12941   ins_pipe( pipe_jmp );
12942 %}
12943 
12944 // inlined locking and unlocking
12945 
12946 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
12947   predicate(Compile::current()->use_rtm());
12948   match(Set cr (FastLock object box));
12949   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
12950   ins_cost(300);
12951   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
12952   ins_encode %{
12953     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12954                  $scr$$Register, $cx1$$Register, $cx2$$Register,
12955                  _counters, _rtm_counters, _stack_rtm_counters,
12956                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
12957                  true, ra_->C->profile_rtm());
12958   %}
12959   ins_pipe(pipe_slow);
12960 %}
12961 
12962 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
12963   predicate(!Compile::current()->use_rtm());
12964   match(Set cr (FastLock object box));
12965   effect(TEMP tmp, TEMP scr, USE_KILL box);
12966   ins_cost(300);
12967   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
12968   ins_encode %{
12969     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12970                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
12971   %}
12972   ins_pipe(pipe_slow);
12973 %}
12974 
12975 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
12976   match(Set cr (FastUnlock object box));
12977   effect(TEMP tmp, USE_KILL box);
12978   ins_cost(300);
12979   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
12980   ins_encode %{
12981     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
12982   %}
12983   ins_pipe(pipe_slow);
12984 %}
12985 
12986 
12987 
12988 // ============================================================================
12989 // Safepoint Instruction
12990 instruct safePoint_poll(eFlagsReg cr) %{
12991   match(SafePoint);
12992   effect(KILL cr);
12993 
12994   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
12995   // On SPARC that might be acceptable as we can generate the address with
12996   // just a sethi, saving an or.  By polling at offset 0 we can end up
12997   // putting additional pressure on the index-0 in the D$.  Because of
12998   // alignment (just like the situation at hand) the lower indices tend
12999   // to see more traffic.  It'd be better to change the polling address
13000   // to offset 0 of the last $line in the polling page.
13001 
13002   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13003   ins_cost(125);
13004   size(6) ;
13005   ins_encode( Safepoint_Poll() );
13006   ins_pipe( ialu_reg_mem );
13007 %}
13008 
13009 
13010 // ============================================================================
13011 // This name is KNOWN by the ADLC and cannot be changed.
13012 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13013 // for this guy.
13014 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13015   match(Set dst (ThreadLocal));
13016   effect(DEF dst, KILL cr);
13017 
13018   format %{ "MOV    $dst, Thread::current()" %}
13019   ins_encode %{
13020     Register dstReg = as_Register($dst$$reg);
13021     __ get_thread(dstReg);
13022   %}
13023   ins_pipe( ialu_reg_fat );
13024 %}
13025 
13026 
13027 
13028 //----------PEEPHOLE RULES-----------------------------------------------------
13029 // These must follow all instruction definitions as they use the names
13030 // defined in the instructions definitions.
13031 //
13032 // peepmatch ( root_instr_name [preceding_instruction]* );
13033 //
13034 // peepconstraint %{
13035 // (instruction_number.operand_name relational_op instruction_number.operand_name
13036 //  [, ...] );
13037 // // instruction numbers are zero-based using left to right order in peepmatch
13038 //
13039 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13040 // // provide an instruction_number.operand_name for each operand that appears
13041 // // in the replacement instruction's match rule
13042 //
13043 // ---------VM FLAGS---------------------------------------------------------
13044 //
13045 // All peephole optimizations can be turned off using -XX:-OptoPeephole
13046 //
13047 // Each peephole rule is given an identifying number starting with zero and
13048 // increasing by one in the order seen by the parser.  An individual peephole
13049 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13050 // on the command-line.
13051 //
13052 // ---------CURRENT LIMITATIONS----------------------------------------------
13053 //
13054 // Only match adjacent instructions in same basic block
13055 // Only equality constraints
13056 // Only constraints between operands, not (0.dest_reg == EAX_enc)
13057 // Only one replacement instruction
13058 //
13059 // ---------EXAMPLE----------------------------------------------------------
13060 //
13061 // // pertinent parts of existing instructions in architecture description
13062 // instruct movI(rRegI dst, rRegI src) %{
13063 //   match(Set dst (CopyI src));
13064 // %}
13065 //
13066 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
13067 //   match(Set dst (AddI dst src));
13068 //   effect(KILL cr);
13069 // %}
13070 //
13071 // // Change (inc mov) to lea
13072 // peephole %{
13073 //   // increment preceeded by register-register move
13074 //   peepmatch ( incI_eReg movI );
13075 //   // require that the destination register of the increment
13076 //   // match the destination register of the move
13077 //   peepconstraint ( 0.dst == 1.dst );
13078 //   // construct a replacement instruction that sets
13079 //   // the destination to ( move's source register + one )
13080 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13081 // %}
13082 //
13083 // Implementation no longer uses movX instructions since
13084 // machine-independent system no longer uses CopyX nodes.
13085 //
13086 // peephole %{
13087 //   peepmatch ( incI_eReg movI );
13088 //   peepconstraint ( 0.dst == 1.dst );
13089 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13090 // %}
13091 //
13092 // peephole %{
13093 //   peepmatch ( decI_eReg movI );
13094 //   peepconstraint ( 0.dst == 1.dst );
13095 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13096 // %}
13097 //
13098 // peephole %{
13099 //   peepmatch ( addI_eReg_imm movI );
13100 //   peepconstraint ( 0.dst == 1.dst );
13101 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13102 // %}
13103 //
13104 // peephole %{
13105 //   peepmatch ( addP_eReg_imm movP );
13106 //   peepconstraint ( 0.dst == 1.dst );
13107 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13108 // %}
13109 
13110 // // Change load of spilled value to only a spill
13111 // instruct storeI(memory mem, rRegI src) %{
13112 //   match(Set mem (StoreI mem src));
13113 // %}
13114 //
13115 // instruct loadI(rRegI dst, memory mem) %{
13116 //   match(Set dst (LoadI mem));
13117 // %}
13118 //
13119 peephole %{
13120   peepmatch ( loadI storeI );
13121   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13122   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13123 %}
13124 
13125 //----------SMARTSPILL RULES---------------------------------------------------
13126 // These must follow all instruction definitions as they use the names
13127 // defined in the instructions definitions.