1 //
   2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 //
 105 // Empty fill registers, which are never used, but supply alignment to xmm regs
 106 //
 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2));
 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3));
 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4));
 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5));
 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6));
 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7));
 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8));
 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9));
 115 
 116 // Specify priority of register selection within phases of register
 117 // allocation.  Highest priority is first.  A useful heuristic is to
 118 // give registers a low priority when they are required by machine
 119 // instructions, like EAX and EDX.  Registers which are used as
 120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 121 // For the Intel integer registers, the equivalent Long pairs are
 122 // EDX:EAX, EBX:ECX, and EDI:EBP.
 123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 126                     FPR6L, FPR6H, FPR7L, FPR7H,
 127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
 128 
 129 
 130 //----------Architecture Description Register Classes--------------------------
 131 // Several register classes are automatically defined based upon information in
 132 // this architecture description.
 133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 134 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 137 //
 138 // Class for no registers (empty set).
 139 reg_class no_reg();
 140 
 141 // Class for all registers
 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 143 // Class for all registers (excluding EBP)
 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
 145 // Dynamic register class that selects at runtime between register classes
 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
 149 
 150 // Class for general registers
 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 152 // Class for general registers (excluding EBP).
 153 // This register class can be used for implicit null checks on win95.
 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
 155 // Used also if the PreserveFramePointer flag is true.
 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
 159 
 160 // Class of "X" registers
 161 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 162 
 163 // Class of registers that can appear in an address with no offset.
 164 // EBP and ESP require an extra instruction byte for zero offset.
 165 // Used in fast-unlock
 166 reg_class p_reg(EDX, EDI, ESI, EBX);
 167 
 168 // Class for general registers excluding ECX
 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
 170 // Class for general registers excluding ECX (and EBP)
 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
 174 
 175 // Class for general registers excluding EAX
 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 177 
 178 // Class for general registers excluding EAX and EBX.
 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
 180 // Class for general registers excluding EAX and EBX (and EBP)
 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
 184 
 185 // Class of EAX (for multiply and divide operations)
 186 reg_class eax_reg(EAX);
 187 
 188 // Class of EBX (for atomic add)
 189 reg_class ebx_reg(EBX);
 190 
 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 192 reg_class ecx_reg(ECX);
 193 
 194 // Class of EDX (for multiply and divide operations)
 195 reg_class edx_reg(EDX);
 196 
 197 // Class of EDI (for synchronization)
 198 reg_class edi_reg(EDI);
 199 
 200 // Class of ESI (for synchronization)
 201 reg_class esi_reg(ESI);
 202 
 203 // Singleton class for stack pointer
 204 reg_class sp_reg(ESP);
 205 
 206 // Singleton class for instruction pointer
 207 // reg_class ip_reg(EIP);
 208 
 209 // Class of integer register pairs
 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
 211 // Class of integer register pairs (excluding EBP and EDI);
 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
 215 
 216 // Class of integer register pairs that aligns with calling convention
 217 reg_class eadx_reg( EAX,EDX );
 218 reg_class ebcx_reg( ECX,EBX );
 219 
 220 // Not AX or DX, used in divides
 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
 222 // Not AX or DX (and neither EBP), used in divides
 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 226 
 227 // Floating point registers.  Notice FPR0 is not a choice.
 228 // FPR0 is not ever allocated; we use clever encodings to fake
 229 // a 2-address instructions out of Intels FP stack.
 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 231 
 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 233                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 234                       FPR7L,FPR7H );
 235 
 236 reg_class fp_flt_reg0( FPR1L );
 237 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 238 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 240                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 241 
 242 %}
 243 
 244 
 245 //----------SOURCE BLOCK-------------------------------------------------------
 246 // This is a block of C++ code which provides values, functions, and
 247 // definitions necessary in the rest of the architecture description
 248 source_hpp %{
 249 // Must be visible to the DFA in dfa_x86_32.cpp
 250 extern bool is_operand_hi32_zero(Node* n);
 251 %}
 252 
 253 source %{
 254 #define   RELOC_IMM32    Assembler::imm_operand
 255 #define   RELOC_DISP32   Assembler::disp32_operand
 256 
 257 #define __ _masm.
 258 
 259 // How to find the high register of a Long pair, given the low register
 260 #define   HIGH_FROM_LOW(x) ((x)+2)
 261 
 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 263 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 264 // fast versions of NegF/NegD and AbsF/AbsD.
 265 
 266 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 268   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 269   // of 128-bits operands for SSE instructions.
 270   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 271   // Store the value to a 128-bits operand.
 272   operand[0] = lo;
 273   operand[1] = hi;
 274   return operand;
 275 }
 276 
 277 // Buffer for 128-bits masks used by SSE instructions.
 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 279 
 280 // Static initialization during VM startup.
 281 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 283 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 285 
 286 // Offset hacking within calls.
 287 static int pre_call_resets_size() {
 288   int size = 0;
 289   Compile* C = Compile::current();
 290   if (C->in_24_bit_fp_mode()) {
 291     size += 6; // fldcw
 292   }
 293   if (C->max_vector_size() > 16) {
 294     if(UseAVX <= 2) {
 295       size += 3; // vzeroupper
 296     }
 297   }
 298   return size;
 299 }
 300 
 301 // !!!!! Special hack to get all type of calls to specify the byte offset
 302 //       from the start of the call to the point where the return address
 303 //       will point.
 304 int MachCallStaticJavaNode::ret_addr_offset() {
 305   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
 306 }
 307 
 308 int MachCallDynamicJavaNode::ret_addr_offset() {
 309   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 310 }
 311 
 312 static int sizeof_FFree_Float_Stack_All = -1;
 313 
 314 int MachCallRuntimeNode::ret_addr_offset() {
 315   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 316   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 317 }
 318 
 319 // Indicate if the safepoint node needs the polling page as an input.
 320 // Since x86 does have absolute addressing, it doesn't.
 321 bool SafePointNode::needs_polling_address_input() {
 322   return false;
 323 }
 324 
 325 //
 326 // Compute padding required for nodes which need alignment
 327 //
 328 
 329 // The address of the call instruction needs to be 4-byte aligned to
 330 // ensure that it does not span a cache line so that it can be patched.
 331 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 332   current_offset += pre_call_resets_size();  // skip fldcw, if any
 333   current_offset += 1;      // skip call opcode byte
 334   return round_to(current_offset, alignment_required()) - current_offset;
 335 }
 336 
 337 // The address of the call instruction needs to be 4-byte aligned to
 338 // ensure that it does not span a cache line so that it can be patched.
 339 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 340   current_offset += pre_call_resets_size();  // skip fldcw, if any
 341   current_offset += 5;      // skip MOV instruction
 342   current_offset += 1;      // skip call opcode byte
 343   return round_to(current_offset, alignment_required()) - current_offset;
 344 }
 345 
 346 // EMIT_RM()
 347 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 348   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 349   cbuf.insts()->emit_int8(c);
 350 }
 351 
 352 // EMIT_CC()
 353 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 354   unsigned char c = (unsigned char)( f1 | f2 );
 355   cbuf.insts()->emit_int8(c);
 356 }
 357 
 358 // EMIT_OPCODE()
 359 void emit_opcode(CodeBuffer &cbuf, int code) {
 360   cbuf.insts()->emit_int8((unsigned char) code);
 361 }
 362 
 363 // EMIT_OPCODE() w/ relocation information
 364 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 365   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 366   emit_opcode(cbuf, code);
 367 }
 368 
 369 // EMIT_D8()
 370 void emit_d8(CodeBuffer &cbuf, int d8) {
 371   cbuf.insts()->emit_int8((unsigned char) d8);
 372 }
 373 
 374 // EMIT_D16()
 375 void emit_d16(CodeBuffer &cbuf, int d16) {
 376   cbuf.insts()->emit_int16(d16);
 377 }
 378 
 379 // EMIT_D32()
 380 void emit_d32(CodeBuffer &cbuf, int d32) {
 381   cbuf.insts()->emit_int32(d32);
 382 }
 383 
 384 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 386         int format) {
 387   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 388   cbuf.insts()->emit_int32(d32);
 389 }
 390 
 391 // emit 32 bit value and construct relocation entry from RelocationHolder
 392 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 393         int format) {
 394 #ifdef ASSERT
 395   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 396     assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
 397   }
 398 #endif
 399   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 400   cbuf.insts()->emit_int32(d32);
 401 }
 402 
 403 // Access stack slot for load or store
 404 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 405   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 406   if( -128 <= disp && disp <= 127 ) {
 407     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 408     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 409     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 410   } else {
 411     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 412     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 413     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 414   }
 415 }
 416 
 417    // rRegI ereg, memory mem) %{    // emit_reg_mem
 418 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 419   // There is no index & no scale, use form without SIB byte
 420   if ((index == 0x4) &&
 421       (scale == 0) && (base != ESP_enc)) {
 422     // If no displacement, mode is 0x0; unless base is [EBP]
 423     if ( (displace == 0) && (base != EBP_enc) ) {
 424       emit_rm(cbuf, 0x0, reg_encoding, base);
 425     }
 426     else {                    // If 8-bit displacement, mode 0x1
 427       if ((displace >= -128) && (displace <= 127)
 428           && (disp_reloc == relocInfo::none) ) {
 429         emit_rm(cbuf, 0x1, reg_encoding, base);
 430         emit_d8(cbuf, displace);
 431       }
 432       else {                  // If 32-bit displacement
 433         if (base == -1) { // Special flag for absolute address
 434           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 435           // (manual lies; no SIB needed here)
 436           if ( disp_reloc != relocInfo::none ) {
 437             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 438           } else {
 439             emit_d32      (cbuf, displace);
 440           }
 441         }
 442         else {                // Normal base + offset
 443           emit_rm(cbuf, 0x2, reg_encoding, base);
 444           if ( disp_reloc != relocInfo::none ) {
 445             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 446           } else {
 447             emit_d32      (cbuf, displace);
 448           }
 449         }
 450       }
 451     }
 452   }
 453   else {                      // Else, encode with the SIB byte
 454     // If no displacement, mode is 0x0; unless base is [EBP]
 455     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 456       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 457       emit_rm(cbuf, scale, index, base);
 458     }
 459     else {                    // If 8-bit displacement, mode 0x1
 460       if ((displace >= -128) && (displace <= 127)
 461           && (disp_reloc == relocInfo::none) ) {
 462         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 463         emit_rm(cbuf, scale, index, base);
 464         emit_d8(cbuf, displace);
 465       }
 466       else {                  // If 32-bit displacement
 467         if (base == 0x04 ) {
 468           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 469           emit_rm(cbuf, scale, index, 0x04);
 470         } else {
 471           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 472           emit_rm(cbuf, scale, index, base);
 473         }
 474         if ( disp_reloc != relocInfo::none ) {
 475           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 476         } else {
 477           emit_d32      (cbuf, displace);
 478         }
 479       }
 480     }
 481   }
 482 }
 483 
 484 
 485 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 486   if( dst_encoding == src_encoding ) {
 487     // reg-reg copy, use an empty encoding
 488   } else {
 489     emit_opcode( cbuf, 0x8B );
 490     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 491   }
 492 }
 493 
 494 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 495   Label exit;
 496   __ jccb(Assembler::noParity, exit);
 497   __ pushf();
 498   //
 499   // comiss/ucomiss instructions set ZF,PF,CF flags and
 500   // zero OF,AF,SF for NaN values.
 501   // Fixup flags by zeroing ZF,PF so that compare of NaN
 502   // values returns 'less than' result (CF is set).
 503   // Leave the rest of flags unchanged.
 504   //
 505   //    7 6 5 4 3 2 1 0
 506   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 507   //    0 0 1 0 1 0 1 1   (0x2B)
 508   //
 509   __ andl(Address(rsp, 0), 0xffffff2b);
 510   __ popf();
 511   __ bind(exit);
 512 }
 513 
 514 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 515   Label done;
 516   __ movl(dst, -1);
 517   __ jcc(Assembler::parity, done);
 518   __ jcc(Assembler::below, done);
 519   __ setb(Assembler::notEqual, dst);
 520   __ movzbl(dst, dst);
 521   __ bind(done);
 522 }
 523 
 524 
 525 //=============================================================================
 526 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 527 
 528 int Compile::ConstantTable::calculate_table_base_offset() const {
 529   return 0;  // absolute addressing, no offset
 530 }
 531 
 532 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 533 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 534   ShouldNotReachHere();
 535 }
 536 
 537 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 538   // Empty encoding
 539 }
 540 
 541 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 542   return 0;
 543 }
 544 
 545 #ifndef PRODUCT
 546 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 547   st->print("# MachConstantBaseNode (empty encoding)");
 548 }
 549 #endif
 550 
 551 
 552 //=============================================================================
 553 #ifndef PRODUCT
 554 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 555   Compile* C = ra_->C;
 556 
 557   int framesize = C->frame_size_in_bytes();
 558   int bangsize = C->bang_size_in_bytes();
 559   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 560   // Remove wordSize for return addr which is already pushed.
 561   framesize -= wordSize;
 562 
 563   if (C->need_stack_bang(bangsize)) {
 564     framesize -= wordSize;
 565     st->print("# stack bang (%d bytes)", bangsize);
 566     st->print("\n\t");
 567     st->print("PUSH   EBP\t# Save EBP");
 568     if (PreserveFramePointer) {
 569       st->print("\n\t");
 570       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 571     }
 572     if (framesize) {
 573       st->print("\n\t");
 574       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 575     }
 576   } else {
 577     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 578     st->print("\n\t");
 579     framesize -= wordSize;
 580     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 581     if (PreserveFramePointer) {
 582       st->print("\n\t");
 583       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 584       if (framesize > 0) {
 585         st->print("\n\t");
 586         st->print("ADD    EBP, #%d", framesize);
 587       }
 588     }
 589   }
 590 
 591   if (VerifyStackAtCalls) {
 592     st->print("\n\t");
 593     framesize -= wordSize;
 594     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 595   }
 596 
 597   if( C->in_24_bit_fp_mode() ) {
 598     st->print("\n\t");
 599     st->print("FLDCW  \t# load 24 bit fpu control word");
 600   }
 601   if (UseSSE >= 2 && VerifyFPU) {
 602     st->print("\n\t");
 603     st->print("# verify FPU stack (must be clean on entry)");
 604   }
 605 
 606 #ifdef ASSERT
 607   if (VerifyStackAtCalls) {
 608     st->print("\n\t");
 609     st->print("# stack alignment check");
 610   }
 611 #endif
 612   st->cr();
 613 }
 614 #endif
 615 
 616 
 617 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 618   Compile* C = ra_->C;
 619   MacroAssembler _masm(&cbuf);
 620 
 621   int framesize = C->frame_size_in_bytes();
 622   int bangsize = C->bang_size_in_bytes();
 623 
 624   __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode());
 625 
 626   C->set_frame_complete(cbuf.insts_size());
 627 
 628   if (C->has_mach_constant_base_node()) {
 629     // NOTE: We set the table base offset here because users might be
 630     // emitted before MachConstantBaseNode.
 631     Compile::ConstantTable& constant_table = C->constant_table();
 632     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 633   }
 634 }
 635 
 636 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 637   return MachNode::size(ra_); // too many variables; just compute it the hard way
 638 }
 639 
 640 int MachPrologNode::reloc() const {
 641   return 0; // a large enough number
 642 }
 643 
 644 //=============================================================================
 645 #ifndef PRODUCT
 646 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 647   Compile *C = ra_->C;
 648   int framesize = C->frame_size_in_bytes();
 649   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 650   // Remove two words for return addr and rbp,
 651   framesize -= 2*wordSize;
 652 
 653   if (C->max_vector_size() > 16) {
 654     st->print("VZEROUPPER");
 655     st->cr(); st->print("\t");
 656   }
 657   if (C->in_24_bit_fp_mode()) {
 658     st->print("FLDCW  standard control word");
 659     st->cr(); st->print("\t");
 660   }
 661   if (framesize) {
 662     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 663     st->cr(); st->print("\t");
 664   }
 665   st->print_cr("POPL   EBP"); st->print("\t");
 666   if (do_polling() && C->is_method_compilation()) {
 667     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 668     st->cr(); st->print("\t");
 669   }
 670 }
 671 #endif
 672 
 673 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 674   Compile *C = ra_->C;
 675 
 676   if (C->max_vector_size() > 16) {
 677     // Clear upper bits of YMM registers when current compiled code uses
 678     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 679     MacroAssembler masm(&cbuf);
 680     masm.vzeroupper();
 681   }
 682   // If method set FPU control word, restore to standard control word
 683   if (C->in_24_bit_fp_mode()) {
 684     MacroAssembler masm(&cbuf);
 685     masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 686   }
 687 
 688   int framesize = C->frame_size_in_bytes();
 689   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 690   // Remove two words for return addr and rbp,
 691   framesize -= 2*wordSize;
 692 
 693   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 694 
 695   if (framesize >= 128) {
 696     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 697     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 698     emit_d32(cbuf, framesize);
 699   } else if (framesize) {
 700     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 701     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 702     emit_d8(cbuf, framesize);
 703   }
 704 
 705   emit_opcode(cbuf, 0x58 | EBP_enc);
 706 
 707   if (do_polling() && C->is_method_compilation()) {
 708     cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 709     emit_opcode(cbuf,0x85);
 710     emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 711     emit_d32(cbuf, (intptr_t)os::get_polling_page());
 712   }
 713 }
 714 
 715 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 716   Compile *C = ra_->C;
 717   // If method set FPU control word, restore to standard control word
 718   int size = C->in_24_bit_fp_mode() ? 6 : 0;
 719   if (C->max_vector_size() > 16) size += 3; // vzeroupper
 720   if (do_polling() && C->is_method_compilation()) size += 6;
 721 
 722   int framesize = C->frame_size_in_bytes();
 723   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 724   // Remove two words for return addr and rbp,
 725   framesize -= 2*wordSize;
 726 
 727   size++; // popl rbp,
 728 
 729   if (framesize >= 128) {
 730     size += 6;
 731   } else {
 732     size += framesize ? 3 : 0;
 733   }
 734   return size;
 735 }
 736 
 737 int MachEpilogNode::reloc() const {
 738   return 0; // a large enough number
 739 }
 740 
 741 const Pipeline * MachEpilogNode::pipeline() const {
 742   return MachNode::pipeline_class();
 743 }
 744 
 745 int MachEpilogNode::safepoint_offset() const { return 0; }
 746 
 747 //=============================================================================
 748 
 749 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 750 static enum RC rc_class( OptoReg::Name reg ) {
 751 
 752   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 753   if (OptoReg::is_stack(reg)) return rc_stack;
 754 
 755   VMReg r = OptoReg::as_VMReg(reg);
 756   if (r->is_Register()) return rc_int;
 757   if (r->is_FloatRegister()) {
 758     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 759     return rc_float;
 760   }
 761   assert(r->is_XMMRegister(), "must be");
 762   return rc_xmm;
 763 }
 764 
 765 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 766                         int opcode, const char *op_str, int size, outputStream* st ) {
 767   if( cbuf ) {
 768     emit_opcode  (*cbuf, opcode );
 769     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 770 #ifndef PRODUCT
 771   } else if( !do_size ) {
 772     if( size != 0 ) st->print("\n\t");
 773     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 774       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 775       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 776     } else { // FLD, FST, PUSH, POP
 777       st->print("%s [ESP + #%d]",op_str,offset);
 778     }
 779 #endif
 780   }
 781   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 782   return size+3+offset_size;
 783 }
 784 
 785 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 786 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 787                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 788   int in_size_in_bits = Assembler::EVEX_32bit;
 789   int evex_encoding = 0;
 790   if (reg_lo+1 == reg_hi) {
 791     in_size_in_bits = Assembler::EVEX_64bit;
 792     evex_encoding = Assembler::VEX_W;
 793   }
 794   if (cbuf) {
 795     MacroAssembler _masm(cbuf);
 796     if (reg_lo+1 == reg_hi) { // double move?
 797       if (is_load) {
 798         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 799       } else {
 800         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 801       }
 802     } else {
 803       if (is_load) {
 804         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 805       } else {
 806         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 807       }
 808     }
 809 #ifndef PRODUCT
 810   } else if (!do_size) {
 811     if (size != 0) st->print("\n\t");
 812     if (reg_lo+1 == reg_hi) { // double move?
 813       if (is_load) st->print("%s %s,[ESP + #%d]",
 814                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 815                               Matcher::regName[reg_lo], offset);
 816       else         st->print("MOVSD  [ESP + #%d],%s",
 817                               offset, Matcher::regName[reg_lo]);
 818     } else {
 819       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 820                               Matcher::regName[reg_lo], offset);
 821       else         st->print("MOVSS  [ESP + #%d],%s",
 822                               offset, Matcher::regName[reg_lo]);
 823     }
 824 #endif
 825   }
 826   bool is_single_byte = false;
 827   if ((UseAVX > 2) && (offset != 0)) {
 828     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
 829   }
 830   int offset_size = 0;
 831   if (UseAVX > 2 ) {
 832     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
 833   } else {
 834     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 835   }
 836   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
 837   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 838   return size+5+offset_size;
 839 }
 840 
 841 
 842 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 843                             int src_hi, int dst_hi, int size, outputStream* st ) {
 844   if (cbuf) {
 845     MacroAssembler _masm(cbuf);
 846     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 847       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 848                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 849     } else {
 850       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 851                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 852     }
 853 #ifndef PRODUCT
 854   } else if (!do_size) {
 855     if (size != 0) st->print("\n\t");
 856     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 857       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 858         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 859       } else {
 860         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 861       }
 862     } else {
 863       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 864         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 865       } else {
 866         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 867       }
 868     }
 869 #endif
 870   }
 871   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 872   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
 873   int sz = (UseAVX > 2) ? 6 : 4;
 874   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 875       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 876   return size + sz;
 877 }
 878 
 879 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 880                             int src_hi, int dst_hi, int size, outputStream* st ) {
 881   // 32-bit
 882   if (cbuf) {
 883     MacroAssembler _masm(cbuf);
 884     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 885              as_Register(Matcher::_regEncode[src_lo]));
 886 #ifndef PRODUCT
 887   } else if (!do_size) {
 888     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 889 #endif
 890   }
 891   return (UseAVX> 2) ? 6 : 4;
 892 }
 893 
 894 
 895 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 896                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 897   // 32-bit
 898   if (cbuf) {
 899     MacroAssembler _masm(cbuf);
 900     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 901              as_XMMRegister(Matcher::_regEncode[src_lo]));
 902 #ifndef PRODUCT
 903   } else if (!do_size) {
 904     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 905 #endif
 906   }
 907   return (UseAVX> 2) ? 6 : 4;
 908 }
 909 
 910 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 911   if( cbuf ) {
 912     emit_opcode(*cbuf, 0x8B );
 913     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 914 #ifndef PRODUCT
 915   } else if( !do_size ) {
 916     if( size != 0 ) st->print("\n\t");
 917     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 918 #endif
 919   }
 920   return size+2;
 921 }
 922 
 923 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 924                                  int offset, int size, outputStream* st ) {
 925   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 926     if( cbuf ) {
 927       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 928       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 929 #ifndef PRODUCT
 930     } else if( !do_size ) {
 931       if( size != 0 ) st->print("\n\t");
 932       st->print("FLD    %s",Matcher::regName[src_lo]);
 933 #endif
 934     }
 935     size += 2;
 936   }
 937 
 938   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 939   const char *op_str;
 940   int op;
 941   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 942     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 943     op = 0xDD;
 944   } else {                   // 32-bit store
 945     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 946     op = 0xD9;
 947     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 948   }
 949 
 950   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 951 }
 952 
 953 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 954 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 955                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 956 
 957 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 958                             int stack_offset, int reg, uint ireg, outputStream* st);
 959 
 960 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 961                                      int dst_offset, uint ireg, outputStream* st) {
 962   int calc_size = 0;
 963   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 964   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 965   switch (ireg) {
 966   case Op_VecS:
 967     calc_size = 3+src_offset_size + 3+dst_offset_size;
 968     break;
 969   case Op_VecD:
 970     calc_size = 3+src_offset_size + 3+dst_offset_size;
 971     src_offset += 4;
 972     dst_offset += 4;
 973     src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 974     dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 975     calc_size += 3+src_offset_size + 3+dst_offset_size;
 976     break;
 977   case Op_VecX:
 978   case Op_VecY:
 979   case Op_VecZ:
 980     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 981     break;
 982   default:
 983     ShouldNotReachHere();
 984   }
 985   if (cbuf) {
 986     MacroAssembler _masm(cbuf);
 987     int offset = __ offset();
 988     switch (ireg) {
 989     case Op_VecS:
 990       __ pushl(Address(rsp, src_offset));
 991       __ popl (Address(rsp, dst_offset));
 992       break;
 993     case Op_VecD:
 994       __ pushl(Address(rsp, src_offset));
 995       __ popl (Address(rsp, dst_offset));
 996       __ pushl(Address(rsp, src_offset+4));
 997       __ popl (Address(rsp, dst_offset+4));
 998       break;
 999     case Op_VecX:
1000       __ movdqu(Address(rsp, -16), xmm0);
1001       __ movdqu(xmm0, Address(rsp, src_offset));
1002       __ movdqu(Address(rsp, dst_offset), xmm0);
1003       __ movdqu(xmm0, Address(rsp, -16));
1004       break;
1005     case Op_VecY:
1006       __ vmovdqu(Address(rsp, -32), xmm0);
1007       __ vmovdqu(xmm0, Address(rsp, src_offset));
1008       __ vmovdqu(Address(rsp, dst_offset), xmm0);
1009       __ vmovdqu(xmm0, Address(rsp, -32));
1010     case Op_VecZ:
1011       __ evmovdqul(Address(rsp, -64), xmm0, 2);
1012       __ evmovdqul(xmm0, Address(rsp, src_offset), 2);
1013       __ evmovdqul(Address(rsp, dst_offset), xmm0, 2);
1014       __ evmovdqul(xmm0, Address(rsp, -64), 2);
1015       break;
1016     default:
1017       ShouldNotReachHere();
1018     }
1019     int size = __ offset() - offset;
1020     assert(size == calc_size, "incorrect size calculattion");
1021     return size;
1022 #ifndef PRODUCT
1023   } else if (!do_size) {
1024     switch (ireg) {
1025     case Op_VecS:
1026       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1027                 "popl    [rsp + #%d]",
1028                 src_offset, dst_offset);
1029       break;
1030     case Op_VecD:
1031       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1032                 "popq    [rsp + #%d]\n\t"
1033                 "pushl   [rsp + #%d]\n\t"
1034                 "popq    [rsp + #%d]",
1035                 src_offset, dst_offset, src_offset+4, dst_offset+4);
1036       break;
1037      case Op_VecX:
1038       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1039                 "movdqu  xmm0, [rsp + #%d]\n\t"
1040                 "movdqu  [rsp + #%d], xmm0\n\t"
1041                 "movdqu  xmm0, [rsp - #16]",
1042                 src_offset, dst_offset);
1043       break;
1044     case Op_VecY:
1045       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1046                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1047                 "vmovdqu [rsp + #%d], xmm0\n\t"
1048                 "vmovdqu xmm0, [rsp - #32]",
1049                 src_offset, dst_offset);
1050     case Op_VecZ:
1051       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1052                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1053                 "vmovdqu [rsp + #%d], xmm0\n\t"
1054                 "vmovdqu xmm0, [rsp - #64]",
1055                 src_offset, dst_offset);
1056       break;
1057     default:
1058       ShouldNotReachHere();
1059     }
1060 #endif
1061   }
1062   return calc_size;
1063 }
1064 
1065 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1066   // Get registers to move
1067   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1068   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1069   OptoReg::Name dst_second = ra_->get_reg_second(this );
1070   OptoReg::Name dst_first = ra_->get_reg_first(this );
1071 
1072   enum RC src_second_rc = rc_class(src_second);
1073   enum RC src_first_rc = rc_class(src_first);
1074   enum RC dst_second_rc = rc_class(dst_second);
1075   enum RC dst_first_rc = rc_class(dst_first);
1076 
1077   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1078 
1079   // Generate spill code!
1080   int size = 0;
1081 
1082   if( src_first == dst_first && src_second == dst_second )
1083     return size;            // Self copy, no move
1084 
1085   if (bottom_type()->isa_vect() != NULL) {
1086     uint ireg = ideal_reg();
1087     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1088     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1089     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1090     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1091       // mem -> mem
1092       int src_offset = ra_->reg2offset(src_first);
1093       int dst_offset = ra_->reg2offset(dst_first);
1094       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1095     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1096       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1097     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1098       int stack_offset = ra_->reg2offset(dst_first);
1099       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1100     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1101       int stack_offset = ra_->reg2offset(src_first);
1102       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1103     } else {
1104       ShouldNotReachHere();
1105     }
1106   }
1107 
1108   // --------------------------------------
1109   // Check for mem-mem move.  push/pop to move.
1110   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1111     if( src_second == dst_first ) { // overlapping stack copy ranges
1112       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1113       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1114       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1115       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1116     }
1117     // move low bits
1118     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1119     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1120     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1121       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1122       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1123     }
1124     return size;
1125   }
1126 
1127   // --------------------------------------
1128   // Check for integer reg-reg copy
1129   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1130     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1131 
1132   // Check for integer store
1133   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1134     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1135 
1136   // Check for integer load
1137   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1138     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1139 
1140   // Check for integer reg-xmm reg copy
1141   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1142     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1143             "no 64 bit integer-float reg moves" );
1144     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1145   }
1146   // --------------------------------------
1147   // Check for float reg-reg copy
1148   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1149     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1150             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1151     if( cbuf ) {
1152 
1153       // Note the mucking with the register encode to compensate for the 0/1
1154       // indexing issue mentioned in a comment in the reg_def sections
1155       // for FPR registers many lines above here.
1156 
1157       if( src_first != FPR1L_num ) {
1158         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1159         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1160         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1161         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1162      } else {
1163         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1164         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1165      }
1166 #ifndef PRODUCT
1167     } else if( !do_size ) {
1168       if( size != 0 ) st->print("\n\t");
1169       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1170       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1171 #endif
1172     }
1173     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1174   }
1175 
1176   // Check for float store
1177   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1178     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1179   }
1180 
1181   // Check for float load
1182   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1183     int offset = ra_->reg2offset(src_first);
1184     const char *op_str;
1185     int op;
1186     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1187       op_str = "FLD_D";
1188       op = 0xDD;
1189     } else {                   // 32-bit load
1190       op_str = "FLD_S";
1191       op = 0xD9;
1192       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1193     }
1194     if( cbuf ) {
1195       emit_opcode  (*cbuf, op );
1196       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1197       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1198       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1199 #ifndef PRODUCT
1200     } else if( !do_size ) {
1201       if( size != 0 ) st->print("\n\t");
1202       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1203 #endif
1204     }
1205     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1206     return size + 3+offset_size+2;
1207   }
1208 
1209   // Check for xmm reg-reg copy
1210   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1211     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1212             (src_first+1 == src_second && dst_first+1 == dst_second),
1213             "no non-adjacent float-moves" );
1214     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1215   }
1216 
1217   // Check for xmm reg-integer reg copy
1218   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1219     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1220             "no 64 bit float-integer reg moves" );
1221     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1222   }
1223 
1224   // Check for xmm store
1225   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1226     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1227   }
1228 
1229   // Check for float xmm load
1230   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1231     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1232   }
1233 
1234   // Copy from float reg to xmm reg
1235   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1236     // copy to the top of stack from floating point reg
1237     // and use LEA to preserve flags
1238     if( cbuf ) {
1239       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1240       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1241       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1242       emit_d8(*cbuf,0xF8);
1243 #ifndef PRODUCT
1244     } else if( !do_size ) {
1245       if( size != 0 ) st->print("\n\t");
1246       st->print("LEA    ESP,[ESP-8]");
1247 #endif
1248     }
1249     size += 4;
1250 
1251     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1252 
1253     // Copy from the temp memory to the xmm reg.
1254     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1255 
1256     if( cbuf ) {
1257       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1258       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1259       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1260       emit_d8(*cbuf,0x08);
1261 #ifndef PRODUCT
1262     } else if( !do_size ) {
1263       if( size != 0 ) st->print("\n\t");
1264       st->print("LEA    ESP,[ESP+8]");
1265 #endif
1266     }
1267     size += 4;
1268     return size;
1269   }
1270 
1271   assert( size > 0, "missed a case" );
1272 
1273   // --------------------------------------------------------------------
1274   // Check for second bits still needing moving.
1275   if( src_second == dst_second )
1276     return size;               // Self copy; no move
1277   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1278 
1279   // Check for second word int-int move
1280   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1281     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1282 
1283   // Check for second word integer store
1284   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1285     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1286 
1287   // Check for second word integer load
1288   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1289     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1290 
1291 
1292   Unimplemented();
1293   return 0; // Mute compiler
1294 }
1295 
1296 #ifndef PRODUCT
1297 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1298   implementation( NULL, ra_, false, st );
1299 }
1300 #endif
1301 
1302 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1303   implementation( &cbuf, ra_, false, NULL );
1304 }
1305 
1306 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1307   return implementation( NULL, ra_, true, NULL );
1308 }
1309 
1310 
1311 //=============================================================================
1312 #ifndef PRODUCT
1313 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1314   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1315   int reg = ra_->get_reg_first(this);
1316   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1317 }
1318 #endif
1319 
1320 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1321   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1322   int reg = ra_->get_encode(this);
1323   if( offset >= 128 ) {
1324     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1325     emit_rm(cbuf, 0x2, reg, 0x04);
1326     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1327     emit_d32(cbuf, offset);
1328   }
1329   else {
1330     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1331     emit_rm(cbuf, 0x1, reg, 0x04);
1332     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1333     emit_d8(cbuf, offset);
1334   }
1335 }
1336 
1337 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1338   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1339   if( offset >= 128 ) {
1340     return 7;
1341   }
1342   else {
1343     return 4;
1344   }
1345 }
1346 
1347 //=============================================================================
1348 #ifndef PRODUCT
1349 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1350   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1351   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1352   st->print_cr("\tNOP");
1353   st->print_cr("\tNOP");
1354   if( !OptoBreakpoint )
1355     st->print_cr("\tNOP");
1356 }
1357 #endif
1358 
1359 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1360   MacroAssembler masm(&cbuf);
1361 #ifdef ASSERT
1362   uint insts_size = cbuf.insts_size();
1363 #endif
1364   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1365   masm.jump_cc(Assembler::notEqual,
1366                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1367   /* WARNING these NOPs are critical so that verified entry point is properly
1368      aligned for patching by NativeJump::patch_verified_entry() */
1369   int nops_cnt = 2;
1370   if( !OptoBreakpoint ) // Leave space for int3
1371      nops_cnt += 1;
1372   masm.nop(nops_cnt);
1373 
1374   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1375 }
1376 
1377 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1378   return OptoBreakpoint ? 11 : 12;
1379 }
1380 
1381 
1382 //=============================================================================
1383 
1384 int Matcher::regnum_to_fpu_offset(int regnum) {
1385   return regnum - 32; // The FP registers are in the second chunk
1386 }
1387 
1388 // This is UltraSparc specific, true just means we have fast l2f conversion
1389 const bool Matcher::convL2FSupported(void) {
1390   return true;
1391 }
1392 
1393 // Is this branch offset short enough that a short branch can be used?
1394 //
1395 // NOTE: If the platform does not provide any short branch variants, then
1396 //       this method should return false for offset 0.
1397 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1398   // The passed offset is relative to address of the branch.
1399   // On 86 a branch displacement is calculated relative to address
1400   // of a next instruction.
1401   offset -= br_size;
1402 
1403   // the short version of jmpConUCF2 contains multiple branches,
1404   // making the reach slightly less
1405   if (rule == jmpConUCF2_rule)
1406     return (-126 <= offset && offset <= 125);
1407   return (-128 <= offset && offset <= 127);
1408 }
1409 
1410 const bool Matcher::isSimpleConstant64(jlong value) {
1411   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1412   return false;
1413 }
1414 
1415 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1416 const bool Matcher::init_array_count_is_in_bytes = false;
1417 
1418 // Threshold size for cleararray.
1419 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1420 
1421 // Needs 2 CMOV's for longs.
1422 const int Matcher::long_cmove_cost() { return 1; }
1423 
1424 // No CMOVF/CMOVD with SSE/SSE2
1425 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1426 
1427 // Does the CPU require late expand (see block.cpp for description of late expand)?
1428 const bool Matcher::require_postalloc_expand = false;
1429 
1430 // Should the Matcher clone shifts on addressing modes, expecting them to
1431 // be subsumed into complex addressing expressions or compute them into
1432 // registers?  True for Intel but false for most RISCs
1433 const bool Matcher::clone_shift_expressions = true;
1434 
1435 // Do we need to mask the count passed to shift instructions or does
1436 // the cpu only look at the lower 5/6 bits anyway?
1437 const bool Matcher::need_masked_shift_count = false;
1438 
1439 bool Matcher::narrow_oop_use_complex_address() {
1440   ShouldNotCallThis();
1441   return true;
1442 }
1443 
1444 bool Matcher::narrow_klass_use_complex_address() {
1445   ShouldNotCallThis();
1446   return true;
1447 }
1448 
1449 
1450 // Is it better to copy float constants, or load them directly from memory?
1451 // Intel can load a float constant from a direct address, requiring no
1452 // extra registers.  Most RISCs will have to materialize an address into a
1453 // register first, so they would do better to copy the constant from stack.
1454 const bool Matcher::rematerialize_float_constants = true;
1455 
1456 // If CPU can load and store mis-aligned doubles directly then no fixup is
1457 // needed.  Else we split the double into 2 integer pieces and move it
1458 // piece-by-piece.  Only happens when passing doubles into C code as the
1459 // Java calling convention forces doubles to be aligned.
1460 const bool Matcher::misaligned_doubles_ok = true;
1461 
1462 
1463 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1464   // Get the memory operand from the node
1465   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1466   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1467   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1468   uint opcnt     = 1;                 // First operand
1469   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1470   while( idx >= skipped+num_edges ) {
1471     skipped += num_edges;
1472     opcnt++;                          // Bump operand count
1473     assert( opcnt < numopnds, "Accessing non-existent operand" );
1474     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1475   }
1476 
1477   MachOper *memory = node->_opnds[opcnt];
1478   MachOper *new_memory = NULL;
1479   switch (memory->opcode()) {
1480   case DIRECT:
1481   case INDOFFSET32X:
1482     // No transformation necessary.
1483     return;
1484   case INDIRECT:
1485     new_memory = new indirect_win95_safeOper( );
1486     break;
1487   case INDOFFSET8:
1488     new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1489     break;
1490   case INDOFFSET32:
1491     new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1492     break;
1493   case INDINDEXOFFSET:
1494     new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1495     break;
1496   case INDINDEXSCALE:
1497     new_memory = new indIndexScale_win95_safeOper(memory->scale());
1498     break;
1499   case INDINDEXSCALEOFFSET:
1500     new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1501     break;
1502   case LOAD_LONG_INDIRECT:
1503   case LOAD_LONG_INDOFFSET32:
1504     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1505     return;
1506   default:
1507     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1508     return;
1509   }
1510   node->_opnds[opcnt] = new_memory;
1511 }
1512 
1513 // Advertise here if the CPU requires explicit rounding operations
1514 // to implement the UseStrictFP mode.
1515 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1516 
1517 // Are floats conerted to double when stored to stack during deoptimization?
1518 // On x32 it is stored with convertion only when FPU is used for floats.
1519 bool Matcher::float_in_double() { return (UseSSE == 0); }
1520 
1521 // Do ints take an entire long register or just half?
1522 const bool Matcher::int_in_long = false;
1523 
1524 // Return whether or not this register is ever used as an argument.  This
1525 // function is used on startup to build the trampoline stubs in generateOptoStub.
1526 // Registers not mentioned will be killed by the VM call in the trampoline, and
1527 // arguments in those registers not be available to the callee.
1528 bool Matcher::can_be_java_arg( int reg ) {
1529   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1530   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1531   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1532   return false;
1533 }
1534 
1535 bool Matcher::is_spillable_arg( int reg ) {
1536   return can_be_java_arg(reg);
1537 }
1538 
1539 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1540   // Use hardware integer DIV instruction when
1541   // it is faster than a code which use multiply.
1542   // Only when constant divisor fits into 32 bit
1543   // (min_jint is excluded to get only correct
1544   // positive 32 bit values from negative).
1545   return VM_Version::has_fast_idiv() &&
1546          (divisor == (int)divisor && divisor != min_jint);
1547 }
1548 
1549 // Register for DIVI projection of divmodI
1550 RegMask Matcher::divI_proj_mask() {
1551   return EAX_REG_mask();
1552 }
1553 
1554 // Register for MODI projection of divmodI
1555 RegMask Matcher::modI_proj_mask() {
1556   return EDX_REG_mask();
1557 }
1558 
1559 // Register for DIVL projection of divmodL
1560 RegMask Matcher::divL_proj_mask() {
1561   ShouldNotReachHere();
1562   return RegMask();
1563 }
1564 
1565 // Register for MODL projection of divmodL
1566 RegMask Matcher::modL_proj_mask() {
1567   ShouldNotReachHere();
1568   return RegMask();
1569 }
1570 
1571 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1572   return NO_REG_mask();
1573 }
1574 
1575 // Returns true if the high 32 bits of the value is known to be zero.
1576 bool is_operand_hi32_zero(Node* n) {
1577   int opc = n->Opcode();
1578   if (opc == Op_AndL) {
1579     Node* o2 = n->in(2);
1580     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1581       return true;
1582     }
1583   }
1584   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1585     return true;
1586   }
1587   return false;
1588 }
1589 
1590 %}
1591 
1592 //----------ENCODING BLOCK-----------------------------------------------------
1593 // This block specifies the encoding classes used by the compiler to output
1594 // byte streams.  Encoding classes generate functions which are called by
1595 // Machine Instruction Nodes in order to generate the bit encoding of the
1596 // instruction.  Operands specify their base encoding interface with the
1597 // interface keyword.  There are currently supported four interfaces,
1598 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1599 // operand to generate a function which returns its register number when
1600 // queried.   CONST_INTER causes an operand to generate a function which
1601 // returns the value of the constant when queried.  MEMORY_INTER causes an
1602 // operand to generate four functions which return the Base Register, the
1603 // Index Register, the Scale Value, and the Offset Value of the operand when
1604 // queried.  COND_INTER causes an operand to generate six functions which
1605 // return the encoding code (ie - encoding bits for the instruction)
1606 // associated with each basic boolean condition for a conditional instruction.
1607 // Instructions specify two basic values for encoding.  They use the
1608 // ins_encode keyword to specify their encoding class (which must be one of
1609 // the class names specified in the encoding block), and they use the
1610 // opcode keyword to specify, in order, their primary, secondary, and
1611 // tertiary opcode.  Only the opcode sections which a particular instruction
1612 // needs for encoding need to be specified.
1613 encode %{
1614   // Build emit functions for each basic byte or larger field in the intel
1615   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1616   // code in the enc_class source block.  Emit functions will live in the
1617   // main source block for now.  In future, we can generalize this by
1618   // adding a syntax that specifies the sizes of fields in an order,
1619   // so that the adlc can build the emit functions automagically
1620 
1621   // Emit primary opcode
1622   enc_class OpcP %{
1623     emit_opcode(cbuf, $primary);
1624   %}
1625 
1626   // Emit secondary opcode
1627   enc_class OpcS %{
1628     emit_opcode(cbuf, $secondary);
1629   %}
1630 
1631   // Emit opcode directly
1632   enc_class Opcode(immI d8) %{
1633     emit_opcode(cbuf, $d8$$constant);
1634   %}
1635 
1636   enc_class SizePrefix %{
1637     emit_opcode(cbuf,0x66);
1638   %}
1639 
1640   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1641     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1642   %}
1643 
1644   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1645     emit_opcode(cbuf,$opcode$$constant);
1646     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1647   %}
1648 
1649   enc_class mov_r32_imm0( rRegI dst ) %{
1650     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1651     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1652   %}
1653 
1654   enc_class cdq_enc %{
1655     // Full implementation of Java idiv and irem; checks for
1656     // special case as described in JVM spec., p.243 & p.271.
1657     //
1658     //         normal case                           special case
1659     //
1660     // input : rax,: dividend                         min_int
1661     //         reg: divisor                          -1
1662     //
1663     // output: rax,: quotient  (= rax, idiv reg)       min_int
1664     //         rdx: remainder (= rax, irem reg)       0
1665     //
1666     //  Code sequnce:
1667     //
1668     //  81 F8 00 00 00 80    cmp         rax,80000000h
1669     //  0F 85 0B 00 00 00    jne         normal_case
1670     //  33 D2                xor         rdx,edx
1671     //  83 F9 FF             cmp         rcx,0FFh
1672     //  0F 84 03 00 00 00    je          done
1673     //                  normal_case:
1674     //  99                   cdq
1675     //  F7 F9                idiv        rax,ecx
1676     //                  done:
1677     //
1678     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1679     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1680     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1681     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1682     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1683     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1684     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1685     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1686     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1687     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1688     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1689     // normal_case:
1690     emit_opcode(cbuf,0x99);                                         // cdq
1691     // idiv (note: must be emitted by the user of this rule)
1692     // normal:
1693   %}
1694 
1695   // Dense encoding for older common ops
1696   enc_class Opc_plus(immI opcode, rRegI reg) %{
1697     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1698   %}
1699 
1700 
1701   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1702   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1703     // Check for 8-bit immediate, and set sign extend bit in opcode
1704     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1705       emit_opcode(cbuf, $primary | 0x02);
1706     }
1707     else {                          // If 32-bit immediate
1708       emit_opcode(cbuf, $primary);
1709     }
1710   %}
1711 
1712   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1713     // Emit primary opcode and set sign-extend bit
1714     // Check for 8-bit immediate, and set sign extend bit in opcode
1715     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1716       emit_opcode(cbuf, $primary | 0x02);    }
1717     else {                          // If 32-bit immediate
1718       emit_opcode(cbuf, $primary);
1719     }
1720     // Emit r/m byte with secondary opcode, after primary opcode.
1721     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1722   %}
1723 
1724   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1725     // Check for 8-bit immediate, and set sign extend bit in opcode
1726     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1727       $$$emit8$imm$$constant;
1728     }
1729     else {                          // If 32-bit immediate
1730       // Output immediate
1731       $$$emit32$imm$$constant;
1732     }
1733   %}
1734 
1735   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1736     // Emit primary opcode and set sign-extend bit
1737     // Check for 8-bit immediate, and set sign extend bit in opcode
1738     int con = (int)$imm$$constant; // Throw away top bits
1739     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1740     // Emit r/m byte with secondary opcode, after primary opcode.
1741     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1742     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1743     else                               emit_d32(cbuf,con);
1744   %}
1745 
1746   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1747     // Emit primary opcode and set sign-extend bit
1748     // Check for 8-bit immediate, and set sign extend bit in opcode
1749     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1750     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1751     // Emit r/m byte with tertiary opcode, after primary opcode.
1752     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1753     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1754     else                               emit_d32(cbuf,con);
1755   %}
1756 
1757   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1758     emit_cc(cbuf, $secondary, $dst$$reg );
1759   %}
1760 
1761   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1762     int destlo = $dst$$reg;
1763     int desthi = HIGH_FROM_LOW(destlo);
1764     // bswap lo
1765     emit_opcode(cbuf, 0x0F);
1766     emit_cc(cbuf, 0xC8, destlo);
1767     // bswap hi
1768     emit_opcode(cbuf, 0x0F);
1769     emit_cc(cbuf, 0xC8, desthi);
1770     // xchg lo and hi
1771     emit_opcode(cbuf, 0x87);
1772     emit_rm(cbuf, 0x3, destlo, desthi);
1773   %}
1774 
1775   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1776     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1777   %}
1778 
1779   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1780     $$$emit8$primary;
1781     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1782   %}
1783 
1784   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1785     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1786     emit_d8(cbuf, op >> 8 );
1787     emit_d8(cbuf, op & 255);
1788   %}
1789 
1790   // emulate a CMOV with a conditional branch around a MOV
1791   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1792     // Invert sense of branch from sense of CMOV
1793     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1794     emit_d8( cbuf, $brOffs$$constant );
1795   %}
1796 
1797   enc_class enc_PartialSubtypeCheck( ) %{
1798     Register Redi = as_Register(EDI_enc); // result register
1799     Register Reax = as_Register(EAX_enc); // super class
1800     Register Recx = as_Register(ECX_enc); // killed
1801     Register Resi = as_Register(ESI_enc); // sub class
1802     Label miss;
1803 
1804     MacroAssembler _masm(&cbuf);
1805     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1806                                      NULL, &miss,
1807                                      /*set_cond_codes:*/ true);
1808     if ($primary) {
1809       __ xorptr(Redi, Redi);
1810     }
1811     __ bind(miss);
1812   %}
1813 
1814   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1815     MacroAssembler masm(&cbuf);
1816     int start = masm.offset();
1817     if (UseSSE >= 2) {
1818       if (VerifyFPU) {
1819         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1820       }
1821     } else {
1822       // External c_calling_convention expects the FPU stack to be 'clean'.
1823       // Compiled code leaves it dirty.  Do cleanup now.
1824       masm.empty_FPU_stack();
1825     }
1826     if (sizeof_FFree_Float_Stack_All == -1) {
1827       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1828     } else {
1829       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1830     }
1831   %}
1832 
1833   enc_class Verify_FPU_For_Leaf %{
1834     if( VerifyFPU ) {
1835       MacroAssembler masm(&cbuf);
1836       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1837     }
1838   %}
1839 
1840   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1841     // This is the instruction starting address for relocation info.
1842     cbuf.set_insts_mark();
1843     $$$emit8$primary;
1844     // CALL directly to the runtime
1845     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1846                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1847 
1848     if (UseSSE >= 2) {
1849       MacroAssembler _masm(&cbuf);
1850       BasicType rt = tf()->return_type();
1851 
1852       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1853         // A C runtime call where the return value is unused.  In SSE2+
1854         // mode the result needs to be removed from the FPU stack.  It's
1855         // likely that this function call could be removed by the
1856         // optimizer if the C function is a pure function.
1857         __ ffree(0);
1858       } else if (rt == T_FLOAT) {
1859         __ lea(rsp, Address(rsp, -4));
1860         __ fstp_s(Address(rsp, 0));
1861         __ movflt(xmm0, Address(rsp, 0));
1862         __ lea(rsp, Address(rsp,  4));
1863       } else if (rt == T_DOUBLE) {
1864         __ lea(rsp, Address(rsp, -8));
1865         __ fstp_d(Address(rsp, 0));
1866         __ movdbl(xmm0, Address(rsp, 0));
1867         __ lea(rsp, Address(rsp,  8));
1868       }
1869     }
1870   %}
1871 
1872 
1873   enc_class pre_call_resets %{
1874     // If method sets FPU control word restore it here
1875     debug_only(int off0 = cbuf.insts_size());
1876     if (ra_->C->in_24_bit_fp_mode()) {
1877       MacroAssembler _masm(&cbuf);
1878       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1879     }
1880     if (ra_->C->max_vector_size() > 16) {
1881       // Clear upper bits of YMM registers when current compiled code uses
1882       // wide vectors to avoid AVX <-> SSE transition penalty during call.
1883       MacroAssembler _masm(&cbuf);
1884       __ vzeroupper();
1885     }
1886     debug_only(int off1 = cbuf.insts_size());
1887     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1888   %}
1889 
1890   enc_class post_call_FPU %{
1891     // If method sets FPU control word do it here also
1892     if (Compile::current()->in_24_bit_fp_mode()) {
1893       MacroAssembler masm(&cbuf);
1894       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1895     }
1896   %}
1897 
1898   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1899     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1900     // who we intended to call.
1901     cbuf.set_insts_mark();
1902     $$$emit8$primary;
1903     if (!_method) {
1904       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1905                      runtime_call_Relocation::spec(), RELOC_IMM32 );
1906     } else if (_optimized_virtual) {
1907       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1908                      opt_virtual_call_Relocation::spec(), RELOC_IMM32 );
1909     } else {
1910       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1911                      static_call_Relocation::spec(), RELOC_IMM32 );
1912     }
1913     if (_method) {  // Emit stub for static call.
1914       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1915       if (stub == NULL) {
1916         ciEnv::current()->record_failure("CodeCache is full");
1917         return;
1918       } 
1919     }
1920   %}
1921 
1922   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1923     MacroAssembler _masm(&cbuf);
1924     __ ic_call((address)$meth$$method);
1925   %}
1926 
1927   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1928     int disp = in_bytes(Method::from_compiled_offset());
1929     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1930 
1931     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1932     cbuf.set_insts_mark();
1933     $$$emit8$primary;
1934     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1935     emit_d8(cbuf, disp);             // Displacement
1936 
1937   %}
1938 
1939 //   Following encoding is no longer used, but may be restored if calling
1940 //   convention changes significantly.
1941 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1942 //
1943 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1944 //     // int ic_reg     = Matcher::inline_cache_reg();
1945 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1946 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1947 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1948 //
1949 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1950 //     // // so we load it immediately before the call
1951 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1952 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1953 //
1954 //     // xor rbp,ebp
1955 //     emit_opcode(cbuf, 0x33);
1956 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1957 //
1958 //     // CALL to interpreter.
1959 //     cbuf.set_insts_mark();
1960 //     $$$emit8$primary;
1961 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1962 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1963 //   %}
1964 
1965   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1966     $$$emit8$primary;
1967     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1968     $$$emit8$shift$$constant;
1969   %}
1970 
1971   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1972     // Load immediate does not have a zero or sign extended version
1973     // for 8-bit immediates
1974     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1975     $$$emit32$src$$constant;
1976   %}
1977 
1978   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1979     // Load immediate does not have a zero or sign extended version
1980     // for 8-bit immediates
1981     emit_opcode(cbuf, $primary + $dst$$reg);
1982     $$$emit32$src$$constant;
1983   %}
1984 
1985   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1986     // Load immediate does not have a zero or sign extended version
1987     // for 8-bit immediates
1988     int dst_enc = $dst$$reg;
1989     int src_con = $src$$constant & 0x0FFFFFFFFL;
1990     if (src_con == 0) {
1991       // xor dst, dst
1992       emit_opcode(cbuf, 0x33);
1993       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1994     } else {
1995       emit_opcode(cbuf, $primary + dst_enc);
1996       emit_d32(cbuf, src_con);
1997     }
1998   %}
1999 
2000   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2001     // Load immediate does not have a zero or sign extended version
2002     // for 8-bit immediates
2003     int dst_enc = $dst$$reg + 2;
2004     int src_con = ((julong)($src$$constant)) >> 32;
2005     if (src_con == 0) {
2006       // xor dst, dst
2007       emit_opcode(cbuf, 0x33);
2008       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2009     } else {
2010       emit_opcode(cbuf, $primary + dst_enc);
2011       emit_d32(cbuf, src_con);
2012     }
2013   %}
2014 
2015 
2016   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2017   enc_class enc_Copy( rRegI dst, rRegI src ) %{
2018     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2019   %}
2020 
2021   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2022     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2023   %}
2024 
2025   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2026     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2027   %}
2028 
2029   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2030     $$$emit8$primary;
2031     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2032   %}
2033 
2034   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2035     $$$emit8$secondary;
2036     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2037   %}
2038 
2039   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2040     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2041   %}
2042 
2043   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2044     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2045   %}
2046 
2047   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2048     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2049   %}
2050 
2051   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2052     // Output immediate
2053     $$$emit32$src$$constant;
2054   %}
2055 
2056   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2057     // Output Float immediate bits
2058     jfloat jf = $src$$constant;
2059     int    jf_as_bits = jint_cast( jf );
2060     emit_d32(cbuf, jf_as_bits);
2061   %}
2062 
2063   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2064     // Output Float immediate bits
2065     jfloat jf = $src$$constant;
2066     int    jf_as_bits = jint_cast( jf );
2067     emit_d32(cbuf, jf_as_bits);
2068   %}
2069 
2070   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2071     // Output immediate
2072     $$$emit16$src$$constant;
2073   %}
2074 
2075   enc_class Con_d32(immI src) %{
2076     emit_d32(cbuf,$src$$constant);
2077   %}
2078 
2079   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2080     // Output immediate memory reference
2081     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2082     emit_d32(cbuf, 0x00);
2083   %}
2084 
2085   enc_class lock_prefix( ) %{
2086     if( os::is_MP() )
2087       emit_opcode(cbuf,0xF0);         // [Lock]
2088   %}
2089 
2090   // Cmp-xchg long value.
2091   // Note: we need to swap rbx, and rcx before and after the
2092   //       cmpxchg8 instruction because the instruction uses
2093   //       rcx as the high order word of the new value to store but
2094   //       our register encoding uses rbx,.
2095   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2096 
2097     // XCHG  rbx,ecx
2098     emit_opcode(cbuf,0x87);
2099     emit_opcode(cbuf,0xD9);
2100     // [Lock]
2101     if( os::is_MP() )
2102       emit_opcode(cbuf,0xF0);
2103     // CMPXCHG8 [Eptr]
2104     emit_opcode(cbuf,0x0F);
2105     emit_opcode(cbuf,0xC7);
2106     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2107     // XCHG  rbx,ecx
2108     emit_opcode(cbuf,0x87);
2109     emit_opcode(cbuf,0xD9);
2110   %}
2111 
2112   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2113     // [Lock]
2114     if( os::is_MP() )
2115       emit_opcode(cbuf,0xF0);
2116 
2117     // CMPXCHG [Eptr]
2118     emit_opcode(cbuf,0x0F);
2119     emit_opcode(cbuf,0xB1);
2120     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2121   %}
2122 
2123   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2124     int res_encoding = $res$$reg;
2125 
2126     // MOV  res,0
2127     emit_opcode( cbuf, 0xB8 + res_encoding);
2128     emit_d32( cbuf, 0 );
2129     // JNE,s  fail
2130     emit_opcode(cbuf,0x75);
2131     emit_d8(cbuf, 5 );
2132     // MOV  res,1
2133     emit_opcode( cbuf, 0xB8 + res_encoding);
2134     emit_d32( cbuf, 1 );
2135     // fail:
2136   %}
2137 
2138   enc_class set_instruction_start( ) %{
2139     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2140   %}
2141 
2142   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2143     int reg_encoding = $ereg$$reg;
2144     int base  = $mem$$base;
2145     int index = $mem$$index;
2146     int scale = $mem$$scale;
2147     int displace = $mem$$disp;
2148     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2149     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2150   %}
2151 
2152   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2153     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2154     int base  = $mem$$base;
2155     int index = $mem$$index;
2156     int scale = $mem$$scale;
2157     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2158     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2159     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2160   %}
2161 
2162   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2163     int r1, r2;
2164     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2165     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2166     emit_opcode(cbuf,0x0F);
2167     emit_opcode(cbuf,$tertiary);
2168     emit_rm(cbuf, 0x3, r1, r2);
2169     emit_d8(cbuf,$cnt$$constant);
2170     emit_d8(cbuf,$primary);
2171     emit_rm(cbuf, 0x3, $secondary, r1);
2172     emit_d8(cbuf,$cnt$$constant);
2173   %}
2174 
2175   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2176     emit_opcode( cbuf, 0x8B ); // Move
2177     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2178     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2179       emit_d8(cbuf,$primary);
2180       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2181       emit_d8(cbuf,$cnt$$constant-32);
2182     }
2183     emit_d8(cbuf,$primary);
2184     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2185     emit_d8(cbuf,31);
2186   %}
2187 
2188   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2189     int r1, r2;
2190     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2191     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2192 
2193     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2194     emit_rm(cbuf, 0x3, r1, r2);
2195     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2196       emit_opcode(cbuf,$primary);
2197       emit_rm(cbuf, 0x3, $secondary, r1);
2198       emit_d8(cbuf,$cnt$$constant-32);
2199     }
2200     emit_opcode(cbuf,0x33);  // XOR r2,r2
2201     emit_rm(cbuf, 0x3, r2, r2);
2202   %}
2203 
2204   // Clone of RegMem but accepts an extra parameter to access each
2205   // half of a double in memory; it never needs relocation info.
2206   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2207     emit_opcode(cbuf,$opcode$$constant);
2208     int reg_encoding = $rm_reg$$reg;
2209     int base     = $mem$$base;
2210     int index    = $mem$$index;
2211     int scale    = $mem$$scale;
2212     int displace = $mem$$disp + $disp_for_half$$constant;
2213     relocInfo::relocType disp_reloc = relocInfo::none;
2214     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2215   %}
2216 
2217   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2218   //
2219   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2220   // and it never needs relocation information.
2221   // Frequently used to move data between FPU's Stack Top and memory.
2222   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2223     int rm_byte_opcode = $rm_opcode$$constant;
2224     int base     = $mem$$base;
2225     int index    = $mem$$index;
2226     int scale    = $mem$$scale;
2227     int displace = $mem$$disp;
2228     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2229     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2230   %}
2231 
2232   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2233     int rm_byte_opcode = $rm_opcode$$constant;
2234     int base     = $mem$$base;
2235     int index    = $mem$$index;
2236     int scale    = $mem$$scale;
2237     int displace = $mem$$disp;
2238     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2239     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2240   %}
2241 
2242   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2243     int reg_encoding = $dst$$reg;
2244     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2245     int index        = 0x04;            // 0x04 indicates no index
2246     int scale        = 0x00;            // 0x00 indicates no scale
2247     int displace     = $src1$$constant; // 0x00 indicates no displacement
2248     relocInfo::relocType disp_reloc = relocInfo::none;
2249     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2250   %}
2251 
2252   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2253     // Compare dst,src
2254     emit_opcode(cbuf,0x3B);
2255     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2256     // jmp dst < src around move
2257     emit_opcode(cbuf,0x7C);
2258     emit_d8(cbuf,2);
2259     // move dst,src
2260     emit_opcode(cbuf,0x8B);
2261     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2262   %}
2263 
2264   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2265     // Compare dst,src
2266     emit_opcode(cbuf,0x3B);
2267     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2268     // jmp dst > src around move
2269     emit_opcode(cbuf,0x7F);
2270     emit_d8(cbuf,2);
2271     // move dst,src
2272     emit_opcode(cbuf,0x8B);
2273     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2274   %}
2275 
2276   enc_class enc_FPR_store(memory mem, regDPR src) %{
2277     // If src is FPR1, we can just FST to store it.
2278     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2279     int reg_encoding = 0x2; // Just store
2280     int base  = $mem$$base;
2281     int index = $mem$$index;
2282     int scale = $mem$$scale;
2283     int displace = $mem$$disp;
2284     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2285     if( $src$$reg != FPR1L_enc ) {
2286       reg_encoding = 0x3;  // Store & pop
2287       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2288       emit_d8( cbuf, 0xC0-1+$src$$reg );
2289     }
2290     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2291     emit_opcode(cbuf,$primary);
2292     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2293   %}
2294 
2295   enc_class neg_reg(rRegI dst) %{
2296     // NEG $dst
2297     emit_opcode(cbuf,0xF7);
2298     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2299   %}
2300 
2301   enc_class setLT_reg(eCXRegI dst) %{
2302     // SETLT $dst
2303     emit_opcode(cbuf,0x0F);
2304     emit_opcode(cbuf,0x9C);
2305     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2306   %}
2307 
2308   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2309     int tmpReg = $tmp$$reg;
2310 
2311     // SUB $p,$q
2312     emit_opcode(cbuf,0x2B);
2313     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2314     // SBB $tmp,$tmp
2315     emit_opcode(cbuf,0x1B);
2316     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2317     // AND $tmp,$y
2318     emit_opcode(cbuf,0x23);
2319     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2320     // ADD $p,$tmp
2321     emit_opcode(cbuf,0x03);
2322     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2323   %}
2324 
2325   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2326     // TEST shift,32
2327     emit_opcode(cbuf,0xF7);
2328     emit_rm(cbuf, 0x3, 0, ECX_enc);
2329     emit_d32(cbuf,0x20);
2330     // JEQ,s small
2331     emit_opcode(cbuf, 0x74);
2332     emit_d8(cbuf, 0x04);
2333     // MOV    $dst.hi,$dst.lo
2334     emit_opcode( cbuf, 0x8B );
2335     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2336     // CLR    $dst.lo
2337     emit_opcode(cbuf, 0x33);
2338     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2339 // small:
2340     // SHLD   $dst.hi,$dst.lo,$shift
2341     emit_opcode(cbuf,0x0F);
2342     emit_opcode(cbuf,0xA5);
2343     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2344     // SHL    $dst.lo,$shift"
2345     emit_opcode(cbuf,0xD3);
2346     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2347   %}
2348 
2349   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2350     // TEST shift,32
2351     emit_opcode(cbuf,0xF7);
2352     emit_rm(cbuf, 0x3, 0, ECX_enc);
2353     emit_d32(cbuf,0x20);
2354     // JEQ,s small
2355     emit_opcode(cbuf, 0x74);
2356     emit_d8(cbuf, 0x04);
2357     // MOV    $dst.lo,$dst.hi
2358     emit_opcode( cbuf, 0x8B );
2359     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2360     // CLR    $dst.hi
2361     emit_opcode(cbuf, 0x33);
2362     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2363 // small:
2364     // SHRD   $dst.lo,$dst.hi,$shift
2365     emit_opcode(cbuf,0x0F);
2366     emit_opcode(cbuf,0xAD);
2367     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2368     // SHR    $dst.hi,$shift"
2369     emit_opcode(cbuf,0xD3);
2370     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2371   %}
2372 
2373   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2374     // TEST shift,32
2375     emit_opcode(cbuf,0xF7);
2376     emit_rm(cbuf, 0x3, 0, ECX_enc);
2377     emit_d32(cbuf,0x20);
2378     // JEQ,s small
2379     emit_opcode(cbuf, 0x74);
2380     emit_d8(cbuf, 0x05);
2381     // MOV    $dst.lo,$dst.hi
2382     emit_opcode( cbuf, 0x8B );
2383     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2384     // SAR    $dst.hi,31
2385     emit_opcode(cbuf, 0xC1);
2386     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2387     emit_d8(cbuf, 0x1F );
2388 // small:
2389     // SHRD   $dst.lo,$dst.hi,$shift
2390     emit_opcode(cbuf,0x0F);
2391     emit_opcode(cbuf,0xAD);
2392     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2393     // SAR    $dst.hi,$shift"
2394     emit_opcode(cbuf,0xD3);
2395     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2396   %}
2397 
2398 
2399   // ----------------- Encodings for floating point unit -----------------
2400   // May leave result in FPU-TOS or FPU reg depending on opcodes
2401   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2402     $$$emit8$primary;
2403     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2404   %}
2405 
2406   // Pop argument in FPR0 with FSTP ST(0)
2407   enc_class PopFPU() %{
2408     emit_opcode( cbuf, 0xDD );
2409     emit_d8( cbuf, 0xD8 );
2410   %}
2411 
2412   // !!!!! equivalent to Pop_Reg_F
2413   enc_class Pop_Reg_DPR( regDPR dst ) %{
2414     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2415     emit_d8( cbuf, 0xD8+$dst$$reg );
2416   %}
2417 
2418   enc_class Push_Reg_DPR( regDPR dst ) %{
2419     emit_opcode( cbuf, 0xD9 );
2420     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2421   %}
2422 
2423   enc_class strictfp_bias1( regDPR dst ) %{
2424     emit_opcode( cbuf, 0xDB );           // FLD m80real
2425     emit_opcode( cbuf, 0x2D );
2426     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2427     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2428     emit_opcode( cbuf, 0xC8+$dst$$reg );
2429   %}
2430 
2431   enc_class strictfp_bias2( regDPR dst ) %{
2432     emit_opcode( cbuf, 0xDB );           // FLD m80real
2433     emit_opcode( cbuf, 0x2D );
2434     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2435     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2436     emit_opcode( cbuf, 0xC8+$dst$$reg );
2437   %}
2438 
2439   // Special case for moving an integer register to a stack slot.
2440   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2441     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2442   %}
2443 
2444   // Special case for moving a register to a stack slot.
2445   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2446     // Opcode already emitted
2447     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2448     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2449     emit_d32(cbuf, $dst$$disp);   // Displacement
2450   %}
2451 
2452   // Push the integer in stackSlot 'src' onto FP-stack
2453   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2454     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2455   %}
2456 
2457   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2458   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2459     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2460   %}
2461 
2462   // Same as Pop_Mem_F except for opcode
2463   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2464   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2465     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2466   %}
2467 
2468   enc_class Pop_Reg_FPR( regFPR dst ) %{
2469     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2470     emit_d8( cbuf, 0xD8+$dst$$reg );
2471   %}
2472 
2473   enc_class Push_Reg_FPR( regFPR dst ) %{
2474     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2475     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2476   %}
2477 
2478   // Push FPU's float to a stack-slot, and pop FPU-stack
2479   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2480     int pop = 0x02;
2481     if ($src$$reg != FPR1L_enc) {
2482       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2483       emit_d8( cbuf, 0xC0-1+$src$$reg );
2484       pop = 0x03;
2485     }
2486     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2487   %}
2488 
2489   // Push FPU's double to a stack-slot, and pop FPU-stack
2490   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2491     int pop = 0x02;
2492     if ($src$$reg != FPR1L_enc) {
2493       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2494       emit_d8( cbuf, 0xC0-1+$src$$reg );
2495       pop = 0x03;
2496     }
2497     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2498   %}
2499 
2500   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2501   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2502     int pop = 0xD0 - 1; // -1 since we skip FLD
2503     if ($src$$reg != FPR1L_enc) {
2504       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2505       emit_d8( cbuf, 0xC0-1+$src$$reg );
2506       pop = 0xD8;
2507     }
2508     emit_opcode( cbuf, 0xDD );
2509     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2510   %}
2511 
2512 
2513   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2514     // load dst in FPR0
2515     emit_opcode( cbuf, 0xD9 );
2516     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2517     if ($src$$reg != FPR1L_enc) {
2518       // fincstp
2519       emit_opcode (cbuf, 0xD9);
2520       emit_opcode (cbuf, 0xF7);
2521       // swap src with FPR1:
2522       // FXCH FPR1 with src
2523       emit_opcode(cbuf, 0xD9);
2524       emit_d8(cbuf, 0xC8-1+$src$$reg );
2525       // fdecstp
2526       emit_opcode (cbuf, 0xD9);
2527       emit_opcode (cbuf, 0xF6);
2528     }
2529   %}
2530 
2531   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2532     MacroAssembler _masm(&cbuf);
2533     __ subptr(rsp, 8);
2534     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2535     __ fld_d(Address(rsp, 0));
2536     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2537     __ fld_d(Address(rsp, 0));
2538   %}
2539 
2540   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2541     MacroAssembler _masm(&cbuf);
2542     __ subptr(rsp, 4);
2543     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2544     __ fld_s(Address(rsp, 0));
2545     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2546     __ fld_s(Address(rsp, 0));
2547   %}
2548 
2549   enc_class Push_ResultD(regD dst) %{
2550     MacroAssembler _masm(&cbuf);
2551     __ fstp_d(Address(rsp, 0));
2552     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2553     __ addptr(rsp, 8);
2554   %}
2555 
2556   enc_class Push_ResultF(regF dst, immI d8) %{
2557     MacroAssembler _masm(&cbuf);
2558     __ fstp_s(Address(rsp, 0));
2559     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2560     __ addptr(rsp, $d8$$constant);
2561   %}
2562 
2563   enc_class Push_SrcD(regD src) %{
2564     MacroAssembler _masm(&cbuf);
2565     __ subptr(rsp, 8);
2566     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2567     __ fld_d(Address(rsp, 0));
2568   %}
2569 
2570   enc_class push_stack_temp_qword() %{
2571     MacroAssembler _masm(&cbuf);
2572     __ subptr(rsp, 8);
2573   %}
2574 
2575   enc_class pop_stack_temp_qword() %{
2576     MacroAssembler _masm(&cbuf);
2577     __ addptr(rsp, 8);
2578   %}
2579 
2580   enc_class push_xmm_to_fpr1(regD src) %{
2581     MacroAssembler _masm(&cbuf);
2582     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2583     __ fld_d(Address(rsp, 0));
2584   %}
2585 
2586   enc_class Push_Result_Mod_DPR( regDPR src) %{
2587     if ($src$$reg != FPR1L_enc) {
2588       // fincstp
2589       emit_opcode (cbuf, 0xD9);
2590       emit_opcode (cbuf, 0xF7);
2591       // FXCH FPR1 with src
2592       emit_opcode(cbuf, 0xD9);
2593       emit_d8(cbuf, 0xC8-1+$src$$reg );
2594       // fdecstp
2595       emit_opcode (cbuf, 0xD9);
2596       emit_opcode (cbuf, 0xF6);
2597     }
2598     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2599     // // FSTP   FPR$dst$$reg
2600     // emit_opcode( cbuf, 0xDD );
2601     // emit_d8( cbuf, 0xD8+$dst$$reg );
2602   %}
2603 
2604   enc_class fnstsw_sahf_skip_parity() %{
2605     // fnstsw ax
2606     emit_opcode( cbuf, 0xDF );
2607     emit_opcode( cbuf, 0xE0 );
2608     // sahf
2609     emit_opcode( cbuf, 0x9E );
2610     // jnp  ::skip
2611     emit_opcode( cbuf, 0x7B );
2612     emit_opcode( cbuf, 0x05 );
2613   %}
2614 
2615   enc_class emitModDPR() %{
2616     // fprem must be iterative
2617     // :: loop
2618     // fprem
2619     emit_opcode( cbuf, 0xD9 );
2620     emit_opcode( cbuf, 0xF8 );
2621     // wait
2622     emit_opcode( cbuf, 0x9b );
2623     // fnstsw ax
2624     emit_opcode( cbuf, 0xDF );
2625     emit_opcode( cbuf, 0xE0 );
2626     // sahf
2627     emit_opcode( cbuf, 0x9E );
2628     // jp  ::loop
2629     emit_opcode( cbuf, 0x0F );
2630     emit_opcode( cbuf, 0x8A );
2631     emit_opcode( cbuf, 0xF4 );
2632     emit_opcode( cbuf, 0xFF );
2633     emit_opcode( cbuf, 0xFF );
2634     emit_opcode( cbuf, 0xFF );
2635   %}
2636 
2637   enc_class fpu_flags() %{
2638     // fnstsw_ax
2639     emit_opcode( cbuf, 0xDF);
2640     emit_opcode( cbuf, 0xE0);
2641     // test ax,0x0400
2642     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2643     emit_opcode( cbuf, 0xA9 );
2644     emit_d16   ( cbuf, 0x0400 );
2645     // // // This sequence works, but stalls for 12-16 cycles on PPro
2646     // // test rax,0x0400
2647     // emit_opcode( cbuf, 0xA9 );
2648     // emit_d32   ( cbuf, 0x00000400 );
2649     //
2650     // jz exit (no unordered comparison)
2651     emit_opcode( cbuf, 0x74 );
2652     emit_d8    ( cbuf, 0x02 );
2653     // mov ah,1 - treat as LT case (set carry flag)
2654     emit_opcode( cbuf, 0xB4 );
2655     emit_d8    ( cbuf, 0x01 );
2656     // sahf
2657     emit_opcode( cbuf, 0x9E);
2658   %}
2659 
2660   enc_class cmpF_P6_fixup() %{
2661     // Fixup the integer flags in case comparison involved a NaN
2662     //
2663     // JNP exit (no unordered comparison, P-flag is set by NaN)
2664     emit_opcode( cbuf, 0x7B );
2665     emit_d8    ( cbuf, 0x03 );
2666     // MOV AH,1 - treat as LT case (set carry flag)
2667     emit_opcode( cbuf, 0xB4 );
2668     emit_d8    ( cbuf, 0x01 );
2669     // SAHF
2670     emit_opcode( cbuf, 0x9E);
2671     // NOP     // target for branch to avoid branch to branch
2672     emit_opcode( cbuf, 0x90);
2673   %}
2674 
2675 //     fnstsw_ax();
2676 //     sahf();
2677 //     movl(dst, nan_result);
2678 //     jcc(Assembler::parity, exit);
2679 //     movl(dst, less_result);
2680 //     jcc(Assembler::below, exit);
2681 //     movl(dst, equal_result);
2682 //     jcc(Assembler::equal, exit);
2683 //     movl(dst, greater_result);
2684 
2685 // less_result     =  1;
2686 // greater_result  = -1;
2687 // equal_result    = 0;
2688 // nan_result      = -1;
2689 
2690   enc_class CmpF_Result(rRegI dst) %{
2691     // fnstsw_ax();
2692     emit_opcode( cbuf, 0xDF);
2693     emit_opcode( cbuf, 0xE0);
2694     // sahf
2695     emit_opcode( cbuf, 0x9E);
2696     // movl(dst, nan_result);
2697     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2698     emit_d32( cbuf, -1 );
2699     // jcc(Assembler::parity, exit);
2700     emit_opcode( cbuf, 0x7A );
2701     emit_d8    ( cbuf, 0x13 );
2702     // movl(dst, less_result);
2703     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2704     emit_d32( cbuf, -1 );
2705     // jcc(Assembler::below, exit);
2706     emit_opcode( cbuf, 0x72 );
2707     emit_d8    ( cbuf, 0x0C );
2708     // movl(dst, equal_result);
2709     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2710     emit_d32( cbuf, 0 );
2711     // jcc(Assembler::equal, exit);
2712     emit_opcode( cbuf, 0x74 );
2713     emit_d8    ( cbuf, 0x05 );
2714     // movl(dst, greater_result);
2715     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2716     emit_d32( cbuf, 1 );
2717   %}
2718 
2719 
2720   // Compare the longs and set flags
2721   // BROKEN!  Do Not use as-is
2722   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2723     // CMP    $src1.hi,$src2.hi
2724     emit_opcode( cbuf, 0x3B );
2725     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2726     // JNE,s  done
2727     emit_opcode(cbuf,0x75);
2728     emit_d8(cbuf, 2 );
2729     // CMP    $src1.lo,$src2.lo
2730     emit_opcode( cbuf, 0x3B );
2731     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2732 // done:
2733   %}
2734 
2735   enc_class convert_int_long( regL dst, rRegI src ) %{
2736     // mov $dst.lo,$src
2737     int dst_encoding = $dst$$reg;
2738     int src_encoding = $src$$reg;
2739     encode_Copy( cbuf, dst_encoding  , src_encoding );
2740     // mov $dst.hi,$src
2741     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2742     // sar $dst.hi,31
2743     emit_opcode( cbuf, 0xC1 );
2744     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2745     emit_d8(cbuf, 0x1F );
2746   %}
2747 
2748   enc_class convert_long_double( eRegL src ) %{
2749     // push $src.hi
2750     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2751     // push $src.lo
2752     emit_opcode(cbuf, 0x50+$src$$reg  );
2753     // fild 64-bits at [SP]
2754     emit_opcode(cbuf,0xdf);
2755     emit_d8(cbuf, 0x6C);
2756     emit_d8(cbuf, 0x24);
2757     emit_d8(cbuf, 0x00);
2758     // pop stack
2759     emit_opcode(cbuf, 0x83); // add  SP, #8
2760     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2761     emit_d8(cbuf, 0x8);
2762   %}
2763 
2764   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2765     // IMUL   EDX:EAX,$src1
2766     emit_opcode( cbuf, 0xF7 );
2767     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2768     // SAR    EDX,$cnt-32
2769     int shift_count = ((int)$cnt$$constant) - 32;
2770     if (shift_count > 0) {
2771       emit_opcode(cbuf, 0xC1);
2772       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2773       emit_d8(cbuf, shift_count);
2774     }
2775   %}
2776 
2777   // this version doesn't have add sp, 8
2778   enc_class convert_long_double2( eRegL src ) %{
2779     // push $src.hi
2780     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2781     // push $src.lo
2782     emit_opcode(cbuf, 0x50+$src$$reg  );
2783     // fild 64-bits at [SP]
2784     emit_opcode(cbuf,0xdf);
2785     emit_d8(cbuf, 0x6C);
2786     emit_d8(cbuf, 0x24);
2787     emit_d8(cbuf, 0x00);
2788   %}
2789 
2790   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2791     // Basic idea: long = (long)int * (long)int
2792     // IMUL EDX:EAX, src
2793     emit_opcode( cbuf, 0xF7 );
2794     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2795   %}
2796 
2797   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2798     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2799     // MUL EDX:EAX, src
2800     emit_opcode( cbuf, 0xF7 );
2801     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2802   %}
2803 
2804   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2805     // Basic idea: lo(result) = lo(x_lo * y_lo)
2806     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2807     // MOV    $tmp,$src.lo
2808     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2809     // IMUL   $tmp,EDX
2810     emit_opcode( cbuf, 0x0F );
2811     emit_opcode( cbuf, 0xAF );
2812     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2813     // MOV    EDX,$src.hi
2814     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2815     // IMUL   EDX,EAX
2816     emit_opcode( cbuf, 0x0F );
2817     emit_opcode( cbuf, 0xAF );
2818     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2819     // ADD    $tmp,EDX
2820     emit_opcode( cbuf, 0x03 );
2821     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2822     // MUL   EDX:EAX,$src.lo
2823     emit_opcode( cbuf, 0xF7 );
2824     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2825     // ADD    EDX,ESI
2826     emit_opcode( cbuf, 0x03 );
2827     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2828   %}
2829 
2830   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2831     // Basic idea: lo(result) = lo(src * y_lo)
2832     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2833     // IMUL   $tmp,EDX,$src
2834     emit_opcode( cbuf, 0x6B );
2835     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2836     emit_d8( cbuf, (int)$src$$constant );
2837     // MOV    EDX,$src
2838     emit_opcode(cbuf, 0xB8 + EDX_enc);
2839     emit_d32( cbuf, (int)$src$$constant );
2840     // MUL   EDX:EAX,EDX
2841     emit_opcode( cbuf, 0xF7 );
2842     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2843     // ADD    EDX,ESI
2844     emit_opcode( cbuf, 0x03 );
2845     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2846   %}
2847 
2848   enc_class long_div( eRegL src1, eRegL src2 ) %{
2849     // PUSH src1.hi
2850     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2851     // PUSH src1.lo
2852     emit_opcode(cbuf,               0x50+$src1$$reg  );
2853     // PUSH src2.hi
2854     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2855     // PUSH src2.lo
2856     emit_opcode(cbuf,               0x50+$src2$$reg  );
2857     // CALL directly to the runtime
2858     cbuf.set_insts_mark();
2859     emit_opcode(cbuf,0xE8);       // Call into runtime
2860     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2861     // Restore stack
2862     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2863     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2864     emit_d8(cbuf, 4*4);
2865   %}
2866 
2867   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2868     // PUSH src1.hi
2869     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2870     // PUSH src1.lo
2871     emit_opcode(cbuf,               0x50+$src1$$reg  );
2872     // PUSH src2.hi
2873     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2874     // PUSH src2.lo
2875     emit_opcode(cbuf,               0x50+$src2$$reg  );
2876     // CALL directly to the runtime
2877     cbuf.set_insts_mark();
2878     emit_opcode(cbuf,0xE8);       // Call into runtime
2879     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2880     // Restore stack
2881     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2882     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2883     emit_d8(cbuf, 4*4);
2884   %}
2885 
2886   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2887     // MOV   $tmp,$src.lo
2888     emit_opcode(cbuf, 0x8B);
2889     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2890     // OR    $tmp,$src.hi
2891     emit_opcode(cbuf, 0x0B);
2892     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2893   %}
2894 
2895   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2896     // CMP    $src1.lo,$src2.lo
2897     emit_opcode( cbuf, 0x3B );
2898     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2899     // JNE,s  skip
2900     emit_cc(cbuf, 0x70, 0x5);
2901     emit_d8(cbuf,2);
2902     // CMP    $src1.hi,$src2.hi
2903     emit_opcode( cbuf, 0x3B );
2904     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2905   %}
2906 
2907   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2908     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2909     emit_opcode( cbuf, 0x3B );
2910     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2911     // MOV    $tmp,$src1.hi
2912     emit_opcode( cbuf, 0x8B );
2913     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2914     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2915     emit_opcode( cbuf, 0x1B );
2916     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2917   %}
2918 
2919   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2920     // XOR    $tmp,$tmp
2921     emit_opcode(cbuf,0x33);  // XOR
2922     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2923     // CMP    $tmp,$src.lo
2924     emit_opcode( cbuf, 0x3B );
2925     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2926     // SBB    $tmp,$src.hi
2927     emit_opcode( cbuf, 0x1B );
2928     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2929   %}
2930 
2931  // Sniff, sniff... smells like Gnu Superoptimizer
2932   enc_class neg_long( eRegL dst ) %{
2933     emit_opcode(cbuf,0xF7);    // NEG hi
2934     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2935     emit_opcode(cbuf,0xF7);    // NEG lo
2936     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2937     emit_opcode(cbuf,0x83);    // SBB hi,0
2938     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2939     emit_d8    (cbuf,0 );
2940   %}
2941 
2942   enc_class enc_pop_rdx() %{
2943     emit_opcode(cbuf,0x5A);
2944   %}
2945 
2946   enc_class enc_rethrow() %{
2947     cbuf.set_insts_mark();
2948     emit_opcode(cbuf, 0xE9);        // jmp    entry
2949     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2950                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2951   %}
2952 
2953 
2954   // Convert a double to an int.  Java semantics require we do complex
2955   // manglelations in the corner cases.  So we set the rounding mode to
2956   // 'zero', store the darned double down as an int, and reset the
2957   // rounding mode to 'nearest'.  The hardware throws an exception which
2958   // patches up the correct value directly to the stack.
2959   enc_class DPR2I_encoding( regDPR src ) %{
2960     // Flip to round-to-zero mode.  We attempted to allow invalid-op
2961     // exceptions here, so that a NAN or other corner-case value will
2962     // thrown an exception (but normal values get converted at full speed).
2963     // However, I2C adapters and other float-stack manglers leave pending
2964     // invalid-op exceptions hanging.  We would have to clear them before
2965     // enabling them and that is more expensive than just testing for the
2966     // invalid value Intel stores down in the corner cases.
2967     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2968     emit_opcode(cbuf,0x2D);
2969     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2970     // Allocate a word
2971     emit_opcode(cbuf,0x83);            // SUB ESP,4
2972     emit_opcode(cbuf,0xEC);
2973     emit_d8(cbuf,0x04);
2974     // Encoding assumes a double has been pushed into FPR0.
2975     // Store down the double as an int, popping the FPU stack
2976     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
2977     emit_opcode(cbuf,0x1C);
2978     emit_d8(cbuf,0x24);
2979     // Restore the rounding mode; mask the exception
2980     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
2981     emit_opcode(cbuf,0x2D);
2982     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
2983         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
2984         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
2985 
2986     // Load the converted int; adjust CPU stack
2987     emit_opcode(cbuf,0x58);       // POP EAX
2988     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
2989     emit_d32   (cbuf,0x80000000); //         0x80000000
2990     emit_opcode(cbuf,0x75);       // JNE around_slow_call
2991     emit_d8    (cbuf,0x07);       // Size of slow_call
2992     // Push src onto stack slow-path
2993     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
2994     emit_d8    (cbuf,0xC0-1+$src$$reg );
2995     // CALL directly to the runtime
2996     cbuf.set_insts_mark();
2997     emit_opcode(cbuf,0xE8);       // Call into runtime
2998     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2999     // Carry on here...
3000   %}
3001 
3002   enc_class DPR2L_encoding( regDPR src ) %{
3003     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3004     emit_opcode(cbuf,0x2D);
3005     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3006     // Allocate a word
3007     emit_opcode(cbuf,0x83);            // SUB ESP,8
3008     emit_opcode(cbuf,0xEC);
3009     emit_d8(cbuf,0x08);
3010     // Encoding assumes a double has been pushed into FPR0.
3011     // Store down the double as a long, popping the FPU stack
3012     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3013     emit_opcode(cbuf,0x3C);
3014     emit_d8(cbuf,0x24);
3015     // Restore the rounding mode; mask the exception
3016     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3017     emit_opcode(cbuf,0x2D);
3018     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3019         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3020         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3021 
3022     // Load the converted int; adjust CPU stack
3023     emit_opcode(cbuf,0x58);       // POP EAX
3024     emit_opcode(cbuf,0x5A);       // POP EDX
3025     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3026     emit_d8    (cbuf,0xFA);       // rdx
3027     emit_d32   (cbuf,0x80000000); //         0x80000000
3028     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3029     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3030     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3031     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3032     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3033     emit_d8    (cbuf,0x07);       // Size of slow_call
3034     // Push src onto stack slow-path
3035     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3036     emit_d8    (cbuf,0xC0-1+$src$$reg );
3037     // CALL directly to the runtime
3038     cbuf.set_insts_mark();
3039     emit_opcode(cbuf,0xE8);       // Call into runtime
3040     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3041     // Carry on here...
3042   %}
3043 
3044   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3045     // Operand was loaded from memory into fp ST (stack top)
3046     // FMUL   ST,$src  /* D8 C8+i */
3047     emit_opcode(cbuf, 0xD8);
3048     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3049   %}
3050 
3051   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3052     // FADDP  ST,src2  /* D8 C0+i */
3053     emit_opcode(cbuf, 0xD8);
3054     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3055     //could use FADDP  src2,fpST  /* DE C0+i */
3056   %}
3057 
3058   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3059     // FADDP  src2,ST  /* DE C0+i */
3060     emit_opcode(cbuf, 0xDE);
3061     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3062   %}
3063 
3064   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3065     // Operand has been loaded into fp ST (stack top)
3066       // FSUB   ST,$src1
3067       emit_opcode(cbuf, 0xD8);
3068       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3069 
3070       // FDIV
3071       emit_opcode(cbuf, 0xD8);
3072       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3073   %}
3074 
3075   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3076     // Operand was loaded from memory into fp ST (stack top)
3077     // FADD   ST,$src  /* D8 C0+i */
3078     emit_opcode(cbuf, 0xD8);
3079     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3080 
3081     // FMUL  ST,src2  /* D8 C*+i */
3082     emit_opcode(cbuf, 0xD8);
3083     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3084   %}
3085 
3086 
3087   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3088     // Operand was loaded from memory into fp ST (stack top)
3089     // FADD   ST,$src  /* D8 C0+i */
3090     emit_opcode(cbuf, 0xD8);
3091     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3092 
3093     // FMULP  src2,ST  /* DE C8+i */
3094     emit_opcode(cbuf, 0xDE);
3095     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3096   %}
3097 
3098   // Atomically load the volatile long
3099   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3100     emit_opcode(cbuf,0xDF);
3101     int rm_byte_opcode = 0x05;
3102     int base     = $mem$$base;
3103     int index    = $mem$$index;
3104     int scale    = $mem$$scale;
3105     int displace = $mem$$disp;
3106     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3107     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3108     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3109   %}
3110 
3111   // Volatile Store Long.  Must be atomic, so move it into
3112   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3113   // target address before the store (for null-ptr checks)
3114   // so the memory operand is used twice in the encoding.
3115   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3116     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3117     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3118     emit_opcode(cbuf,0xDF);
3119     int rm_byte_opcode = 0x07;
3120     int base     = $mem$$base;
3121     int index    = $mem$$index;
3122     int scale    = $mem$$scale;
3123     int displace = $mem$$disp;
3124     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3125     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3126   %}
3127 
3128   // Safepoint Poll.  This polls the safepoint page, and causes an
3129   // exception if it is not readable. Unfortunately, it kills the condition code
3130   // in the process
3131   // We current use TESTL [spp],EDI
3132   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3133 
3134   enc_class Safepoint_Poll() %{
3135     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3136     emit_opcode(cbuf,0x85);
3137     emit_rm (cbuf, 0x0, 0x7, 0x5);
3138     emit_d32(cbuf, (intptr_t)os::get_polling_page());
3139   %}
3140 %}
3141 
3142 
3143 //----------FRAME--------------------------------------------------------------
3144 // Definition of frame structure and management information.
3145 //
3146 //  S T A C K   L A Y O U T    Allocators stack-slot number
3147 //                             |   (to get allocators register number
3148 //  G  Owned by    |        |  v    add OptoReg::stack0())
3149 //  r   CALLER     |        |
3150 //  o     |        +--------+      pad to even-align allocators stack-slot
3151 //  w     V        |  pad0  |        numbers; owned by CALLER
3152 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3153 //  h     ^        |   in   |  5
3154 //        |        |  args  |  4   Holes in incoming args owned by SELF
3155 //  |     |        |        |  3
3156 //  |     |        +--------+
3157 //  V     |        | old out|      Empty on Intel, window on Sparc
3158 //        |    old |preserve|      Must be even aligned.
3159 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3160 //        |        |   in   |  3   area for Intel ret address
3161 //     Owned by    |preserve|      Empty on Sparc.
3162 //       SELF      +--------+
3163 //        |        |  pad2  |  2   pad to align old SP
3164 //        |        +--------+  1
3165 //        |        | locks  |  0
3166 //        |        +--------+----> OptoReg::stack0(), even aligned
3167 //        |        |  pad1  | 11   pad to align new SP
3168 //        |        +--------+
3169 //        |        |        | 10
3170 //        |        | spills |  9   spills
3171 //        V        |        |  8   (pad0 slot for callee)
3172 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3173 //        ^        |  out   |  7
3174 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3175 //     Owned by    +--------+
3176 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3177 //        |    new |preserve|      Must be even-aligned.
3178 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3179 //        |        |        |
3180 //
3181 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3182 //         known from SELF's arguments and the Java calling convention.
3183 //         Region 6-7 is determined per call site.
3184 // Note 2: If the calling convention leaves holes in the incoming argument
3185 //         area, those holes are owned by SELF.  Holes in the outgoing area
3186 //         are owned by the CALLEE.  Holes should not be nessecary in the
3187 //         incoming area, as the Java calling convention is completely under
3188 //         the control of the AD file.  Doubles can be sorted and packed to
3189 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3190 //         varargs C calling conventions.
3191 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3192 //         even aligned with pad0 as needed.
3193 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3194 //         region 6-11 is even aligned; it may be padded out more so that
3195 //         the region from SP to FP meets the minimum stack alignment.
3196 
3197 frame %{
3198   // What direction does stack grow in (assumed to be same for C & Java)
3199   stack_direction(TOWARDS_LOW);
3200 
3201   // These three registers define part of the calling convention
3202   // between compiled code and the interpreter.
3203   inline_cache_reg(EAX);                // Inline Cache Register
3204   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3205 
3206   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3207   cisc_spilling_operand_name(indOffset32);
3208 
3209   // Number of stack slots consumed by locking an object
3210   sync_stack_slots(1);
3211 
3212   // Compiled code's Frame Pointer
3213   frame_pointer(ESP);
3214   // Interpreter stores its frame pointer in a register which is
3215   // stored to the stack by I2CAdaptors.
3216   // I2CAdaptors convert from interpreted java to compiled java.
3217   interpreter_frame_pointer(EBP);
3218 
3219   // Stack alignment requirement
3220   // Alignment size in bytes (128-bit -> 16 bytes)
3221   stack_alignment(StackAlignmentInBytes);
3222 
3223   // Number of stack slots between incoming argument block and the start of
3224   // a new frame.  The PROLOG must add this many slots to the stack.  The
3225   // EPILOG must remove this many slots.  Intel needs one slot for
3226   // return address and one for rbp, (must save rbp)
3227   in_preserve_stack_slots(2+VerifyStackAtCalls);
3228 
3229   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3230   // for calls to C.  Supports the var-args backing area for register parms.
3231   varargs_C_out_slots_killed(0);
3232 
3233   // The after-PROLOG location of the return address.  Location of
3234   // return address specifies a type (REG or STACK) and a number
3235   // representing the register number (i.e. - use a register name) or
3236   // stack slot.
3237   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3238   // Otherwise, it is above the locks and verification slot and alignment word
3239   return_addr(STACK - 1 +
3240               round_to((Compile::current()->in_preserve_stack_slots() +
3241                         Compile::current()->fixed_slots()),
3242                        stack_alignment_in_slots()));
3243 
3244   // Body of function which returns an integer array locating
3245   // arguments either in registers or in stack slots.  Passed an array
3246   // of ideal registers called "sig" and a "length" count.  Stack-slot
3247   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3248   // arguments for a CALLEE.  Incoming stack arguments are
3249   // automatically biased by the preserve_stack_slots field above.
3250   calling_convention %{
3251     // No difference between ingoing/outgoing just pass false
3252     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3253   %}
3254 
3255 
3256   // Body of function which returns an integer array locating
3257   // arguments either in registers or in stack slots.  Passed an array
3258   // of ideal registers called "sig" and a "length" count.  Stack-slot
3259   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3260   // arguments for a CALLEE.  Incoming stack arguments are
3261   // automatically biased by the preserve_stack_slots field above.
3262   c_calling_convention %{
3263     // This is obviously always outgoing
3264     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3265   %}
3266 
3267   // Location of C & interpreter return values
3268   c_return_value %{
3269     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3270     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3271     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3272 
3273     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3274     // that C functions return float and double results in XMM0.
3275     if( ideal_reg == Op_RegD && UseSSE>=2 )
3276       return OptoRegPair(XMM0b_num,XMM0_num);
3277     if( ideal_reg == Op_RegF && UseSSE>=2 )
3278       return OptoRegPair(OptoReg::Bad,XMM0_num);
3279 
3280     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3281   %}
3282 
3283   // Location of return values
3284   return_value %{
3285     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3286     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3287     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3288     if( ideal_reg == Op_RegD && UseSSE>=2 )
3289       return OptoRegPair(XMM0b_num,XMM0_num);
3290     if( ideal_reg == Op_RegF && UseSSE>=1 )
3291       return OptoRegPair(OptoReg::Bad,XMM0_num);
3292     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3293   %}
3294 
3295 %}
3296 
3297 //----------ATTRIBUTES---------------------------------------------------------
3298 //----------Operand Attributes-------------------------------------------------
3299 op_attrib op_cost(0);        // Required cost attribute
3300 
3301 //----------Instruction Attributes---------------------------------------------
3302 ins_attrib ins_cost(100);       // Required cost attribute
3303 ins_attrib ins_size(8);         // Required size attribute (in bits)
3304 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3305                                 // non-matching short branch variant of some
3306                                                             // long branch?
3307 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3308                                 // specifies the alignment that some part of the instruction (not
3309                                 // necessarily the start) requires.  If > 1, a compute_padding()
3310                                 // function must be provided for the instruction
3311 
3312 //----------OPERANDS-----------------------------------------------------------
3313 // Operand definitions must precede instruction definitions for correct parsing
3314 // in the ADLC because operands constitute user defined types which are used in
3315 // instruction definitions.
3316 
3317 //----------Simple Operands----------------------------------------------------
3318 // Immediate Operands
3319 // Integer Immediate
3320 operand immI() %{
3321   match(ConI);
3322 
3323   op_cost(10);
3324   format %{ %}
3325   interface(CONST_INTER);
3326 %}
3327 
3328 // Constant for test vs zero
3329 operand immI0() %{
3330   predicate(n->get_int() == 0);
3331   match(ConI);
3332 
3333   op_cost(0);
3334   format %{ %}
3335   interface(CONST_INTER);
3336 %}
3337 
3338 // Constant for increment
3339 operand immI1() %{
3340   predicate(n->get_int() == 1);
3341   match(ConI);
3342 
3343   op_cost(0);
3344   format %{ %}
3345   interface(CONST_INTER);
3346 %}
3347 
3348 // Constant for decrement
3349 operand immI_M1() %{
3350   predicate(n->get_int() == -1);
3351   match(ConI);
3352 
3353   op_cost(0);
3354   format %{ %}
3355   interface(CONST_INTER);
3356 %}
3357 
3358 // Valid scale values for addressing modes
3359 operand immI2() %{
3360   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3361   match(ConI);
3362 
3363   format %{ %}
3364   interface(CONST_INTER);
3365 %}
3366 
3367 operand immI8() %{
3368   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3369   match(ConI);
3370 
3371   op_cost(5);
3372   format %{ %}
3373   interface(CONST_INTER);
3374 %}
3375 
3376 operand immI16() %{
3377   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3378   match(ConI);
3379 
3380   op_cost(10);
3381   format %{ %}
3382   interface(CONST_INTER);
3383 %}
3384 
3385 // Int Immediate non-negative
3386 operand immU31()
3387 %{
3388   predicate(n->get_int() >= 0);
3389   match(ConI);
3390 
3391   op_cost(0);
3392   format %{ %}
3393   interface(CONST_INTER);
3394 %}
3395 
3396 // Constant for long shifts
3397 operand immI_32() %{
3398   predicate( n->get_int() == 32 );
3399   match(ConI);
3400 
3401   op_cost(0);
3402   format %{ %}
3403   interface(CONST_INTER);
3404 %}
3405 
3406 operand immI_1_31() %{
3407   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3408   match(ConI);
3409 
3410   op_cost(0);
3411   format %{ %}
3412   interface(CONST_INTER);
3413 %}
3414 
3415 operand immI_32_63() %{
3416   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3417   match(ConI);
3418   op_cost(0);
3419 
3420   format %{ %}
3421   interface(CONST_INTER);
3422 %}
3423 
3424 operand immI_1() %{
3425   predicate( n->get_int() == 1 );
3426   match(ConI);
3427 
3428   op_cost(0);
3429   format %{ %}
3430   interface(CONST_INTER);
3431 %}
3432 
3433 operand immI_2() %{
3434   predicate( n->get_int() == 2 );
3435   match(ConI);
3436 
3437   op_cost(0);
3438   format %{ %}
3439   interface(CONST_INTER);
3440 %}
3441 
3442 operand immI_3() %{
3443   predicate( n->get_int() == 3 );
3444   match(ConI);
3445 
3446   op_cost(0);
3447   format %{ %}
3448   interface(CONST_INTER);
3449 %}
3450 
3451 // Pointer Immediate
3452 operand immP() %{
3453   match(ConP);
3454 
3455   op_cost(10);
3456   format %{ %}
3457   interface(CONST_INTER);
3458 %}
3459 
3460 // NULL Pointer Immediate
3461 operand immP0() %{
3462   predicate( n->get_ptr() == 0 );
3463   match(ConP);
3464   op_cost(0);
3465 
3466   format %{ %}
3467   interface(CONST_INTER);
3468 %}
3469 
3470 // Long Immediate
3471 operand immL() %{
3472   match(ConL);
3473 
3474   op_cost(20);
3475   format %{ %}
3476   interface(CONST_INTER);
3477 %}
3478 
3479 // Long Immediate zero
3480 operand immL0() %{
3481   predicate( n->get_long() == 0L );
3482   match(ConL);
3483   op_cost(0);
3484 
3485   format %{ %}
3486   interface(CONST_INTER);
3487 %}
3488 
3489 // Long Immediate zero
3490 operand immL_M1() %{
3491   predicate( n->get_long() == -1L );
3492   match(ConL);
3493   op_cost(0);
3494 
3495   format %{ %}
3496   interface(CONST_INTER);
3497 %}
3498 
3499 // Long immediate from 0 to 127.
3500 // Used for a shorter form of long mul by 10.
3501 operand immL_127() %{
3502   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3503   match(ConL);
3504   op_cost(0);
3505 
3506   format %{ %}
3507   interface(CONST_INTER);
3508 %}
3509 
3510 // Long Immediate: low 32-bit mask
3511 operand immL_32bits() %{
3512   predicate(n->get_long() == 0xFFFFFFFFL);
3513   match(ConL);
3514   op_cost(0);
3515 
3516   format %{ %}
3517   interface(CONST_INTER);
3518 %}
3519 
3520 // Long Immediate: low 32-bit mask
3521 operand immL32() %{
3522   predicate(n->get_long() == (int)(n->get_long()));
3523   match(ConL);
3524   op_cost(20);
3525 
3526   format %{ %}
3527   interface(CONST_INTER);
3528 %}
3529 
3530 //Double Immediate zero
3531 operand immDPR0() %{
3532   // Do additional (and counter-intuitive) test against NaN to work around VC++
3533   // bug that generates code such that NaNs compare equal to 0.0
3534   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3535   match(ConD);
3536 
3537   op_cost(5);
3538   format %{ %}
3539   interface(CONST_INTER);
3540 %}
3541 
3542 // Double Immediate one
3543 operand immDPR1() %{
3544   predicate( UseSSE<=1 && n->getd() == 1.0 );
3545   match(ConD);
3546 
3547   op_cost(5);
3548   format %{ %}
3549   interface(CONST_INTER);
3550 %}
3551 
3552 // Double Immediate
3553 operand immDPR() %{
3554   predicate(UseSSE<=1);
3555   match(ConD);
3556 
3557   op_cost(5);
3558   format %{ %}
3559   interface(CONST_INTER);
3560 %}
3561 
3562 operand immD() %{
3563   predicate(UseSSE>=2);
3564   match(ConD);
3565 
3566   op_cost(5);
3567   format %{ %}
3568   interface(CONST_INTER);
3569 %}
3570 
3571 // Double Immediate zero
3572 operand immD0() %{
3573   // Do additional (and counter-intuitive) test against NaN to work around VC++
3574   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3575   // compare equal to -0.0.
3576   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3577   match(ConD);
3578 
3579   format %{ %}
3580   interface(CONST_INTER);
3581 %}
3582 
3583 // Float Immediate zero
3584 operand immFPR0() %{
3585   predicate(UseSSE == 0 && n->getf() == 0.0F);
3586   match(ConF);
3587 
3588   op_cost(5);
3589   format %{ %}
3590   interface(CONST_INTER);
3591 %}
3592 
3593 // Float Immediate one
3594 operand immFPR1() %{
3595   predicate(UseSSE == 0 && n->getf() == 1.0F);
3596   match(ConF);
3597 
3598   op_cost(5);
3599   format %{ %}
3600   interface(CONST_INTER);
3601 %}
3602 
3603 // Float Immediate
3604 operand immFPR() %{
3605   predicate( UseSSE == 0 );
3606   match(ConF);
3607 
3608   op_cost(5);
3609   format %{ %}
3610   interface(CONST_INTER);
3611 %}
3612 
3613 // Float Immediate
3614 operand immF() %{
3615   predicate(UseSSE >= 1);
3616   match(ConF);
3617 
3618   op_cost(5);
3619   format %{ %}
3620   interface(CONST_INTER);
3621 %}
3622 
3623 // Float Immediate zero.  Zero and not -0.0
3624 operand immF0() %{
3625   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3626   match(ConF);
3627 
3628   op_cost(5);
3629   format %{ %}
3630   interface(CONST_INTER);
3631 %}
3632 
3633 // Immediates for special shifts (sign extend)
3634 
3635 // Constants for increment
3636 operand immI_16() %{
3637   predicate( n->get_int() == 16 );
3638   match(ConI);
3639 
3640   format %{ %}
3641   interface(CONST_INTER);
3642 %}
3643 
3644 operand immI_24() %{
3645   predicate( n->get_int() == 24 );
3646   match(ConI);
3647 
3648   format %{ %}
3649   interface(CONST_INTER);
3650 %}
3651 
3652 // Constant for byte-wide masking
3653 operand immI_255() %{
3654   predicate( n->get_int() == 255 );
3655   match(ConI);
3656 
3657   format %{ %}
3658   interface(CONST_INTER);
3659 %}
3660 
3661 // Constant for short-wide masking
3662 operand immI_65535() %{
3663   predicate(n->get_int() == 65535);
3664   match(ConI);
3665 
3666   format %{ %}
3667   interface(CONST_INTER);
3668 %}
3669 
3670 // Register Operands
3671 // Integer Register
3672 operand rRegI() %{
3673   constraint(ALLOC_IN_RC(int_reg));
3674   match(RegI);
3675   match(xRegI);
3676   match(eAXRegI);
3677   match(eBXRegI);
3678   match(eCXRegI);
3679   match(eDXRegI);
3680   match(eDIRegI);
3681   match(eSIRegI);
3682 
3683   format %{ %}
3684   interface(REG_INTER);
3685 %}
3686 
3687 // Subset of Integer Register
3688 operand xRegI(rRegI reg) %{
3689   constraint(ALLOC_IN_RC(int_x_reg));
3690   match(reg);
3691   match(eAXRegI);
3692   match(eBXRegI);
3693   match(eCXRegI);
3694   match(eDXRegI);
3695 
3696   format %{ %}
3697   interface(REG_INTER);
3698 %}
3699 
3700 // Special Registers
3701 operand eAXRegI(xRegI reg) %{
3702   constraint(ALLOC_IN_RC(eax_reg));
3703   match(reg);
3704   match(rRegI);
3705 
3706   format %{ "EAX" %}
3707   interface(REG_INTER);
3708 %}
3709 
3710 // Special Registers
3711 operand eBXRegI(xRegI reg) %{
3712   constraint(ALLOC_IN_RC(ebx_reg));
3713   match(reg);
3714   match(rRegI);
3715 
3716   format %{ "EBX" %}
3717   interface(REG_INTER);
3718 %}
3719 
3720 operand eCXRegI(xRegI reg) %{
3721   constraint(ALLOC_IN_RC(ecx_reg));
3722   match(reg);
3723   match(rRegI);
3724 
3725   format %{ "ECX" %}
3726   interface(REG_INTER);
3727 %}
3728 
3729 operand eDXRegI(xRegI reg) %{
3730   constraint(ALLOC_IN_RC(edx_reg));
3731   match(reg);
3732   match(rRegI);
3733 
3734   format %{ "EDX" %}
3735   interface(REG_INTER);
3736 %}
3737 
3738 operand eDIRegI(xRegI reg) %{
3739   constraint(ALLOC_IN_RC(edi_reg));
3740   match(reg);
3741   match(rRegI);
3742 
3743   format %{ "EDI" %}
3744   interface(REG_INTER);
3745 %}
3746 
3747 operand naxRegI() %{
3748   constraint(ALLOC_IN_RC(nax_reg));
3749   match(RegI);
3750   match(eCXRegI);
3751   match(eDXRegI);
3752   match(eSIRegI);
3753   match(eDIRegI);
3754 
3755   format %{ %}
3756   interface(REG_INTER);
3757 %}
3758 
3759 operand nadxRegI() %{
3760   constraint(ALLOC_IN_RC(nadx_reg));
3761   match(RegI);
3762   match(eBXRegI);
3763   match(eCXRegI);
3764   match(eSIRegI);
3765   match(eDIRegI);
3766 
3767   format %{ %}
3768   interface(REG_INTER);
3769 %}
3770 
3771 operand ncxRegI() %{
3772   constraint(ALLOC_IN_RC(ncx_reg));
3773   match(RegI);
3774   match(eAXRegI);
3775   match(eDXRegI);
3776   match(eSIRegI);
3777   match(eDIRegI);
3778 
3779   format %{ %}
3780   interface(REG_INTER);
3781 %}
3782 
3783 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3784 // //
3785 operand eSIRegI(xRegI reg) %{
3786    constraint(ALLOC_IN_RC(esi_reg));
3787    match(reg);
3788    match(rRegI);
3789 
3790    format %{ "ESI" %}
3791    interface(REG_INTER);
3792 %}
3793 
3794 // Pointer Register
3795 operand anyRegP() %{
3796   constraint(ALLOC_IN_RC(any_reg));
3797   match(RegP);
3798   match(eAXRegP);
3799   match(eBXRegP);
3800   match(eCXRegP);
3801   match(eDIRegP);
3802   match(eRegP);
3803 
3804   format %{ %}
3805   interface(REG_INTER);
3806 %}
3807 
3808 operand eRegP() %{
3809   constraint(ALLOC_IN_RC(int_reg));
3810   match(RegP);
3811   match(eAXRegP);
3812   match(eBXRegP);
3813   match(eCXRegP);
3814   match(eDIRegP);
3815 
3816   format %{ %}
3817   interface(REG_INTER);
3818 %}
3819 
3820 // On windows95, EBP is not safe to use for implicit null tests.
3821 operand eRegP_no_EBP() %{
3822   constraint(ALLOC_IN_RC(int_reg_no_ebp));
3823   match(RegP);
3824   match(eAXRegP);
3825   match(eBXRegP);
3826   match(eCXRegP);
3827   match(eDIRegP);
3828 
3829   op_cost(100);
3830   format %{ %}
3831   interface(REG_INTER);
3832 %}
3833 
3834 operand naxRegP() %{
3835   constraint(ALLOC_IN_RC(nax_reg));
3836   match(RegP);
3837   match(eBXRegP);
3838   match(eDXRegP);
3839   match(eCXRegP);
3840   match(eSIRegP);
3841   match(eDIRegP);
3842 
3843   format %{ %}
3844   interface(REG_INTER);
3845 %}
3846 
3847 operand nabxRegP() %{
3848   constraint(ALLOC_IN_RC(nabx_reg));
3849   match(RegP);
3850   match(eCXRegP);
3851   match(eDXRegP);
3852   match(eSIRegP);
3853   match(eDIRegP);
3854 
3855   format %{ %}
3856   interface(REG_INTER);
3857 %}
3858 
3859 operand pRegP() %{
3860   constraint(ALLOC_IN_RC(p_reg));
3861   match(RegP);
3862   match(eBXRegP);
3863   match(eDXRegP);
3864   match(eSIRegP);
3865   match(eDIRegP);
3866 
3867   format %{ %}
3868   interface(REG_INTER);
3869 %}
3870 
3871 // Special Registers
3872 // Return a pointer value
3873 operand eAXRegP(eRegP reg) %{
3874   constraint(ALLOC_IN_RC(eax_reg));
3875   match(reg);
3876   format %{ "EAX" %}
3877   interface(REG_INTER);
3878 %}
3879 
3880 // Used in AtomicAdd
3881 operand eBXRegP(eRegP reg) %{
3882   constraint(ALLOC_IN_RC(ebx_reg));
3883   match(reg);
3884   format %{ "EBX" %}
3885   interface(REG_INTER);
3886 %}
3887 
3888 // Tail-call (interprocedural jump) to interpreter
3889 operand eCXRegP(eRegP reg) %{
3890   constraint(ALLOC_IN_RC(ecx_reg));
3891   match(reg);
3892   format %{ "ECX" %}
3893   interface(REG_INTER);
3894 %}
3895 
3896 operand eSIRegP(eRegP reg) %{
3897   constraint(ALLOC_IN_RC(esi_reg));
3898   match(reg);
3899   format %{ "ESI" %}
3900   interface(REG_INTER);
3901 %}
3902 
3903 // Used in rep stosw
3904 operand eDIRegP(eRegP reg) %{
3905   constraint(ALLOC_IN_RC(edi_reg));
3906   match(reg);
3907   format %{ "EDI" %}
3908   interface(REG_INTER);
3909 %}
3910 
3911 operand eRegL() %{
3912   constraint(ALLOC_IN_RC(long_reg));
3913   match(RegL);
3914   match(eADXRegL);
3915 
3916   format %{ %}
3917   interface(REG_INTER);
3918 %}
3919 
3920 operand eADXRegL( eRegL reg ) %{
3921   constraint(ALLOC_IN_RC(eadx_reg));
3922   match(reg);
3923 
3924   format %{ "EDX:EAX" %}
3925   interface(REG_INTER);
3926 %}
3927 
3928 operand eBCXRegL( eRegL reg ) %{
3929   constraint(ALLOC_IN_RC(ebcx_reg));
3930   match(reg);
3931 
3932   format %{ "EBX:ECX" %}
3933   interface(REG_INTER);
3934 %}
3935 
3936 // Special case for integer high multiply
3937 operand eADXRegL_low_only() %{
3938   constraint(ALLOC_IN_RC(eadx_reg));
3939   match(RegL);
3940 
3941   format %{ "EAX" %}
3942   interface(REG_INTER);
3943 %}
3944 
3945 // Flags register, used as output of compare instructions
3946 operand eFlagsReg() %{
3947   constraint(ALLOC_IN_RC(int_flags));
3948   match(RegFlags);
3949 
3950   format %{ "EFLAGS" %}
3951   interface(REG_INTER);
3952 %}
3953 
3954 // Flags register, used as output of FLOATING POINT compare instructions
3955 operand eFlagsRegU() %{
3956   constraint(ALLOC_IN_RC(int_flags));
3957   match(RegFlags);
3958 
3959   format %{ "EFLAGS_U" %}
3960   interface(REG_INTER);
3961 %}
3962 
3963 operand eFlagsRegUCF() %{
3964   constraint(ALLOC_IN_RC(int_flags));
3965   match(RegFlags);
3966   predicate(false);
3967 
3968   format %{ "EFLAGS_U_CF" %}
3969   interface(REG_INTER);
3970 %}
3971 
3972 // Condition Code Register used by long compare
3973 operand flagsReg_long_LTGE() %{
3974   constraint(ALLOC_IN_RC(int_flags));
3975   match(RegFlags);
3976   format %{ "FLAGS_LTGE" %}
3977   interface(REG_INTER);
3978 %}
3979 operand flagsReg_long_EQNE() %{
3980   constraint(ALLOC_IN_RC(int_flags));
3981   match(RegFlags);
3982   format %{ "FLAGS_EQNE" %}
3983   interface(REG_INTER);
3984 %}
3985 operand flagsReg_long_LEGT() %{
3986   constraint(ALLOC_IN_RC(int_flags));
3987   match(RegFlags);
3988   format %{ "FLAGS_LEGT" %}
3989   interface(REG_INTER);
3990 %}
3991 
3992 // Float register operands
3993 operand regDPR() %{
3994   predicate( UseSSE < 2 );
3995   constraint(ALLOC_IN_RC(fp_dbl_reg));
3996   match(RegD);
3997   match(regDPR1);
3998   match(regDPR2);
3999   format %{ %}
4000   interface(REG_INTER);
4001 %}
4002 
4003 operand regDPR1(regDPR reg) %{
4004   predicate( UseSSE < 2 );
4005   constraint(ALLOC_IN_RC(fp_dbl_reg0));
4006   match(reg);
4007   format %{ "FPR1" %}
4008   interface(REG_INTER);
4009 %}
4010 
4011 operand regDPR2(regDPR reg) %{
4012   predicate( UseSSE < 2 );
4013   constraint(ALLOC_IN_RC(fp_dbl_reg1));
4014   match(reg);
4015   format %{ "FPR2" %}
4016   interface(REG_INTER);
4017 %}
4018 
4019 operand regnotDPR1(regDPR reg) %{
4020   predicate( UseSSE < 2 );
4021   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4022   match(reg);
4023   format %{ %}
4024   interface(REG_INTER);
4025 %}
4026 
4027 // Float register operands
4028 operand regFPR() %{
4029   predicate( UseSSE < 2 );
4030   constraint(ALLOC_IN_RC(fp_flt_reg));
4031   match(RegF);
4032   match(regFPR1);
4033   format %{ %}
4034   interface(REG_INTER);
4035 %}
4036 
4037 // Float register operands
4038 operand regFPR1(regFPR reg) %{
4039   predicate( UseSSE < 2 );
4040   constraint(ALLOC_IN_RC(fp_flt_reg0));
4041   match(reg);
4042   format %{ "FPR1" %}
4043   interface(REG_INTER);
4044 %}
4045 
4046 // XMM Float register operands
4047 operand regF() %{
4048   predicate( UseSSE>=1 );
4049   constraint(ALLOC_IN_RC(float_reg_legacy));
4050   match(RegF);
4051   format %{ %}
4052   interface(REG_INTER);
4053 %}
4054 
4055 // XMM Double register operands
4056 operand regD() %{
4057   predicate( UseSSE>=2 );
4058   constraint(ALLOC_IN_RC(double_reg_legacy));
4059   match(RegD);
4060   format %{ %}
4061   interface(REG_INTER);
4062 %}
4063 
4064 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4065 // runtime code generation via reg_class_dynamic.
4066 operand vecS() %{
4067   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4068   match(VecS);
4069 
4070   format %{ %}
4071   interface(REG_INTER);
4072 %}
4073 
4074 operand vecD() %{
4075   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4076   match(VecD);
4077 
4078   format %{ %}
4079   interface(REG_INTER);
4080 %}
4081 
4082 operand vecX() %{
4083   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4084   match(VecX);
4085 
4086   format %{ %}
4087   interface(REG_INTER);
4088 %}
4089 
4090 operand vecY() %{
4091   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4092   match(VecY);
4093 
4094   format %{ %}
4095   interface(REG_INTER);
4096 %}
4097 
4098 //----------Memory Operands----------------------------------------------------
4099 // Direct Memory Operand
4100 operand direct(immP addr) %{
4101   match(addr);
4102 
4103   format %{ "[$addr]" %}
4104   interface(MEMORY_INTER) %{
4105     base(0xFFFFFFFF);
4106     index(0x4);
4107     scale(0x0);
4108     disp($addr);
4109   %}
4110 %}
4111 
4112 // Indirect Memory Operand
4113 operand indirect(eRegP reg) %{
4114   constraint(ALLOC_IN_RC(int_reg));
4115   match(reg);
4116 
4117   format %{ "[$reg]" %}
4118   interface(MEMORY_INTER) %{
4119     base($reg);
4120     index(0x4);
4121     scale(0x0);
4122     disp(0x0);
4123   %}
4124 %}
4125 
4126 // Indirect Memory Plus Short Offset Operand
4127 operand indOffset8(eRegP reg, immI8 off) %{
4128   match(AddP reg off);
4129 
4130   format %{ "[$reg + $off]" %}
4131   interface(MEMORY_INTER) %{
4132     base($reg);
4133     index(0x4);
4134     scale(0x0);
4135     disp($off);
4136   %}
4137 %}
4138 
4139 // Indirect Memory Plus Long Offset Operand
4140 operand indOffset32(eRegP reg, immI off) %{
4141   match(AddP reg off);
4142 
4143   format %{ "[$reg + $off]" %}
4144   interface(MEMORY_INTER) %{
4145     base($reg);
4146     index(0x4);
4147     scale(0x0);
4148     disp($off);
4149   %}
4150 %}
4151 
4152 // Indirect Memory Plus Long Offset Operand
4153 operand indOffset32X(rRegI reg, immP off) %{
4154   match(AddP off reg);
4155 
4156   format %{ "[$reg + $off]" %}
4157   interface(MEMORY_INTER) %{
4158     base($reg);
4159     index(0x4);
4160     scale(0x0);
4161     disp($off);
4162   %}
4163 %}
4164 
4165 // Indirect Memory Plus Index Register Plus Offset Operand
4166 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4167   match(AddP (AddP reg ireg) off);
4168 
4169   op_cost(10);
4170   format %{"[$reg + $off + $ireg]" %}
4171   interface(MEMORY_INTER) %{
4172     base($reg);
4173     index($ireg);
4174     scale(0x0);
4175     disp($off);
4176   %}
4177 %}
4178 
4179 // Indirect Memory Plus Index Register Plus Offset Operand
4180 operand indIndex(eRegP reg, rRegI ireg) %{
4181   match(AddP reg ireg);
4182 
4183   op_cost(10);
4184   format %{"[$reg + $ireg]" %}
4185   interface(MEMORY_INTER) %{
4186     base($reg);
4187     index($ireg);
4188     scale(0x0);
4189     disp(0x0);
4190   %}
4191 %}
4192 
4193 // // -------------------------------------------------------------------------
4194 // // 486 architecture doesn't support "scale * index + offset" with out a base
4195 // // -------------------------------------------------------------------------
4196 // // Scaled Memory Operands
4197 // // Indirect Memory Times Scale Plus Offset Operand
4198 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4199 //   match(AddP off (LShiftI ireg scale));
4200 //
4201 //   op_cost(10);
4202 //   format %{"[$off + $ireg << $scale]" %}
4203 //   interface(MEMORY_INTER) %{
4204 //     base(0x4);
4205 //     index($ireg);
4206 //     scale($scale);
4207 //     disp($off);
4208 //   %}
4209 // %}
4210 
4211 // Indirect Memory Times Scale Plus Index Register
4212 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4213   match(AddP reg (LShiftI ireg scale));
4214 
4215   op_cost(10);
4216   format %{"[$reg + $ireg << $scale]" %}
4217   interface(MEMORY_INTER) %{
4218     base($reg);
4219     index($ireg);
4220     scale($scale);
4221     disp(0x0);
4222   %}
4223 %}
4224 
4225 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4226 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4227   match(AddP (AddP reg (LShiftI ireg scale)) off);
4228 
4229   op_cost(10);
4230   format %{"[$reg + $off + $ireg << $scale]" %}
4231   interface(MEMORY_INTER) %{
4232     base($reg);
4233     index($ireg);
4234     scale($scale);
4235     disp($off);
4236   %}
4237 %}
4238 
4239 //----------Load Long Memory Operands------------------------------------------
4240 // The load-long idiom will use it's address expression again after loading
4241 // the first word of the long.  If the load-long destination overlaps with
4242 // registers used in the addressing expression, the 2nd half will be loaded
4243 // from a clobbered address.  Fix this by requiring that load-long use
4244 // address registers that do not overlap with the load-long target.
4245 
4246 // load-long support
4247 operand load_long_RegP() %{
4248   constraint(ALLOC_IN_RC(esi_reg));
4249   match(RegP);
4250   match(eSIRegP);
4251   op_cost(100);
4252   format %{  %}
4253   interface(REG_INTER);
4254 %}
4255 
4256 // Indirect Memory Operand Long
4257 operand load_long_indirect(load_long_RegP reg) %{
4258   constraint(ALLOC_IN_RC(esi_reg));
4259   match(reg);
4260 
4261   format %{ "[$reg]" %}
4262   interface(MEMORY_INTER) %{
4263     base($reg);
4264     index(0x4);
4265     scale(0x0);
4266     disp(0x0);
4267   %}
4268 %}
4269 
4270 // Indirect Memory Plus Long Offset Operand
4271 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4272   match(AddP reg off);
4273 
4274   format %{ "[$reg + $off]" %}
4275   interface(MEMORY_INTER) %{
4276     base($reg);
4277     index(0x4);
4278     scale(0x0);
4279     disp($off);
4280   %}
4281 %}
4282 
4283 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4284 
4285 
4286 //----------Special Memory Operands--------------------------------------------
4287 // Stack Slot Operand - This operand is used for loading and storing temporary
4288 //                      values on the stack where a match requires a value to
4289 //                      flow through memory.
4290 operand stackSlotP(sRegP reg) %{
4291   constraint(ALLOC_IN_RC(stack_slots));
4292   // No match rule because this operand is only generated in matching
4293   format %{ "[$reg]" %}
4294   interface(MEMORY_INTER) %{
4295     base(0x4);   // ESP
4296     index(0x4);  // No Index
4297     scale(0x0);  // No Scale
4298     disp($reg);  // Stack Offset
4299   %}
4300 %}
4301 
4302 operand stackSlotI(sRegI reg) %{
4303   constraint(ALLOC_IN_RC(stack_slots));
4304   // No match rule because this operand is only generated in matching
4305   format %{ "[$reg]" %}
4306   interface(MEMORY_INTER) %{
4307     base(0x4);   // ESP
4308     index(0x4);  // No Index
4309     scale(0x0);  // No Scale
4310     disp($reg);  // Stack Offset
4311   %}
4312 %}
4313 
4314 operand stackSlotF(sRegF reg) %{
4315   constraint(ALLOC_IN_RC(stack_slots));
4316   // No match rule because this operand is only generated in matching
4317   format %{ "[$reg]" %}
4318   interface(MEMORY_INTER) %{
4319     base(0x4);   // ESP
4320     index(0x4);  // No Index
4321     scale(0x0);  // No Scale
4322     disp($reg);  // Stack Offset
4323   %}
4324 %}
4325 
4326 operand stackSlotD(sRegD reg) %{
4327   constraint(ALLOC_IN_RC(stack_slots));
4328   // No match rule because this operand is only generated in matching
4329   format %{ "[$reg]" %}
4330   interface(MEMORY_INTER) %{
4331     base(0x4);   // ESP
4332     index(0x4);  // No Index
4333     scale(0x0);  // No Scale
4334     disp($reg);  // Stack Offset
4335   %}
4336 %}
4337 
4338 operand stackSlotL(sRegL reg) %{
4339   constraint(ALLOC_IN_RC(stack_slots));
4340   // No match rule because this operand is only generated in matching
4341   format %{ "[$reg]" %}
4342   interface(MEMORY_INTER) %{
4343     base(0x4);   // ESP
4344     index(0x4);  // No Index
4345     scale(0x0);  // No Scale
4346     disp($reg);  // Stack Offset
4347   %}
4348 %}
4349 
4350 //----------Memory Operands - Win95 Implicit Null Variants----------------
4351 // Indirect Memory Operand
4352 operand indirect_win95_safe(eRegP_no_EBP reg)
4353 %{
4354   constraint(ALLOC_IN_RC(int_reg));
4355   match(reg);
4356 
4357   op_cost(100);
4358   format %{ "[$reg]" %}
4359   interface(MEMORY_INTER) %{
4360     base($reg);
4361     index(0x4);
4362     scale(0x0);
4363     disp(0x0);
4364   %}
4365 %}
4366 
4367 // Indirect Memory Plus Short Offset Operand
4368 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4369 %{
4370   match(AddP reg off);
4371 
4372   op_cost(100);
4373   format %{ "[$reg + $off]" %}
4374   interface(MEMORY_INTER) %{
4375     base($reg);
4376     index(0x4);
4377     scale(0x0);
4378     disp($off);
4379   %}
4380 %}
4381 
4382 // Indirect Memory Plus Long Offset Operand
4383 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4384 %{
4385   match(AddP reg off);
4386 
4387   op_cost(100);
4388   format %{ "[$reg + $off]" %}
4389   interface(MEMORY_INTER) %{
4390     base($reg);
4391     index(0x4);
4392     scale(0x0);
4393     disp($off);
4394   %}
4395 %}
4396 
4397 // Indirect Memory Plus Index Register Plus Offset Operand
4398 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4399 %{
4400   match(AddP (AddP reg ireg) off);
4401 
4402   op_cost(100);
4403   format %{"[$reg + $off + $ireg]" %}
4404   interface(MEMORY_INTER) %{
4405     base($reg);
4406     index($ireg);
4407     scale(0x0);
4408     disp($off);
4409   %}
4410 %}
4411 
4412 // Indirect Memory Times Scale Plus Index Register
4413 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4414 %{
4415   match(AddP reg (LShiftI ireg scale));
4416 
4417   op_cost(100);
4418   format %{"[$reg + $ireg << $scale]" %}
4419   interface(MEMORY_INTER) %{
4420     base($reg);
4421     index($ireg);
4422     scale($scale);
4423     disp(0x0);
4424   %}
4425 %}
4426 
4427 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4428 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4429 %{
4430   match(AddP (AddP reg (LShiftI ireg scale)) off);
4431 
4432   op_cost(100);
4433   format %{"[$reg + $off + $ireg << $scale]" %}
4434   interface(MEMORY_INTER) %{
4435     base($reg);
4436     index($ireg);
4437     scale($scale);
4438     disp($off);
4439   %}
4440 %}
4441 
4442 //----------Conditional Branch Operands----------------------------------------
4443 // Comparison Op  - This is the operation of the comparison, and is limited to
4444 //                  the following set of codes:
4445 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4446 //
4447 // Other attributes of the comparison, such as unsignedness, are specified
4448 // by the comparison instruction that sets a condition code flags register.
4449 // That result is represented by a flags operand whose subtype is appropriate
4450 // to the unsignedness (etc.) of the comparison.
4451 //
4452 // Later, the instruction which matches both the Comparison Op (a Bool) and
4453 // the flags (produced by the Cmp) specifies the coding of the comparison op
4454 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4455 
4456 // Comparision Code
4457 operand cmpOp() %{
4458   match(Bool);
4459 
4460   format %{ "" %}
4461   interface(COND_INTER) %{
4462     equal(0x4, "e");
4463     not_equal(0x5, "ne");
4464     less(0xC, "l");
4465     greater_equal(0xD, "ge");
4466     less_equal(0xE, "le");
4467     greater(0xF, "g");
4468     overflow(0x0, "o");
4469     no_overflow(0x1, "no");
4470   %}
4471 %}
4472 
4473 // Comparison Code, unsigned compare.  Used by FP also, with
4474 // C2 (unordered) turned into GT or LT already.  The other bits
4475 // C0 and C3 are turned into Carry & Zero flags.
4476 operand cmpOpU() %{
4477   match(Bool);
4478 
4479   format %{ "" %}
4480   interface(COND_INTER) %{
4481     equal(0x4, "e");
4482     not_equal(0x5, "ne");
4483     less(0x2, "b");
4484     greater_equal(0x3, "nb");
4485     less_equal(0x6, "be");
4486     greater(0x7, "nbe");
4487     overflow(0x0, "o");
4488     no_overflow(0x1, "no");
4489   %}
4490 %}
4491 
4492 // Floating comparisons that don't require any fixup for the unordered case
4493 operand cmpOpUCF() %{
4494   match(Bool);
4495   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4496             n->as_Bool()->_test._test == BoolTest::ge ||
4497             n->as_Bool()->_test._test == BoolTest::le ||
4498             n->as_Bool()->_test._test == BoolTest::gt);
4499   format %{ "" %}
4500   interface(COND_INTER) %{
4501     equal(0x4, "e");
4502     not_equal(0x5, "ne");
4503     less(0x2, "b");
4504     greater_equal(0x3, "nb");
4505     less_equal(0x6, "be");
4506     greater(0x7, "nbe");
4507     overflow(0x0, "o");
4508     no_overflow(0x1, "no");
4509   %}
4510 %}
4511 
4512 
4513 // Floating comparisons that can be fixed up with extra conditional jumps
4514 operand cmpOpUCF2() %{
4515   match(Bool);
4516   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4517             n->as_Bool()->_test._test == BoolTest::eq);
4518   format %{ "" %}
4519   interface(COND_INTER) %{
4520     equal(0x4, "e");
4521     not_equal(0x5, "ne");
4522     less(0x2, "b");
4523     greater_equal(0x3, "nb");
4524     less_equal(0x6, "be");
4525     greater(0x7, "nbe");
4526     overflow(0x0, "o");
4527     no_overflow(0x1, "no");
4528   %}
4529 %}
4530 
4531 // Comparison Code for FP conditional move
4532 operand cmpOp_fcmov() %{
4533   match(Bool);
4534 
4535   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4536             n->as_Bool()->_test._test != BoolTest::no_overflow);
4537   format %{ "" %}
4538   interface(COND_INTER) %{
4539     equal        (0x0C8);
4540     not_equal    (0x1C8);
4541     less         (0x0C0);
4542     greater_equal(0x1C0);
4543     less_equal   (0x0D0);
4544     greater      (0x1D0);
4545     overflow(0x0, "o"); // not really supported by the instruction
4546     no_overflow(0x1, "no"); // not really supported by the instruction
4547   %}
4548 %}
4549 
4550 // Comparision Code used in long compares
4551 operand cmpOp_commute() %{
4552   match(Bool);
4553 
4554   format %{ "" %}
4555   interface(COND_INTER) %{
4556     equal(0x4, "e");
4557     not_equal(0x5, "ne");
4558     less(0xF, "g");
4559     greater_equal(0xE, "le");
4560     less_equal(0xD, "ge");
4561     greater(0xC, "l");
4562     overflow(0x0, "o");
4563     no_overflow(0x1, "no");
4564   %}
4565 %}
4566 
4567 //----------OPERAND CLASSES----------------------------------------------------
4568 // Operand Classes are groups of operands that are used as to simplify
4569 // instruction definitions by not requiring the AD writer to specify separate
4570 // instructions for every form of operand when the instruction accepts
4571 // multiple operand types with the same basic encoding and format.  The classic
4572 // case of this is memory operands.
4573 
4574 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4575                indIndex, indIndexScale, indIndexScaleOffset);
4576 
4577 // Long memory operations are encoded in 2 instructions and a +4 offset.
4578 // This means some kind of offset is always required and you cannot use
4579 // an oop as the offset (done when working on static globals).
4580 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4581                     indIndex, indIndexScale, indIndexScaleOffset);
4582 
4583 
4584 //----------PIPELINE-----------------------------------------------------------
4585 // Rules which define the behavior of the target architectures pipeline.
4586 pipeline %{
4587 
4588 //----------ATTRIBUTES---------------------------------------------------------
4589 attributes %{
4590   variable_size_instructions;        // Fixed size instructions
4591   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4592   instruction_unit_size = 1;         // An instruction is 1 bytes long
4593   instruction_fetch_unit_size = 16;  // The processor fetches one line
4594   instruction_fetch_units = 1;       // of 16 bytes
4595 
4596   // List of nop instructions
4597   nops( MachNop );
4598 %}
4599 
4600 //----------RESOURCES----------------------------------------------------------
4601 // Resources are the functional units available to the machine
4602 
4603 // Generic P2/P3 pipeline
4604 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4605 // 3 instructions decoded per cycle.
4606 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4607 // 2 ALU op, only ALU0 handles mul/div instructions.
4608 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4609            MS0, MS1, MEM = MS0 | MS1,
4610            BR, FPU,
4611            ALU0, ALU1, ALU = ALU0 | ALU1 );
4612 
4613 //----------PIPELINE DESCRIPTION-----------------------------------------------
4614 // Pipeline Description specifies the stages in the machine's pipeline
4615 
4616 // Generic P2/P3 pipeline
4617 pipe_desc(S0, S1, S2, S3, S4, S5);
4618 
4619 //----------PIPELINE CLASSES---------------------------------------------------
4620 // Pipeline Classes describe the stages in which input and output are
4621 // referenced by the hardware pipeline.
4622 
4623 // Naming convention: ialu or fpu
4624 // Then: _reg
4625 // Then: _reg if there is a 2nd register
4626 // Then: _long if it's a pair of instructions implementing a long
4627 // Then: _fat if it requires the big decoder
4628 //   Or: _mem if it requires the big decoder and a memory unit.
4629 
4630 // Integer ALU reg operation
4631 pipe_class ialu_reg(rRegI dst) %{
4632     single_instruction;
4633     dst    : S4(write);
4634     dst    : S3(read);
4635     DECODE : S0;        // any decoder
4636     ALU    : S3;        // any alu
4637 %}
4638 
4639 // Long ALU reg operation
4640 pipe_class ialu_reg_long(eRegL dst) %{
4641     instruction_count(2);
4642     dst    : S4(write);
4643     dst    : S3(read);
4644     DECODE : S0(2);     // any 2 decoders
4645     ALU    : S3(2);     // both alus
4646 %}
4647 
4648 // Integer ALU reg operation using big decoder
4649 pipe_class ialu_reg_fat(rRegI dst) %{
4650     single_instruction;
4651     dst    : S4(write);
4652     dst    : S3(read);
4653     D0     : S0;        // big decoder only
4654     ALU    : S3;        // any alu
4655 %}
4656 
4657 // Long ALU reg operation using big decoder
4658 pipe_class ialu_reg_long_fat(eRegL dst) %{
4659     instruction_count(2);
4660     dst    : S4(write);
4661     dst    : S3(read);
4662     D0     : S0(2);     // big decoder only; twice
4663     ALU    : S3(2);     // any 2 alus
4664 %}
4665 
4666 // Integer ALU reg-reg operation
4667 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4668     single_instruction;
4669     dst    : S4(write);
4670     src    : S3(read);
4671     DECODE : S0;        // any decoder
4672     ALU    : S3;        // any alu
4673 %}
4674 
4675 // Long ALU reg-reg operation
4676 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4677     instruction_count(2);
4678     dst    : S4(write);
4679     src    : S3(read);
4680     DECODE : S0(2);     // any 2 decoders
4681     ALU    : S3(2);     // both alus
4682 %}
4683 
4684 // Integer ALU reg-reg operation
4685 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4686     single_instruction;
4687     dst    : S4(write);
4688     src    : S3(read);
4689     D0     : S0;        // big decoder only
4690     ALU    : S3;        // any alu
4691 %}
4692 
4693 // Long ALU reg-reg operation
4694 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4695     instruction_count(2);
4696     dst    : S4(write);
4697     src    : S3(read);
4698     D0     : S0(2);     // big decoder only; twice
4699     ALU    : S3(2);     // both alus
4700 %}
4701 
4702 // Integer ALU reg-mem operation
4703 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4704     single_instruction;
4705     dst    : S5(write);
4706     mem    : S3(read);
4707     D0     : S0;        // big decoder only
4708     ALU    : S4;        // any alu
4709     MEM    : S3;        // any mem
4710 %}
4711 
4712 // Long ALU reg-mem operation
4713 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4714     instruction_count(2);
4715     dst    : S5(write);
4716     mem    : S3(read);
4717     D0     : S0(2);     // big decoder only; twice
4718     ALU    : S4(2);     // any 2 alus
4719     MEM    : S3(2);     // both mems
4720 %}
4721 
4722 // Integer mem operation (prefetch)
4723 pipe_class ialu_mem(memory mem)
4724 %{
4725     single_instruction;
4726     mem    : S3(read);
4727     D0     : S0;        // big decoder only
4728     MEM    : S3;        // any mem
4729 %}
4730 
4731 // Integer Store to Memory
4732 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4733     single_instruction;
4734     mem    : S3(read);
4735     src    : S5(read);
4736     D0     : S0;        // big decoder only
4737     ALU    : S4;        // any alu
4738     MEM    : S3;
4739 %}
4740 
4741 // Long Store to Memory
4742 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4743     instruction_count(2);
4744     mem    : S3(read);
4745     src    : S5(read);
4746     D0     : S0(2);     // big decoder only; twice
4747     ALU    : S4(2);     // any 2 alus
4748     MEM    : S3(2);     // Both mems
4749 %}
4750 
4751 // Integer Store to Memory
4752 pipe_class ialu_mem_imm(memory mem) %{
4753     single_instruction;
4754     mem    : S3(read);
4755     D0     : S0;        // big decoder only
4756     ALU    : S4;        // any alu
4757     MEM    : S3;
4758 %}
4759 
4760 // Integer ALU0 reg-reg operation
4761 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4762     single_instruction;
4763     dst    : S4(write);
4764     src    : S3(read);
4765     D0     : S0;        // Big decoder only
4766     ALU0   : S3;        // only alu0
4767 %}
4768 
4769 // Integer ALU0 reg-mem operation
4770 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4771     single_instruction;
4772     dst    : S5(write);
4773     mem    : S3(read);
4774     D0     : S0;        // big decoder only
4775     ALU0   : S4;        // ALU0 only
4776     MEM    : S3;        // any mem
4777 %}
4778 
4779 // Integer ALU reg-reg operation
4780 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4781     single_instruction;
4782     cr     : S4(write);
4783     src1   : S3(read);
4784     src2   : S3(read);
4785     DECODE : S0;        // any decoder
4786     ALU    : S3;        // any alu
4787 %}
4788 
4789 // Integer ALU reg-imm operation
4790 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4791     single_instruction;
4792     cr     : S4(write);
4793     src1   : S3(read);
4794     DECODE : S0;        // any decoder
4795     ALU    : S3;        // any alu
4796 %}
4797 
4798 // Integer ALU reg-mem operation
4799 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4800     single_instruction;
4801     cr     : S4(write);
4802     src1   : S3(read);
4803     src2   : S3(read);
4804     D0     : S0;        // big decoder only
4805     ALU    : S4;        // any alu
4806     MEM    : S3;
4807 %}
4808 
4809 // Conditional move reg-reg
4810 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4811     instruction_count(4);
4812     y      : S4(read);
4813     q      : S3(read);
4814     p      : S3(read);
4815     DECODE : S0(4);     // any decoder
4816 %}
4817 
4818 // Conditional move reg-reg
4819 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4820     single_instruction;
4821     dst    : S4(write);
4822     src    : S3(read);
4823     cr     : S3(read);
4824     DECODE : S0;        // any decoder
4825 %}
4826 
4827 // Conditional move reg-mem
4828 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4829     single_instruction;
4830     dst    : S4(write);
4831     src    : S3(read);
4832     cr     : S3(read);
4833     DECODE : S0;        // any decoder
4834     MEM    : S3;
4835 %}
4836 
4837 // Conditional move reg-reg long
4838 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4839     single_instruction;
4840     dst    : S4(write);
4841     src    : S3(read);
4842     cr     : S3(read);
4843     DECODE : S0(2);     // any 2 decoders
4844 %}
4845 
4846 // Conditional move double reg-reg
4847 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4848     single_instruction;
4849     dst    : S4(write);
4850     src    : S3(read);
4851     cr     : S3(read);
4852     DECODE : S0;        // any decoder
4853 %}
4854 
4855 // Float reg-reg operation
4856 pipe_class fpu_reg(regDPR dst) %{
4857     instruction_count(2);
4858     dst    : S3(read);
4859     DECODE : S0(2);     // any 2 decoders
4860     FPU    : S3;
4861 %}
4862 
4863 // Float reg-reg operation
4864 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4865     instruction_count(2);
4866     dst    : S4(write);
4867     src    : S3(read);
4868     DECODE : S0(2);     // any 2 decoders
4869     FPU    : S3;
4870 %}
4871 
4872 // Float reg-reg operation
4873 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4874     instruction_count(3);
4875     dst    : S4(write);
4876     src1   : S3(read);
4877     src2   : S3(read);
4878     DECODE : S0(3);     // any 3 decoders
4879     FPU    : S3(2);
4880 %}
4881 
4882 // Float reg-reg operation
4883 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4884     instruction_count(4);
4885     dst    : S4(write);
4886     src1   : S3(read);
4887     src2   : S3(read);
4888     src3   : S3(read);
4889     DECODE : S0(4);     // any 3 decoders
4890     FPU    : S3(2);
4891 %}
4892 
4893 // Float reg-reg operation
4894 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4895     instruction_count(4);
4896     dst    : S4(write);
4897     src1   : S3(read);
4898     src2   : S3(read);
4899     src3   : S3(read);
4900     DECODE : S1(3);     // any 3 decoders
4901     D0     : S0;        // Big decoder only
4902     FPU    : S3(2);
4903     MEM    : S3;
4904 %}
4905 
4906 // Float reg-mem operation
4907 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4908     instruction_count(2);
4909     dst    : S5(write);
4910     mem    : S3(read);
4911     D0     : S0;        // big decoder only
4912     DECODE : S1;        // any decoder for FPU POP
4913     FPU    : S4;
4914     MEM    : S3;        // any mem
4915 %}
4916 
4917 // Float reg-mem operation
4918 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4919     instruction_count(3);
4920     dst    : S5(write);
4921     src1   : S3(read);
4922     mem    : S3(read);
4923     D0     : S0;        // big decoder only
4924     DECODE : S1(2);     // any decoder for FPU POP
4925     FPU    : S4;
4926     MEM    : S3;        // any mem
4927 %}
4928 
4929 // Float mem-reg operation
4930 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4931     instruction_count(2);
4932     src    : S5(read);
4933     mem    : S3(read);
4934     DECODE : S0;        // any decoder for FPU PUSH
4935     D0     : S1;        // big decoder only
4936     FPU    : S4;
4937     MEM    : S3;        // any mem
4938 %}
4939 
4940 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
4941     instruction_count(3);
4942     src1   : S3(read);
4943     src2   : S3(read);
4944     mem    : S3(read);
4945     DECODE : S0(2);     // any decoder for FPU PUSH
4946     D0     : S1;        // big decoder only
4947     FPU    : S4;
4948     MEM    : S3;        // any mem
4949 %}
4950 
4951 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
4952     instruction_count(3);
4953     src1   : S3(read);
4954     src2   : S3(read);
4955     mem    : S4(read);
4956     DECODE : S0;        // any decoder for FPU PUSH
4957     D0     : S0(2);     // big decoder only
4958     FPU    : S4;
4959     MEM    : S3(2);     // any mem
4960 %}
4961 
4962 pipe_class fpu_mem_mem(memory dst, memory src1) %{
4963     instruction_count(2);
4964     src1   : S3(read);
4965     dst    : S4(read);
4966     D0     : S0(2);     // big decoder only
4967     MEM    : S3(2);     // any mem
4968 %}
4969 
4970 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
4971     instruction_count(3);
4972     src1   : S3(read);
4973     src2   : S3(read);
4974     dst    : S4(read);
4975     D0     : S0(3);     // big decoder only
4976     FPU    : S4;
4977     MEM    : S3(3);     // any mem
4978 %}
4979 
4980 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
4981     instruction_count(3);
4982     src1   : S4(read);
4983     mem    : S4(read);
4984     DECODE : S0;        // any decoder for FPU PUSH
4985     D0     : S0(2);     // big decoder only
4986     FPU    : S4;
4987     MEM    : S3(2);     // any mem
4988 %}
4989 
4990 // Float load constant
4991 pipe_class fpu_reg_con(regDPR dst) %{
4992     instruction_count(2);
4993     dst    : S5(write);
4994     D0     : S0;        // big decoder only for the load
4995     DECODE : S1;        // any decoder for FPU POP
4996     FPU    : S4;
4997     MEM    : S3;        // any mem
4998 %}
4999 
5000 // Float load constant
5001 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5002     instruction_count(3);
5003     dst    : S5(write);
5004     src    : S3(read);
5005     D0     : S0;        // big decoder only for the load
5006     DECODE : S1(2);     // any decoder for FPU POP
5007     FPU    : S4;
5008     MEM    : S3;        // any mem
5009 %}
5010 
5011 // UnConditional branch
5012 pipe_class pipe_jmp( label labl ) %{
5013     single_instruction;
5014     BR   : S3;
5015 %}
5016 
5017 // Conditional branch
5018 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5019     single_instruction;
5020     cr    : S1(read);
5021     BR    : S3;
5022 %}
5023 
5024 // Allocation idiom
5025 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5026     instruction_count(1); force_serialization;
5027     fixed_latency(6);
5028     heap_ptr : S3(read);
5029     DECODE   : S0(3);
5030     D0       : S2;
5031     MEM      : S3;
5032     ALU      : S3(2);
5033     dst      : S5(write);
5034     BR       : S5;
5035 %}
5036 
5037 // Generic big/slow expanded idiom
5038 pipe_class pipe_slow(  ) %{
5039     instruction_count(10); multiple_bundles; force_serialization;
5040     fixed_latency(100);
5041     D0  : S0(2);
5042     MEM : S3(2);
5043 %}
5044 
5045 // The real do-nothing guy
5046 pipe_class empty( ) %{
5047     instruction_count(0);
5048 %}
5049 
5050 // Define the class for the Nop node
5051 define %{
5052    MachNop = empty;
5053 %}
5054 
5055 %}
5056 
5057 //----------INSTRUCTIONS-------------------------------------------------------
5058 //
5059 // match      -- States which machine-independent subtree may be replaced
5060 //               by this instruction.
5061 // ins_cost   -- The estimated cost of this instruction is used by instruction
5062 //               selection to identify a minimum cost tree of machine
5063 //               instructions that matches a tree of machine-independent
5064 //               instructions.
5065 // format     -- A string providing the disassembly for this instruction.
5066 //               The value of an instruction's operand may be inserted
5067 //               by referring to it with a '$' prefix.
5068 // opcode     -- Three instruction opcodes may be provided.  These are referred
5069 //               to within an encode class as $primary, $secondary, and $tertiary
5070 //               respectively.  The primary opcode is commonly used to
5071 //               indicate the type of machine instruction, while secondary
5072 //               and tertiary are often used for prefix options or addressing
5073 //               modes.
5074 // ins_encode -- A list of encode classes with parameters. The encode class
5075 //               name must have been defined in an 'enc_class' specification
5076 //               in the encode section of the architecture description.
5077 
5078 //----------BSWAP-Instruction--------------------------------------------------
5079 instruct bytes_reverse_int(rRegI dst) %{
5080   match(Set dst (ReverseBytesI dst));
5081 
5082   format %{ "BSWAP  $dst" %}
5083   opcode(0x0F, 0xC8);
5084   ins_encode( OpcP, OpcSReg(dst) );
5085   ins_pipe( ialu_reg );
5086 %}
5087 
5088 instruct bytes_reverse_long(eRegL dst) %{
5089   match(Set dst (ReverseBytesL dst));
5090 
5091   format %{ "BSWAP  $dst.lo\n\t"
5092             "BSWAP  $dst.hi\n\t"
5093             "XCHG   $dst.lo $dst.hi" %}
5094 
5095   ins_cost(125);
5096   ins_encode( bswap_long_bytes(dst) );
5097   ins_pipe( ialu_reg_reg);
5098 %}
5099 
5100 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5101   match(Set dst (ReverseBytesUS dst));
5102   effect(KILL cr);
5103 
5104   format %{ "BSWAP  $dst\n\t"
5105             "SHR    $dst,16\n\t" %}
5106   ins_encode %{
5107     __ bswapl($dst$$Register);
5108     __ shrl($dst$$Register, 16);
5109   %}
5110   ins_pipe( ialu_reg );
5111 %}
5112 
5113 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5114   match(Set dst (ReverseBytesS dst));
5115   effect(KILL cr);
5116 
5117   format %{ "BSWAP  $dst\n\t"
5118             "SAR    $dst,16\n\t" %}
5119   ins_encode %{
5120     __ bswapl($dst$$Register);
5121     __ sarl($dst$$Register, 16);
5122   %}
5123   ins_pipe( ialu_reg );
5124 %}
5125 
5126 
5127 //---------- Zeros Count Instructions ------------------------------------------
5128 
5129 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5130   predicate(UseCountLeadingZerosInstruction);
5131   match(Set dst (CountLeadingZerosI src));
5132   effect(KILL cr);
5133 
5134   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5135   ins_encode %{
5136     __ lzcntl($dst$$Register, $src$$Register);
5137   %}
5138   ins_pipe(ialu_reg);
5139 %}
5140 
5141 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5142   predicate(!UseCountLeadingZerosInstruction);
5143   match(Set dst (CountLeadingZerosI src));
5144   effect(KILL cr);
5145 
5146   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5147             "JNZ    skip\n\t"
5148             "MOV    $dst, -1\n"
5149       "skip:\n\t"
5150             "NEG    $dst\n\t"
5151             "ADD    $dst, 31" %}
5152   ins_encode %{
5153     Register Rdst = $dst$$Register;
5154     Register Rsrc = $src$$Register;
5155     Label skip;
5156     __ bsrl(Rdst, Rsrc);
5157     __ jccb(Assembler::notZero, skip);
5158     __ movl(Rdst, -1);
5159     __ bind(skip);
5160     __ negl(Rdst);
5161     __ addl(Rdst, BitsPerInt - 1);
5162   %}
5163   ins_pipe(ialu_reg);
5164 %}
5165 
5166 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5167   predicate(UseCountLeadingZerosInstruction);
5168   match(Set dst (CountLeadingZerosL src));
5169   effect(TEMP dst, KILL cr);
5170 
5171   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5172             "JNC    done\n\t"
5173             "LZCNT  $dst, $src.lo\n\t"
5174             "ADD    $dst, 32\n"
5175       "done:" %}
5176   ins_encode %{
5177     Register Rdst = $dst$$Register;
5178     Register Rsrc = $src$$Register;
5179     Label done;
5180     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5181     __ jccb(Assembler::carryClear, done);
5182     __ lzcntl(Rdst, Rsrc);
5183     __ addl(Rdst, BitsPerInt);
5184     __ bind(done);
5185   %}
5186   ins_pipe(ialu_reg);
5187 %}
5188 
5189 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5190   predicate(!UseCountLeadingZerosInstruction);
5191   match(Set dst (CountLeadingZerosL src));
5192   effect(TEMP dst, KILL cr);
5193 
5194   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5195             "JZ     msw_is_zero\n\t"
5196             "ADD    $dst, 32\n\t"
5197             "JMP    not_zero\n"
5198       "msw_is_zero:\n\t"
5199             "BSR    $dst, $src.lo\n\t"
5200             "JNZ    not_zero\n\t"
5201             "MOV    $dst, -1\n"
5202       "not_zero:\n\t"
5203             "NEG    $dst\n\t"
5204             "ADD    $dst, 63\n" %}
5205  ins_encode %{
5206     Register Rdst = $dst$$Register;
5207     Register Rsrc = $src$$Register;
5208     Label msw_is_zero;
5209     Label not_zero;
5210     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5211     __ jccb(Assembler::zero, msw_is_zero);
5212     __ addl(Rdst, BitsPerInt);
5213     __ jmpb(not_zero);
5214     __ bind(msw_is_zero);
5215     __ bsrl(Rdst, Rsrc);
5216     __ jccb(Assembler::notZero, not_zero);
5217     __ movl(Rdst, -1);
5218     __ bind(not_zero);
5219     __ negl(Rdst);
5220     __ addl(Rdst, BitsPerLong - 1);
5221   %}
5222   ins_pipe(ialu_reg);
5223 %}
5224 
5225 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5226   predicate(UseCountTrailingZerosInstruction);
5227   match(Set dst (CountTrailingZerosI src));
5228   effect(KILL cr);
5229 
5230   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5231   ins_encode %{
5232     __ tzcntl($dst$$Register, $src$$Register);
5233   %}
5234   ins_pipe(ialu_reg);
5235 %}
5236 
5237 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5238   predicate(!UseCountTrailingZerosInstruction);
5239   match(Set dst (CountTrailingZerosI src));
5240   effect(KILL cr);
5241 
5242   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5243             "JNZ    done\n\t"
5244             "MOV    $dst, 32\n"
5245       "done:" %}
5246   ins_encode %{
5247     Register Rdst = $dst$$Register;
5248     Label done;
5249     __ bsfl(Rdst, $src$$Register);
5250     __ jccb(Assembler::notZero, done);
5251     __ movl(Rdst, BitsPerInt);
5252     __ bind(done);
5253   %}
5254   ins_pipe(ialu_reg);
5255 %}
5256 
5257 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5258   predicate(UseCountTrailingZerosInstruction);
5259   match(Set dst (CountTrailingZerosL src));
5260   effect(TEMP dst, KILL cr);
5261 
5262   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5263             "JNC    done\n\t"
5264             "TZCNT  $dst, $src.hi\n\t"
5265             "ADD    $dst, 32\n"
5266             "done:" %}
5267   ins_encode %{
5268     Register Rdst = $dst$$Register;
5269     Register Rsrc = $src$$Register;
5270     Label done;
5271     __ tzcntl(Rdst, Rsrc);
5272     __ jccb(Assembler::carryClear, done);
5273     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5274     __ addl(Rdst, BitsPerInt);
5275     __ bind(done);
5276   %}
5277   ins_pipe(ialu_reg);
5278 %}
5279 
5280 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5281   predicate(!UseCountTrailingZerosInstruction);
5282   match(Set dst (CountTrailingZerosL src));
5283   effect(TEMP dst, KILL cr);
5284 
5285   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5286             "JNZ    done\n\t"
5287             "BSF    $dst, $src.hi\n\t"
5288             "JNZ    msw_not_zero\n\t"
5289             "MOV    $dst, 32\n"
5290       "msw_not_zero:\n\t"
5291             "ADD    $dst, 32\n"
5292       "done:" %}
5293   ins_encode %{
5294     Register Rdst = $dst$$Register;
5295     Register Rsrc = $src$$Register;
5296     Label msw_not_zero;
5297     Label done;
5298     __ bsfl(Rdst, Rsrc);
5299     __ jccb(Assembler::notZero, done);
5300     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5301     __ jccb(Assembler::notZero, msw_not_zero);
5302     __ movl(Rdst, BitsPerInt);
5303     __ bind(msw_not_zero);
5304     __ addl(Rdst, BitsPerInt);
5305     __ bind(done);
5306   %}
5307   ins_pipe(ialu_reg);
5308 %}
5309 
5310 
5311 //---------- Population Count Instructions -------------------------------------
5312 
5313 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5314   predicate(UsePopCountInstruction);
5315   match(Set dst (PopCountI src));
5316   effect(KILL cr);
5317 
5318   format %{ "POPCNT $dst, $src" %}
5319   ins_encode %{
5320     __ popcntl($dst$$Register, $src$$Register);
5321   %}
5322   ins_pipe(ialu_reg);
5323 %}
5324 
5325 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5326   predicate(UsePopCountInstruction);
5327   match(Set dst (PopCountI (LoadI mem)));
5328   effect(KILL cr);
5329 
5330   format %{ "POPCNT $dst, $mem" %}
5331   ins_encode %{
5332     __ popcntl($dst$$Register, $mem$$Address);
5333   %}
5334   ins_pipe(ialu_reg);
5335 %}
5336 
5337 // Note: Long.bitCount(long) returns an int.
5338 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5339   predicate(UsePopCountInstruction);
5340   match(Set dst (PopCountL src));
5341   effect(KILL cr, TEMP tmp, TEMP dst);
5342 
5343   format %{ "POPCNT $dst, $src.lo\n\t"
5344             "POPCNT $tmp, $src.hi\n\t"
5345             "ADD    $dst, $tmp" %}
5346   ins_encode %{
5347     __ popcntl($dst$$Register, $src$$Register);
5348     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5349     __ addl($dst$$Register, $tmp$$Register);
5350   %}
5351   ins_pipe(ialu_reg);
5352 %}
5353 
5354 // Note: Long.bitCount(long) returns an int.
5355 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5356   predicate(UsePopCountInstruction);
5357   match(Set dst (PopCountL (LoadL mem)));
5358   effect(KILL cr, TEMP tmp, TEMP dst);
5359 
5360   format %{ "POPCNT $dst, $mem\n\t"
5361             "POPCNT $tmp, $mem+4\n\t"
5362             "ADD    $dst, $tmp" %}
5363   ins_encode %{
5364     //__ popcntl($dst$$Register, $mem$$Address$$first);
5365     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5366     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5367     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5368     __ addl($dst$$Register, $tmp$$Register);
5369   %}
5370   ins_pipe(ialu_reg);
5371 %}
5372 
5373 
5374 //----------Load/Store/Move Instructions---------------------------------------
5375 //----------Load Instructions--------------------------------------------------
5376 // Load Byte (8bit signed)
5377 instruct loadB(xRegI dst, memory mem) %{
5378   match(Set dst (LoadB mem));
5379 
5380   ins_cost(125);
5381   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5382 
5383   ins_encode %{
5384     __ movsbl($dst$$Register, $mem$$Address);
5385   %}
5386 
5387   ins_pipe(ialu_reg_mem);
5388 %}
5389 
5390 // Load Byte (8bit signed) into Long Register
5391 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5392   match(Set dst (ConvI2L (LoadB mem)));
5393   effect(KILL cr);
5394 
5395   ins_cost(375);
5396   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5397             "MOV    $dst.hi,$dst.lo\n\t"
5398             "SAR    $dst.hi,7" %}
5399 
5400   ins_encode %{
5401     __ movsbl($dst$$Register, $mem$$Address);
5402     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5403     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5404   %}
5405 
5406   ins_pipe(ialu_reg_mem);
5407 %}
5408 
5409 // Load Unsigned Byte (8bit UNsigned)
5410 instruct loadUB(xRegI dst, memory mem) %{
5411   match(Set dst (LoadUB mem));
5412 
5413   ins_cost(125);
5414   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5415 
5416   ins_encode %{
5417     __ movzbl($dst$$Register, $mem$$Address);
5418   %}
5419 
5420   ins_pipe(ialu_reg_mem);
5421 %}
5422 
5423 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5424 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5425   match(Set dst (ConvI2L (LoadUB mem)));
5426   effect(KILL cr);
5427 
5428   ins_cost(250);
5429   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5430             "XOR    $dst.hi,$dst.hi" %}
5431 
5432   ins_encode %{
5433     Register Rdst = $dst$$Register;
5434     __ movzbl(Rdst, $mem$$Address);
5435     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5436   %}
5437 
5438   ins_pipe(ialu_reg_mem);
5439 %}
5440 
5441 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5442 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5443   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5444   effect(KILL cr);
5445 
5446   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5447             "XOR    $dst.hi,$dst.hi\n\t"
5448             "AND    $dst.lo,right_n_bits($mask, 8)" %}
5449   ins_encode %{
5450     Register Rdst = $dst$$Register;
5451     __ movzbl(Rdst, $mem$$Address);
5452     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5453     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5454   %}
5455   ins_pipe(ialu_reg_mem);
5456 %}
5457 
5458 // Load Short (16bit signed)
5459 instruct loadS(rRegI dst, memory mem) %{
5460   match(Set dst (LoadS mem));
5461 
5462   ins_cost(125);
5463   format %{ "MOVSX  $dst,$mem\t# short" %}
5464 
5465   ins_encode %{
5466     __ movswl($dst$$Register, $mem$$Address);
5467   %}
5468 
5469   ins_pipe(ialu_reg_mem);
5470 %}
5471 
5472 // Load Short (16 bit signed) to Byte (8 bit signed)
5473 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5474   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5475 
5476   ins_cost(125);
5477   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5478   ins_encode %{
5479     __ movsbl($dst$$Register, $mem$$Address);
5480   %}
5481   ins_pipe(ialu_reg_mem);
5482 %}
5483 
5484 // Load Short (16bit signed) into Long Register
5485 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5486   match(Set dst (ConvI2L (LoadS mem)));
5487   effect(KILL cr);
5488 
5489   ins_cost(375);
5490   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5491             "MOV    $dst.hi,$dst.lo\n\t"
5492             "SAR    $dst.hi,15" %}
5493 
5494   ins_encode %{
5495     __ movswl($dst$$Register, $mem$$Address);
5496     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5497     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5498   %}
5499 
5500   ins_pipe(ialu_reg_mem);
5501 %}
5502 
5503 // Load Unsigned Short/Char (16bit unsigned)
5504 instruct loadUS(rRegI dst, memory mem) %{
5505   match(Set dst (LoadUS mem));
5506 
5507   ins_cost(125);
5508   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5509 
5510   ins_encode %{
5511     __ movzwl($dst$$Register, $mem$$Address);
5512   %}
5513 
5514   ins_pipe(ialu_reg_mem);
5515 %}
5516 
5517 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5518 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5519   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5520 
5521   ins_cost(125);
5522   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5523   ins_encode %{
5524     __ movsbl($dst$$Register, $mem$$Address);
5525   %}
5526   ins_pipe(ialu_reg_mem);
5527 %}
5528 
5529 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5530 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5531   match(Set dst (ConvI2L (LoadUS mem)));
5532   effect(KILL cr);
5533 
5534   ins_cost(250);
5535   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5536             "XOR    $dst.hi,$dst.hi" %}
5537 
5538   ins_encode %{
5539     __ movzwl($dst$$Register, $mem$$Address);
5540     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5541   %}
5542 
5543   ins_pipe(ialu_reg_mem);
5544 %}
5545 
5546 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5547 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5548   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5549   effect(KILL cr);
5550 
5551   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5552             "XOR    $dst.hi,$dst.hi" %}
5553   ins_encode %{
5554     Register Rdst = $dst$$Register;
5555     __ movzbl(Rdst, $mem$$Address);
5556     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5557   %}
5558   ins_pipe(ialu_reg_mem);
5559 %}
5560 
5561 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5562 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5563   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5564   effect(KILL cr);
5565 
5566   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5567             "XOR    $dst.hi,$dst.hi\n\t"
5568             "AND    $dst.lo,right_n_bits($mask, 16)" %}
5569   ins_encode %{
5570     Register Rdst = $dst$$Register;
5571     __ movzwl(Rdst, $mem$$Address);
5572     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5573     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5574   %}
5575   ins_pipe(ialu_reg_mem);
5576 %}
5577 
5578 // Load Integer
5579 instruct loadI(rRegI dst, memory mem) %{
5580   match(Set dst (LoadI mem));
5581 
5582   ins_cost(125);
5583   format %{ "MOV    $dst,$mem\t# int" %}
5584 
5585   ins_encode %{
5586     __ movl($dst$$Register, $mem$$Address);
5587   %}
5588 
5589   ins_pipe(ialu_reg_mem);
5590 %}
5591 
5592 // Load Integer (32 bit signed) to Byte (8 bit signed)
5593 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5594   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5595 
5596   ins_cost(125);
5597   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5598   ins_encode %{
5599     __ movsbl($dst$$Register, $mem$$Address);
5600   %}
5601   ins_pipe(ialu_reg_mem);
5602 %}
5603 
5604 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5605 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5606   match(Set dst (AndI (LoadI mem) mask));
5607 
5608   ins_cost(125);
5609   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5610   ins_encode %{
5611     __ movzbl($dst$$Register, $mem$$Address);
5612   %}
5613   ins_pipe(ialu_reg_mem);
5614 %}
5615 
5616 // Load Integer (32 bit signed) to Short (16 bit signed)
5617 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5618   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5619 
5620   ins_cost(125);
5621   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5622   ins_encode %{
5623     __ movswl($dst$$Register, $mem$$Address);
5624   %}
5625   ins_pipe(ialu_reg_mem);
5626 %}
5627 
5628 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5629 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5630   match(Set dst (AndI (LoadI mem) mask));
5631 
5632   ins_cost(125);
5633   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5634   ins_encode %{
5635     __ movzwl($dst$$Register, $mem$$Address);
5636   %}
5637   ins_pipe(ialu_reg_mem);
5638 %}
5639 
5640 // Load Integer into Long Register
5641 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5642   match(Set dst (ConvI2L (LoadI mem)));
5643   effect(KILL cr);
5644 
5645   ins_cost(375);
5646   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5647             "MOV    $dst.hi,$dst.lo\n\t"
5648             "SAR    $dst.hi,31" %}
5649 
5650   ins_encode %{
5651     __ movl($dst$$Register, $mem$$Address);
5652     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5653     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5654   %}
5655 
5656   ins_pipe(ialu_reg_mem);
5657 %}
5658 
5659 // Load Integer with mask 0xFF into Long Register
5660 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5661   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5662   effect(KILL cr);
5663 
5664   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5665             "XOR    $dst.hi,$dst.hi" %}
5666   ins_encode %{
5667     Register Rdst = $dst$$Register;
5668     __ movzbl(Rdst, $mem$$Address);
5669     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5670   %}
5671   ins_pipe(ialu_reg_mem);
5672 %}
5673 
5674 // Load Integer with mask 0xFFFF into Long Register
5675 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5676   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5677   effect(KILL cr);
5678 
5679   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5680             "XOR    $dst.hi,$dst.hi" %}
5681   ins_encode %{
5682     Register Rdst = $dst$$Register;
5683     __ movzwl(Rdst, $mem$$Address);
5684     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5685   %}
5686   ins_pipe(ialu_reg_mem);
5687 %}
5688 
5689 // Load Integer with 31-bit mask into Long Register
5690 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5691   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5692   effect(KILL cr);
5693 
5694   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5695             "XOR    $dst.hi,$dst.hi\n\t"
5696             "AND    $dst.lo,$mask" %}
5697   ins_encode %{
5698     Register Rdst = $dst$$Register;
5699     __ movl(Rdst, $mem$$Address);
5700     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5701     __ andl(Rdst, $mask$$constant);
5702   %}
5703   ins_pipe(ialu_reg_mem);
5704 %}
5705 
5706 // Load Unsigned Integer into Long Register
5707 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5708   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5709   effect(KILL cr);
5710 
5711   ins_cost(250);
5712   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5713             "XOR    $dst.hi,$dst.hi" %}
5714 
5715   ins_encode %{
5716     __ movl($dst$$Register, $mem$$Address);
5717     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5718   %}
5719 
5720   ins_pipe(ialu_reg_mem);
5721 %}
5722 
5723 // Load Long.  Cannot clobber address while loading, so restrict address
5724 // register to ESI
5725 instruct loadL(eRegL dst, load_long_memory mem) %{
5726   predicate(!((LoadLNode*)n)->require_atomic_access());
5727   match(Set dst (LoadL mem));
5728 
5729   ins_cost(250);
5730   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5731             "MOV    $dst.hi,$mem+4" %}
5732 
5733   ins_encode %{
5734     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5735     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5736     __ movl($dst$$Register, Amemlo);
5737     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5738   %}
5739 
5740   ins_pipe(ialu_reg_long_mem);
5741 %}
5742 
5743 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5744 // then store it down to the stack and reload on the int
5745 // side.
5746 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5747   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5748   match(Set dst (LoadL mem));
5749 
5750   ins_cost(200);
5751   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5752             "FISTp  $dst" %}
5753   ins_encode(enc_loadL_volatile(mem,dst));
5754   ins_pipe( fpu_reg_mem );
5755 %}
5756 
5757 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5758   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5759   match(Set dst (LoadL mem));
5760   effect(TEMP tmp);
5761   ins_cost(180);
5762   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5763             "MOVSD  $dst,$tmp" %}
5764   ins_encode %{
5765     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5766     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5767   %}
5768   ins_pipe( pipe_slow );
5769 %}
5770 
5771 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5772   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5773   match(Set dst (LoadL mem));
5774   effect(TEMP tmp);
5775   ins_cost(160);
5776   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5777             "MOVD   $dst.lo,$tmp\n\t"
5778             "PSRLQ  $tmp,32\n\t"
5779             "MOVD   $dst.hi,$tmp" %}
5780   ins_encode %{
5781     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5782     __ movdl($dst$$Register, $tmp$$XMMRegister);
5783     __ psrlq($tmp$$XMMRegister, 32);
5784     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5785   %}
5786   ins_pipe( pipe_slow );
5787 %}
5788 
5789 // Load Range
5790 instruct loadRange(rRegI dst, memory mem) %{
5791   match(Set dst (LoadRange mem));
5792 
5793   ins_cost(125);
5794   format %{ "MOV    $dst,$mem" %}
5795   opcode(0x8B);
5796   ins_encode( OpcP, RegMem(dst,mem));
5797   ins_pipe( ialu_reg_mem );
5798 %}
5799 
5800 
5801 // Load Pointer
5802 instruct loadP(eRegP dst, memory mem) %{
5803   match(Set dst (LoadP mem));
5804 
5805   ins_cost(125);
5806   format %{ "MOV    $dst,$mem" %}
5807   opcode(0x8B);
5808   ins_encode( OpcP, RegMem(dst,mem));
5809   ins_pipe( ialu_reg_mem );
5810 %}
5811 
5812 // Load Klass Pointer
5813 instruct loadKlass(eRegP dst, memory mem) %{
5814   match(Set dst (LoadKlass mem));
5815 
5816   ins_cost(125);
5817   format %{ "MOV    $dst,$mem" %}
5818   opcode(0x8B);
5819   ins_encode( OpcP, RegMem(dst,mem));
5820   ins_pipe( ialu_reg_mem );
5821 %}
5822 
5823 // Load Double
5824 instruct loadDPR(regDPR dst, memory mem) %{
5825   predicate(UseSSE<=1);
5826   match(Set dst (LoadD mem));
5827 
5828   ins_cost(150);
5829   format %{ "FLD_D  ST,$mem\n\t"
5830             "FSTP   $dst" %}
5831   opcode(0xDD);               /* DD /0 */
5832   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5833               Pop_Reg_DPR(dst) );
5834   ins_pipe( fpu_reg_mem );
5835 %}
5836 
5837 // Load Double to XMM
5838 instruct loadD(regD dst, memory mem) %{
5839   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5840   match(Set dst (LoadD mem));
5841   ins_cost(145);
5842   format %{ "MOVSD  $dst,$mem" %}
5843   ins_encode %{
5844     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5845   %}
5846   ins_pipe( pipe_slow );
5847 %}
5848 
5849 instruct loadD_partial(regD dst, memory mem) %{
5850   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5851   match(Set dst (LoadD mem));
5852   ins_cost(145);
5853   format %{ "MOVLPD $dst,$mem" %}
5854   ins_encode %{
5855     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5856   %}
5857   ins_pipe( pipe_slow );
5858 %}
5859 
5860 // Load to XMM register (single-precision floating point)
5861 // MOVSS instruction
5862 instruct loadF(regF dst, memory mem) %{
5863   predicate(UseSSE>=1);
5864   match(Set dst (LoadF mem));
5865   ins_cost(145);
5866   format %{ "MOVSS  $dst,$mem" %}
5867   ins_encode %{
5868     __ movflt ($dst$$XMMRegister, $mem$$Address);
5869   %}
5870   ins_pipe( pipe_slow );
5871 %}
5872 
5873 // Load Float
5874 instruct loadFPR(regFPR dst, memory mem) %{
5875   predicate(UseSSE==0);
5876   match(Set dst (LoadF mem));
5877 
5878   ins_cost(150);
5879   format %{ "FLD_S  ST,$mem\n\t"
5880             "FSTP   $dst" %}
5881   opcode(0xD9);               /* D9 /0 */
5882   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5883               Pop_Reg_FPR(dst) );
5884   ins_pipe( fpu_reg_mem );
5885 %}
5886 
5887 // Load Effective Address
5888 instruct leaP8(eRegP dst, indOffset8 mem) %{
5889   match(Set dst mem);
5890 
5891   ins_cost(110);
5892   format %{ "LEA    $dst,$mem" %}
5893   opcode(0x8D);
5894   ins_encode( OpcP, RegMem(dst,mem));
5895   ins_pipe( ialu_reg_reg_fat );
5896 %}
5897 
5898 instruct leaP32(eRegP dst, indOffset32 mem) %{
5899   match(Set dst mem);
5900 
5901   ins_cost(110);
5902   format %{ "LEA    $dst,$mem" %}
5903   opcode(0x8D);
5904   ins_encode( OpcP, RegMem(dst,mem));
5905   ins_pipe( ialu_reg_reg_fat );
5906 %}
5907 
5908 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5909   match(Set dst mem);
5910 
5911   ins_cost(110);
5912   format %{ "LEA    $dst,$mem" %}
5913   opcode(0x8D);
5914   ins_encode( OpcP, RegMem(dst,mem));
5915   ins_pipe( ialu_reg_reg_fat );
5916 %}
5917 
5918 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5919   match(Set dst mem);
5920 
5921   ins_cost(110);
5922   format %{ "LEA    $dst,$mem" %}
5923   opcode(0x8D);
5924   ins_encode( OpcP, RegMem(dst,mem));
5925   ins_pipe( ialu_reg_reg_fat );
5926 %}
5927 
5928 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5929   match(Set dst mem);
5930 
5931   ins_cost(110);
5932   format %{ "LEA    $dst,$mem" %}
5933   opcode(0x8D);
5934   ins_encode( OpcP, RegMem(dst,mem));
5935   ins_pipe( ialu_reg_reg_fat );
5936 %}
5937 
5938 // Load Constant
5939 instruct loadConI(rRegI dst, immI src) %{
5940   match(Set dst src);
5941 
5942   format %{ "MOV    $dst,$src" %}
5943   ins_encode( LdImmI(dst, src) );
5944   ins_pipe( ialu_reg_fat );
5945 %}
5946 
5947 // Load Constant zero
5948 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
5949   match(Set dst src);
5950   effect(KILL cr);
5951 
5952   ins_cost(50);
5953   format %{ "XOR    $dst,$dst" %}
5954   opcode(0x33);  /* + rd */
5955   ins_encode( OpcP, RegReg( dst, dst ) );
5956   ins_pipe( ialu_reg );
5957 %}
5958 
5959 instruct loadConP(eRegP dst, immP src) %{
5960   match(Set dst src);
5961 
5962   format %{ "MOV    $dst,$src" %}
5963   opcode(0xB8);  /* + rd */
5964   ins_encode( LdImmP(dst, src) );
5965   ins_pipe( ialu_reg_fat );
5966 %}
5967 
5968 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
5969   match(Set dst src);
5970   effect(KILL cr);
5971   ins_cost(200);
5972   format %{ "MOV    $dst.lo,$src.lo\n\t"
5973             "MOV    $dst.hi,$src.hi" %}
5974   opcode(0xB8);
5975   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
5976   ins_pipe( ialu_reg_long_fat );
5977 %}
5978 
5979 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
5980   match(Set dst src);
5981   effect(KILL cr);
5982   ins_cost(150);
5983   format %{ "XOR    $dst.lo,$dst.lo\n\t"
5984             "XOR    $dst.hi,$dst.hi" %}
5985   opcode(0x33,0x33);
5986   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
5987   ins_pipe( ialu_reg_long );
5988 %}
5989 
5990 // The instruction usage is guarded by predicate in operand immFPR().
5991 instruct loadConFPR(regFPR dst, immFPR con) %{
5992   match(Set dst con);
5993   ins_cost(125);
5994   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
5995             "FSTP   $dst" %}
5996   ins_encode %{
5997     __ fld_s($constantaddress($con));
5998     __ fstp_d($dst$$reg);
5999   %}
6000   ins_pipe(fpu_reg_con);
6001 %}
6002 
6003 // The instruction usage is guarded by predicate in operand immFPR0().
6004 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6005   match(Set dst con);
6006   ins_cost(125);
6007   format %{ "FLDZ   ST\n\t"
6008             "FSTP   $dst" %}
6009   ins_encode %{
6010     __ fldz();
6011     __ fstp_d($dst$$reg);
6012   %}
6013   ins_pipe(fpu_reg_con);
6014 %}
6015 
6016 // The instruction usage is guarded by predicate in operand immFPR1().
6017 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6018   match(Set dst con);
6019   ins_cost(125);
6020   format %{ "FLD1   ST\n\t"
6021             "FSTP   $dst" %}
6022   ins_encode %{
6023     __ fld1();
6024     __ fstp_d($dst$$reg);
6025   %}
6026   ins_pipe(fpu_reg_con);
6027 %}
6028 
6029 // The instruction usage is guarded by predicate in operand immF().
6030 instruct loadConF(regF dst, immF con) %{
6031   match(Set dst con);
6032   ins_cost(125);
6033   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6034   ins_encode %{
6035     __ movflt($dst$$XMMRegister, $constantaddress($con));
6036   %}
6037   ins_pipe(pipe_slow);
6038 %}
6039 
6040 // The instruction usage is guarded by predicate in operand immF0().
6041 instruct loadConF0(regF dst, immF0 src) %{
6042   match(Set dst src);
6043   ins_cost(100);
6044   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6045   ins_encode %{
6046     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6047   %}
6048   ins_pipe(pipe_slow);
6049 %}
6050 
6051 // The instruction usage is guarded by predicate in operand immDPR().
6052 instruct loadConDPR(regDPR dst, immDPR con) %{
6053   match(Set dst con);
6054   ins_cost(125);
6055 
6056   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6057             "FSTP   $dst" %}
6058   ins_encode %{
6059     __ fld_d($constantaddress($con));
6060     __ fstp_d($dst$$reg);
6061   %}
6062   ins_pipe(fpu_reg_con);
6063 %}
6064 
6065 // The instruction usage is guarded by predicate in operand immDPR0().
6066 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6067   match(Set dst con);
6068   ins_cost(125);
6069 
6070   format %{ "FLDZ   ST\n\t"
6071             "FSTP   $dst" %}
6072   ins_encode %{
6073     __ fldz();
6074     __ fstp_d($dst$$reg);
6075   %}
6076   ins_pipe(fpu_reg_con);
6077 %}
6078 
6079 // The instruction usage is guarded by predicate in operand immDPR1().
6080 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6081   match(Set dst con);
6082   ins_cost(125);
6083 
6084   format %{ "FLD1   ST\n\t"
6085             "FSTP   $dst" %}
6086   ins_encode %{
6087     __ fld1();
6088     __ fstp_d($dst$$reg);
6089   %}
6090   ins_pipe(fpu_reg_con);
6091 %}
6092 
6093 // The instruction usage is guarded by predicate in operand immD().
6094 instruct loadConD(regD dst, immD con) %{
6095   match(Set dst con);
6096   ins_cost(125);
6097   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6098   ins_encode %{
6099     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6100   %}
6101   ins_pipe(pipe_slow);
6102 %}
6103 
6104 // The instruction usage is guarded by predicate in operand immD0().
6105 instruct loadConD0(regD dst, immD0 src) %{
6106   match(Set dst src);
6107   ins_cost(100);
6108   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6109   ins_encode %{
6110     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6111   %}
6112   ins_pipe( pipe_slow );
6113 %}
6114 
6115 // Load Stack Slot
6116 instruct loadSSI(rRegI dst, stackSlotI src) %{
6117   match(Set dst src);
6118   ins_cost(125);
6119 
6120   format %{ "MOV    $dst,$src" %}
6121   opcode(0x8B);
6122   ins_encode( OpcP, RegMem(dst,src));
6123   ins_pipe( ialu_reg_mem );
6124 %}
6125 
6126 instruct loadSSL(eRegL dst, stackSlotL src) %{
6127   match(Set dst src);
6128 
6129   ins_cost(200);
6130   format %{ "MOV    $dst,$src.lo\n\t"
6131             "MOV    $dst+4,$src.hi" %}
6132   opcode(0x8B, 0x8B);
6133   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6134   ins_pipe( ialu_mem_long_reg );
6135 %}
6136 
6137 // Load Stack Slot
6138 instruct loadSSP(eRegP dst, stackSlotP src) %{
6139   match(Set dst src);
6140   ins_cost(125);
6141 
6142   format %{ "MOV    $dst,$src" %}
6143   opcode(0x8B);
6144   ins_encode( OpcP, RegMem(dst,src));
6145   ins_pipe( ialu_reg_mem );
6146 %}
6147 
6148 // Load Stack Slot
6149 instruct loadSSF(regFPR dst, stackSlotF src) %{
6150   match(Set dst src);
6151   ins_cost(125);
6152 
6153   format %{ "FLD_S  $src\n\t"
6154             "FSTP   $dst" %}
6155   opcode(0xD9);               /* D9 /0, FLD m32real */
6156   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6157               Pop_Reg_FPR(dst) );
6158   ins_pipe( fpu_reg_mem );
6159 %}
6160 
6161 // Load Stack Slot
6162 instruct loadSSD(regDPR dst, stackSlotD src) %{
6163   match(Set dst src);
6164   ins_cost(125);
6165 
6166   format %{ "FLD_D  $src\n\t"
6167             "FSTP   $dst" %}
6168   opcode(0xDD);               /* DD /0, FLD m64real */
6169   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6170               Pop_Reg_DPR(dst) );
6171   ins_pipe( fpu_reg_mem );
6172 %}
6173 
6174 // Prefetch instructions for allocation.
6175 // Must be safe to execute with invalid address (cannot fault).
6176 
6177 instruct prefetchAlloc0( memory mem ) %{
6178   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6179   match(PrefetchAllocation mem);
6180   ins_cost(0);
6181   size(0);
6182   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6183   ins_encode();
6184   ins_pipe(empty);
6185 %}
6186 
6187 instruct prefetchAlloc( memory mem ) %{
6188   predicate(AllocatePrefetchInstr==3);
6189   match( PrefetchAllocation mem );
6190   ins_cost(100);
6191 
6192   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6193   ins_encode %{
6194     __ prefetchw($mem$$Address);
6195   %}
6196   ins_pipe(ialu_mem);
6197 %}
6198 
6199 instruct prefetchAllocNTA( memory mem ) %{
6200   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6201   match(PrefetchAllocation mem);
6202   ins_cost(100);
6203 
6204   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6205   ins_encode %{
6206     __ prefetchnta($mem$$Address);
6207   %}
6208   ins_pipe(ialu_mem);
6209 %}
6210 
6211 instruct prefetchAllocT0( memory mem ) %{
6212   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6213   match(PrefetchAllocation mem);
6214   ins_cost(100);
6215 
6216   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6217   ins_encode %{
6218     __ prefetcht0($mem$$Address);
6219   %}
6220   ins_pipe(ialu_mem);
6221 %}
6222 
6223 instruct prefetchAllocT2( memory mem ) %{
6224   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6225   match(PrefetchAllocation mem);
6226   ins_cost(100);
6227 
6228   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6229   ins_encode %{
6230     __ prefetcht2($mem$$Address);
6231   %}
6232   ins_pipe(ialu_mem);
6233 %}
6234 
6235 //----------Store Instructions-------------------------------------------------
6236 
6237 // Store Byte
6238 instruct storeB(memory mem, xRegI src) %{
6239   match(Set mem (StoreB mem src));
6240 
6241   ins_cost(125);
6242   format %{ "MOV8   $mem,$src" %}
6243   opcode(0x88);
6244   ins_encode( OpcP, RegMem( src, mem ) );
6245   ins_pipe( ialu_mem_reg );
6246 %}
6247 
6248 // Store Char/Short
6249 instruct storeC(memory mem, rRegI src) %{
6250   match(Set mem (StoreC mem src));
6251 
6252   ins_cost(125);
6253   format %{ "MOV16  $mem,$src" %}
6254   opcode(0x89, 0x66);
6255   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6256   ins_pipe( ialu_mem_reg );
6257 %}
6258 
6259 // Store Integer
6260 instruct storeI(memory mem, rRegI src) %{
6261   match(Set mem (StoreI mem src));
6262 
6263   ins_cost(125);
6264   format %{ "MOV    $mem,$src" %}
6265   opcode(0x89);
6266   ins_encode( OpcP, RegMem( src, mem ) );
6267   ins_pipe( ialu_mem_reg );
6268 %}
6269 
6270 // Store Long
6271 instruct storeL(long_memory mem, eRegL src) %{
6272   predicate(!((StoreLNode*)n)->require_atomic_access());
6273   match(Set mem (StoreL mem src));
6274 
6275   ins_cost(200);
6276   format %{ "MOV    $mem,$src.lo\n\t"
6277             "MOV    $mem+4,$src.hi" %}
6278   opcode(0x89, 0x89);
6279   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6280   ins_pipe( ialu_mem_long_reg );
6281 %}
6282 
6283 // Store Long to Integer
6284 instruct storeL2I(memory mem, eRegL src) %{
6285   match(Set mem (StoreI mem (ConvL2I src)));
6286 
6287   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6288   ins_encode %{
6289     __ movl($mem$$Address, $src$$Register);
6290   %}
6291   ins_pipe(ialu_mem_reg);
6292 %}
6293 
6294 // Volatile Store Long.  Must be atomic, so move it into
6295 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6296 // target address before the store (for null-ptr checks)
6297 // so the memory operand is used twice in the encoding.
6298 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6299   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6300   match(Set mem (StoreL mem src));
6301   effect( KILL cr );
6302   ins_cost(400);
6303   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6304             "FILD   $src\n\t"
6305             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6306   opcode(0x3B);
6307   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6308   ins_pipe( fpu_reg_mem );
6309 %}
6310 
6311 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6312   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6313   match(Set mem (StoreL mem src));
6314   effect( TEMP tmp, KILL cr );
6315   ins_cost(380);
6316   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6317             "MOVSD  $tmp,$src\n\t"
6318             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6319   ins_encode %{
6320     __ cmpl(rax, $mem$$Address);
6321     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6322     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6323   %}
6324   ins_pipe( pipe_slow );
6325 %}
6326 
6327 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6328   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6329   match(Set mem (StoreL mem src));
6330   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6331   ins_cost(360);
6332   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6333             "MOVD   $tmp,$src.lo\n\t"
6334             "MOVD   $tmp2,$src.hi\n\t"
6335             "PUNPCKLDQ $tmp,$tmp2\n\t"
6336             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6337   ins_encode %{
6338     __ cmpl(rax, $mem$$Address);
6339     __ movdl($tmp$$XMMRegister, $src$$Register);
6340     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6341     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6342     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6343   %}
6344   ins_pipe( pipe_slow );
6345 %}
6346 
6347 // Store Pointer; for storing unknown oops and raw pointers
6348 instruct storeP(memory mem, anyRegP src) %{
6349   match(Set mem (StoreP mem src));
6350 
6351   ins_cost(125);
6352   format %{ "MOV    $mem,$src" %}
6353   opcode(0x89);
6354   ins_encode( OpcP, RegMem( src, mem ) );
6355   ins_pipe( ialu_mem_reg );
6356 %}
6357 
6358 // Store Integer Immediate
6359 instruct storeImmI(memory mem, immI src) %{
6360   match(Set mem (StoreI mem src));
6361 
6362   ins_cost(150);
6363   format %{ "MOV    $mem,$src" %}
6364   opcode(0xC7);               /* C7 /0 */
6365   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6366   ins_pipe( ialu_mem_imm );
6367 %}
6368 
6369 // Store Short/Char Immediate
6370 instruct storeImmI16(memory mem, immI16 src) %{
6371   predicate(UseStoreImmI16);
6372   match(Set mem (StoreC mem src));
6373 
6374   ins_cost(150);
6375   format %{ "MOV16  $mem,$src" %}
6376   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6377   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6378   ins_pipe( ialu_mem_imm );
6379 %}
6380 
6381 // Store Pointer Immediate; null pointers or constant oops that do not
6382 // need card-mark barriers.
6383 instruct storeImmP(memory mem, immP src) %{
6384   match(Set mem (StoreP mem src));
6385 
6386   ins_cost(150);
6387   format %{ "MOV    $mem,$src" %}
6388   opcode(0xC7);               /* C7 /0 */
6389   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6390   ins_pipe( ialu_mem_imm );
6391 %}
6392 
6393 // Store Byte Immediate
6394 instruct storeImmB(memory mem, immI8 src) %{
6395   match(Set mem (StoreB mem src));
6396 
6397   ins_cost(150);
6398   format %{ "MOV8   $mem,$src" %}
6399   opcode(0xC6);               /* C6 /0 */
6400   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6401   ins_pipe( ialu_mem_imm );
6402 %}
6403 
6404 // Store CMS card-mark Immediate
6405 instruct storeImmCM(memory mem, immI8 src) %{
6406   match(Set mem (StoreCM mem src));
6407 
6408   ins_cost(150);
6409   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6410   opcode(0xC6);               /* C6 /0 */
6411   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6412   ins_pipe( ialu_mem_imm );
6413 %}
6414 
6415 // Store Double
6416 instruct storeDPR( memory mem, regDPR1 src) %{
6417   predicate(UseSSE<=1);
6418   match(Set mem (StoreD mem src));
6419 
6420   ins_cost(100);
6421   format %{ "FST_D  $mem,$src" %}
6422   opcode(0xDD);       /* DD /2 */
6423   ins_encode( enc_FPR_store(mem,src) );
6424   ins_pipe( fpu_mem_reg );
6425 %}
6426 
6427 // Store double does rounding on x86
6428 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6429   predicate(UseSSE<=1);
6430   match(Set mem (StoreD mem (RoundDouble src)));
6431 
6432   ins_cost(100);
6433   format %{ "FST_D  $mem,$src\t# round" %}
6434   opcode(0xDD);       /* DD /2 */
6435   ins_encode( enc_FPR_store(mem,src) );
6436   ins_pipe( fpu_mem_reg );
6437 %}
6438 
6439 // Store XMM register to memory (double-precision floating points)
6440 // MOVSD instruction
6441 instruct storeD(memory mem, regD src) %{
6442   predicate(UseSSE>=2);
6443   match(Set mem (StoreD mem src));
6444   ins_cost(95);
6445   format %{ "MOVSD  $mem,$src" %}
6446   ins_encode %{
6447     __ movdbl($mem$$Address, $src$$XMMRegister);
6448   %}
6449   ins_pipe( pipe_slow );
6450 %}
6451 
6452 // Store XMM register to memory (single-precision floating point)
6453 // MOVSS instruction
6454 instruct storeF(memory mem, regF src) %{
6455   predicate(UseSSE>=1);
6456   match(Set mem (StoreF mem src));
6457   ins_cost(95);
6458   format %{ "MOVSS  $mem,$src" %}
6459   ins_encode %{
6460     __ movflt($mem$$Address, $src$$XMMRegister);
6461   %}
6462   ins_pipe( pipe_slow );
6463 %}
6464 
6465 // Store Float
6466 instruct storeFPR( memory mem, regFPR1 src) %{
6467   predicate(UseSSE==0);
6468   match(Set mem (StoreF mem src));
6469 
6470   ins_cost(100);
6471   format %{ "FST_S  $mem,$src" %}
6472   opcode(0xD9);       /* D9 /2 */
6473   ins_encode( enc_FPR_store(mem,src) );
6474   ins_pipe( fpu_mem_reg );
6475 %}
6476 
6477 // Store Float does rounding on x86
6478 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6479   predicate(UseSSE==0);
6480   match(Set mem (StoreF mem (RoundFloat src)));
6481 
6482   ins_cost(100);
6483   format %{ "FST_S  $mem,$src\t# round" %}
6484   opcode(0xD9);       /* D9 /2 */
6485   ins_encode( enc_FPR_store(mem,src) );
6486   ins_pipe( fpu_mem_reg );
6487 %}
6488 
6489 // Store Float does rounding on x86
6490 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6491   predicate(UseSSE<=1);
6492   match(Set mem (StoreF mem (ConvD2F src)));
6493 
6494   ins_cost(100);
6495   format %{ "FST_S  $mem,$src\t# D-round" %}
6496   opcode(0xD9);       /* D9 /2 */
6497   ins_encode( enc_FPR_store(mem,src) );
6498   ins_pipe( fpu_mem_reg );
6499 %}
6500 
6501 // Store immediate Float value (it is faster than store from FPU register)
6502 // The instruction usage is guarded by predicate in operand immFPR().
6503 instruct storeFPR_imm( memory mem, immFPR src) %{
6504   match(Set mem (StoreF mem src));
6505 
6506   ins_cost(50);
6507   format %{ "MOV    $mem,$src\t# store float" %}
6508   opcode(0xC7);               /* C7 /0 */
6509   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6510   ins_pipe( ialu_mem_imm );
6511 %}
6512 
6513 // Store immediate Float value (it is faster than store from XMM register)
6514 // The instruction usage is guarded by predicate in operand immF().
6515 instruct storeF_imm( memory mem, immF src) %{
6516   match(Set mem (StoreF mem src));
6517 
6518   ins_cost(50);
6519   format %{ "MOV    $mem,$src\t# store float" %}
6520   opcode(0xC7);               /* C7 /0 */
6521   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6522   ins_pipe( ialu_mem_imm );
6523 %}
6524 
6525 // Store Integer to stack slot
6526 instruct storeSSI(stackSlotI dst, rRegI src) %{
6527   match(Set dst src);
6528 
6529   ins_cost(100);
6530   format %{ "MOV    $dst,$src" %}
6531   opcode(0x89);
6532   ins_encode( OpcPRegSS( dst, src ) );
6533   ins_pipe( ialu_mem_reg );
6534 %}
6535 
6536 // Store Integer to stack slot
6537 instruct storeSSP(stackSlotP dst, eRegP src) %{
6538   match(Set dst src);
6539 
6540   ins_cost(100);
6541   format %{ "MOV    $dst,$src" %}
6542   opcode(0x89);
6543   ins_encode( OpcPRegSS( dst, src ) );
6544   ins_pipe( ialu_mem_reg );
6545 %}
6546 
6547 // Store Long to stack slot
6548 instruct storeSSL(stackSlotL dst, eRegL src) %{
6549   match(Set dst src);
6550 
6551   ins_cost(200);
6552   format %{ "MOV    $dst,$src.lo\n\t"
6553             "MOV    $dst+4,$src.hi" %}
6554   opcode(0x89, 0x89);
6555   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6556   ins_pipe( ialu_mem_long_reg );
6557 %}
6558 
6559 //----------MemBar Instructions-----------------------------------------------
6560 // Memory barrier flavors
6561 
6562 instruct membar_acquire() %{
6563   match(MemBarAcquire);
6564   match(LoadFence);
6565   ins_cost(400);
6566 
6567   size(0);
6568   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6569   ins_encode();
6570   ins_pipe(empty);
6571 %}
6572 
6573 instruct membar_acquire_lock() %{
6574   match(MemBarAcquireLock);
6575   ins_cost(0);
6576 
6577   size(0);
6578   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6579   ins_encode( );
6580   ins_pipe(empty);
6581 %}
6582 
6583 instruct membar_release() %{
6584   match(MemBarRelease);
6585   match(StoreFence);
6586   ins_cost(400);
6587 
6588   size(0);
6589   format %{ "MEMBAR-release ! (empty encoding)" %}
6590   ins_encode( );
6591   ins_pipe(empty);
6592 %}
6593 
6594 instruct membar_release_lock() %{
6595   match(MemBarReleaseLock);
6596   ins_cost(0);
6597 
6598   size(0);
6599   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6600   ins_encode( );
6601   ins_pipe(empty);
6602 %}
6603 
6604 instruct membar_volatile(eFlagsReg cr) %{
6605   match(MemBarVolatile);
6606   effect(KILL cr);
6607   ins_cost(400);
6608 
6609   format %{
6610     $$template
6611     if (os::is_MP()) {
6612       $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6613     } else {
6614       $$emit$$"MEMBAR-volatile ! (empty encoding)"
6615     }
6616   %}
6617   ins_encode %{
6618     __ membar(Assembler::StoreLoad);
6619   %}
6620   ins_pipe(pipe_slow);
6621 %}
6622 
6623 instruct unnecessary_membar_volatile() %{
6624   match(MemBarVolatile);
6625   predicate(Matcher::post_store_load_barrier(n));
6626   ins_cost(0);
6627 
6628   size(0);
6629   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6630   ins_encode( );
6631   ins_pipe(empty);
6632 %}
6633 
6634 instruct membar_storestore() %{
6635   match(MemBarStoreStore);
6636   ins_cost(0);
6637 
6638   size(0);
6639   format %{ "MEMBAR-storestore (empty encoding)" %}
6640   ins_encode( );
6641   ins_pipe(empty);
6642 %}
6643 
6644 //----------Move Instructions--------------------------------------------------
6645 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6646   match(Set dst (CastX2P src));
6647   format %{ "# X2P  $dst, $src" %}
6648   ins_encode( /*empty encoding*/ );
6649   ins_cost(0);
6650   ins_pipe(empty);
6651 %}
6652 
6653 instruct castP2X(rRegI dst, eRegP src ) %{
6654   match(Set dst (CastP2X src));
6655   ins_cost(50);
6656   format %{ "MOV    $dst, $src\t# CastP2X" %}
6657   ins_encode( enc_Copy( dst, src) );
6658   ins_pipe( ialu_reg_reg );
6659 %}
6660 
6661 //----------Conditional Move---------------------------------------------------
6662 // Conditional move
6663 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6664   predicate(!VM_Version::supports_cmov() );
6665   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6666   ins_cost(200);
6667   format %{ "J$cop,us skip\t# signed cmove\n\t"
6668             "MOV    $dst,$src\n"
6669       "skip:" %}
6670   ins_encode %{
6671     Label Lskip;
6672     // Invert sense of branch from sense of CMOV
6673     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6674     __ movl($dst$$Register, $src$$Register);
6675     __ bind(Lskip);
6676   %}
6677   ins_pipe( pipe_cmov_reg );
6678 %}
6679 
6680 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6681   predicate(!VM_Version::supports_cmov() );
6682   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6683   ins_cost(200);
6684   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6685             "MOV    $dst,$src\n"
6686       "skip:" %}
6687   ins_encode %{
6688     Label Lskip;
6689     // Invert sense of branch from sense of CMOV
6690     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6691     __ movl($dst$$Register, $src$$Register);
6692     __ bind(Lskip);
6693   %}
6694   ins_pipe( pipe_cmov_reg );
6695 %}
6696 
6697 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6698   predicate(VM_Version::supports_cmov() );
6699   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6700   ins_cost(200);
6701   format %{ "CMOV$cop $dst,$src" %}
6702   opcode(0x0F,0x40);
6703   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6704   ins_pipe( pipe_cmov_reg );
6705 %}
6706 
6707 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6708   predicate(VM_Version::supports_cmov() );
6709   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6710   ins_cost(200);
6711   format %{ "CMOV$cop $dst,$src" %}
6712   opcode(0x0F,0x40);
6713   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6714   ins_pipe( pipe_cmov_reg );
6715 %}
6716 
6717 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6718   predicate(VM_Version::supports_cmov() );
6719   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6720   ins_cost(200);
6721   expand %{
6722     cmovI_regU(cop, cr, dst, src);
6723   %}
6724 %}
6725 
6726 // Conditional move
6727 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6728   predicate(VM_Version::supports_cmov() );
6729   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6730   ins_cost(250);
6731   format %{ "CMOV$cop $dst,$src" %}
6732   opcode(0x0F,0x40);
6733   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6734   ins_pipe( pipe_cmov_mem );
6735 %}
6736 
6737 // Conditional move
6738 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6739   predicate(VM_Version::supports_cmov() );
6740   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6741   ins_cost(250);
6742   format %{ "CMOV$cop $dst,$src" %}
6743   opcode(0x0F,0x40);
6744   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6745   ins_pipe( pipe_cmov_mem );
6746 %}
6747 
6748 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6749   predicate(VM_Version::supports_cmov() );
6750   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6751   ins_cost(250);
6752   expand %{
6753     cmovI_memU(cop, cr, dst, src);
6754   %}
6755 %}
6756 
6757 // Conditional move
6758 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6759   predicate(VM_Version::supports_cmov() );
6760   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6761   ins_cost(200);
6762   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6763   opcode(0x0F,0x40);
6764   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6765   ins_pipe( pipe_cmov_reg );
6766 %}
6767 
6768 // Conditional move (non-P6 version)
6769 // Note:  a CMoveP is generated for  stubs and native wrappers
6770 //        regardless of whether we are on a P6, so we
6771 //        emulate a cmov here
6772 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6773   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6774   ins_cost(300);
6775   format %{ "Jn$cop   skip\n\t"
6776           "MOV    $dst,$src\t# pointer\n"
6777       "skip:" %}
6778   opcode(0x8b);
6779   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6780   ins_pipe( pipe_cmov_reg );
6781 %}
6782 
6783 // Conditional move
6784 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6785   predicate(VM_Version::supports_cmov() );
6786   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6787   ins_cost(200);
6788   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6789   opcode(0x0F,0x40);
6790   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6791   ins_pipe( pipe_cmov_reg );
6792 %}
6793 
6794 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6795   predicate(VM_Version::supports_cmov() );
6796   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6797   ins_cost(200);
6798   expand %{
6799     cmovP_regU(cop, cr, dst, src);
6800   %}
6801 %}
6802 
6803 // DISABLED: Requires the ADLC to emit a bottom_type call that
6804 // correctly meets the two pointer arguments; one is an incoming
6805 // register but the other is a memory operand.  ALSO appears to
6806 // be buggy with implicit null checks.
6807 //
6808 //// Conditional move
6809 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6810 //  predicate(VM_Version::supports_cmov() );
6811 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6812 //  ins_cost(250);
6813 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6814 //  opcode(0x0F,0x40);
6815 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6816 //  ins_pipe( pipe_cmov_mem );
6817 //%}
6818 //
6819 //// Conditional move
6820 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6821 //  predicate(VM_Version::supports_cmov() );
6822 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6823 //  ins_cost(250);
6824 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6825 //  opcode(0x0F,0x40);
6826 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6827 //  ins_pipe( pipe_cmov_mem );
6828 //%}
6829 
6830 // Conditional move
6831 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6832   predicate(UseSSE<=1);
6833   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6834   ins_cost(200);
6835   format %{ "FCMOV$cop $dst,$src\t# double" %}
6836   opcode(0xDA);
6837   ins_encode( enc_cmov_dpr(cop,src) );
6838   ins_pipe( pipe_cmovDPR_reg );
6839 %}
6840 
6841 // Conditional move
6842 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6843   predicate(UseSSE==0);
6844   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6845   ins_cost(200);
6846   format %{ "FCMOV$cop $dst,$src\t# float" %}
6847   opcode(0xDA);
6848   ins_encode( enc_cmov_dpr(cop,src) );
6849   ins_pipe( pipe_cmovDPR_reg );
6850 %}
6851 
6852 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6853 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6854   predicate(UseSSE<=1);
6855   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6856   ins_cost(200);
6857   format %{ "Jn$cop   skip\n\t"
6858             "MOV    $dst,$src\t# double\n"
6859       "skip:" %}
6860   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6861   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6862   ins_pipe( pipe_cmovDPR_reg );
6863 %}
6864 
6865 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6866 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6867   predicate(UseSSE==0);
6868   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6869   ins_cost(200);
6870   format %{ "Jn$cop    skip\n\t"
6871             "MOV    $dst,$src\t# float\n"
6872       "skip:" %}
6873   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6874   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6875   ins_pipe( pipe_cmovDPR_reg );
6876 %}
6877 
6878 // No CMOVE with SSE/SSE2
6879 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6880   predicate (UseSSE>=1);
6881   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6882   ins_cost(200);
6883   format %{ "Jn$cop   skip\n\t"
6884             "MOVSS  $dst,$src\t# float\n"
6885       "skip:" %}
6886   ins_encode %{
6887     Label skip;
6888     // Invert sense of branch from sense of CMOV
6889     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6890     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6891     __ bind(skip);
6892   %}
6893   ins_pipe( pipe_slow );
6894 %}
6895 
6896 // No CMOVE with SSE/SSE2
6897 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6898   predicate (UseSSE>=2);
6899   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6900   ins_cost(200);
6901   format %{ "Jn$cop   skip\n\t"
6902             "MOVSD  $dst,$src\t# float\n"
6903       "skip:" %}
6904   ins_encode %{
6905     Label skip;
6906     // Invert sense of branch from sense of CMOV
6907     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6908     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6909     __ bind(skip);
6910   %}
6911   ins_pipe( pipe_slow );
6912 %}
6913 
6914 // unsigned version
6915 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6916   predicate (UseSSE>=1);
6917   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6918   ins_cost(200);
6919   format %{ "Jn$cop   skip\n\t"
6920             "MOVSS  $dst,$src\t# float\n"
6921       "skip:" %}
6922   ins_encode %{
6923     Label skip;
6924     // Invert sense of branch from sense of CMOV
6925     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6926     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6927     __ bind(skip);
6928   %}
6929   ins_pipe( pipe_slow );
6930 %}
6931 
6932 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
6933   predicate (UseSSE>=1);
6934   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6935   ins_cost(200);
6936   expand %{
6937     fcmovF_regU(cop, cr, dst, src);
6938   %}
6939 %}
6940 
6941 // unsigned version
6942 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
6943   predicate (UseSSE>=2);
6944   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6945   ins_cost(200);
6946   format %{ "Jn$cop   skip\n\t"
6947             "MOVSD  $dst,$src\t# float\n"
6948       "skip:" %}
6949   ins_encode %{
6950     Label skip;
6951     // Invert sense of branch from sense of CMOV
6952     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6953     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6954     __ bind(skip);
6955   %}
6956   ins_pipe( pipe_slow );
6957 %}
6958 
6959 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
6960   predicate (UseSSE>=2);
6961   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6962   ins_cost(200);
6963   expand %{
6964     fcmovD_regU(cop, cr, dst, src);
6965   %}
6966 %}
6967 
6968 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
6969   predicate(VM_Version::supports_cmov() );
6970   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6971   ins_cost(200);
6972   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6973             "CMOV$cop $dst.hi,$src.hi" %}
6974   opcode(0x0F,0x40);
6975   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6976   ins_pipe( pipe_cmov_reg_long );
6977 %}
6978 
6979 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
6980   predicate(VM_Version::supports_cmov() );
6981   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6982   ins_cost(200);
6983   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6984             "CMOV$cop $dst.hi,$src.hi" %}
6985   opcode(0x0F,0x40);
6986   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6987   ins_pipe( pipe_cmov_reg_long );
6988 %}
6989 
6990 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
6991   predicate(VM_Version::supports_cmov() );
6992   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6993   ins_cost(200);
6994   expand %{
6995     cmovL_regU(cop, cr, dst, src);
6996   %}
6997 %}
6998 
6999 //----------Arithmetic Instructions--------------------------------------------
7000 //----------Addition Instructions----------------------------------------------
7001 
7002 // Integer Addition Instructions
7003 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7004   match(Set dst (AddI dst src));
7005   effect(KILL cr);
7006 
7007   size(2);
7008   format %{ "ADD    $dst,$src" %}
7009   opcode(0x03);
7010   ins_encode( OpcP, RegReg( dst, src) );
7011   ins_pipe( ialu_reg_reg );
7012 %}
7013 
7014 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7015   match(Set dst (AddI dst src));
7016   effect(KILL cr);
7017 
7018   format %{ "ADD    $dst,$src" %}
7019   opcode(0x81, 0x00); /* /0 id */
7020   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7021   ins_pipe( ialu_reg );
7022 %}
7023 
7024 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7025   predicate(UseIncDec);
7026   match(Set dst (AddI dst src));
7027   effect(KILL cr);
7028 
7029   size(1);
7030   format %{ "INC    $dst" %}
7031   opcode(0x40); /*  */
7032   ins_encode( Opc_plus( primary, dst ) );
7033   ins_pipe( ialu_reg );
7034 %}
7035 
7036 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7037   match(Set dst (AddI src0 src1));
7038   ins_cost(110);
7039 
7040   format %{ "LEA    $dst,[$src0 + $src1]" %}
7041   opcode(0x8D); /* 0x8D /r */
7042   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7043   ins_pipe( ialu_reg_reg );
7044 %}
7045 
7046 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7047   match(Set dst (AddP src0 src1));
7048   ins_cost(110);
7049 
7050   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7051   opcode(0x8D); /* 0x8D /r */
7052   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7053   ins_pipe( ialu_reg_reg );
7054 %}
7055 
7056 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7057   predicate(UseIncDec);
7058   match(Set dst (AddI dst src));
7059   effect(KILL cr);
7060 
7061   size(1);
7062   format %{ "DEC    $dst" %}
7063   opcode(0x48); /*  */
7064   ins_encode( Opc_plus( primary, dst ) );
7065   ins_pipe( ialu_reg );
7066 %}
7067 
7068 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7069   match(Set dst (AddP dst src));
7070   effect(KILL cr);
7071 
7072   size(2);
7073   format %{ "ADD    $dst,$src" %}
7074   opcode(0x03);
7075   ins_encode( OpcP, RegReg( dst, src) );
7076   ins_pipe( ialu_reg_reg );
7077 %}
7078 
7079 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7080   match(Set dst (AddP dst src));
7081   effect(KILL cr);
7082 
7083   format %{ "ADD    $dst,$src" %}
7084   opcode(0x81,0x00); /* Opcode 81 /0 id */
7085   // ins_encode( RegImm( dst, src) );
7086   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7087   ins_pipe( ialu_reg );
7088 %}
7089 
7090 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7091   match(Set dst (AddI dst (LoadI src)));
7092   effect(KILL cr);
7093 
7094   ins_cost(125);
7095   format %{ "ADD    $dst,$src" %}
7096   opcode(0x03);
7097   ins_encode( OpcP, RegMem( dst, src) );
7098   ins_pipe( ialu_reg_mem );
7099 %}
7100 
7101 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7102   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7103   effect(KILL cr);
7104 
7105   ins_cost(150);
7106   format %{ "ADD    $dst,$src" %}
7107   opcode(0x01);  /* Opcode 01 /r */
7108   ins_encode( OpcP, RegMem( src, dst ) );
7109   ins_pipe( ialu_mem_reg );
7110 %}
7111 
7112 // Add Memory with Immediate
7113 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7114   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7115   effect(KILL cr);
7116 
7117   ins_cost(125);
7118   format %{ "ADD    $dst,$src" %}
7119   opcode(0x81);               /* Opcode 81 /0 id */
7120   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7121   ins_pipe( ialu_mem_imm );
7122 %}
7123 
7124 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7125   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7126   effect(KILL cr);
7127 
7128   ins_cost(125);
7129   format %{ "INC    $dst" %}
7130   opcode(0xFF);               /* Opcode FF /0 */
7131   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7132   ins_pipe( ialu_mem_imm );
7133 %}
7134 
7135 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7136   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7137   effect(KILL cr);
7138 
7139   ins_cost(125);
7140   format %{ "DEC    $dst" %}
7141   opcode(0xFF);               /* Opcode FF /1 */
7142   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7143   ins_pipe( ialu_mem_imm );
7144 %}
7145 
7146 
7147 instruct checkCastPP( eRegP dst ) %{
7148   match(Set dst (CheckCastPP dst));
7149 
7150   size(0);
7151   format %{ "#checkcastPP of $dst" %}
7152   ins_encode( /*empty encoding*/ );
7153   ins_pipe( empty );
7154 %}
7155 
7156 instruct castPP( eRegP dst ) %{
7157   match(Set dst (CastPP dst));
7158   format %{ "#castPP of $dst" %}
7159   ins_encode( /*empty encoding*/ );
7160   ins_pipe( empty );
7161 %}
7162 
7163 instruct castII( rRegI dst ) %{
7164   match(Set dst (CastII dst));
7165   format %{ "#castII of $dst" %}
7166   ins_encode( /*empty encoding*/ );
7167   ins_cost(0);
7168   ins_pipe( empty );
7169 %}
7170 
7171 
7172 // Load-locked - same as a regular pointer load when used with compare-swap
7173 instruct loadPLocked(eRegP dst, memory mem) %{
7174   match(Set dst (LoadPLocked mem));
7175 
7176   ins_cost(125);
7177   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7178   opcode(0x8B);
7179   ins_encode( OpcP, RegMem(dst,mem));
7180   ins_pipe( ialu_reg_mem );
7181 %}
7182 
7183 // Conditional-store of the updated heap-top.
7184 // Used during allocation of the shared heap.
7185 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7186 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7187   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7188   // EAX is killed if there is contention, but then it's also unused.
7189   // In the common case of no contention, EAX holds the new oop address.
7190   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7191   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7192   ins_pipe( pipe_cmpxchg );
7193 %}
7194 
7195 // Conditional-store of an int value.
7196 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7197 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7198   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7199   effect(KILL oldval);
7200   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7201   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7202   ins_pipe( pipe_cmpxchg );
7203 %}
7204 
7205 // Conditional-store of a long value.
7206 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7207 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7208   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7209   effect(KILL oldval);
7210   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7211             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7212             "XCHG   EBX,ECX"
7213   %}
7214   ins_encode %{
7215     // Note: we need to swap rbx, and rcx before and after the
7216     //       cmpxchg8 instruction because the instruction uses
7217     //       rcx as the high order word of the new value to store but
7218     //       our register encoding uses rbx.
7219     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7220     if( os::is_MP() )
7221       __ lock();
7222     __ cmpxchg8($mem$$Address);
7223     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7224   %}
7225   ins_pipe( pipe_cmpxchg );
7226 %}
7227 
7228 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7229 
7230 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7231   predicate(VM_Version::supports_cx8());
7232   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7233   effect(KILL cr, KILL oldval);
7234   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7235             "MOV    $res,0\n\t"
7236             "JNE,s  fail\n\t"
7237             "MOV    $res,1\n"
7238           "fail:" %}
7239   ins_encode( enc_cmpxchg8(mem_ptr),
7240               enc_flags_ne_to_boolean(res) );
7241   ins_pipe( pipe_cmpxchg );
7242 %}
7243 
7244 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7245   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7246   effect(KILL cr, KILL oldval);
7247   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7248             "MOV    $res,0\n\t"
7249             "JNE,s  fail\n\t"
7250             "MOV    $res,1\n"
7251           "fail:" %}
7252   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7253   ins_pipe( pipe_cmpxchg );
7254 %}
7255 
7256 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7257   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7258   effect(KILL cr, KILL oldval);
7259   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7260             "MOV    $res,0\n\t"
7261             "JNE,s  fail\n\t"
7262             "MOV    $res,1\n"
7263           "fail:" %}
7264   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7265   ins_pipe( pipe_cmpxchg );
7266 %}
7267 
7268 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7269   predicate(n->as_LoadStore()->result_not_used());
7270   match(Set dummy (GetAndAddI mem add));
7271   effect(KILL cr);
7272   format %{ "ADDL  [$mem],$add" %}
7273   ins_encode %{
7274     if (os::is_MP()) { __ lock(); }
7275     __ addl($mem$$Address, $add$$constant);
7276   %}
7277   ins_pipe( pipe_cmpxchg );
7278 %}
7279 
7280 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7281   match(Set newval (GetAndAddI mem newval));
7282   effect(KILL cr);
7283   format %{ "XADDL  [$mem],$newval" %}
7284   ins_encode %{
7285     if (os::is_MP()) { __ lock(); }
7286     __ xaddl($mem$$Address, $newval$$Register);
7287   %}
7288   ins_pipe( pipe_cmpxchg );
7289 %}
7290 
7291 instruct xchgI( memory mem, rRegI newval) %{
7292   match(Set newval (GetAndSetI mem newval));
7293   format %{ "XCHGL  $newval,[$mem]" %}
7294   ins_encode %{
7295     __ xchgl($newval$$Register, $mem$$Address);
7296   %}
7297   ins_pipe( pipe_cmpxchg );
7298 %}
7299 
7300 instruct xchgP( memory mem, pRegP newval) %{
7301   match(Set newval (GetAndSetP mem newval));
7302   format %{ "XCHGL  $newval,[$mem]" %}
7303   ins_encode %{
7304     __ xchgl($newval$$Register, $mem$$Address);
7305   %}
7306   ins_pipe( pipe_cmpxchg );
7307 %}
7308 
7309 //----------Subtraction Instructions-------------------------------------------
7310 
7311 // Integer Subtraction Instructions
7312 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7313   match(Set dst (SubI dst src));
7314   effect(KILL cr);
7315 
7316   size(2);
7317   format %{ "SUB    $dst,$src" %}
7318   opcode(0x2B);
7319   ins_encode( OpcP, RegReg( dst, src) );
7320   ins_pipe( ialu_reg_reg );
7321 %}
7322 
7323 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7324   match(Set dst (SubI dst src));
7325   effect(KILL cr);
7326 
7327   format %{ "SUB    $dst,$src" %}
7328   opcode(0x81,0x05);  /* Opcode 81 /5 */
7329   // ins_encode( RegImm( dst, src) );
7330   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7331   ins_pipe( ialu_reg );
7332 %}
7333 
7334 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7335   match(Set dst (SubI dst (LoadI src)));
7336   effect(KILL cr);
7337 
7338   ins_cost(125);
7339   format %{ "SUB    $dst,$src" %}
7340   opcode(0x2B);
7341   ins_encode( OpcP, RegMem( dst, src) );
7342   ins_pipe( ialu_reg_mem );
7343 %}
7344 
7345 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7346   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7347   effect(KILL cr);
7348 
7349   ins_cost(150);
7350   format %{ "SUB    $dst,$src" %}
7351   opcode(0x29);  /* Opcode 29 /r */
7352   ins_encode( OpcP, RegMem( src, dst ) );
7353   ins_pipe( ialu_mem_reg );
7354 %}
7355 
7356 // Subtract from a pointer
7357 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7358   match(Set dst (AddP dst (SubI zero src)));
7359   effect(KILL cr);
7360 
7361   size(2);
7362   format %{ "SUB    $dst,$src" %}
7363   opcode(0x2B);
7364   ins_encode( OpcP, RegReg( dst, src) );
7365   ins_pipe( ialu_reg_reg );
7366 %}
7367 
7368 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7369   match(Set dst (SubI zero dst));
7370   effect(KILL cr);
7371 
7372   size(2);
7373   format %{ "NEG    $dst" %}
7374   opcode(0xF7,0x03);  // Opcode F7 /3
7375   ins_encode( OpcP, RegOpc( dst ) );
7376   ins_pipe( ialu_reg );
7377 %}
7378 
7379 //----------Multiplication/Division Instructions-------------------------------
7380 // Integer Multiplication Instructions
7381 // Multiply Register
7382 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7383   match(Set dst (MulI dst src));
7384   effect(KILL cr);
7385 
7386   size(3);
7387   ins_cost(300);
7388   format %{ "IMUL   $dst,$src" %}
7389   opcode(0xAF, 0x0F);
7390   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7391   ins_pipe( ialu_reg_reg_alu0 );
7392 %}
7393 
7394 // Multiply 32-bit Immediate
7395 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7396   match(Set dst (MulI src imm));
7397   effect(KILL cr);
7398 
7399   ins_cost(300);
7400   format %{ "IMUL   $dst,$src,$imm" %}
7401   opcode(0x69);  /* 69 /r id */
7402   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7403   ins_pipe( ialu_reg_reg_alu0 );
7404 %}
7405 
7406 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7407   match(Set dst src);
7408   effect(KILL cr);
7409 
7410   // Note that this is artificially increased to make it more expensive than loadConL
7411   ins_cost(250);
7412   format %{ "MOV    EAX,$src\t// low word only" %}
7413   opcode(0xB8);
7414   ins_encode( LdImmL_Lo(dst, src) );
7415   ins_pipe( ialu_reg_fat );
7416 %}
7417 
7418 // Multiply by 32-bit Immediate, taking the shifted high order results
7419 //  (special case for shift by 32)
7420 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7421   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7422   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7423              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7424              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7425   effect(USE src1, KILL cr);
7426 
7427   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7428   ins_cost(0*100 + 1*400 - 150);
7429   format %{ "IMUL   EDX:EAX,$src1" %}
7430   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7431   ins_pipe( pipe_slow );
7432 %}
7433 
7434 // Multiply by 32-bit Immediate, taking the shifted high order results
7435 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7436   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7437   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7438              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7439              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7440   effect(USE src1, KILL cr);
7441 
7442   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7443   ins_cost(1*100 + 1*400 - 150);
7444   format %{ "IMUL   EDX:EAX,$src1\n\t"
7445             "SAR    EDX,$cnt-32" %}
7446   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7447   ins_pipe( pipe_slow );
7448 %}
7449 
7450 // Multiply Memory 32-bit Immediate
7451 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7452   match(Set dst (MulI (LoadI src) imm));
7453   effect(KILL cr);
7454 
7455   ins_cost(300);
7456   format %{ "IMUL   $dst,$src,$imm" %}
7457   opcode(0x69);  /* 69 /r id */
7458   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7459   ins_pipe( ialu_reg_mem_alu0 );
7460 %}
7461 
7462 // Multiply Memory
7463 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7464   match(Set dst (MulI dst (LoadI src)));
7465   effect(KILL cr);
7466 
7467   ins_cost(350);
7468   format %{ "IMUL   $dst,$src" %}
7469   opcode(0xAF, 0x0F);
7470   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7471   ins_pipe( ialu_reg_mem_alu0 );
7472 %}
7473 
7474 // Multiply Register Int to Long
7475 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7476   // Basic Idea: long = (long)int * (long)int
7477   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7478   effect(DEF dst, USE src, USE src1, KILL flags);
7479 
7480   ins_cost(300);
7481   format %{ "IMUL   $dst,$src1" %}
7482 
7483   ins_encode( long_int_multiply( dst, src1 ) );
7484   ins_pipe( ialu_reg_reg_alu0 );
7485 %}
7486 
7487 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7488   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7489   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7490   effect(KILL flags);
7491 
7492   ins_cost(300);
7493   format %{ "MUL    $dst,$src1" %}
7494 
7495   ins_encode( long_uint_multiply(dst, src1) );
7496   ins_pipe( ialu_reg_reg_alu0 );
7497 %}
7498 
7499 // Multiply Register Long
7500 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7501   match(Set dst (MulL dst src));
7502   effect(KILL cr, TEMP tmp);
7503   ins_cost(4*100+3*400);
7504 // Basic idea: lo(result) = lo(x_lo * y_lo)
7505 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7506   format %{ "MOV    $tmp,$src.lo\n\t"
7507             "IMUL   $tmp,EDX\n\t"
7508             "MOV    EDX,$src.hi\n\t"
7509             "IMUL   EDX,EAX\n\t"
7510             "ADD    $tmp,EDX\n\t"
7511             "MUL    EDX:EAX,$src.lo\n\t"
7512             "ADD    EDX,$tmp" %}
7513   ins_encode( long_multiply( dst, src, tmp ) );
7514   ins_pipe( pipe_slow );
7515 %}
7516 
7517 // Multiply Register Long where the left operand's high 32 bits are zero
7518 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7519   predicate(is_operand_hi32_zero(n->in(1)));
7520   match(Set dst (MulL dst src));
7521   effect(KILL cr, TEMP tmp);
7522   ins_cost(2*100+2*400);
7523 // Basic idea: lo(result) = lo(x_lo * y_lo)
7524 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7525   format %{ "MOV    $tmp,$src.hi\n\t"
7526             "IMUL   $tmp,EAX\n\t"
7527             "MUL    EDX:EAX,$src.lo\n\t"
7528             "ADD    EDX,$tmp" %}
7529   ins_encode %{
7530     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7531     __ imull($tmp$$Register, rax);
7532     __ mull($src$$Register);
7533     __ addl(rdx, $tmp$$Register);
7534   %}
7535   ins_pipe( pipe_slow );
7536 %}
7537 
7538 // Multiply Register Long where the right operand's high 32 bits are zero
7539 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7540   predicate(is_operand_hi32_zero(n->in(2)));
7541   match(Set dst (MulL dst src));
7542   effect(KILL cr, TEMP tmp);
7543   ins_cost(2*100+2*400);
7544 // Basic idea: lo(result) = lo(x_lo * y_lo)
7545 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7546   format %{ "MOV    $tmp,$src.lo\n\t"
7547             "IMUL   $tmp,EDX\n\t"
7548             "MUL    EDX:EAX,$src.lo\n\t"
7549             "ADD    EDX,$tmp" %}
7550   ins_encode %{
7551     __ movl($tmp$$Register, $src$$Register);
7552     __ imull($tmp$$Register, rdx);
7553     __ mull($src$$Register);
7554     __ addl(rdx, $tmp$$Register);
7555   %}
7556   ins_pipe( pipe_slow );
7557 %}
7558 
7559 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7560 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7561   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7562   match(Set dst (MulL dst src));
7563   effect(KILL cr);
7564   ins_cost(1*400);
7565 // Basic idea: lo(result) = lo(x_lo * y_lo)
7566 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7567   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7568   ins_encode %{
7569     __ mull($src$$Register);
7570   %}
7571   ins_pipe( pipe_slow );
7572 %}
7573 
7574 // Multiply Register Long by small constant
7575 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7576   match(Set dst (MulL dst src));
7577   effect(KILL cr, TEMP tmp);
7578   ins_cost(2*100+2*400);
7579   size(12);
7580 // Basic idea: lo(result) = lo(src * EAX)
7581 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7582   format %{ "IMUL   $tmp,EDX,$src\n\t"
7583             "MOV    EDX,$src\n\t"
7584             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7585             "ADD    EDX,$tmp" %}
7586   ins_encode( long_multiply_con( dst, src, tmp ) );
7587   ins_pipe( pipe_slow );
7588 %}
7589 
7590 // Integer DIV with Register
7591 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7592   match(Set rax (DivI rax div));
7593   effect(KILL rdx, KILL cr);
7594   size(26);
7595   ins_cost(30*100+10*100);
7596   format %{ "CMP    EAX,0x80000000\n\t"
7597             "JNE,s  normal\n\t"
7598             "XOR    EDX,EDX\n\t"
7599             "CMP    ECX,-1\n\t"
7600             "JE,s   done\n"
7601     "normal: CDQ\n\t"
7602             "IDIV   $div\n\t"
7603     "done:"        %}
7604   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7605   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7606   ins_pipe( ialu_reg_reg_alu0 );
7607 %}
7608 
7609 // Divide Register Long
7610 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7611   match(Set dst (DivL src1 src2));
7612   effect( KILL cr, KILL cx, KILL bx );
7613   ins_cost(10000);
7614   format %{ "PUSH   $src1.hi\n\t"
7615             "PUSH   $src1.lo\n\t"
7616             "PUSH   $src2.hi\n\t"
7617             "PUSH   $src2.lo\n\t"
7618             "CALL   SharedRuntime::ldiv\n\t"
7619             "ADD    ESP,16" %}
7620   ins_encode( long_div(src1,src2) );
7621   ins_pipe( pipe_slow );
7622 %}
7623 
7624 // Integer DIVMOD with Register, both quotient and mod results
7625 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7626   match(DivModI rax div);
7627   effect(KILL cr);
7628   size(26);
7629   ins_cost(30*100+10*100);
7630   format %{ "CMP    EAX,0x80000000\n\t"
7631             "JNE,s  normal\n\t"
7632             "XOR    EDX,EDX\n\t"
7633             "CMP    ECX,-1\n\t"
7634             "JE,s   done\n"
7635     "normal: CDQ\n\t"
7636             "IDIV   $div\n\t"
7637     "done:"        %}
7638   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7639   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7640   ins_pipe( pipe_slow );
7641 %}
7642 
7643 // Integer MOD with Register
7644 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7645   match(Set rdx (ModI rax div));
7646   effect(KILL rax, KILL cr);
7647 
7648   size(26);
7649   ins_cost(300);
7650   format %{ "CDQ\n\t"
7651             "IDIV   $div" %}
7652   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7653   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7654   ins_pipe( ialu_reg_reg_alu0 );
7655 %}
7656 
7657 // Remainder Register Long
7658 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7659   match(Set dst (ModL src1 src2));
7660   effect( KILL cr, KILL cx, KILL bx );
7661   ins_cost(10000);
7662   format %{ "PUSH   $src1.hi\n\t"
7663             "PUSH   $src1.lo\n\t"
7664             "PUSH   $src2.hi\n\t"
7665             "PUSH   $src2.lo\n\t"
7666             "CALL   SharedRuntime::lrem\n\t"
7667             "ADD    ESP,16" %}
7668   ins_encode( long_mod(src1,src2) );
7669   ins_pipe( pipe_slow );
7670 %}
7671 
7672 // Divide Register Long (no special case since divisor != -1)
7673 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7674   match(Set dst (DivL dst imm));
7675   effect( TEMP tmp, TEMP tmp2, KILL cr );
7676   ins_cost(1000);
7677   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7678             "XOR    $tmp2,$tmp2\n\t"
7679             "CMP    $tmp,EDX\n\t"
7680             "JA,s   fast\n\t"
7681             "MOV    $tmp2,EAX\n\t"
7682             "MOV    EAX,EDX\n\t"
7683             "MOV    EDX,0\n\t"
7684             "JLE,s  pos\n\t"
7685             "LNEG   EAX : $tmp2\n\t"
7686             "DIV    $tmp # unsigned division\n\t"
7687             "XCHG   EAX,$tmp2\n\t"
7688             "DIV    $tmp\n\t"
7689             "LNEG   $tmp2 : EAX\n\t"
7690             "JMP,s  done\n"
7691     "pos:\n\t"
7692             "DIV    $tmp\n\t"
7693             "XCHG   EAX,$tmp2\n"
7694     "fast:\n\t"
7695             "DIV    $tmp\n"
7696     "done:\n\t"
7697             "MOV    EDX,$tmp2\n\t"
7698             "NEG    EDX:EAX # if $imm < 0" %}
7699   ins_encode %{
7700     int con = (int)$imm$$constant;
7701     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7702     int pcon = (con > 0) ? con : -con;
7703     Label Lfast, Lpos, Ldone;
7704 
7705     __ movl($tmp$$Register, pcon);
7706     __ xorl($tmp2$$Register,$tmp2$$Register);
7707     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7708     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7709 
7710     __ movl($tmp2$$Register, $dst$$Register); // save
7711     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7712     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7713     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7714 
7715     // Negative dividend.
7716     // convert value to positive to use unsigned division
7717     __ lneg($dst$$Register, $tmp2$$Register);
7718     __ divl($tmp$$Register);
7719     __ xchgl($dst$$Register, $tmp2$$Register);
7720     __ divl($tmp$$Register);
7721     // revert result back to negative
7722     __ lneg($tmp2$$Register, $dst$$Register);
7723     __ jmpb(Ldone);
7724 
7725     __ bind(Lpos);
7726     __ divl($tmp$$Register); // Use unsigned division
7727     __ xchgl($dst$$Register, $tmp2$$Register);
7728     // Fallthrow for final divide, tmp2 has 32 bit hi result
7729 
7730     __ bind(Lfast);
7731     // fast path: src is positive
7732     __ divl($tmp$$Register); // Use unsigned division
7733 
7734     __ bind(Ldone);
7735     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7736     if (con < 0) {
7737       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7738     }
7739   %}
7740   ins_pipe( pipe_slow );
7741 %}
7742 
7743 // Remainder Register Long (remainder fit into 32 bits)
7744 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7745   match(Set dst (ModL dst imm));
7746   effect( TEMP tmp, TEMP tmp2, KILL cr );
7747   ins_cost(1000);
7748   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7749             "CMP    $tmp,EDX\n\t"
7750             "JA,s   fast\n\t"
7751             "MOV    $tmp2,EAX\n\t"
7752             "MOV    EAX,EDX\n\t"
7753             "MOV    EDX,0\n\t"
7754             "JLE,s  pos\n\t"
7755             "LNEG   EAX : $tmp2\n\t"
7756             "DIV    $tmp # unsigned division\n\t"
7757             "MOV    EAX,$tmp2\n\t"
7758             "DIV    $tmp\n\t"
7759             "NEG    EDX\n\t"
7760             "JMP,s  done\n"
7761     "pos:\n\t"
7762             "DIV    $tmp\n\t"
7763             "MOV    EAX,$tmp2\n"
7764     "fast:\n\t"
7765             "DIV    $tmp\n"
7766     "done:\n\t"
7767             "MOV    EAX,EDX\n\t"
7768             "SAR    EDX,31\n\t" %}
7769   ins_encode %{
7770     int con = (int)$imm$$constant;
7771     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7772     int pcon = (con > 0) ? con : -con;
7773     Label  Lfast, Lpos, Ldone;
7774 
7775     __ movl($tmp$$Register, pcon);
7776     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7777     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7778 
7779     __ movl($tmp2$$Register, $dst$$Register); // save
7780     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7781     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7782     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7783 
7784     // Negative dividend.
7785     // convert value to positive to use unsigned division
7786     __ lneg($dst$$Register, $tmp2$$Register);
7787     __ divl($tmp$$Register);
7788     __ movl($dst$$Register, $tmp2$$Register);
7789     __ divl($tmp$$Register);
7790     // revert remainder back to negative
7791     __ negl(HIGH_FROM_LOW($dst$$Register));
7792     __ jmpb(Ldone);
7793 
7794     __ bind(Lpos);
7795     __ divl($tmp$$Register);
7796     __ movl($dst$$Register, $tmp2$$Register);
7797 
7798     __ bind(Lfast);
7799     // fast path: src is positive
7800     __ divl($tmp$$Register);
7801 
7802     __ bind(Ldone);
7803     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7804     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
7805 
7806   %}
7807   ins_pipe( pipe_slow );
7808 %}
7809 
7810 // Integer Shift Instructions
7811 // Shift Left by one
7812 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7813   match(Set dst (LShiftI dst shift));
7814   effect(KILL cr);
7815 
7816   size(2);
7817   format %{ "SHL    $dst,$shift" %}
7818   opcode(0xD1, 0x4);  /* D1 /4 */
7819   ins_encode( OpcP, RegOpc( dst ) );
7820   ins_pipe( ialu_reg );
7821 %}
7822 
7823 // Shift Left by 8-bit immediate
7824 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7825   match(Set dst (LShiftI dst shift));
7826   effect(KILL cr);
7827 
7828   size(3);
7829   format %{ "SHL    $dst,$shift" %}
7830   opcode(0xC1, 0x4);  /* C1 /4 ib */
7831   ins_encode( RegOpcImm( dst, shift) );
7832   ins_pipe( ialu_reg );
7833 %}
7834 
7835 // Shift Left by variable
7836 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7837   match(Set dst (LShiftI dst shift));
7838   effect(KILL cr);
7839 
7840   size(2);
7841   format %{ "SHL    $dst,$shift" %}
7842   opcode(0xD3, 0x4);  /* D3 /4 */
7843   ins_encode( OpcP, RegOpc( dst ) );
7844   ins_pipe( ialu_reg_reg );
7845 %}
7846 
7847 // Arithmetic shift right by one
7848 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7849   match(Set dst (RShiftI dst shift));
7850   effect(KILL cr);
7851 
7852   size(2);
7853   format %{ "SAR    $dst,$shift" %}
7854   opcode(0xD1, 0x7);  /* D1 /7 */
7855   ins_encode( OpcP, RegOpc( dst ) );
7856   ins_pipe( ialu_reg );
7857 %}
7858 
7859 // Arithmetic shift right by one
7860 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
7861   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7862   effect(KILL cr);
7863   format %{ "SAR    $dst,$shift" %}
7864   opcode(0xD1, 0x7);  /* D1 /7 */
7865   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
7866   ins_pipe( ialu_mem_imm );
7867 %}
7868 
7869 // Arithmetic Shift Right by 8-bit immediate
7870 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7871   match(Set dst (RShiftI dst shift));
7872   effect(KILL cr);
7873 
7874   size(3);
7875   format %{ "SAR    $dst,$shift" %}
7876   opcode(0xC1, 0x7);  /* C1 /7 ib */
7877   ins_encode( RegOpcImm( dst, shift ) );
7878   ins_pipe( ialu_mem_imm );
7879 %}
7880 
7881 // Arithmetic Shift Right by 8-bit immediate
7882 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
7883   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7884   effect(KILL cr);
7885 
7886   format %{ "SAR    $dst,$shift" %}
7887   opcode(0xC1, 0x7);  /* C1 /7 ib */
7888   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
7889   ins_pipe( ialu_mem_imm );
7890 %}
7891 
7892 // Arithmetic Shift Right by variable
7893 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7894   match(Set dst (RShiftI dst shift));
7895   effect(KILL cr);
7896 
7897   size(2);
7898   format %{ "SAR    $dst,$shift" %}
7899   opcode(0xD3, 0x7);  /* D3 /7 */
7900   ins_encode( OpcP, RegOpc( dst ) );
7901   ins_pipe( ialu_reg_reg );
7902 %}
7903 
7904 // Logical shift right by one
7905 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7906   match(Set dst (URShiftI dst shift));
7907   effect(KILL cr);
7908 
7909   size(2);
7910   format %{ "SHR    $dst,$shift" %}
7911   opcode(0xD1, 0x5);  /* D1 /5 */
7912   ins_encode( OpcP, RegOpc( dst ) );
7913   ins_pipe( ialu_reg );
7914 %}
7915 
7916 // Logical Shift Right by 8-bit immediate
7917 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7918   match(Set dst (URShiftI dst shift));
7919   effect(KILL cr);
7920 
7921   size(3);
7922   format %{ "SHR    $dst,$shift" %}
7923   opcode(0xC1, 0x5);  /* C1 /5 ib */
7924   ins_encode( RegOpcImm( dst, shift) );
7925   ins_pipe( ialu_reg );
7926 %}
7927 
7928 
7929 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
7930 // This idiom is used by the compiler for the i2b bytecode.
7931 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
7932   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
7933 
7934   size(3);
7935   format %{ "MOVSX  $dst,$src :8" %}
7936   ins_encode %{
7937     __ movsbl($dst$$Register, $src$$Register);
7938   %}
7939   ins_pipe(ialu_reg_reg);
7940 %}
7941 
7942 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
7943 // This idiom is used by the compiler the i2s bytecode.
7944 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
7945   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
7946 
7947   size(3);
7948   format %{ "MOVSX  $dst,$src :16" %}
7949   ins_encode %{
7950     __ movswl($dst$$Register, $src$$Register);
7951   %}
7952   ins_pipe(ialu_reg_reg);
7953 %}
7954 
7955 
7956 // Logical Shift Right by variable
7957 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7958   match(Set dst (URShiftI dst shift));
7959   effect(KILL cr);
7960 
7961   size(2);
7962   format %{ "SHR    $dst,$shift" %}
7963   opcode(0xD3, 0x5);  /* D3 /5 */
7964   ins_encode( OpcP, RegOpc( dst ) );
7965   ins_pipe( ialu_reg_reg );
7966 %}
7967 
7968 
7969 //----------Logical Instructions-----------------------------------------------
7970 //----------Integer Logical Instructions---------------------------------------
7971 // And Instructions
7972 // And Register with Register
7973 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7974   match(Set dst (AndI dst src));
7975   effect(KILL cr);
7976 
7977   size(2);
7978   format %{ "AND    $dst,$src" %}
7979   opcode(0x23);
7980   ins_encode( OpcP, RegReg( dst, src) );
7981   ins_pipe( ialu_reg_reg );
7982 %}
7983 
7984 // And Register with Immediate
7985 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7986   match(Set dst (AndI dst src));
7987   effect(KILL cr);
7988 
7989   format %{ "AND    $dst,$src" %}
7990   opcode(0x81,0x04);  /* Opcode 81 /4 */
7991   // ins_encode( RegImm( dst, src) );
7992   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7993   ins_pipe( ialu_reg );
7994 %}
7995 
7996 // And Register with Memory
7997 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7998   match(Set dst (AndI dst (LoadI src)));
7999   effect(KILL cr);
8000 
8001   ins_cost(125);
8002   format %{ "AND    $dst,$src" %}
8003   opcode(0x23);
8004   ins_encode( OpcP, RegMem( dst, src) );
8005   ins_pipe( ialu_reg_mem );
8006 %}
8007 
8008 // And Memory with Register
8009 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8010   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8011   effect(KILL cr);
8012 
8013   ins_cost(150);
8014   format %{ "AND    $dst,$src" %}
8015   opcode(0x21);  /* Opcode 21 /r */
8016   ins_encode( OpcP, RegMem( src, dst ) );
8017   ins_pipe( ialu_mem_reg );
8018 %}
8019 
8020 // And Memory with Immediate
8021 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8022   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8023   effect(KILL cr);
8024 
8025   ins_cost(125);
8026   format %{ "AND    $dst,$src" %}
8027   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8028   // ins_encode( MemImm( dst, src) );
8029   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8030   ins_pipe( ialu_mem_imm );
8031 %}
8032 
8033 // BMI1 instructions
8034 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8035   match(Set dst (AndI (XorI src1 minus_1) src2));
8036   predicate(UseBMI1Instructions);
8037   effect(KILL cr);
8038 
8039   format %{ "ANDNL  $dst, $src1, $src2" %}
8040 
8041   ins_encode %{
8042     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8043   %}
8044   ins_pipe(ialu_reg);
8045 %}
8046 
8047 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8048   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8049   predicate(UseBMI1Instructions);
8050   effect(KILL cr);
8051 
8052   ins_cost(125);
8053   format %{ "ANDNL  $dst, $src1, $src2" %}
8054 
8055   ins_encode %{
8056     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8057   %}
8058   ins_pipe(ialu_reg_mem);
8059 %}
8060 
8061 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8062   match(Set dst (AndI (SubI imm_zero src) src));
8063   predicate(UseBMI1Instructions);
8064   effect(KILL cr);
8065 
8066   format %{ "BLSIL  $dst, $src" %}
8067 
8068   ins_encode %{
8069     __ blsil($dst$$Register, $src$$Register);
8070   %}
8071   ins_pipe(ialu_reg);
8072 %}
8073 
8074 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8075   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8076   predicate(UseBMI1Instructions);
8077   effect(KILL cr);
8078 
8079   ins_cost(125);
8080   format %{ "BLSIL  $dst, $src" %}
8081 
8082   ins_encode %{
8083     __ blsil($dst$$Register, $src$$Address);
8084   %}
8085   ins_pipe(ialu_reg_mem);
8086 %}
8087 
8088 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8089 %{
8090   match(Set dst (XorI (AddI src minus_1) src));
8091   predicate(UseBMI1Instructions);
8092   effect(KILL cr);
8093 
8094   format %{ "BLSMSKL $dst, $src" %}
8095 
8096   ins_encode %{
8097     __ blsmskl($dst$$Register, $src$$Register);
8098   %}
8099 
8100   ins_pipe(ialu_reg);
8101 %}
8102 
8103 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8104 %{
8105   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8106   predicate(UseBMI1Instructions);
8107   effect(KILL cr);
8108 
8109   ins_cost(125);
8110   format %{ "BLSMSKL $dst, $src" %}
8111 
8112   ins_encode %{
8113     __ blsmskl($dst$$Register, $src$$Address);
8114   %}
8115 
8116   ins_pipe(ialu_reg_mem);
8117 %}
8118 
8119 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8120 %{
8121   match(Set dst (AndI (AddI src minus_1) src) );
8122   predicate(UseBMI1Instructions);
8123   effect(KILL cr);
8124 
8125   format %{ "BLSRL  $dst, $src" %}
8126 
8127   ins_encode %{
8128     __ blsrl($dst$$Register, $src$$Register);
8129   %}
8130 
8131   ins_pipe(ialu_reg);
8132 %}
8133 
8134 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8135 %{
8136   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8137   predicate(UseBMI1Instructions);
8138   effect(KILL cr);
8139 
8140   ins_cost(125);
8141   format %{ "BLSRL  $dst, $src" %}
8142 
8143   ins_encode %{
8144     __ blsrl($dst$$Register, $src$$Address);
8145   %}
8146 
8147   ins_pipe(ialu_reg_mem);
8148 %}
8149 
8150 // Or Instructions
8151 // Or Register with Register
8152 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8153   match(Set dst (OrI dst src));
8154   effect(KILL cr);
8155 
8156   size(2);
8157   format %{ "OR     $dst,$src" %}
8158   opcode(0x0B);
8159   ins_encode( OpcP, RegReg( dst, src) );
8160   ins_pipe( ialu_reg_reg );
8161 %}
8162 
8163 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8164   match(Set dst (OrI dst (CastP2X src)));
8165   effect(KILL cr);
8166 
8167   size(2);
8168   format %{ "OR     $dst,$src" %}
8169   opcode(0x0B);
8170   ins_encode( OpcP, RegReg( dst, src) );
8171   ins_pipe( ialu_reg_reg );
8172 %}
8173 
8174 
8175 // Or Register with Immediate
8176 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8177   match(Set dst (OrI dst src));
8178   effect(KILL cr);
8179 
8180   format %{ "OR     $dst,$src" %}
8181   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8182   // ins_encode( RegImm( dst, src) );
8183   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8184   ins_pipe( ialu_reg );
8185 %}
8186 
8187 // Or Register with Memory
8188 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8189   match(Set dst (OrI dst (LoadI src)));
8190   effect(KILL cr);
8191 
8192   ins_cost(125);
8193   format %{ "OR     $dst,$src" %}
8194   opcode(0x0B);
8195   ins_encode( OpcP, RegMem( dst, src) );
8196   ins_pipe( ialu_reg_mem );
8197 %}
8198 
8199 // Or Memory with Register
8200 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8201   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8202   effect(KILL cr);
8203 
8204   ins_cost(150);
8205   format %{ "OR     $dst,$src" %}
8206   opcode(0x09);  /* Opcode 09 /r */
8207   ins_encode( OpcP, RegMem( src, dst ) );
8208   ins_pipe( ialu_mem_reg );
8209 %}
8210 
8211 // Or Memory with Immediate
8212 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8213   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8214   effect(KILL cr);
8215 
8216   ins_cost(125);
8217   format %{ "OR     $dst,$src" %}
8218   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8219   // ins_encode( MemImm( dst, src) );
8220   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8221   ins_pipe( ialu_mem_imm );
8222 %}
8223 
8224 // ROL/ROR
8225 // ROL expand
8226 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8227   effect(USE_DEF dst, USE shift, KILL cr);
8228 
8229   format %{ "ROL    $dst, $shift" %}
8230   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8231   ins_encode( OpcP, RegOpc( dst ));
8232   ins_pipe( ialu_reg );
8233 %}
8234 
8235 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8236   effect(USE_DEF dst, USE shift, KILL cr);
8237 
8238   format %{ "ROL    $dst, $shift" %}
8239   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8240   ins_encode( RegOpcImm(dst, shift) );
8241   ins_pipe(ialu_reg);
8242 %}
8243 
8244 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8245   effect(USE_DEF dst, USE shift, KILL cr);
8246 
8247   format %{ "ROL    $dst, $shift" %}
8248   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8249   ins_encode(OpcP, RegOpc(dst));
8250   ins_pipe( ialu_reg_reg );
8251 %}
8252 // end of ROL expand
8253 
8254 // ROL 32bit by one once
8255 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8256   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8257 
8258   expand %{
8259     rolI_eReg_imm1(dst, lshift, cr);
8260   %}
8261 %}
8262 
8263 // ROL 32bit var by imm8 once
8264 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8265   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8266   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8267 
8268   expand %{
8269     rolI_eReg_imm8(dst, lshift, cr);
8270   %}
8271 %}
8272 
8273 // ROL 32bit var by var once
8274 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8275   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8276 
8277   expand %{
8278     rolI_eReg_CL(dst, shift, cr);
8279   %}
8280 %}
8281 
8282 // ROL 32bit var by var once
8283 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8284   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8285 
8286   expand %{
8287     rolI_eReg_CL(dst, shift, cr);
8288   %}
8289 %}
8290 
8291 // ROR expand
8292 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8293   effect(USE_DEF dst, USE shift, KILL cr);
8294 
8295   format %{ "ROR    $dst, $shift" %}
8296   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8297   ins_encode( OpcP, RegOpc( dst ) );
8298   ins_pipe( ialu_reg );
8299 %}
8300 
8301 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8302   effect (USE_DEF dst, USE shift, KILL cr);
8303 
8304   format %{ "ROR    $dst, $shift" %}
8305   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8306   ins_encode( RegOpcImm(dst, shift) );
8307   ins_pipe( ialu_reg );
8308 %}
8309 
8310 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8311   effect(USE_DEF dst, USE shift, KILL cr);
8312 
8313   format %{ "ROR    $dst, $shift" %}
8314   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8315   ins_encode(OpcP, RegOpc(dst));
8316   ins_pipe( ialu_reg_reg );
8317 %}
8318 // end of ROR expand
8319 
8320 // ROR right once
8321 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8322   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8323 
8324   expand %{
8325     rorI_eReg_imm1(dst, rshift, cr);
8326   %}
8327 %}
8328 
8329 // ROR 32bit by immI8 once
8330 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8331   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8332   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8333 
8334   expand %{
8335     rorI_eReg_imm8(dst, rshift, cr);
8336   %}
8337 %}
8338 
8339 // ROR 32bit var by var once
8340 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8341   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8342 
8343   expand %{
8344     rorI_eReg_CL(dst, shift, cr);
8345   %}
8346 %}
8347 
8348 // ROR 32bit var by var once
8349 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8350   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8351 
8352   expand %{
8353     rorI_eReg_CL(dst, shift, cr);
8354   %}
8355 %}
8356 
8357 // Xor Instructions
8358 // Xor Register with Register
8359 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8360   match(Set dst (XorI dst src));
8361   effect(KILL cr);
8362 
8363   size(2);
8364   format %{ "XOR    $dst,$src" %}
8365   opcode(0x33);
8366   ins_encode( OpcP, RegReg( dst, src) );
8367   ins_pipe( ialu_reg_reg );
8368 %}
8369 
8370 // Xor Register with Immediate -1
8371 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8372   match(Set dst (XorI dst imm));
8373 
8374   size(2);
8375   format %{ "NOT    $dst" %}
8376   ins_encode %{
8377      __ notl($dst$$Register);
8378   %}
8379   ins_pipe( ialu_reg );
8380 %}
8381 
8382 // Xor Register with Immediate
8383 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8384   match(Set dst (XorI dst src));
8385   effect(KILL cr);
8386 
8387   format %{ "XOR    $dst,$src" %}
8388   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8389   // ins_encode( RegImm( dst, src) );
8390   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8391   ins_pipe( ialu_reg );
8392 %}
8393 
8394 // Xor Register with Memory
8395 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8396   match(Set dst (XorI dst (LoadI src)));
8397   effect(KILL cr);
8398 
8399   ins_cost(125);
8400   format %{ "XOR    $dst,$src" %}
8401   opcode(0x33);
8402   ins_encode( OpcP, RegMem(dst, src) );
8403   ins_pipe( ialu_reg_mem );
8404 %}
8405 
8406 // Xor Memory with Register
8407 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8408   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8409   effect(KILL cr);
8410 
8411   ins_cost(150);
8412   format %{ "XOR    $dst,$src" %}
8413   opcode(0x31);  /* Opcode 31 /r */
8414   ins_encode( OpcP, RegMem( src, dst ) );
8415   ins_pipe( ialu_mem_reg );
8416 %}
8417 
8418 // Xor Memory with Immediate
8419 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8420   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8421   effect(KILL cr);
8422 
8423   ins_cost(125);
8424   format %{ "XOR    $dst,$src" %}
8425   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8426   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8427   ins_pipe( ialu_mem_imm );
8428 %}
8429 
8430 //----------Convert Int to Boolean---------------------------------------------
8431 
8432 instruct movI_nocopy(rRegI dst, rRegI src) %{
8433   effect( DEF dst, USE src );
8434   format %{ "MOV    $dst,$src" %}
8435   ins_encode( enc_Copy( dst, src) );
8436   ins_pipe( ialu_reg_reg );
8437 %}
8438 
8439 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8440   effect( USE_DEF dst, USE src, KILL cr );
8441 
8442   size(4);
8443   format %{ "NEG    $dst\n\t"
8444             "ADC    $dst,$src" %}
8445   ins_encode( neg_reg(dst),
8446               OpcRegReg(0x13,dst,src) );
8447   ins_pipe( ialu_reg_reg_long );
8448 %}
8449 
8450 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8451   match(Set dst (Conv2B src));
8452 
8453   expand %{
8454     movI_nocopy(dst,src);
8455     ci2b(dst,src,cr);
8456   %}
8457 %}
8458 
8459 instruct movP_nocopy(rRegI dst, eRegP src) %{
8460   effect( DEF dst, USE src );
8461   format %{ "MOV    $dst,$src" %}
8462   ins_encode( enc_Copy( dst, src) );
8463   ins_pipe( ialu_reg_reg );
8464 %}
8465 
8466 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8467   effect( USE_DEF dst, USE src, KILL cr );
8468   format %{ "NEG    $dst\n\t"
8469             "ADC    $dst,$src" %}
8470   ins_encode( neg_reg(dst),
8471               OpcRegReg(0x13,dst,src) );
8472   ins_pipe( ialu_reg_reg_long );
8473 %}
8474 
8475 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8476   match(Set dst (Conv2B src));
8477 
8478   expand %{
8479     movP_nocopy(dst,src);
8480     cp2b(dst,src,cr);
8481   %}
8482 %}
8483 
8484 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8485   match(Set dst (CmpLTMask p q));
8486   effect(KILL cr);
8487   ins_cost(400);
8488 
8489   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8490   format %{ "XOR    $dst,$dst\n\t"
8491             "CMP    $p,$q\n\t"
8492             "SETlt  $dst\n\t"
8493             "NEG    $dst" %}
8494   ins_encode %{
8495     Register Rp = $p$$Register;
8496     Register Rq = $q$$Register;
8497     Register Rd = $dst$$Register;
8498     Label done;
8499     __ xorl(Rd, Rd);
8500     __ cmpl(Rp, Rq);
8501     __ setb(Assembler::less, Rd);
8502     __ negl(Rd);
8503   %}
8504 
8505   ins_pipe(pipe_slow);
8506 %}
8507 
8508 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8509   match(Set dst (CmpLTMask dst zero));
8510   effect(DEF dst, KILL cr);
8511   ins_cost(100);
8512 
8513   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8514   ins_encode %{
8515   __ sarl($dst$$Register, 31);
8516   %}
8517   ins_pipe(ialu_reg);
8518 %}
8519 
8520 /* better to save a register than avoid a branch */
8521 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8522   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8523   effect(KILL cr);
8524   ins_cost(400);
8525   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8526             "JGE    done\n\t"
8527             "ADD    $p,$y\n"
8528             "done:  " %}
8529   ins_encode %{
8530     Register Rp = $p$$Register;
8531     Register Rq = $q$$Register;
8532     Register Ry = $y$$Register;
8533     Label done;
8534     __ subl(Rp, Rq);
8535     __ jccb(Assembler::greaterEqual, done);
8536     __ addl(Rp, Ry);
8537     __ bind(done);
8538   %}
8539 
8540   ins_pipe(pipe_cmplt);
8541 %}
8542 
8543 /* better to save a register than avoid a branch */
8544 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8545   match(Set y (AndI (CmpLTMask p q) y));
8546   effect(KILL cr);
8547 
8548   ins_cost(300);
8549 
8550   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8551             "JLT      done\n\t"
8552             "XORL     $y, $y\n"
8553             "done:  " %}
8554   ins_encode %{
8555     Register Rp = $p$$Register;
8556     Register Rq = $q$$Register;
8557     Register Ry = $y$$Register;
8558     Label done;
8559     __ cmpl(Rp, Rq);
8560     __ jccb(Assembler::less, done);
8561     __ xorl(Ry, Ry);
8562     __ bind(done);
8563   %}
8564 
8565   ins_pipe(pipe_cmplt);
8566 %}
8567 
8568 /* If I enable this, I encourage spilling in the inner loop of compress.
8569 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8570   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8571 */
8572 //----------Overflow Math Instructions-----------------------------------------
8573 
8574 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8575 %{
8576   match(Set cr (OverflowAddI op1 op2));
8577   effect(DEF cr, USE_KILL op1, USE op2);
8578 
8579   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8580 
8581   ins_encode %{
8582     __ addl($op1$$Register, $op2$$Register);
8583   %}
8584   ins_pipe(ialu_reg_reg);
8585 %}
8586 
8587 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8588 %{
8589   match(Set cr (OverflowAddI op1 op2));
8590   effect(DEF cr, USE_KILL op1, USE op2);
8591 
8592   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8593 
8594   ins_encode %{
8595     __ addl($op1$$Register, $op2$$constant);
8596   %}
8597   ins_pipe(ialu_reg_reg);
8598 %}
8599 
8600 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8601 %{
8602   match(Set cr (OverflowSubI op1 op2));
8603 
8604   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8605   ins_encode %{
8606     __ cmpl($op1$$Register, $op2$$Register);
8607   %}
8608   ins_pipe(ialu_reg_reg);
8609 %}
8610 
8611 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8612 %{
8613   match(Set cr (OverflowSubI op1 op2));
8614 
8615   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8616   ins_encode %{
8617     __ cmpl($op1$$Register, $op2$$constant);
8618   %}
8619   ins_pipe(ialu_reg_reg);
8620 %}
8621 
8622 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8623 %{
8624   match(Set cr (OverflowSubI zero op2));
8625   effect(DEF cr, USE_KILL op2);
8626 
8627   format %{ "NEG    $op2\t# overflow check int" %}
8628   ins_encode %{
8629     __ negl($op2$$Register);
8630   %}
8631   ins_pipe(ialu_reg_reg);
8632 %}
8633 
8634 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8635 %{
8636   match(Set cr (OverflowMulI op1 op2));
8637   effect(DEF cr, USE_KILL op1, USE op2);
8638 
8639   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8640   ins_encode %{
8641     __ imull($op1$$Register, $op2$$Register);
8642   %}
8643   ins_pipe(ialu_reg_reg_alu0);
8644 %}
8645 
8646 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8647 %{
8648   match(Set cr (OverflowMulI op1 op2));
8649   effect(DEF cr, TEMP tmp, USE op1, USE op2);
8650 
8651   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8652   ins_encode %{
8653     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8654   %}
8655   ins_pipe(ialu_reg_reg_alu0);
8656 %}
8657 
8658 //----------Long Instructions------------------------------------------------
8659 // Add Long Register with Register
8660 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8661   match(Set dst (AddL dst src));
8662   effect(KILL cr);
8663   ins_cost(200);
8664   format %{ "ADD    $dst.lo,$src.lo\n\t"
8665             "ADC    $dst.hi,$src.hi" %}
8666   opcode(0x03, 0x13);
8667   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8668   ins_pipe( ialu_reg_reg_long );
8669 %}
8670 
8671 // Add Long Register with Immediate
8672 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8673   match(Set dst (AddL dst src));
8674   effect(KILL cr);
8675   format %{ "ADD    $dst.lo,$src.lo\n\t"
8676             "ADC    $dst.hi,$src.hi" %}
8677   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8678   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8679   ins_pipe( ialu_reg_long );
8680 %}
8681 
8682 // Add Long Register with Memory
8683 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8684   match(Set dst (AddL dst (LoadL mem)));
8685   effect(KILL cr);
8686   ins_cost(125);
8687   format %{ "ADD    $dst.lo,$mem\n\t"
8688             "ADC    $dst.hi,$mem+4" %}
8689   opcode(0x03, 0x13);
8690   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8691   ins_pipe( ialu_reg_long_mem );
8692 %}
8693 
8694 // Subtract Long Register with Register.
8695 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8696   match(Set dst (SubL dst src));
8697   effect(KILL cr);
8698   ins_cost(200);
8699   format %{ "SUB    $dst.lo,$src.lo\n\t"
8700             "SBB    $dst.hi,$src.hi" %}
8701   opcode(0x2B, 0x1B);
8702   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8703   ins_pipe( ialu_reg_reg_long );
8704 %}
8705 
8706 // Subtract Long Register with Immediate
8707 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8708   match(Set dst (SubL dst src));
8709   effect(KILL cr);
8710   format %{ "SUB    $dst.lo,$src.lo\n\t"
8711             "SBB    $dst.hi,$src.hi" %}
8712   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
8713   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8714   ins_pipe( ialu_reg_long );
8715 %}
8716 
8717 // Subtract Long Register with Memory
8718 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8719   match(Set dst (SubL dst (LoadL mem)));
8720   effect(KILL cr);
8721   ins_cost(125);
8722   format %{ "SUB    $dst.lo,$mem\n\t"
8723             "SBB    $dst.hi,$mem+4" %}
8724   opcode(0x2B, 0x1B);
8725   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8726   ins_pipe( ialu_reg_long_mem );
8727 %}
8728 
8729 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8730   match(Set dst (SubL zero dst));
8731   effect(KILL cr);
8732   ins_cost(300);
8733   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
8734   ins_encode( neg_long(dst) );
8735   ins_pipe( ialu_reg_reg_long );
8736 %}
8737 
8738 // And Long Register with Register
8739 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8740   match(Set dst (AndL dst src));
8741   effect(KILL cr);
8742   format %{ "AND    $dst.lo,$src.lo\n\t"
8743             "AND    $dst.hi,$src.hi" %}
8744   opcode(0x23,0x23);
8745   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8746   ins_pipe( ialu_reg_reg_long );
8747 %}
8748 
8749 // And Long Register with Immediate
8750 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8751   match(Set dst (AndL dst src));
8752   effect(KILL cr);
8753   format %{ "AND    $dst.lo,$src.lo\n\t"
8754             "AND    $dst.hi,$src.hi" %}
8755   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
8756   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8757   ins_pipe( ialu_reg_long );
8758 %}
8759 
8760 // And Long Register with Memory
8761 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8762   match(Set dst (AndL dst (LoadL mem)));
8763   effect(KILL cr);
8764   ins_cost(125);
8765   format %{ "AND    $dst.lo,$mem\n\t"
8766             "AND    $dst.hi,$mem+4" %}
8767   opcode(0x23, 0x23);
8768   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8769   ins_pipe( ialu_reg_long_mem );
8770 %}
8771 
8772 // BMI1 instructions
8773 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
8774   match(Set dst (AndL (XorL src1 minus_1) src2));
8775   predicate(UseBMI1Instructions);
8776   effect(KILL cr, TEMP dst);
8777 
8778   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
8779             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
8780          %}
8781 
8782   ins_encode %{
8783     Register Rdst = $dst$$Register;
8784     Register Rsrc1 = $src1$$Register;
8785     Register Rsrc2 = $src2$$Register;
8786     __ andnl(Rdst, Rsrc1, Rsrc2);
8787     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
8788   %}
8789   ins_pipe(ialu_reg_reg_long);
8790 %}
8791 
8792 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
8793   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
8794   predicate(UseBMI1Instructions);
8795   effect(KILL cr, TEMP dst);
8796 
8797   ins_cost(125);
8798   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
8799             "ANDNL  $dst.hi, $src1.hi, $src2+4"
8800          %}
8801 
8802   ins_encode %{
8803     Register Rdst = $dst$$Register;
8804     Register Rsrc1 = $src1$$Register;
8805     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
8806 
8807     __ andnl(Rdst, Rsrc1, $src2$$Address);
8808     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
8809   %}
8810   ins_pipe(ialu_reg_mem);
8811 %}
8812 
8813 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
8814   match(Set dst (AndL (SubL imm_zero src) src));
8815   predicate(UseBMI1Instructions);
8816   effect(KILL cr, TEMP dst);
8817 
8818   format %{ "MOVL   $dst.hi, 0\n\t"
8819             "BLSIL  $dst.lo, $src.lo\n\t"
8820             "JNZ    done\n\t"
8821             "BLSIL  $dst.hi, $src.hi\n"
8822             "done:"
8823          %}
8824 
8825   ins_encode %{
8826     Label done;
8827     Register Rdst = $dst$$Register;
8828     Register Rsrc = $src$$Register;
8829     __ movl(HIGH_FROM_LOW(Rdst), 0);
8830     __ blsil(Rdst, Rsrc);
8831     __ jccb(Assembler::notZero, done);
8832     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8833     __ bind(done);
8834   %}
8835   ins_pipe(ialu_reg);
8836 %}
8837 
8838 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
8839   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
8840   predicate(UseBMI1Instructions);
8841   effect(KILL cr, TEMP dst);
8842 
8843   ins_cost(125);
8844   format %{ "MOVL   $dst.hi, 0\n\t"
8845             "BLSIL  $dst.lo, $src\n\t"
8846             "JNZ    done\n\t"
8847             "BLSIL  $dst.hi, $src+4\n"
8848             "done:"
8849          %}
8850 
8851   ins_encode %{
8852     Label done;
8853     Register Rdst = $dst$$Register;
8854     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8855 
8856     __ movl(HIGH_FROM_LOW(Rdst), 0);
8857     __ blsil(Rdst, $src$$Address);
8858     __ jccb(Assembler::notZero, done);
8859     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
8860     __ bind(done);
8861   %}
8862   ins_pipe(ialu_reg_mem);
8863 %}
8864 
8865 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8866 %{
8867   match(Set dst (XorL (AddL src minus_1) src));
8868   predicate(UseBMI1Instructions);
8869   effect(KILL cr, TEMP dst);
8870 
8871   format %{ "MOVL    $dst.hi, 0\n\t"
8872             "BLSMSKL $dst.lo, $src.lo\n\t"
8873             "JNC     done\n\t"
8874             "BLSMSKL $dst.hi, $src.hi\n"
8875             "done:"
8876          %}
8877 
8878   ins_encode %{
8879     Label done;
8880     Register Rdst = $dst$$Register;
8881     Register Rsrc = $src$$Register;
8882     __ movl(HIGH_FROM_LOW(Rdst), 0);
8883     __ blsmskl(Rdst, Rsrc);
8884     __ jccb(Assembler::carryClear, done);
8885     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8886     __ bind(done);
8887   %}
8888 
8889   ins_pipe(ialu_reg);
8890 %}
8891 
8892 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8893 %{
8894   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
8895   predicate(UseBMI1Instructions);
8896   effect(KILL cr, TEMP dst);
8897 
8898   ins_cost(125);
8899   format %{ "MOVL    $dst.hi, 0\n\t"
8900             "BLSMSKL $dst.lo, $src\n\t"
8901             "JNC     done\n\t"
8902             "BLSMSKL $dst.hi, $src+4\n"
8903             "done:"
8904          %}
8905 
8906   ins_encode %{
8907     Label done;
8908     Register Rdst = $dst$$Register;
8909     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8910 
8911     __ movl(HIGH_FROM_LOW(Rdst), 0);
8912     __ blsmskl(Rdst, $src$$Address);
8913     __ jccb(Assembler::carryClear, done);
8914     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
8915     __ bind(done);
8916   %}
8917 
8918   ins_pipe(ialu_reg_mem);
8919 %}
8920 
8921 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8922 %{
8923   match(Set dst (AndL (AddL src minus_1) src) );
8924   predicate(UseBMI1Instructions);
8925   effect(KILL cr, TEMP dst);
8926 
8927   format %{ "MOVL   $dst.hi, $src.hi\n\t"
8928             "BLSRL  $dst.lo, $src.lo\n\t"
8929             "JNC    done\n\t"
8930             "BLSRL  $dst.hi, $src.hi\n"
8931             "done:"
8932   %}
8933 
8934   ins_encode %{
8935     Label done;
8936     Register Rdst = $dst$$Register;
8937     Register Rsrc = $src$$Register;
8938     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8939     __ blsrl(Rdst, Rsrc);
8940     __ jccb(Assembler::carryClear, done);
8941     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8942     __ bind(done);
8943   %}
8944 
8945   ins_pipe(ialu_reg);
8946 %}
8947 
8948 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8949 %{
8950   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
8951   predicate(UseBMI1Instructions);
8952   effect(KILL cr, TEMP dst);
8953 
8954   ins_cost(125);
8955   format %{ "MOVL   $dst.hi, $src+4\n\t"
8956             "BLSRL  $dst.lo, $src\n\t"
8957             "JNC    done\n\t"
8958             "BLSRL  $dst.hi, $src+4\n"
8959             "done:"
8960   %}
8961 
8962   ins_encode %{
8963     Label done;
8964     Register Rdst = $dst$$Register;
8965     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8966     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
8967     __ blsrl(Rdst, $src$$Address);
8968     __ jccb(Assembler::carryClear, done);
8969     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
8970     __ bind(done);
8971   %}
8972 
8973   ins_pipe(ialu_reg_mem);
8974 %}
8975 
8976 // Or Long Register with Register
8977 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8978   match(Set dst (OrL dst src));
8979   effect(KILL cr);
8980   format %{ "OR     $dst.lo,$src.lo\n\t"
8981             "OR     $dst.hi,$src.hi" %}
8982   opcode(0x0B,0x0B);
8983   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8984   ins_pipe( ialu_reg_reg_long );
8985 %}
8986 
8987 // Or Long Register with Immediate
8988 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8989   match(Set dst (OrL dst src));
8990   effect(KILL cr);
8991   format %{ "OR     $dst.lo,$src.lo\n\t"
8992             "OR     $dst.hi,$src.hi" %}
8993   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
8994   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8995   ins_pipe( ialu_reg_long );
8996 %}
8997 
8998 // Or Long Register with Memory
8999 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9000   match(Set dst (OrL dst (LoadL mem)));
9001   effect(KILL cr);
9002   ins_cost(125);
9003   format %{ "OR     $dst.lo,$mem\n\t"
9004             "OR     $dst.hi,$mem+4" %}
9005   opcode(0x0B,0x0B);
9006   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9007   ins_pipe( ialu_reg_long_mem );
9008 %}
9009 
9010 // Xor Long Register with Register
9011 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9012   match(Set dst (XorL dst src));
9013   effect(KILL cr);
9014   format %{ "XOR    $dst.lo,$src.lo\n\t"
9015             "XOR    $dst.hi,$src.hi" %}
9016   opcode(0x33,0x33);
9017   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9018   ins_pipe( ialu_reg_reg_long );
9019 %}
9020 
9021 // Xor Long Register with Immediate -1
9022 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9023   match(Set dst (XorL dst imm));
9024   format %{ "NOT    $dst.lo\n\t"
9025             "NOT    $dst.hi" %}
9026   ins_encode %{
9027      __ notl($dst$$Register);
9028      __ notl(HIGH_FROM_LOW($dst$$Register));
9029   %}
9030   ins_pipe( ialu_reg_long );
9031 %}
9032 
9033 // Xor Long Register with Immediate
9034 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9035   match(Set dst (XorL dst src));
9036   effect(KILL cr);
9037   format %{ "XOR    $dst.lo,$src.lo\n\t"
9038             "XOR    $dst.hi,$src.hi" %}
9039   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9040   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9041   ins_pipe( ialu_reg_long );
9042 %}
9043 
9044 // Xor Long Register with Memory
9045 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9046   match(Set dst (XorL dst (LoadL mem)));
9047   effect(KILL cr);
9048   ins_cost(125);
9049   format %{ "XOR    $dst.lo,$mem\n\t"
9050             "XOR    $dst.hi,$mem+4" %}
9051   opcode(0x33,0x33);
9052   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9053   ins_pipe( ialu_reg_long_mem );
9054 %}
9055 
9056 // Shift Left Long by 1
9057 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9058   predicate(UseNewLongLShift);
9059   match(Set dst (LShiftL dst cnt));
9060   effect(KILL cr);
9061   ins_cost(100);
9062   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9063             "ADC    $dst.hi,$dst.hi" %}
9064   ins_encode %{
9065     __ addl($dst$$Register,$dst$$Register);
9066     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9067   %}
9068   ins_pipe( ialu_reg_long );
9069 %}
9070 
9071 // Shift Left Long by 2
9072 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9073   predicate(UseNewLongLShift);
9074   match(Set dst (LShiftL dst cnt));
9075   effect(KILL cr);
9076   ins_cost(100);
9077   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9078             "ADC    $dst.hi,$dst.hi\n\t"
9079             "ADD    $dst.lo,$dst.lo\n\t"
9080             "ADC    $dst.hi,$dst.hi" %}
9081   ins_encode %{
9082     __ addl($dst$$Register,$dst$$Register);
9083     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9084     __ addl($dst$$Register,$dst$$Register);
9085     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9086   %}
9087   ins_pipe( ialu_reg_long );
9088 %}
9089 
9090 // Shift Left Long by 3
9091 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9092   predicate(UseNewLongLShift);
9093   match(Set dst (LShiftL dst cnt));
9094   effect(KILL cr);
9095   ins_cost(100);
9096   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9097             "ADC    $dst.hi,$dst.hi\n\t"
9098             "ADD    $dst.lo,$dst.lo\n\t"
9099             "ADC    $dst.hi,$dst.hi\n\t"
9100             "ADD    $dst.lo,$dst.lo\n\t"
9101             "ADC    $dst.hi,$dst.hi" %}
9102   ins_encode %{
9103     __ addl($dst$$Register,$dst$$Register);
9104     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9105     __ addl($dst$$Register,$dst$$Register);
9106     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9107     __ addl($dst$$Register,$dst$$Register);
9108     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9109   %}
9110   ins_pipe( ialu_reg_long );
9111 %}
9112 
9113 // Shift Left Long by 1-31
9114 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9115   match(Set dst (LShiftL dst cnt));
9116   effect(KILL cr);
9117   ins_cost(200);
9118   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9119             "SHL    $dst.lo,$cnt" %}
9120   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9121   ins_encode( move_long_small_shift(dst,cnt) );
9122   ins_pipe( ialu_reg_long );
9123 %}
9124 
9125 // Shift Left Long by 32-63
9126 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9127   match(Set dst (LShiftL dst cnt));
9128   effect(KILL cr);
9129   ins_cost(300);
9130   format %{ "MOV    $dst.hi,$dst.lo\n"
9131           "\tSHL    $dst.hi,$cnt-32\n"
9132           "\tXOR    $dst.lo,$dst.lo" %}
9133   opcode(0xC1, 0x4);  /* C1 /4 ib */
9134   ins_encode( move_long_big_shift_clr(dst,cnt) );
9135   ins_pipe( ialu_reg_long );
9136 %}
9137 
9138 // Shift Left Long by variable
9139 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9140   match(Set dst (LShiftL dst shift));
9141   effect(KILL cr);
9142   ins_cost(500+200);
9143   size(17);
9144   format %{ "TEST   $shift,32\n\t"
9145             "JEQ,s  small\n\t"
9146             "MOV    $dst.hi,$dst.lo\n\t"
9147             "XOR    $dst.lo,$dst.lo\n"
9148     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9149             "SHL    $dst.lo,$shift" %}
9150   ins_encode( shift_left_long( dst, shift ) );
9151   ins_pipe( pipe_slow );
9152 %}
9153 
9154 // Shift Right Long by 1-31
9155 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9156   match(Set dst (URShiftL dst cnt));
9157   effect(KILL cr);
9158   ins_cost(200);
9159   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9160             "SHR    $dst.hi,$cnt" %}
9161   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9162   ins_encode( move_long_small_shift(dst,cnt) );
9163   ins_pipe( ialu_reg_long );
9164 %}
9165 
9166 // Shift Right Long by 32-63
9167 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9168   match(Set dst (URShiftL dst cnt));
9169   effect(KILL cr);
9170   ins_cost(300);
9171   format %{ "MOV    $dst.lo,$dst.hi\n"
9172           "\tSHR    $dst.lo,$cnt-32\n"
9173           "\tXOR    $dst.hi,$dst.hi" %}
9174   opcode(0xC1, 0x5);  /* C1 /5 ib */
9175   ins_encode( move_long_big_shift_clr(dst,cnt) );
9176   ins_pipe( ialu_reg_long );
9177 %}
9178 
9179 // Shift Right Long by variable
9180 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9181   match(Set dst (URShiftL dst shift));
9182   effect(KILL cr);
9183   ins_cost(600);
9184   size(17);
9185   format %{ "TEST   $shift,32\n\t"
9186             "JEQ,s  small\n\t"
9187             "MOV    $dst.lo,$dst.hi\n\t"
9188             "XOR    $dst.hi,$dst.hi\n"
9189     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9190             "SHR    $dst.hi,$shift" %}
9191   ins_encode( shift_right_long( dst, shift ) );
9192   ins_pipe( pipe_slow );
9193 %}
9194 
9195 // Shift Right Long by 1-31
9196 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9197   match(Set dst (RShiftL dst cnt));
9198   effect(KILL cr);
9199   ins_cost(200);
9200   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9201             "SAR    $dst.hi,$cnt" %}
9202   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9203   ins_encode( move_long_small_shift(dst,cnt) );
9204   ins_pipe( ialu_reg_long );
9205 %}
9206 
9207 // Shift Right Long by 32-63
9208 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9209   match(Set dst (RShiftL dst cnt));
9210   effect(KILL cr);
9211   ins_cost(300);
9212   format %{ "MOV    $dst.lo,$dst.hi\n"
9213           "\tSAR    $dst.lo,$cnt-32\n"
9214           "\tSAR    $dst.hi,31" %}
9215   opcode(0xC1, 0x7);  /* C1 /7 ib */
9216   ins_encode( move_long_big_shift_sign(dst,cnt) );
9217   ins_pipe( ialu_reg_long );
9218 %}
9219 
9220 // Shift Right arithmetic Long by variable
9221 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9222   match(Set dst (RShiftL dst shift));
9223   effect(KILL cr);
9224   ins_cost(600);
9225   size(18);
9226   format %{ "TEST   $shift,32\n\t"
9227             "JEQ,s  small\n\t"
9228             "MOV    $dst.lo,$dst.hi\n\t"
9229             "SAR    $dst.hi,31\n"
9230     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9231             "SAR    $dst.hi,$shift" %}
9232   ins_encode( shift_right_arith_long( dst, shift ) );
9233   ins_pipe( pipe_slow );
9234 %}
9235 
9236 
9237 //----------Double Instructions------------------------------------------------
9238 // Double Math
9239 
9240 // Compare & branch
9241 
9242 // P6 version of float compare, sets condition codes in EFLAGS
9243 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9244   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9245   match(Set cr (CmpD src1 src2));
9246   effect(KILL rax);
9247   ins_cost(150);
9248   format %{ "FLD    $src1\n\t"
9249             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9250             "JNP    exit\n\t"
9251             "MOV    ah,1       // saw a NaN, set CF\n\t"
9252             "SAHF\n"
9253      "exit:\tNOP               // avoid branch to branch" %}
9254   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9255   ins_encode( Push_Reg_DPR(src1),
9256               OpcP, RegOpc(src2),
9257               cmpF_P6_fixup );
9258   ins_pipe( pipe_slow );
9259 %}
9260 
9261 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9262   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9263   match(Set cr (CmpD src1 src2));
9264   ins_cost(150);
9265   format %{ "FLD    $src1\n\t"
9266             "FUCOMIP ST,$src2  // P6 instruction" %}
9267   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9268   ins_encode( Push_Reg_DPR(src1),
9269               OpcP, RegOpc(src2));
9270   ins_pipe( pipe_slow );
9271 %}
9272 
9273 // Compare & branch
9274 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9275   predicate(UseSSE<=1);
9276   match(Set cr (CmpD src1 src2));
9277   effect(KILL rax);
9278   ins_cost(200);
9279   format %{ "FLD    $src1\n\t"
9280             "FCOMp  $src2\n\t"
9281             "FNSTSW AX\n\t"
9282             "TEST   AX,0x400\n\t"
9283             "JZ,s   flags\n\t"
9284             "MOV    AH,1\t# unordered treat as LT\n"
9285     "flags:\tSAHF" %}
9286   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9287   ins_encode( Push_Reg_DPR(src1),
9288               OpcP, RegOpc(src2),
9289               fpu_flags);
9290   ins_pipe( pipe_slow );
9291 %}
9292 
9293 // Compare vs zero into -1,0,1
9294 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9295   predicate(UseSSE<=1);
9296   match(Set dst (CmpD3 src1 zero));
9297   effect(KILL cr, KILL rax);
9298   ins_cost(280);
9299   format %{ "FTSTD  $dst,$src1" %}
9300   opcode(0xE4, 0xD9);
9301   ins_encode( Push_Reg_DPR(src1),
9302               OpcS, OpcP, PopFPU,
9303               CmpF_Result(dst));
9304   ins_pipe( pipe_slow );
9305 %}
9306 
9307 // Compare into -1,0,1
9308 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9309   predicate(UseSSE<=1);
9310   match(Set dst (CmpD3 src1 src2));
9311   effect(KILL cr, KILL rax);
9312   ins_cost(300);
9313   format %{ "FCMPD  $dst,$src1,$src2" %}
9314   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9315   ins_encode( Push_Reg_DPR(src1),
9316               OpcP, RegOpc(src2),
9317               CmpF_Result(dst));
9318   ins_pipe( pipe_slow );
9319 %}
9320 
9321 // float compare and set condition codes in EFLAGS by XMM regs
9322 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9323   predicate(UseSSE>=2);
9324   match(Set cr (CmpD src1 src2));
9325   ins_cost(145);
9326   format %{ "UCOMISD $src1,$src2\n\t"
9327             "JNP,s   exit\n\t"
9328             "PUSHF\t# saw NaN, set CF\n\t"
9329             "AND     [rsp], #0xffffff2b\n\t"
9330             "POPF\n"
9331     "exit:" %}
9332   ins_encode %{
9333     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9334     emit_cmpfp_fixup(_masm);
9335   %}
9336   ins_pipe( pipe_slow );
9337 %}
9338 
9339 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9340   predicate(UseSSE>=2);
9341   match(Set cr (CmpD src1 src2));
9342   ins_cost(100);
9343   format %{ "UCOMISD $src1,$src2" %}
9344   ins_encode %{
9345     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9346   %}
9347   ins_pipe( pipe_slow );
9348 %}
9349 
9350 // float compare and set condition codes in EFLAGS by XMM regs
9351 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9352   predicate(UseSSE>=2);
9353   match(Set cr (CmpD src1 (LoadD src2)));
9354   ins_cost(145);
9355   format %{ "UCOMISD $src1,$src2\n\t"
9356             "JNP,s   exit\n\t"
9357             "PUSHF\t# saw NaN, set CF\n\t"
9358             "AND     [rsp], #0xffffff2b\n\t"
9359             "POPF\n"
9360     "exit:" %}
9361   ins_encode %{
9362     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9363     emit_cmpfp_fixup(_masm);
9364   %}
9365   ins_pipe( pipe_slow );
9366 %}
9367 
9368 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9369   predicate(UseSSE>=2);
9370   match(Set cr (CmpD src1 (LoadD src2)));
9371   ins_cost(100);
9372   format %{ "UCOMISD $src1,$src2" %}
9373   ins_encode %{
9374     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9375   %}
9376   ins_pipe( pipe_slow );
9377 %}
9378 
9379 // Compare into -1,0,1 in XMM
9380 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9381   predicate(UseSSE>=2);
9382   match(Set dst (CmpD3 src1 src2));
9383   effect(KILL cr);
9384   ins_cost(255);
9385   format %{ "UCOMISD $src1, $src2\n\t"
9386             "MOV     $dst, #-1\n\t"
9387             "JP,s    done\n\t"
9388             "JB,s    done\n\t"
9389             "SETNE   $dst\n\t"
9390             "MOVZB   $dst, $dst\n"
9391     "done:" %}
9392   ins_encode %{
9393     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9394     emit_cmpfp3(_masm, $dst$$Register);
9395   %}
9396   ins_pipe( pipe_slow );
9397 %}
9398 
9399 // Compare into -1,0,1 in XMM and memory
9400 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9401   predicate(UseSSE>=2);
9402   match(Set dst (CmpD3 src1 (LoadD src2)));
9403   effect(KILL cr);
9404   ins_cost(275);
9405   format %{ "UCOMISD $src1, $src2\n\t"
9406             "MOV     $dst, #-1\n\t"
9407             "JP,s    done\n\t"
9408             "JB,s    done\n\t"
9409             "SETNE   $dst\n\t"
9410             "MOVZB   $dst, $dst\n"
9411     "done:" %}
9412   ins_encode %{
9413     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9414     emit_cmpfp3(_masm, $dst$$Register);
9415   %}
9416   ins_pipe( pipe_slow );
9417 %}
9418 
9419 
9420 instruct subDPR_reg(regDPR dst, regDPR src) %{
9421   predicate (UseSSE <=1);
9422   match(Set dst (SubD dst src));
9423 
9424   format %{ "FLD    $src\n\t"
9425             "DSUBp  $dst,ST" %}
9426   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9427   ins_cost(150);
9428   ins_encode( Push_Reg_DPR(src),
9429               OpcP, RegOpc(dst) );
9430   ins_pipe( fpu_reg_reg );
9431 %}
9432 
9433 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9434   predicate (UseSSE <=1);
9435   match(Set dst (RoundDouble (SubD src1 src2)));
9436   ins_cost(250);
9437 
9438   format %{ "FLD    $src2\n\t"
9439             "DSUB   ST,$src1\n\t"
9440             "FSTP_D $dst\t# D-round" %}
9441   opcode(0xD8, 0x5);
9442   ins_encode( Push_Reg_DPR(src2),
9443               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9444   ins_pipe( fpu_mem_reg_reg );
9445 %}
9446 
9447 
9448 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9449   predicate (UseSSE <=1);
9450   match(Set dst (SubD dst (LoadD src)));
9451   ins_cost(150);
9452 
9453   format %{ "FLD    $src\n\t"
9454             "DSUBp  $dst,ST" %}
9455   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9456   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9457               OpcP, RegOpc(dst) );
9458   ins_pipe( fpu_reg_mem );
9459 %}
9460 
9461 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9462   predicate (UseSSE<=1);
9463   match(Set dst (AbsD src));
9464   ins_cost(100);
9465   format %{ "FABS" %}
9466   opcode(0xE1, 0xD9);
9467   ins_encode( OpcS, OpcP );
9468   ins_pipe( fpu_reg_reg );
9469 %}
9470 
9471 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9472   predicate(UseSSE<=1);
9473   match(Set dst (NegD src));
9474   ins_cost(100);
9475   format %{ "FCHS" %}
9476   opcode(0xE0, 0xD9);
9477   ins_encode( OpcS, OpcP );
9478   ins_pipe( fpu_reg_reg );
9479 %}
9480 
9481 instruct addDPR_reg(regDPR dst, regDPR src) %{
9482   predicate(UseSSE<=1);
9483   match(Set dst (AddD dst src));
9484   format %{ "FLD    $src\n\t"
9485             "DADD   $dst,ST" %}
9486   size(4);
9487   ins_cost(150);
9488   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9489   ins_encode( Push_Reg_DPR(src),
9490               OpcP, RegOpc(dst) );
9491   ins_pipe( fpu_reg_reg );
9492 %}
9493 
9494 
9495 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9496   predicate(UseSSE<=1);
9497   match(Set dst (RoundDouble (AddD src1 src2)));
9498   ins_cost(250);
9499 
9500   format %{ "FLD    $src2\n\t"
9501             "DADD   ST,$src1\n\t"
9502             "FSTP_D $dst\t# D-round" %}
9503   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9504   ins_encode( Push_Reg_DPR(src2),
9505               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9506   ins_pipe( fpu_mem_reg_reg );
9507 %}
9508 
9509 
9510 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9511   predicate(UseSSE<=1);
9512   match(Set dst (AddD dst (LoadD src)));
9513   ins_cost(150);
9514 
9515   format %{ "FLD    $src\n\t"
9516             "DADDp  $dst,ST" %}
9517   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9518   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9519               OpcP, RegOpc(dst) );
9520   ins_pipe( fpu_reg_mem );
9521 %}
9522 
9523 // add-to-memory
9524 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9525   predicate(UseSSE<=1);
9526   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9527   ins_cost(150);
9528 
9529   format %{ "FLD_D  $dst\n\t"
9530             "DADD   ST,$src\n\t"
9531             "FST_D  $dst" %}
9532   opcode(0xDD, 0x0);
9533   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9534               Opcode(0xD8), RegOpc(src),
9535               set_instruction_start,
9536               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9537   ins_pipe( fpu_reg_mem );
9538 %}
9539 
9540 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9541   predicate(UseSSE<=1);
9542   match(Set dst (AddD dst con));
9543   ins_cost(125);
9544   format %{ "FLD1\n\t"
9545             "DADDp  $dst,ST" %}
9546   ins_encode %{
9547     __ fld1();
9548     __ faddp($dst$$reg);
9549   %}
9550   ins_pipe(fpu_reg);
9551 %}
9552 
9553 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9554   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9555   match(Set dst (AddD dst con));
9556   ins_cost(200);
9557   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9558             "DADDp  $dst,ST" %}
9559   ins_encode %{
9560     __ fld_d($constantaddress($con));
9561     __ faddp($dst$$reg);
9562   %}
9563   ins_pipe(fpu_reg_mem);
9564 %}
9565 
9566 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9567   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9568   match(Set dst (RoundDouble (AddD src con)));
9569   ins_cost(200);
9570   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9571             "DADD   ST,$src\n\t"
9572             "FSTP_D $dst\t# D-round" %}
9573   ins_encode %{
9574     __ fld_d($constantaddress($con));
9575     __ fadd($src$$reg);
9576     __ fstp_d(Address(rsp, $dst$$disp));
9577   %}
9578   ins_pipe(fpu_mem_reg_con);
9579 %}
9580 
9581 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9582   predicate(UseSSE<=1);
9583   match(Set dst (MulD dst src));
9584   format %{ "FLD    $src\n\t"
9585             "DMULp  $dst,ST" %}
9586   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9587   ins_cost(150);
9588   ins_encode( Push_Reg_DPR(src),
9589               OpcP, RegOpc(dst) );
9590   ins_pipe( fpu_reg_reg );
9591 %}
9592 
9593 // Strict FP instruction biases argument before multiply then
9594 // biases result to avoid double rounding of subnormals.
9595 //
9596 // scale arg1 by multiplying arg1 by 2^(-15360)
9597 // load arg2
9598 // multiply scaled arg1 by arg2
9599 // rescale product by 2^(15360)
9600 //
9601 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9602   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9603   match(Set dst (MulD dst src));
9604   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9605 
9606   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9607             "DMULp  $dst,ST\n\t"
9608             "FLD    $src\n\t"
9609             "DMULp  $dst,ST\n\t"
9610             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9611             "DMULp  $dst,ST\n\t" %}
9612   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9613   ins_encode( strictfp_bias1(dst),
9614               Push_Reg_DPR(src),
9615               OpcP, RegOpc(dst),
9616               strictfp_bias2(dst) );
9617   ins_pipe( fpu_reg_reg );
9618 %}
9619 
9620 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9621   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9622   match(Set dst (MulD dst con));
9623   ins_cost(200);
9624   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9625             "DMULp  $dst,ST" %}
9626   ins_encode %{
9627     __ fld_d($constantaddress($con));
9628     __ fmulp($dst$$reg);
9629   %}
9630   ins_pipe(fpu_reg_mem);
9631 %}
9632 
9633 
9634 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9635   predicate( UseSSE<=1 );
9636   match(Set dst (MulD dst (LoadD src)));
9637   ins_cost(200);
9638   format %{ "FLD_D  $src\n\t"
9639             "DMULp  $dst,ST" %}
9640   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9641   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9642               OpcP, RegOpc(dst) );
9643   ins_pipe( fpu_reg_mem );
9644 %}
9645 
9646 //
9647 // Cisc-alternate to reg-reg multiply
9648 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9649   predicate( UseSSE<=1 );
9650   match(Set dst (MulD src (LoadD mem)));
9651   ins_cost(250);
9652   format %{ "FLD_D  $mem\n\t"
9653             "DMUL   ST,$src\n\t"
9654             "FSTP_D $dst" %}
9655   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9656   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9657               OpcReg_FPR(src),
9658               Pop_Reg_DPR(dst) );
9659   ins_pipe( fpu_reg_reg_mem );
9660 %}
9661 
9662 
9663 // MACRO3 -- addDPR a mulDPR
9664 // This instruction is a '2-address' instruction in that the result goes
9665 // back to src2.  This eliminates a move from the macro; possibly the
9666 // register allocator will have to add it back (and maybe not).
9667 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9668   predicate( UseSSE<=1 );
9669   match(Set src2 (AddD (MulD src0 src1) src2));
9670   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9671             "DMUL   ST,$src1\n\t"
9672             "DADDp  $src2,ST" %}
9673   ins_cost(250);
9674   opcode(0xDD); /* LoadD DD /0 */
9675   ins_encode( Push_Reg_FPR(src0),
9676               FMul_ST_reg(src1),
9677               FAddP_reg_ST(src2) );
9678   ins_pipe( fpu_reg_reg_reg );
9679 %}
9680 
9681 
9682 // MACRO3 -- subDPR a mulDPR
9683 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9684   predicate( UseSSE<=1 );
9685   match(Set src2 (SubD (MulD src0 src1) src2));
9686   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9687             "DMUL   ST,$src1\n\t"
9688             "DSUBRp $src2,ST" %}
9689   ins_cost(250);
9690   ins_encode( Push_Reg_FPR(src0),
9691               FMul_ST_reg(src1),
9692               Opcode(0xDE), Opc_plus(0xE0,src2));
9693   ins_pipe( fpu_reg_reg_reg );
9694 %}
9695 
9696 
9697 instruct divDPR_reg(regDPR dst, regDPR src) %{
9698   predicate( UseSSE<=1 );
9699   match(Set dst (DivD dst src));
9700 
9701   format %{ "FLD    $src\n\t"
9702             "FDIVp  $dst,ST" %}
9703   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9704   ins_cost(150);
9705   ins_encode( Push_Reg_DPR(src),
9706               OpcP, RegOpc(dst) );
9707   ins_pipe( fpu_reg_reg );
9708 %}
9709 
9710 // Strict FP instruction biases argument before division then
9711 // biases result, to avoid double rounding of subnormals.
9712 //
9713 // scale dividend by multiplying dividend by 2^(-15360)
9714 // load divisor
9715 // divide scaled dividend by divisor
9716 // rescale quotient by 2^(15360)
9717 //
9718 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9719   predicate (UseSSE<=1);
9720   match(Set dst (DivD dst src));
9721   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9722   ins_cost(01);
9723 
9724   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9725             "DMULp  $dst,ST\n\t"
9726             "FLD    $src\n\t"
9727             "FDIVp  $dst,ST\n\t"
9728             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9729             "DMULp  $dst,ST\n\t" %}
9730   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9731   ins_encode( strictfp_bias1(dst),
9732               Push_Reg_DPR(src),
9733               OpcP, RegOpc(dst),
9734               strictfp_bias2(dst) );
9735   ins_pipe( fpu_reg_reg );
9736 %}
9737 
9738 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9739   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
9740   match(Set dst (RoundDouble (DivD src1 src2)));
9741 
9742   format %{ "FLD    $src1\n\t"
9743             "FDIV   ST,$src2\n\t"
9744             "FSTP_D $dst\t# D-round" %}
9745   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
9746   ins_encode( Push_Reg_DPR(src1),
9747               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
9748   ins_pipe( fpu_mem_reg_reg );
9749 %}
9750 
9751 
9752 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9753   predicate(UseSSE<=1);
9754   match(Set dst (ModD dst src));
9755   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9756 
9757   format %{ "DMOD   $dst,$src" %}
9758   ins_cost(250);
9759   ins_encode(Push_Reg_Mod_DPR(dst, src),
9760               emitModDPR(),
9761               Push_Result_Mod_DPR(src),
9762               Pop_Reg_DPR(dst));
9763   ins_pipe( pipe_slow );
9764 %}
9765 
9766 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9767   predicate(UseSSE>=2);
9768   match(Set dst (ModD src0 src1));
9769   effect(KILL rax, KILL cr);
9770 
9771   format %{ "SUB    ESP,8\t # DMOD\n"
9772           "\tMOVSD  [ESP+0],$src1\n"
9773           "\tFLD_D  [ESP+0]\n"
9774           "\tMOVSD  [ESP+0],$src0\n"
9775           "\tFLD_D  [ESP+0]\n"
9776      "loop:\tFPREM\n"
9777           "\tFWAIT\n"
9778           "\tFNSTSW AX\n"
9779           "\tSAHF\n"
9780           "\tJP     loop\n"
9781           "\tFSTP_D [ESP+0]\n"
9782           "\tMOVSD  $dst,[ESP+0]\n"
9783           "\tADD    ESP,8\n"
9784           "\tFSTP   ST0\t # Restore FPU Stack"
9785     %}
9786   ins_cost(250);
9787   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
9788   ins_pipe( pipe_slow );
9789 %}
9790 
9791 instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{
9792   predicate (UseSSE<=1);
9793   match(Set dst (SinD src));
9794   ins_cost(1800);
9795   format %{ "DSIN   $dst" %}
9796   opcode(0xD9, 0xFE);
9797   ins_encode( OpcP, OpcS );
9798   ins_pipe( pipe_slow );
9799 %}
9800 
9801 instruct sinD_reg(regD dst, eFlagsReg cr) %{
9802   predicate (UseSSE>=2);
9803   match(Set dst (SinD dst));
9804   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9805   ins_cost(1800);
9806   format %{ "DSIN   $dst" %}
9807   opcode(0xD9, 0xFE);
9808   ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
9809   ins_pipe( pipe_slow );
9810 %}
9811 
9812 instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{
9813   predicate (UseSSE<=1);
9814   match(Set dst (CosD src));
9815   ins_cost(1800);
9816   format %{ "DCOS   $dst" %}
9817   opcode(0xD9, 0xFF);
9818   ins_encode( OpcP, OpcS );
9819   ins_pipe( pipe_slow );
9820 %}
9821 
9822 instruct cosD_reg(regD dst, eFlagsReg cr) %{
9823   predicate (UseSSE>=2);
9824   match(Set dst (CosD dst));
9825   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9826   ins_cost(1800);
9827   format %{ "DCOS   $dst" %}
9828   opcode(0xD9, 0xFF);
9829   ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
9830   ins_pipe( pipe_slow );
9831 %}
9832 
9833 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
9834   predicate (UseSSE<=1);
9835   match(Set dst(TanD src));
9836   format %{ "DTAN   $dst" %}
9837   ins_encode( Opcode(0xD9), Opcode(0xF2),    // fptan
9838               Opcode(0xDD), Opcode(0xD8));   // fstp st
9839   ins_pipe( pipe_slow );
9840 %}
9841 
9842 instruct tanD_reg(regD dst, eFlagsReg cr) %{
9843   predicate (UseSSE>=2);
9844   match(Set dst(TanD dst));
9845   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9846   format %{ "DTAN   $dst" %}
9847   ins_encode( Push_SrcD(dst),
9848               Opcode(0xD9), Opcode(0xF2),    // fptan
9849               Opcode(0xDD), Opcode(0xD8),   // fstp st
9850               Push_ResultD(dst) );
9851   ins_pipe( pipe_slow );
9852 %}
9853 
9854 instruct atanDPR_reg(regDPR dst, regDPR src) %{
9855   predicate (UseSSE<=1);
9856   match(Set dst(AtanD dst src));
9857   format %{ "DATA   $dst,$src" %}
9858   opcode(0xD9, 0xF3);
9859   ins_encode( Push_Reg_DPR(src),
9860               OpcP, OpcS, RegOpc(dst) );
9861   ins_pipe( pipe_slow );
9862 %}
9863 
9864 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
9865   predicate (UseSSE>=2);
9866   match(Set dst(AtanD dst src));
9867   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9868   format %{ "DATA   $dst,$src" %}
9869   opcode(0xD9, 0xF3);
9870   ins_encode( Push_SrcD(src),
9871               OpcP, OpcS, Push_ResultD(dst) );
9872   ins_pipe( pipe_slow );
9873 %}
9874 
9875 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
9876   predicate (UseSSE<=1);
9877   match(Set dst (SqrtD src));
9878   format %{ "DSQRT  $dst,$src" %}
9879   opcode(0xFA, 0xD9);
9880   ins_encode( Push_Reg_DPR(src),
9881               OpcS, OpcP, Pop_Reg_DPR(dst) );
9882   ins_pipe( pipe_slow );
9883 %}
9884 
9885 instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
9886   predicate (UseSSE<=1);
9887   match(Set Y (PowD X Y));  // Raise X to the Yth power
9888   effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
9889   format %{ "fast_pow $X $Y -> $Y  // KILL $rax, $rcx, $rdx" %}
9890   ins_encode %{
9891     __ subptr(rsp, 8);
9892     __ fld_s($X$$reg - 1);
9893     __ fast_pow();
9894     __ addptr(rsp, 8);
9895   %}
9896   ins_pipe( pipe_slow );
9897 %}
9898 
9899 instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
9900   predicate (UseSSE>=2);
9901   match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
9902   effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
9903   format %{ "fast_pow $src0 $src1 -> $dst  // KILL $rax, $rcx, $rdx" %}
9904   ins_encode %{
9905     __ subptr(rsp, 8);
9906     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
9907     __ fld_d(Address(rsp, 0));
9908     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
9909     __ fld_d(Address(rsp, 0));
9910     __ fast_pow();
9911     __ fstp_d(Address(rsp, 0));
9912     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
9913     __ addptr(rsp, 8);
9914   %}
9915   ins_pipe( pipe_slow );
9916 %}
9917 
9918 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
9919   predicate (UseSSE<=1);
9920   // The source Double operand on FPU stack
9921   match(Set dst (Log10D src));
9922   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9923   // fxch         ; swap ST(0) with ST(1)
9924   // fyl2x        ; compute log_10(2) * log_2(x)
9925   format %{ "FLDLG2 \t\t\t#Log10\n\t"
9926             "FXCH   \n\t"
9927             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9928          %}
9929   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9930               Opcode(0xD9), Opcode(0xC9),   // fxch
9931               Opcode(0xD9), Opcode(0xF1));  // fyl2x
9932 
9933   ins_pipe( pipe_slow );
9934 %}
9935 
9936 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
9937   predicate (UseSSE>=2);
9938   effect(KILL cr);
9939   match(Set dst (Log10D src));
9940   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9941   // fyl2x        ; compute log_10(2) * log_2(x)
9942   format %{ "FLDLG2 \t\t\t#Log10\n\t"
9943             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9944          %}
9945   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9946               Push_SrcD(src),
9947               Opcode(0xD9), Opcode(0xF1),   // fyl2x
9948               Push_ResultD(dst));
9949 
9950   ins_pipe( pipe_slow );
9951 %}
9952 
9953 //-------------Float Instructions-------------------------------
9954 // Float Math
9955 
9956 // Code for float compare:
9957 //     fcompp();
9958 //     fwait(); fnstsw_ax();
9959 //     sahf();
9960 //     movl(dst, unordered_result);
9961 //     jcc(Assembler::parity, exit);
9962 //     movl(dst, less_result);
9963 //     jcc(Assembler::below, exit);
9964 //     movl(dst, equal_result);
9965 //     jcc(Assembler::equal, exit);
9966 //     movl(dst, greater_result);
9967 //   exit:
9968 
9969 // P6 version of float compare, sets condition codes in EFLAGS
9970 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
9971   predicate(VM_Version::supports_cmov() && UseSSE == 0);
9972   match(Set cr (CmpF src1 src2));
9973   effect(KILL rax);
9974   ins_cost(150);
9975   format %{ "FLD    $src1\n\t"
9976             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9977             "JNP    exit\n\t"
9978             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
9979             "SAHF\n"
9980      "exit:\tNOP               // avoid branch to branch" %}
9981   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9982   ins_encode( Push_Reg_DPR(src1),
9983               OpcP, RegOpc(src2),
9984               cmpF_P6_fixup );
9985   ins_pipe( pipe_slow );
9986 %}
9987 
9988 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
9989   predicate(VM_Version::supports_cmov() && UseSSE == 0);
9990   match(Set cr (CmpF src1 src2));
9991   ins_cost(100);
9992   format %{ "FLD    $src1\n\t"
9993             "FUCOMIP ST,$src2  // P6 instruction" %}
9994   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9995   ins_encode( Push_Reg_DPR(src1),
9996               OpcP, RegOpc(src2));
9997   ins_pipe( pipe_slow );
9998 %}
9999 
10000 
10001 // Compare & branch
10002 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10003   predicate(UseSSE == 0);
10004   match(Set cr (CmpF src1 src2));
10005   effect(KILL rax);
10006   ins_cost(200);
10007   format %{ "FLD    $src1\n\t"
10008             "FCOMp  $src2\n\t"
10009             "FNSTSW AX\n\t"
10010             "TEST   AX,0x400\n\t"
10011             "JZ,s   flags\n\t"
10012             "MOV    AH,1\t# unordered treat as LT\n"
10013     "flags:\tSAHF" %}
10014   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10015   ins_encode( Push_Reg_DPR(src1),
10016               OpcP, RegOpc(src2),
10017               fpu_flags);
10018   ins_pipe( pipe_slow );
10019 %}
10020 
10021 // Compare vs zero into -1,0,1
10022 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10023   predicate(UseSSE == 0);
10024   match(Set dst (CmpF3 src1 zero));
10025   effect(KILL cr, KILL rax);
10026   ins_cost(280);
10027   format %{ "FTSTF  $dst,$src1" %}
10028   opcode(0xE4, 0xD9);
10029   ins_encode( Push_Reg_DPR(src1),
10030               OpcS, OpcP, PopFPU,
10031               CmpF_Result(dst));
10032   ins_pipe( pipe_slow );
10033 %}
10034 
10035 // Compare into -1,0,1
10036 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10037   predicate(UseSSE == 0);
10038   match(Set dst (CmpF3 src1 src2));
10039   effect(KILL cr, KILL rax);
10040   ins_cost(300);
10041   format %{ "FCMPF  $dst,$src1,$src2" %}
10042   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10043   ins_encode( Push_Reg_DPR(src1),
10044               OpcP, RegOpc(src2),
10045               CmpF_Result(dst));
10046   ins_pipe( pipe_slow );
10047 %}
10048 
10049 // float compare and set condition codes in EFLAGS by XMM regs
10050 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10051   predicate(UseSSE>=1);
10052   match(Set cr (CmpF src1 src2));
10053   ins_cost(145);
10054   format %{ "UCOMISS $src1,$src2\n\t"
10055             "JNP,s   exit\n\t"
10056             "PUSHF\t# saw NaN, set CF\n\t"
10057             "AND     [rsp], #0xffffff2b\n\t"
10058             "POPF\n"
10059     "exit:" %}
10060   ins_encode %{
10061     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10062     emit_cmpfp_fixup(_masm);
10063   %}
10064   ins_pipe( pipe_slow );
10065 %}
10066 
10067 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10068   predicate(UseSSE>=1);
10069   match(Set cr (CmpF src1 src2));
10070   ins_cost(100);
10071   format %{ "UCOMISS $src1,$src2" %}
10072   ins_encode %{
10073     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10074   %}
10075   ins_pipe( pipe_slow );
10076 %}
10077 
10078 // float compare and set condition codes in EFLAGS by XMM regs
10079 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10080   predicate(UseSSE>=1);
10081   match(Set cr (CmpF src1 (LoadF src2)));
10082   ins_cost(165);
10083   format %{ "UCOMISS $src1,$src2\n\t"
10084             "JNP,s   exit\n\t"
10085             "PUSHF\t# saw NaN, set CF\n\t"
10086             "AND     [rsp], #0xffffff2b\n\t"
10087             "POPF\n"
10088     "exit:" %}
10089   ins_encode %{
10090     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10091     emit_cmpfp_fixup(_masm);
10092   %}
10093   ins_pipe( pipe_slow );
10094 %}
10095 
10096 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10097   predicate(UseSSE>=1);
10098   match(Set cr (CmpF src1 (LoadF src2)));
10099   ins_cost(100);
10100   format %{ "UCOMISS $src1,$src2" %}
10101   ins_encode %{
10102     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10103   %}
10104   ins_pipe( pipe_slow );
10105 %}
10106 
10107 // Compare into -1,0,1 in XMM
10108 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10109   predicate(UseSSE>=1);
10110   match(Set dst (CmpF3 src1 src2));
10111   effect(KILL cr);
10112   ins_cost(255);
10113   format %{ "UCOMISS $src1, $src2\n\t"
10114             "MOV     $dst, #-1\n\t"
10115             "JP,s    done\n\t"
10116             "JB,s    done\n\t"
10117             "SETNE   $dst\n\t"
10118             "MOVZB   $dst, $dst\n"
10119     "done:" %}
10120   ins_encode %{
10121     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10122     emit_cmpfp3(_masm, $dst$$Register);
10123   %}
10124   ins_pipe( pipe_slow );
10125 %}
10126 
10127 // Compare into -1,0,1 in XMM and memory
10128 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10129   predicate(UseSSE>=1);
10130   match(Set dst (CmpF3 src1 (LoadF src2)));
10131   effect(KILL cr);
10132   ins_cost(275);
10133   format %{ "UCOMISS $src1, $src2\n\t"
10134             "MOV     $dst, #-1\n\t"
10135             "JP,s    done\n\t"
10136             "JB,s    done\n\t"
10137             "SETNE   $dst\n\t"
10138             "MOVZB   $dst, $dst\n"
10139     "done:" %}
10140   ins_encode %{
10141     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10142     emit_cmpfp3(_masm, $dst$$Register);
10143   %}
10144   ins_pipe( pipe_slow );
10145 %}
10146 
10147 // Spill to obtain 24-bit precision
10148 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10149   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10150   match(Set dst (SubF src1 src2));
10151 
10152   format %{ "FSUB   $dst,$src1 - $src2" %}
10153   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10154   ins_encode( Push_Reg_FPR(src1),
10155               OpcReg_FPR(src2),
10156               Pop_Mem_FPR(dst) );
10157   ins_pipe( fpu_mem_reg_reg );
10158 %}
10159 //
10160 // This instruction does not round to 24-bits
10161 instruct subFPR_reg(regFPR dst, regFPR src) %{
10162   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10163   match(Set dst (SubF dst src));
10164 
10165   format %{ "FSUB   $dst,$src" %}
10166   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10167   ins_encode( Push_Reg_FPR(src),
10168               OpcP, RegOpc(dst) );
10169   ins_pipe( fpu_reg_reg );
10170 %}
10171 
10172 // Spill to obtain 24-bit precision
10173 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10174   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10175   match(Set dst (AddF src1 src2));
10176 
10177   format %{ "FADD   $dst,$src1,$src2" %}
10178   opcode(0xD8, 0x0); /* D8 C0+i */
10179   ins_encode( Push_Reg_FPR(src2),
10180               OpcReg_FPR(src1),
10181               Pop_Mem_FPR(dst) );
10182   ins_pipe( fpu_mem_reg_reg );
10183 %}
10184 //
10185 // This instruction does not round to 24-bits
10186 instruct addFPR_reg(regFPR dst, regFPR src) %{
10187   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10188   match(Set dst (AddF dst src));
10189 
10190   format %{ "FLD    $src\n\t"
10191             "FADDp  $dst,ST" %}
10192   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10193   ins_encode( Push_Reg_FPR(src),
10194               OpcP, RegOpc(dst) );
10195   ins_pipe( fpu_reg_reg );
10196 %}
10197 
10198 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10199   predicate(UseSSE==0);
10200   match(Set dst (AbsF src));
10201   ins_cost(100);
10202   format %{ "FABS" %}
10203   opcode(0xE1, 0xD9);
10204   ins_encode( OpcS, OpcP );
10205   ins_pipe( fpu_reg_reg );
10206 %}
10207 
10208 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10209   predicate(UseSSE==0);
10210   match(Set dst (NegF src));
10211   ins_cost(100);
10212   format %{ "FCHS" %}
10213   opcode(0xE0, 0xD9);
10214   ins_encode( OpcS, OpcP );
10215   ins_pipe( fpu_reg_reg );
10216 %}
10217 
10218 // Cisc-alternate to addFPR_reg
10219 // Spill to obtain 24-bit precision
10220 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10221   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10222   match(Set dst (AddF src1 (LoadF src2)));
10223 
10224   format %{ "FLD    $src2\n\t"
10225             "FADD   ST,$src1\n\t"
10226             "FSTP_S $dst" %}
10227   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10228   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10229               OpcReg_FPR(src1),
10230               Pop_Mem_FPR(dst) );
10231   ins_pipe( fpu_mem_reg_mem );
10232 %}
10233 //
10234 // Cisc-alternate to addFPR_reg
10235 // This instruction does not round to 24-bits
10236 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10237   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10238   match(Set dst (AddF dst (LoadF src)));
10239 
10240   format %{ "FADD   $dst,$src" %}
10241   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10242   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10243               OpcP, RegOpc(dst) );
10244   ins_pipe( fpu_reg_mem );
10245 %}
10246 
10247 // // Following two instructions for _222_mpegaudio
10248 // Spill to obtain 24-bit precision
10249 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10250   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10251   match(Set dst (AddF src1 src2));
10252 
10253   format %{ "FADD   $dst,$src1,$src2" %}
10254   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10255   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10256               OpcReg_FPR(src2),
10257               Pop_Mem_FPR(dst) );
10258   ins_pipe( fpu_mem_reg_mem );
10259 %}
10260 
10261 // Cisc-spill variant
10262 // Spill to obtain 24-bit precision
10263 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10264   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10265   match(Set dst (AddF src1 (LoadF src2)));
10266 
10267   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10268   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10269   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10270               set_instruction_start,
10271               OpcP, RMopc_Mem(secondary,src1),
10272               Pop_Mem_FPR(dst) );
10273   ins_pipe( fpu_mem_mem_mem );
10274 %}
10275 
10276 // Spill to obtain 24-bit precision
10277 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10278   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10279   match(Set dst (AddF src1 src2));
10280 
10281   format %{ "FADD   $dst,$src1,$src2" %}
10282   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10283   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10284               set_instruction_start,
10285               OpcP, RMopc_Mem(secondary,src1),
10286               Pop_Mem_FPR(dst) );
10287   ins_pipe( fpu_mem_mem_mem );
10288 %}
10289 
10290 
10291 // Spill to obtain 24-bit precision
10292 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10293   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10294   match(Set dst (AddF src con));
10295   format %{ "FLD    $src\n\t"
10296             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10297             "FSTP_S $dst"  %}
10298   ins_encode %{
10299     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10300     __ fadd_s($constantaddress($con));
10301     __ fstp_s(Address(rsp, $dst$$disp));
10302   %}
10303   ins_pipe(fpu_mem_reg_con);
10304 %}
10305 //
10306 // This instruction does not round to 24-bits
10307 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10308   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10309   match(Set dst (AddF src con));
10310   format %{ "FLD    $src\n\t"
10311             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10312             "FSTP   $dst"  %}
10313   ins_encode %{
10314     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10315     __ fadd_s($constantaddress($con));
10316     __ fstp_d($dst$$reg);
10317   %}
10318   ins_pipe(fpu_reg_reg_con);
10319 %}
10320 
10321 // Spill to obtain 24-bit precision
10322 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10323   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10324   match(Set dst (MulF src1 src2));
10325 
10326   format %{ "FLD    $src1\n\t"
10327             "FMUL   $src2\n\t"
10328             "FSTP_S $dst"  %}
10329   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10330   ins_encode( Push_Reg_FPR(src1),
10331               OpcReg_FPR(src2),
10332               Pop_Mem_FPR(dst) );
10333   ins_pipe( fpu_mem_reg_reg );
10334 %}
10335 //
10336 // This instruction does not round to 24-bits
10337 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10338   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10339   match(Set dst (MulF src1 src2));
10340 
10341   format %{ "FLD    $src1\n\t"
10342             "FMUL   $src2\n\t"
10343             "FSTP_S $dst"  %}
10344   opcode(0xD8, 0x1); /* D8 C8+i */
10345   ins_encode( Push_Reg_FPR(src2),
10346               OpcReg_FPR(src1),
10347               Pop_Reg_FPR(dst) );
10348   ins_pipe( fpu_reg_reg_reg );
10349 %}
10350 
10351 
10352 // Spill to obtain 24-bit precision
10353 // Cisc-alternate to reg-reg multiply
10354 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10355   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10356   match(Set dst (MulF src1 (LoadF src2)));
10357 
10358   format %{ "FLD_S  $src2\n\t"
10359             "FMUL   $src1\n\t"
10360             "FSTP_S $dst"  %}
10361   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10362   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10363               OpcReg_FPR(src1),
10364               Pop_Mem_FPR(dst) );
10365   ins_pipe( fpu_mem_reg_mem );
10366 %}
10367 //
10368 // This instruction does not round to 24-bits
10369 // Cisc-alternate to reg-reg multiply
10370 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10371   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10372   match(Set dst (MulF src1 (LoadF src2)));
10373 
10374   format %{ "FMUL   $dst,$src1,$src2" %}
10375   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10376   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10377               OpcReg_FPR(src1),
10378               Pop_Reg_FPR(dst) );
10379   ins_pipe( fpu_reg_reg_mem );
10380 %}
10381 
10382 // Spill to obtain 24-bit precision
10383 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10384   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10385   match(Set dst (MulF src1 src2));
10386 
10387   format %{ "FMUL   $dst,$src1,$src2" %}
10388   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10389   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10390               set_instruction_start,
10391               OpcP, RMopc_Mem(secondary,src1),
10392               Pop_Mem_FPR(dst) );
10393   ins_pipe( fpu_mem_mem_mem );
10394 %}
10395 
10396 // Spill to obtain 24-bit precision
10397 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10398   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10399   match(Set dst (MulF src con));
10400 
10401   format %{ "FLD    $src\n\t"
10402             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10403             "FSTP_S $dst"  %}
10404   ins_encode %{
10405     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10406     __ fmul_s($constantaddress($con));
10407     __ fstp_s(Address(rsp, $dst$$disp));
10408   %}
10409   ins_pipe(fpu_mem_reg_con);
10410 %}
10411 //
10412 // This instruction does not round to 24-bits
10413 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10414   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10415   match(Set dst (MulF src con));
10416 
10417   format %{ "FLD    $src\n\t"
10418             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10419             "FSTP   $dst"  %}
10420   ins_encode %{
10421     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10422     __ fmul_s($constantaddress($con));
10423     __ fstp_d($dst$$reg);
10424   %}
10425   ins_pipe(fpu_reg_reg_con);
10426 %}
10427 
10428 
10429 //
10430 // MACRO1 -- subsume unshared load into mulFPR
10431 // This instruction does not round to 24-bits
10432 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10433   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10434   match(Set dst (MulF (LoadF mem1) src));
10435 
10436   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10437             "FMUL   ST,$src\n\t"
10438             "FSTP   $dst" %}
10439   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10440   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10441               OpcReg_FPR(src),
10442               Pop_Reg_FPR(dst) );
10443   ins_pipe( fpu_reg_reg_mem );
10444 %}
10445 //
10446 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10447 // This instruction does not round to 24-bits
10448 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10449   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10450   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10451   ins_cost(95);
10452 
10453   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10454             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10455             "FADD   ST,$src2\n\t"
10456             "FSTP   $dst" %}
10457   opcode(0xD9); /* LoadF D9 /0 */
10458   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10459               FMul_ST_reg(src1),
10460               FAdd_ST_reg(src2),
10461               Pop_Reg_FPR(dst) );
10462   ins_pipe( fpu_reg_mem_reg_reg );
10463 %}
10464 
10465 // MACRO3 -- addFPR a mulFPR
10466 // This instruction does not round to 24-bits.  It is a '2-address'
10467 // instruction in that the result goes back to src2.  This eliminates
10468 // a move from the macro; possibly the register allocator will have
10469 // to add it back (and maybe not).
10470 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10471   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10472   match(Set src2 (AddF (MulF src0 src1) src2));
10473 
10474   format %{ "FLD    $src0     ===MACRO3===\n\t"
10475             "FMUL   ST,$src1\n\t"
10476             "FADDP  $src2,ST" %}
10477   opcode(0xD9); /* LoadF D9 /0 */
10478   ins_encode( Push_Reg_FPR(src0),
10479               FMul_ST_reg(src1),
10480               FAddP_reg_ST(src2) );
10481   ins_pipe( fpu_reg_reg_reg );
10482 %}
10483 
10484 // MACRO4 -- divFPR subFPR
10485 // This instruction does not round to 24-bits
10486 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10487   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10488   match(Set dst (DivF (SubF src2 src1) src3));
10489 
10490   format %{ "FLD    $src2   ===MACRO4===\n\t"
10491             "FSUB   ST,$src1\n\t"
10492             "FDIV   ST,$src3\n\t"
10493             "FSTP  $dst" %}
10494   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10495   ins_encode( Push_Reg_FPR(src2),
10496               subFPR_divFPR_encode(src1,src3),
10497               Pop_Reg_FPR(dst) );
10498   ins_pipe( fpu_reg_reg_reg_reg );
10499 %}
10500 
10501 // Spill to obtain 24-bit precision
10502 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10503   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10504   match(Set dst (DivF src1 src2));
10505 
10506   format %{ "FDIV   $dst,$src1,$src2" %}
10507   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10508   ins_encode( Push_Reg_FPR(src1),
10509               OpcReg_FPR(src2),
10510               Pop_Mem_FPR(dst) );
10511   ins_pipe( fpu_mem_reg_reg );
10512 %}
10513 //
10514 // This instruction does not round to 24-bits
10515 instruct divFPR_reg(regFPR dst, regFPR src) %{
10516   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10517   match(Set dst (DivF dst src));
10518 
10519   format %{ "FDIV   $dst,$src" %}
10520   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10521   ins_encode( Push_Reg_FPR(src),
10522               OpcP, RegOpc(dst) );
10523   ins_pipe( fpu_reg_reg );
10524 %}
10525 
10526 
10527 // Spill to obtain 24-bit precision
10528 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10529   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10530   match(Set dst (ModF src1 src2));
10531   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10532 
10533   format %{ "FMOD   $dst,$src1,$src2" %}
10534   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10535               emitModDPR(),
10536               Push_Result_Mod_DPR(src2),
10537               Pop_Mem_FPR(dst));
10538   ins_pipe( pipe_slow );
10539 %}
10540 //
10541 // This instruction does not round to 24-bits
10542 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10543   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10544   match(Set dst (ModF dst src));
10545   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10546 
10547   format %{ "FMOD   $dst,$src" %}
10548   ins_encode(Push_Reg_Mod_DPR(dst, src),
10549               emitModDPR(),
10550               Push_Result_Mod_DPR(src),
10551               Pop_Reg_FPR(dst));
10552   ins_pipe( pipe_slow );
10553 %}
10554 
10555 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10556   predicate(UseSSE>=1);
10557   match(Set dst (ModF src0 src1));
10558   effect(KILL rax, KILL cr);
10559   format %{ "SUB    ESP,4\t # FMOD\n"
10560           "\tMOVSS  [ESP+0],$src1\n"
10561           "\tFLD_S  [ESP+0]\n"
10562           "\tMOVSS  [ESP+0],$src0\n"
10563           "\tFLD_S  [ESP+0]\n"
10564      "loop:\tFPREM\n"
10565           "\tFWAIT\n"
10566           "\tFNSTSW AX\n"
10567           "\tSAHF\n"
10568           "\tJP     loop\n"
10569           "\tFSTP_S [ESP+0]\n"
10570           "\tMOVSS  $dst,[ESP+0]\n"
10571           "\tADD    ESP,4\n"
10572           "\tFSTP   ST0\t # Restore FPU Stack"
10573     %}
10574   ins_cost(250);
10575   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10576   ins_pipe( pipe_slow );
10577 %}
10578 
10579 
10580 //----------Arithmetic Conversion Instructions---------------------------------
10581 // The conversions operations are all Alpha sorted.  Please keep it that way!
10582 
10583 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10584   predicate(UseSSE==0);
10585   match(Set dst (RoundFloat src));
10586   ins_cost(125);
10587   format %{ "FST_S  $dst,$src\t# F-round" %}
10588   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10589   ins_pipe( fpu_mem_reg );
10590 %}
10591 
10592 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10593   predicate(UseSSE<=1);
10594   match(Set dst (RoundDouble src));
10595   ins_cost(125);
10596   format %{ "FST_D  $dst,$src\t# D-round" %}
10597   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10598   ins_pipe( fpu_mem_reg );
10599 %}
10600 
10601 // Force rounding to 24-bit precision and 6-bit exponent
10602 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10603   predicate(UseSSE==0);
10604   match(Set dst (ConvD2F src));
10605   format %{ "FST_S  $dst,$src\t# F-round" %}
10606   expand %{
10607     roundFloat_mem_reg(dst,src);
10608   %}
10609 %}
10610 
10611 // Force rounding to 24-bit precision and 6-bit exponent
10612 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10613   predicate(UseSSE==1);
10614   match(Set dst (ConvD2F src));
10615   effect( KILL cr );
10616   format %{ "SUB    ESP,4\n\t"
10617             "FST_S  [ESP],$src\t# F-round\n\t"
10618             "MOVSS  $dst,[ESP]\n\t"
10619             "ADD ESP,4" %}
10620   ins_encode %{
10621     __ subptr(rsp, 4);
10622     if ($src$$reg != FPR1L_enc) {
10623       __ fld_s($src$$reg-1);
10624       __ fstp_s(Address(rsp, 0));
10625     } else {
10626       __ fst_s(Address(rsp, 0));
10627     }
10628     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10629     __ addptr(rsp, 4);
10630   %}
10631   ins_pipe( pipe_slow );
10632 %}
10633 
10634 // Force rounding double precision to single precision
10635 instruct convD2F_reg(regF dst, regD src) %{
10636   predicate(UseSSE>=2);
10637   match(Set dst (ConvD2F src));
10638   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10639   ins_encode %{
10640     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10641   %}
10642   ins_pipe( pipe_slow );
10643 %}
10644 
10645 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10646   predicate(UseSSE==0);
10647   match(Set dst (ConvF2D src));
10648   format %{ "FST_S  $dst,$src\t# D-round" %}
10649   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10650   ins_pipe( fpu_reg_reg );
10651 %}
10652 
10653 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10654   predicate(UseSSE==1);
10655   match(Set dst (ConvF2D src));
10656   format %{ "FST_D  $dst,$src\t# D-round" %}
10657   expand %{
10658     roundDouble_mem_reg(dst,src);
10659   %}
10660 %}
10661 
10662 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10663   predicate(UseSSE==1);
10664   match(Set dst (ConvF2D src));
10665   effect( KILL cr );
10666   format %{ "SUB    ESP,4\n\t"
10667             "MOVSS  [ESP] $src\n\t"
10668             "FLD_S  [ESP]\n\t"
10669             "ADD    ESP,4\n\t"
10670             "FSTP   $dst\t# D-round" %}
10671   ins_encode %{
10672     __ subptr(rsp, 4);
10673     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10674     __ fld_s(Address(rsp, 0));
10675     __ addptr(rsp, 4);
10676     __ fstp_d($dst$$reg);
10677   %}
10678   ins_pipe( pipe_slow );
10679 %}
10680 
10681 instruct convF2D_reg(regD dst, regF src) %{
10682   predicate(UseSSE>=2);
10683   match(Set dst (ConvF2D src));
10684   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10685   ins_encode %{
10686     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10687   %}
10688   ins_pipe( pipe_slow );
10689 %}
10690 
10691 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10692 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10693   predicate(UseSSE<=1);
10694   match(Set dst (ConvD2I src));
10695   effect( KILL tmp, KILL cr );
10696   format %{ "FLD    $src\t# Convert double to int \n\t"
10697             "FLDCW  trunc mode\n\t"
10698             "SUB    ESP,4\n\t"
10699             "FISTp  [ESP + #0]\n\t"
10700             "FLDCW  std/24-bit mode\n\t"
10701             "POP    EAX\n\t"
10702             "CMP    EAX,0x80000000\n\t"
10703             "JNE,s  fast\n\t"
10704             "FLD_D  $src\n\t"
10705             "CALL   d2i_wrapper\n"
10706       "fast:" %}
10707   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10708   ins_pipe( pipe_slow );
10709 %}
10710 
10711 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10712 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10713   predicate(UseSSE>=2);
10714   match(Set dst (ConvD2I src));
10715   effect( KILL tmp, KILL cr );
10716   format %{ "CVTTSD2SI $dst, $src\n\t"
10717             "CMP    $dst,0x80000000\n\t"
10718             "JNE,s  fast\n\t"
10719             "SUB    ESP, 8\n\t"
10720             "MOVSD  [ESP], $src\n\t"
10721             "FLD_D  [ESP]\n\t"
10722             "ADD    ESP, 8\n\t"
10723             "CALL   d2i_wrapper\n"
10724       "fast:" %}
10725   ins_encode %{
10726     Label fast;
10727     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10728     __ cmpl($dst$$Register, 0x80000000);
10729     __ jccb(Assembler::notEqual, fast);
10730     __ subptr(rsp, 8);
10731     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10732     __ fld_d(Address(rsp, 0));
10733     __ addptr(rsp, 8);
10734     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10735     __ bind(fast);
10736   %}
10737   ins_pipe( pipe_slow );
10738 %}
10739 
10740 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10741   predicate(UseSSE<=1);
10742   match(Set dst (ConvD2L src));
10743   effect( KILL cr );
10744   format %{ "FLD    $src\t# Convert double to long\n\t"
10745             "FLDCW  trunc mode\n\t"
10746             "SUB    ESP,8\n\t"
10747             "FISTp  [ESP + #0]\n\t"
10748             "FLDCW  std/24-bit mode\n\t"
10749             "POP    EAX\n\t"
10750             "POP    EDX\n\t"
10751             "CMP    EDX,0x80000000\n\t"
10752             "JNE,s  fast\n\t"
10753             "TEST   EAX,EAX\n\t"
10754             "JNE,s  fast\n\t"
10755             "FLD    $src\n\t"
10756             "CALL   d2l_wrapper\n"
10757       "fast:" %}
10758   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10759   ins_pipe( pipe_slow );
10760 %}
10761 
10762 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10763 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10764   predicate (UseSSE>=2);
10765   match(Set dst (ConvD2L src));
10766   effect( KILL cr );
10767   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10768             "MOVSD  [ESP],$src\n\t"
10769             "FLD_D  [ESP]\n\t"
10770             "FLDCW  trunc mode\n\t"
10771             "FISTp  [ESP + #0]\n\t"
10772             "FLDCW  std/24-bit mode\n\t"
10773             "POP    EAX\n\t"
10774             "POP    EDX\n\t"
10775             "CMP    EDX,0x80000000\n\t"
10776             "JNE,s  fast\n\t"
10777             "TEST   EAX,EAX\n\t"
10778             "JNE,s  fast\n\t"
10779             "SUB    ESP,8\n\t"
10780             "MOVSD  [ESP],$src\n\t"
10781             "FLD_D  [ESP]\n\t"
10782             "ADD    ESP,8\n\t"
10783             "CALL   d2l_wrapper\n"
10784       "fast:" %}
10785   ins_encode %{
10786     Label fast;
10787     __ subptr(rsp, 8);
10788     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10789     __ fld_d(Address(rsp, 0));
10790     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10791     __ fistp_d(Address(rsp, 0));
10792     // Restore the rounding mode, mask the exception
10793     if (Compile::current()->in_24_bit_fp_mode()) {
10794       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10795     } else {
10796       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10797     }
10798     // Load the converted long, adjust CPU stack
10799     __ pop(rax);
10800     __ pop(rdx);
10801     __ cmpl(rdx, 0x80000000);
10802     __ jccb(Assembler::notEqual, fast);
10803     __ testl(rax, rax);
10804     __ jccb(Assembler::notEqual, fast);
10805     __ subptr(rsp, 8);
10806     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10807     __ fld_d(Address(rsp, 0));
10808     __ addptr(rsp, 8);
10809     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10810     __ bind(fast);
10811   %}
10812   ins_pipe( pipe_slow );
10813 %}
10814 
10815 // Convert a double to an int.  Java semantics require we do complex
10816 // manglations in the corner cases.  So we set the rounding mode to
10817 // 'zero', store the darned double down as an int, and reset the
10818 // rounding mode to 'nearest'.  The hardware stores a flag value down
10819 // if we would overflow or converted a NAN; we check for this and
10820 // and go the slow path if needed.
10821 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10822   predicate(UseSSE==0);
10823   match(Set dst (ConvF2I src));
10824   effect( KILL tmp, KILL cr );
10825   format %{ "FLD    $src\t# Convert float to int \n\t"
10826             "FLDCW  trunc mode\n\t"
10827             "SUB    ESP,4\n\t"
10828             "FISTp  [ESP + #0]\n\t"
10829             "FLDCW  std/24-bit mode\n\t"
10830             "POP    EAX\n\t"
10831             "CMP    EAX,0x80000000\n\t"
10832             "JNE,s  fast\n\t"
10833             "FLD    $src\n\t"
10834             "CALL   d2i_wrapper\n"
10835       "fast:" %}
10836   // DPR2I_encoding works for FPR2I
10837   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10838   ins_pipe( pipe_slow );
10839 %}
10840 
10841 // Convert a float in xmm to an int reg.
10842 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10843   predicate(UseSSE>=1);
10844   match(Set dst (ConvF2I src));
10845   effect( KILL tmp, KILL cr );
10846   format %{ "CVTTSS2SI $dst, $src\n\t"
10847             "CMP    $dst,0x80000000\n\t"
10848             "JNE,s  fast\n\t"
10849             "SUB    ESP, 4\n\t"
10850             "MOVSS  [ESP], $src\n\t"
10851             "FLD    [ESP]\n\t"
10852             "ADD    ESP, 4\n\t"
10853             "CALL   d2i_wrapper\n"
10854       "fast:" %}
10855   ins_encode %{
10856     Label fast;
10857     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10858     __ cmpl($dst$$Register, 0x80000000);
10859     __ jccb(Assembler::notEqual, fast);
10860     __ subptr(rsp, 4);
10861     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10862     __ fld_s(Address(rsp, 0));
10863     __ addptr(rsp, 4);
10864     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10865     __ bind(fast);
10866   %}
10867   ins_pipe( pipe_slow );
10868 %}
10869 
10870 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10871   predicate(UseSSE==0);
10872   match(Set dst (ConvF2L src));
10873   effect( KILL cr );
10874   format %{ "FLD    $src\t# Convert float to long\n\t"
10875             "FLDCW  trunc mode\n\t"
10876             "SUB    ESP,8\n\t"
10877             "FISTp  [ESP + #0]\n\t"
10878             "FLDCW  std/24-bit mode\n\t"
10879             "POP    EAX\n\t"
10880             "POP    EDX\n\t"
10881             "CMP    EDX,0x80000000\n\t"
10882             "JNE,s  fast\n\t"
10883             "TEST   EAX,EAX\n\t"
10884             "JNE,s  fast\n\t"
10885             "FLD    $src\n\t"
10886             "CALL   d2l_wrapper\n"
10887       "fast:" %}
10888   // DPR2L_encoding works for FPR2L
10889   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10890   ins_pipe( pipe_slow );
10891 %}
10892 
10893 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10894 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10895   predicate (UseSSE>=1);
10896   match(Set dst (ConvF2L src));
10897   effect( KILL cr );
10898   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10899             "MOVSS  [ESP],$src\n\t"
10900             "FLD_S  [ESP]\n\t"
10901             "FLDCW  trunc mode\n\t"
10902             "FISTp  [ESP + #0]\n\t"
10903             "FLDCW  std/24-bit mode\n\t"
10904             "POP    EAX\n\t"
10905             "POP    EDX\n\t"
10906             "CMP    EDX,0x80000000\n\t"
10907             "JNE,s  fast\n\t"
10908             "TEST   EAX,EAX\n\t"
10909             "JNE,s  fast\n\t"
10910             "SUB    ESP,4\t# Convert float to long\n\t"
10911             "MOVSS  [ESP],$src\n\t"
10912             "FLD_S  [ESP]\n\t"
10913             "ADD    ESP,4\n\t"
10914             "CALL   d2l_wrapper\n"
10915       "fast:" %}
10916   ins_encode %{
10917     Label fast;
10918     __ subptr(rsp, 8);
10919     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10920     __ fld_s(Address(rsp, 0));
10921     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10922     __ fistp_d(Address(rsp, 0));
10923     // Restore the rounding mode, mask the exception
10924     if (Compile::current()->in_24_bit_fp_mode()) {
10925       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10926     } else {
10927       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10928     }
10929     // Load the converted long, adjust CPU stack
10930     __ pop(rax);
10931     __ pop(rdx);
10932     __ cmpl(rdx, 0x80000000);
10933     __ jccb(Assembler::notEqual, fast);
10934     __ testl(rax, rax);
10935     __ jccb(Assembler::notEqual, fast);
10936     __ subptr(rsp, 4);
10937     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10938     __ fld_s(Address(rsp, 0));
10939     __ addptr(rsp, 4);
10940     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10941     __ bind(fast);
10942   %}
10943   ins_pipe( pipe_slow );
10944 %}
10945 
10946 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
10947   predicate( UseSSE<=1 );
10948   match(Set dst (ConvI2D src));
10949   format %{ "FILD   $src\n\t"
10950             "FSTP   $dst" %}
10951   opcode(0xDB, 0x0);  /* DB /0 */
10952   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
10953   ins_pipe( fpu_reg_mem );
10954 %}
10955 
10956 instruct convI2D_reg(regD dst, rRegI src) %{
10957   predicate( UseSSE>=2 && !UseXmmI2D );
10958   match(Set dst (ConvI2D src));
10959   format %{ "CVTSI2SD $dst,$src" %}
10960   ins_encode %{
10961     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10962   %}
10963   ins_pipe( pipe_slow );
10964 %}
10965 
10966 instruct convI2D_mem(regD dst, memory mem) %{
10967   predicate( UseSSE>=2 );
10968   match(Set dst (ConvI2D (LoadI mem)));
10969   format %{ "CVTSI2SD $dst,$mem" %}
10970   ins_encode %{
10971     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
10972   %}
10973   ins_pipe( pipe_slow );
10974 %}
10975 
10976 instruct convXI2D_reg(regD dst, rRegI src)
10977 %{
10978   predicate( UseSSE>=2 && UseXmmI2D );
10979   match(Set dst (ConvI2D src));
10980 
10981   format %{ "MOVD  $dst,$src\n\t"
10982             "CVTDQ2PD $dst,$dst\t# i2d" %}
10983   ins_encode %{
10984     __ movdl($dst$$XMMRegister, $src$$Register);
10985     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10986   %}
10987   ins_pipe(pipe_slow); // XXX
10988 %}
10989 
10990 instruct convI2DPR_mem(regDPR dst, memory mem) %{
10991   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
10992   match(Set dst (ConvI2D (LoadI mem)));
10993   format %{ "FILD   $mem\n\t"
10994             "FSTP   $dst" %}
10995   opcode(0xDB);      /* DB /0 */
10996   ins_encode( OpcP, RMopc_Mem(0x00,mem),
10997               Pop_Reg_DPR(dst));
10998   ins_pipe( fpu_reg_mem );
10999 %}
11000 
11001 // Convert a byte to a float; no rounding step needed.
11002 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11003   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11004   match(Set dst (ConvI2F src));
11005   format %{ "FILD   $src\n\t"
11006             "FSTP   $dst" %}
11007 
11008   opcode(0xDB, 0x0);  /* DB /0 */
11009   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11010   ins_pipe( fpu_reg_mem );
11011 %}
11012 
11013 // In 24-bit mode, force exponent rounding by storing back out
11014 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11015   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11016   match(Set dst (ConvI2F src));
11017   ins_cost(200);
11018   format %{ "FILD   $src\n\t"
11019             "FSTP_S $dst" %}
11020   opcode(0xDB, 0x0);  /* DB /0 */
11021   ins_encode( Push_Mem_I(src),
11022               Pop_Mem_FPR(dst));
11023   ins_pipe( fpu_mem_mem );
11024 %}
11025 
11026 // In 24-bit mode, force exponent rounding by storing back out
11027 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11028   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11029   match(Set dst (ConvI2F (LoadI mem)));
11030   ins_cost(200);
11031   format %{ "FILD   $mem\n\t"
11032             "FSTP_S $dst" %}
11033   opcode(0xDB);  /* DB /0 */
11034   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11035               Pop_Mem_FPR(dst));
11036   ins_pipe( fpu_mem_mem );
11037 %}
11038 
11039 // This instruction does not round to 24-bits
11040 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11041   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11042   match(Set dst (ConvI2F src));
11043   format %{ "FILD   $src\n\t"
11044             "FSTP   $dst" %}
11045   opcode(0xDB, 0x0);  /* DB /0 */
11046   ins_encode( Push_Mem_I(src),
11047               Pop_Reg_FPR(dst));
11048   ins_pipe( fpu_reg_mem );
11049 %}
11050 
11051 // This instruction does not round to 24-bits
11052 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11053   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11054   match(Set dst (ConvI2F (LoadI mem)));
11055   format %{ "FILD   $mem\n\t"
11056             "FSTP   $dst" %}
11057   opcode(0xDB);      /* DB /0 */
11058   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11059               Pop_Reg_FPR(dst));
11060   ins_pipe( fpu_reg_mem );
11061 %}
11062 
11063 // Convert an int to a float in xmm; no rounding step needed.
11064 instruct convI2F_reg(regF dst, rRegI src) %{
11065   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11066   match(Set dst (ConvI2F src));
11067   format %{ "CVTSI2SS $dst, $src" %}
11068   ins_encode %{
11069     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11070   %}
11071   ins_pipe( pipe_slow );
11072 %}
11073 
11074  instruct convXI2F_reg(regF dst, rRegI src)
11075 %{
11076   predicate( UseSSE>=2 && UseXmmI2F );
11077   match(Set dst (ConvI2F src));
11078 
11079   format %{ "MOVD  $dst,$src\n\t"
11080             "CVTDQ2PS $dst,$dst\t# i2f" %}
11081   ins_encode %{
11082     __ movdl($dst$$XMMRegister, $src$$Register);
11083     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11084   %}
11085   ins_pipe(pipe_slow); // XXX
11086 %}
11087 
11088 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11089   match(Set dst (ConvI2L src));
11090   effect(KILL cr);
11091   ins_cost(375);
11092   format %{ "MOV    $dst.lo,$src\n\t"
11093             "MOV    $dst.hi,$src\n\t"
11094             "SAR    $dst.hi,31" %}
11095   ins_encode(convert_int_long(dst,src));
11096   ins_pipe( ialu_reg_reg_long );
11097 %}
11098 
11099 // Zero-extend convert int to long
11100 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11101   match(Set dst (AndL (ConvI2L src) mask) );
11102   effect( KILL flags );
11103   ins_cost(250);
11104   format %{ "MOV    $dst.lo,$src\n\t"
11105             "XOR    $dst.hi,$dst.hi" %}
11106   opcode(0x33); // XOR
11107   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11108   ins_pipe( ialu_reg_reg_long );
11109 %}
11110 
11111 // Zero-extend long
11112 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11113   match(Set dst (AndL src mask) );
11114   effect( KILL flags );
11115   ins_cost(250);
11116   format %{ "MOV    $dst.lo,$src.lo\n\t"
11117             "XOR    $dst.hi,$dst.hi\n\t" %}
11118   opcode(0x33); // XOR
11119   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11120   ins_pipe( ialu_reg_reg_long );
11121 %}
11122 
11123 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11124   predicate (UseSSE<=1);
11125   match(Set dst (ConvL2D src));
11126   effect( KILL cr );
11127   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11128             "PUSH   $src.lo\n\t"
11129             "FILD   ST,[ESP + #0]\n\t"
11130             "ADD    ESP,8\n\t"
11131             "FSTP_D $dst\t# D-round" %}
11132   opcode(0xDF, 0x5);  /* DF /5 */
11133   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11134   ins_pipe( pipe_slow );
11135 %}
11136 
11137 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11138   predicate (UseSSE>=2);
11139   match(Set dst (ConvL2D src));
11140   effect( KILL cr );
11141   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11142             "PUSH   $src.lo\n\t"
11143             "FILD_D [ESP]\n\t"
11144             "FSTP_D [ESP]\n\t"
11145             "MOVSD  $dst,[ESP]\n\t"
11146             "ADD    ESP,8" %}
11147   opcode(0xDF, 0x5);  /* DF /5 */
11148   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11149   ins_pipe( pipe_slow );
11150 %}
11151 
11152 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11153   predicate (UseSSE>=1);
11154   match(Set dst (ConvL2F src));
11155   effect( KILL cr );
11156   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11157             "PUSH   $src.lo\n\t"
11158             "FILD_D [ESP]\n\t"
11159             "FSTP_S [ESP]\n\t"
11160             "MOVSS  $dst,[ESP]\n\t"
11161             "ADD    ESP,8" %}
11162   opcode(0xDF, 0x5);  /* DF /5 */
11163   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11164   ins_pipe( pipe_slow );
11165 %}
11166 
11167 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11168   match(Set dst (ConvL2F src));
11169   effect( KILL cr );
11170   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11171             "PUSH   $src.lo\n\t"
11172             "FILD   ST,[ESP + #0]\n\t"
11173             "ADD    ESP,8\n\t"
11174             "FSTP_S $dst\t# F-round" %}
11175   opcode(0xDF, 0x5);  /* DF /5 */
11176   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11177   ins_pipe( pipe_slow );
11178 %}
11179 
11180 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11181   match(Set dst (ConvL2I src));
11182   effect( DEF dst, USE src );
11183   format %{ "MOV    $dst,$src.lo" %}
11184   ins_encode(enc_CopyL_Lo(dst,src));
11185   ins_pipe( ialu_reg_reg );
11186 %}
11187 
11188 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11189   match(Set dst (MoveF2I src));
11190   effect( DEF dst, USE src );
11191   ins_cost(100);
11192   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11193   ins_encode %{
11194     __ movl($dst$$Register, Address(rsp, $src$$disp));
11195   %}
11196   ins_pipe( ialu_reg_mem );
11197 %}
11198 
11199 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11200   predicate(UseSSE==0);
11201   match(Set dst (MoveF2I src));
11202   effect( DEF dst, USE src );
11203 
11204   ins_cost(125);
11205   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11206   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11207   ins_pipe( fpu_mem_reg );
11208 %}
11209 
11210 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11211   predicate(UseSSE>=1);
11212   match(Set dst (MoveF2I src));
11213   effect( DEF dst, USE src );
11214 
11215   ins_cost(95);
11216   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11217   ins_encode %{
11218     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11219   %}
11220   ins_pipe( pipe_slow );
11221 %}
11222 
11223 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11224   predicate(UseSSE>=2);
11225   match(Set dst (MoveF2I src));
11226   effect( DEF dst, USE src );
11227   ins_cost(85);
11228   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11229   ins_encode %{
11230     __ movdl($dst$$Register, $src$$XMMRegister);
11231   %}
11232   ins_pipe( pipe_slow );
11233 %}
11234 
11235 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11236   match(Set dst (MoveI2F src));
11237   effect( DEF dst, USE src );
11238 
11239   ins_cost(100);
11240   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11241   ins_encode %{
11242     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11243   %}
11244   ins_pipe( ialu_mem_reg );
11245 %}
11246 
11247 
11248 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11249   predicate(UseSSE==0);
11250   match(Set dst (MoveI2F src));
11251   effect(DEF dst, USE src);
11252 
11253   ins_cost(125);
11254   format %{ "FLD_S  $src\n\t"
11255             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11256   opcode(0xD9);               /* D9 /0, FLD m32real */
11257   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11258               Pop_Reg_FPR(dst) );
11259   ins_pipe( fpu_reg_mem );
11260 %}
11261 
11262 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11263   predicate(UseSSE>=1);
11264   match(Set dst (MoveI2F src));
11265   effect( DEF dst, USE src );
11266 
11267   ins_cost(95);
11268   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11269   ins_encode %{
11270     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11271   %}
11272   ins_pipe( pipe_slow );
11273 %}
11274 
11275 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11276   predicate(UseSSE>=2);
11277   match(Set dst (MoveI2F src));
11278   effect( DEF dst, USE src );
11279 
11280   ins_cost(85);
11281   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11282   ins_encode %{
11283     __ movdl($dst$$XMMRegister, $src$$Register);
11284   %}
11285   ins_pipe( pipe_slow );
11286 %}
11287 
11288 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11289   match(Set dst (MoveD2L src));
11290   effect(DEF dst, USE src);
11291 
11292   ins_cost(250);
11293   format %{ "MOV    $dst.lo,$src\n\t"
11294             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11295   opcode(0x8B, 0x8B);
11296   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11297   ins_pipe( ialu_mem_long_reg );
11298 %}
11299 
11300 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11301   predicate(UseSSE<=1);
11302   match(Set dst (MoveD2L src));
11303   effect(DEF dst, USE src);
11304 
11305   ins_cost(125);
11306   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11307   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11308   ins_pipe( fpu_mem_reg );
11309 %}
11310 
11311 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11312   predicate(UseSSE>=2);
11313   match(Set dst (MoveD2L src));
11314   effect(DEF dst, USE src);
11315   ins_cost(95);
11316   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11317   ins_encode %{
11318     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11319   %}
11320   ins_pipe( pipe_slow );
11321 %}
11322 
11323 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11324   predicate(UseSSE>=2);
11325   match(Set dst (MoveD2L src));
11326   effect(DEF dst, USE src, TEMP tmp);
11327   ins_cost(85);
11328   format %{ "MOVD   $dst.lo,$src\n\t"
11329             "PSHUFLW $tmp,$src,0x4E\n\t"
11330             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11331   ins_encode %{
11332     __ movdl($dst$$Register, $src$$XMMRegister);
11333     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11334     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11335   %}
11336   ins_pipe( pipe_slow );
11337 %}
11338 
11339 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11340   match(Set dst (MoveL2D src));
11341   effect(DEF dst, USE src);
11342 
11343   ins_cost(200);
11344   format %{ "MOV    $dst,$src.lo\n\t"
11345             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11346   opcode(0x89, 0x89);
11347   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11348   ins_pipe( ialu_mem_long_reg );
11349 %}
11350 
11351 
11352 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11353   predicate(UseSSE<=1);
11354   match(Set dst (MoveL2D src));
11355   effect(DEF dst, USE src);
11356   ins_cost(125);
11357 
11358   format %{ "FLD_D  $src\n\t"
11359             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11360   opcode(0xDD);               /* DD /0, FLD m64real */
11361   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11362               Pop_Reg_DPR(dst) );
11363   ins_pipe( fpu_reg_mem );
11364 %}
11365 
11366 
11367 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11368   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11369   match(Set dst (MoveL2D src));
11370   effect(DEF dst, USE src);
11371 
11372   ins_cost(95);
11373   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11374   ins_encode %{
11375     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11376   %}
11377   ins_pipe( pipe_slow );
11378 %}
11379 
11380 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11381   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11382   match(Set dst (MoveL2D src));
11383   effect(DEF dst, USE src);
11384 
11385   ins_cost(95);
11386   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11387   ins_encode %{
11388     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11389   %}
11390   ins_pipe( pipe_slow );
11391 %}
11392 
11393 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11394   predicate(UseSSE>=2);
11395   match(Set dst (MoveL2D src));
11396   effect(TEMP dst, USE src, TEMP tmp);
11397   ins_cost(85);
11398   format %{ "MOVD   $dst,$src.lo\n\t"
11399             "MOVD   $tmp,$src.hi\n\t"
11400             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11401   ins_encode %{
11402     __ movdl($dst$$XMMRegister, $src$$Register);
11403     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11404     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11405   %}
11406   ins_pipe( pipe_slow );
11407 %}
11408 
11409 
11410 // =======================================================================
11411 // fast clearing of an array
11412 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11413   predicate(!UseFastStosb);
11414   match(Set dummy (ClearArray cnt base));
11415   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11416   format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11417             "SHL    ECX,1\t# Convert doublewords to words\n\t"
11418             "REP STOS\t# store EAX into [EDI++] while ECX--" %}
11419   ins_encode %{
11420     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
11421   %}
11422   ins_pipe( pipe_slow );
11423 %}
11424 
11425 instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11426   predicate(UseFastStosb);
11427   match(Set dummy (ClearArray cnt base));
11428   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11429   format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11430             "SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11431             "REP STOSB\t# store EAX into [EDI++] while ECX--" %}
11432   ins_encode %{
11433     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
11434   %}
11435   ins_pipe( pipe_slow );
11436 %}
11437 
11438 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11439                         eAXRegI result, regD tmp1, eFlagsReg cr) %{
11440   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11441   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11442 
11443   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11444   ins_encode %{
11445     __ string_compare($str1$$Register, $str2$$Register,
11446                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11447                       $tmp1$$XMMRegister);
11448   %}
11449   ins_pipe( pipe_slow );
11450 %}
11451 
11452 // fast string equals
11453 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11454                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11455   match(Set result (StrEquals (Binary str1 str2) cnt));
11456   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11457 
11458   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11459   ins_encode %{
11460     __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
11461                           $cnt$$Register, $result$$Register, $tmp3$$Register,
11462                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11463   %}
11464   ins_pipe( pipe_slow );
11465 %}
11466 
11467 // fast search of substring with known size.
11468 instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11469                             eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11470   predicate(UseSSE42Intrinsics);
11471   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11472   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11473 
11474   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11475   ins_encode %{
11476     int icnt2 = (int)$int_cnt2$$constant;
11477     if (icnt2 >= 8) {
11478       // IndexOf for constant substrings with size >= 8 elements
11479       // which don't need to be loaded through stack.
11480       __ string_indexofC8($str1$$Register, $str2$$Register,
11481                           $cnt1$$Register, $cnt2$$Register,
11482                           icnt2, $result$$Register,
11483                           $vec$$XMMRegister, $tmp$$Register);
11484     } else {
11485       // Small strings are loaded through stack if they cross page boundary.
11486       __ string_indexof($str1$$Register, $str2$$Register,
11487                         $cnt1$$Register, $cnt2$$Register,
11488                         icnt2, $result$$Register,
11489                         $vec$$XMMRegister, $tmp$$Register);
11490     }
11491   %}
11492   ins_pipe( pipe_slow );
11493 %}
11494 
11495 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11496                         eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11497   predicate(UseSSE42Intrinsics);
11498   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11499   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11500 
11501   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11502   ins_encode %{
11503     __ string_indexof($str1$$Register, $str2$$Register,
11504                       $cnt1$$Register, $cnt2$$Register,
11505                       (-1), $result$$Register,
11506                       $vec$$XMMRegister, $tmp$$Register);
11507   %}
11508   ins_pipe( pipe_slow );
11509 %}
11510 
11511 // fast array equals
11512 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11513                       regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11514 %{
11515   match(Set result (AryEq ary1 ary2));
11516   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11517   //ins_cost(300);
11518 
11519   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11520   ins_encode %{
11521     __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
11522                           $tmp3$$Register, $result$$Register, $tmp4$$Register,
11523                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11524   %}
11525   ins_pipe( pipe_slow );
11526 %}
11527 
11528 // encode char[] to byte[] in ISO_8859_1
11529 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11530                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11531                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11532   match(Set result (EncodeISOArray src (Binary dst len)));
11533   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11534 
11535   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11536   ins_encode %{
11537     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11538                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11539                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11540   %}
11541   ins_pipe( pipe_slow );
11542 %}
11543 
11544 
11545 //----------Control Flow Instructions------------------------------------------
11546 // Signed compare Instructions
11547 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
11548   match(Set cr (CmpI op1 op2));
11549   effect( DEF cr, USE op1, USE op2 );
11550   format %{ "CMP    $op1,$op2" %}
11551   opcode(0x3B);  /* Opcode 3B /r */
11552   ins_encode( OpcP, RegReg( op1, op2) );
11553   ins_pipe( ialu_cr_reg_reg );
11554 %}
11555 
11556 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
11557   match(Set cr (CmpI op1 op2));
11558   effect( DEF cr, USE op1 );
11559   format %{ "CMP    $op1,$op2" %}
11560   opcode(0x81,0x07);  /* Opcode 81 /7 */
11561   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
11562   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11563   ins_pipe( ialu_cr_reg_imm );
11564 %}
11565 
11566 // Cisc-spilled version of cmpI_eReg
11567 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
11568   match(Set cr (CmpI op1 (LoadI op2)));
11569 
11570   format %{ "CMP    $op1,$op2" %}
11571   ins_cost(500);
11572   opcode(0x3B);  /* Opcode 3B /r */
11573   ins_encode( OpcP, RegMem( op1, op2) );
11574   ins_pipe( ialu_cr_reg_mem );
11575 %}
11576 
11577 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
11578   match(Set cr (CmpI src zero));
11579   effect( DEF cr, USE src );
11580 
11581   format %{ "TEST   $src,$src" %}
11582   opcode(0x85);
11583   ins_encode( OpcP, RegReg( src, src ) );
11584   ins_pipe( ialu_cr_reg_imm );
11585 %}
11586 
11587 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
11588   match(Set cr (CmpI (AndI src con) zero));
11589 
11590   format %{ "TEST   $src,$con" %}
11591   opcode(0xF7,0x00);
11592   ins_encode( OpcP, RegOpc(src), Con32(con) );
11593   ins_pipe( ialu_cr_reg_imm );
11594 %}
11595 
11596 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
11597   match(Set cr (CmpI (AndI src mem) zero));
11598 
11599   format %{ "TEST   $src,$mem" %}
11600   opcode(0x85);
11601   ins_encode( OpcP, RegMem( src, mem ) );
11602   ins_pipe( ialu_cr_reg_mem );
11603 %}
11604 
11605 // Unsigned compare Instructions; really, same as signed except they
11606 // produce an eFlagsRegU instead of eFlagsReg.
11607 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
11608   match(Set cr (CmpU op1 op2));
11609 
11610   format %{ "CMPu   $op1,$op2" %}
11611   opcode(0x3B);  /* Opcode 3B /r */
11612   ins_encode( OpcP, RegReg( op1, op2) );
11613   ins_pipe( ialu_cr_reg_reg );
11614 %}
11615 
11616 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
11617   match(Set cr (CmpU op1 op2));
11618 
11619   format %{ "CMPu   $op1,$op2" %}
11620   opcode(0x81,0x07);  /* Opcode 81 /7 */
11621   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11622   ins_pipe( ialu_cr_reg_imm );
11623 %}
11624 
11625 // // Cisc-spilled version of cmpU_eReg
11626 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
11627   match(Set cr (CmpU op1 (LoadI op2)));
11628 
11629   format %{ "CMPu   $op1,$op2" %}
11630   ins_cost(500);
11631   opcode(0x3B);  /* Opcode 3B /r */
11632   ins_encode( OpcP, RegMem( op1, op2) );
11633   ins_pipe( ialu_cr_reg_mem );
11634 %}
11635 
11636 // // Cisc-spilled version of cmpU_eReg
11637 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
11638 //  match(Set cr (CmpU (LoadI op1) op2));
11639 //
11640 //  format %{ "CMPu   $op1,$op2" %}
11641 //  ins_cost(500);
11642 //  opcode(0x39);  /* Opcode 39 /r */
11643 //  ins_encode( OpcP, RegMem( op1, op2) );
11644 //%}
11645 
11646 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
11647   match(Set cr (CmpU src zero));
11648 
11649   format %{ "TESTu  $src,$src" %}
11650   opcode(0x85);
11651   ins_encode( OpcP, RegReg( src, src ) );
11652   ins_pipe( ialu_cr_reg_imm );
11653 %}
11654 
11655 // Unsigned pointer compare Instructions
11656 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
11657   match(Set cr (CmpP op1 op2));
11658 
11659   format %{ "CMPu   $op1,$op2" %}
11660   opcode(0x3B);  /* Opcode 3B /r */
11661   ins_encode( OpcP, RegReg( op1, op2) );
11662   ins_pipe( ialu_cr_reg_reg );
11663 %}
11664 
11665 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
11666   match(Set cr (CmpP op1 op2));
11667 
11668   format %{ "CMPu   $op1,$op2" %}
11669   opcode(0x81,0x07);  /* Opcode 81 /7 */
11670   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11671   ins_pipe( ialu_cr_reg_imm );
11672 %}
11673 
11674 // // Cisc-spilled version of cmpP_eReg
11675 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
11676   match(Set cr (CmpP op1 (LoadP op2)));
11677 
11678   format %{ "CMPu   $op1,$op2" %}
11679   ins_cost(500);
11680   opcode(0x3B);  /* Opcode 3B /r */
11681   ins_encode( OpcP, RegMem( op1, op2) );
11682   ins_pipe( ialu_cr_reg_mem );
11683 %}
11684 
11685 // // Cisc-spilled version of cmpP_eReg
11686 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
11687 //  match(Set cr (CmpP (LoadP op1) op2));
11688 //
11689 //  format %{ "CMPu   $op1,$op2" %}
11690 //  ins_cost(500);
11691 //  opcode(0x39);  /* Opcode 39 /r */
11692 //  ins_encode( OpcP, RegMem( op1, op2) );
11693 //%}
11694 
11695 // Compare raw pointer (used in out-of-heap check).
11696 // Only works because non-oop pointers must be raw pointers
11697 // and raw pointers have no anti-dependencies.
11698 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
11699   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
11700   match(Set cr (CmpP op1 (LoadP op2)));
11701 
11702   format %{ "CMPu   $op1,$op2" %}
11703   opcode(0x3B);  /* Opcode 3B /r */
11704   ins_encode( OpcP, RegMem( op1, op2) );
11705   ins_pipe( ialu_cr_reg_mem );
11706 %}
11707 
11708 //
11709 // This will generate a signed flags result. This should be ok
11710 // since any compare to a zero should be eq/neq.
11711 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
11712   match(Set cr (CmpP src zero));
11713 
11714   format %{ "TEST   $src,$src" %}
11715   opcode(0x85);
11716   ins_encode( OpcP, RegReg( src, src ) );
11717   ins_pipe( ialu_cr_reg_imm );
11718 %}
11719 
11720 // Cisc-spilled version of testP_reg
11721 // This will generate a signed flags result. This should be ok
11722 // since any compare to a zero should be eq/neq.
11723 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
11724   match(Set cr (CmpP (LoadP op) zero));
11725 
11726   format %{ "TEST   $op,0xFFFFFFFF" %}
11727   ins_cost(500);
11728   opcode(0xF7);               /* Opcode F7 /0 */
11729   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
11730   ins_pipe( ialu_cr_reg_imm );
11731 %}
11732 
11733 // Yanked all unsigned pointer compare operations.
11734 // Pointer compares are done with CmpP which is already unsigned.
11735 
11736 //----------Max and Min--------------------------------------------------------
11737 // Min Instructions
11738 ////
11739 //   *** Min and Max using the conditional move are slower than the
11740 //   *** branch version on a Pentium III.
11741 // // Conditional move for min
11742 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11743 //  effect( USE_DEF op2, USE op1, USE cr );
11744 //  format %{ "CMOVlt $op2,$op1\t! min" %}
11745 //  opcode(0x4C,0x0F);
11746 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11747 //  ins_pipe( pipe_cmov_reg );
11748 //%}
11749 //
11750 //// Min Register with Register (P6 version)
11751 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
11752 //  predicate(VM_Version::supports_cmov() );
11753 //  match(Set op2 (MinI op1 op2));
11754 //  ins_cost(200);
11755 //  expand %{
11756 //    eFlagsReg cr;
11757 //    compI_eReg(cr,op1,op2);
11758 //    cmovI_reg_lt(op2,op1,cr);
11759 //  %}
11760 //%}
11761 
11762 // Min Register with Register (generic version)
11763 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11764   match(Set dst (MinI dst src));
11765   effect(KILL flags);
11766   ins_cost(300);
11767 
11768   format %{ "MIN    $dst,$src" %}
11769   opcode(0xCC);
11770   ins_encode( min_enc(dst,src) );
11771   ins_pipe( pipe_slow );
11772 %}
11773 
11774 // Max Register with Register
11775 //   *** Min and Max using the conditional move are slower than the
11776 //   *** branch version on a Pentium III.
11777 // // Conditional move for max
11778 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11779 //  effect( USE_DEF op2, USE op1, USE cr );
11780 //  format %{ "CMOVgt $op2,$op1\t! max" %}
11781 //  opcode(0x4F,0x0F);
11782 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11783 //  ins_pipe( pipe_cmov_reg );
11784 //%}
11785 //
11786 // // Max Register with Register (P6 version)
11787 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
11788 //  predicate(VM_Version::supports_cmov() );
11789 //  match(Set op2 (MaxI op1 op2));
11790 //  ins_cost(200);
11791 //  expand %{
11792 //    eFlagsReg cr;
11793 //    compI_eReg(cr,op1,op2);
11794 //    cmovI_reg_gt(op2,op1,cr);
11795 //  %}
11796 //%}
11797 
11798 // Max Register with Register (generic version)
11799 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11800   match(Set dst (MaxI dst src));
11801   effect(KILL flags);
11802   ins_cost(300);
11803 
11804   format %{ "MAX    $dst,$src" %}
11805   opcode(0xCC);
11806   ins_encode( max_enc(dst,src) );
11807   ins_pipe( pipe_slow );
11808 %}
11809 
11810 // ============================================================================
11811 // Counted Loop limit node which represents exact final iterator value.
11812 // Note: the resulting value should fit into integer range since
11813 // counted loops have limit check on overflow.
11814 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
11815   match(Set limit (LoopLimit (Binary init limit) stride));
11816   effect(TEMP limit_hi, TEMP tmp, KILL flags);
11817   ins_cost(300);
11818 
11819   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
11820   ins_encode %{
11821     int strd = (int)$stride$$constant;
11822     assert(strd != 1 && strd != -1, "sanity");
11823     int m1 = (strd > 0) ? 1 : -1;
11824     // Convert limit to long (EAX:EDX)
11825     __ cdql();
11826     // Convert init to long (init:tmp)
11827     __ movl($tmp$$Register, $init$$Register);
11828     __ sarl($tmp$$Register, 31);
11829     // $limit - $init
11830     __ subl($limit$$Register, $init$$Register);
11831     __ sbbl($limit_hi$$Register, $tmp$$Register);
11832     // + ($stride - 1)
11833     if (strd > 0) {
11834       __ addl($limit$$Register, (strd - 1));
11835       __ adcl($limit_hi$$Register, 0);
11836       __ movl($tmp$$Register, strd);
11837     } else {
11838       __ addl($limit$$Register, (strd + 1));
11839       __ adcl($limit_hi$$Register, -1);
11840       __ lneg($limit_hi$$Register, $limit$$Register);
11841       __ movl($tmp$$Register, -strd);
11842     }
11843     // signed devision: (EAX:EDX) / pos_stride
11844     __ idivl($tmp$$Register);
11845     if (strd < 0) {
11846       // restore sign
11847       __ negl($tmp$$Register);
11848     }
11849     // (EAX) * stride
11850     __ mull($tmp$$Register);
11851     // + init (ignore upper bits)
11852     __ addl($limit$$Register, $init$$Register);
11853   %}
11854   ins_pipe( pipe_slow );
11855 %}
11856 
11857 // ============================================================================
11858 // Branch Instructions
11859 // Jump Table
11860 instruct jumpXtnd(rRegI switch_val) %{
11861   match(Jump switch_val);
11862   ins_cost(350);
11863   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
11864   ins_encode %{
11865     // Jump to Address(table_base + switch_reg)
11866     Address index(noreg, $switch_val$$Register, Address::times_1);
11867     __ jump(ArrayAddress($constantaddress, index));
11868   %}
11869   ins_pipe(pipe_jmp);
11870 %}
11871 
11872 // Jump Direct - Label defines a relative address from JMP+1
11873 instruct jmpDir(label labl) %{
11874   match(Goto);
11875   effect(USE labl);
11876 
11877   ins_cost(300);
11878   format %{ "JMP    $labl" %}
11879   size(5);
11880   ins_encode %{
11881     Label* L = $labl$$label;
11882     __ jmp(*L, false); // Always long jump
11883   %}
11884   ins_pipe( pipe_jmp );
11885 %}
11886 
11887 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11888 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
11889   match(If cop cr);
11890   effect(USE labl);
11891 
11892   ins_cost(300);
11893   format %{ "J$cop    $labl" %}
11894   size(6);
11895   ins_encode %{
11896     Label* L = $labl$$label;
11897     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11898   %}
11899   ins_pipe( pipe_jcc );
11900 %}
11901 
11902 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11903 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
11904   match(CountedLoopEnd cop cr);
11905   effect(USE labl);
11906 
11907   ins_cost(300);
11908   format %{ "J$cop    $labl\t# Loop end" %}
11909   size(6);
11910   ins_encode %{
11911     Label* L = $labl$$label;
11912     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11913   %}
11914   ins_pipe( pipe_jcc );
11915 %}
11916 
11917 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11918 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
11919   match(CountedLoopEnd cop cmp);
11920   effect(USE labl);
11921 
11922   ins_cost(300);
11923   format %{ "J$cop,u  $labl\t# Loop end" %}
11924   size(6);
11925   ins_encode %{
11926     Label* L = $labl$$label;
11927     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11928   %}
11929   ins_pipe( pipe_jcc );
11930 %}
11931 
11932 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
11933   match(CountedLoopEnd cop cmp);
11934   effect(USE labl);
11935 
11936   ins_cost(200);
11937   format %{ "J$cop,u  $labl\t# Loop end" %}
11938   size(6);
11939   ins_encode %{
11940     Label* L = $labl$$label;
11941     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11942   %}
11943   ins_pipe( pipe_jcc );
11944 %}
11945 
11946 // Jump Direct Conditional - using unsigned comparison
11947 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
11948   match(If cop cmp);
11949   effect(USE labl);
11950 
11951   ins_cost(300);
11952   format %{ "J$cop,u  $labl" %}
11953   size(6);
11954   ins_encode %{
11955     Label* L = $labl$$label;
11956     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11957   %}
11958   ins_pipe(pipe_jcc);
11959 %}
11960 
11961 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
11962   match(If cop cmp);
11963   effect(USE labl);
11964 
11965   ins_cost(200);
11966   format %{ "J$cop,u  $labl" %}
11967   size(6);
11968   ins_encode %{
11969     Label* L = $labl$$label;
11970     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11971   %}
11972   ins_pipe(pipe_jcc);
11973 %}
11974 
11975 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
11976   match(If cop cmp);
11977   effect(USE labl);
11978 
11979   ins_cost(200);
11980   format %{ $$template
11981     if ($cop$$cmpcode == Assembler::notEqual) {
11982       $$emit$$"JP,u   $labl\n\t"
11983       $$emit$$"J$cop,u   $labl"
11984     } else {
11985       $$emit$$"JP,u   done\n\t"
11986       $$emit$$"J$cop,u   $labl\n\t"
11987       $$emit$$"done:"
11988     }
11989   %}
11990   ins_encode %{
11991     Label* l = $labl$$label;
11992     if ($cop$$cmpcode == Assembler::notEqual) {
11993       __ jcc(Assembler::parity, *l, false);
11994       __ jcc(Assembler::notEqual, *l, false);
11995     } else if ($cop$$cmpcode == Assembler::equal) {
11996       Label done;
11997       __ jccb(Assembler::parity, done);
11998       __ jcc(Assembler::equal, *l, false);
11999       __ bind(done);
12000     } else {
12001        ShouldNotReachHere();
12002     }
12003   %}
12004   ins_pipe(pipe_jcc);
12005 %}
12006 
12007 // ============================================================================
12008 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12009 // array for an instance of the superklass.  Set a hidden internal cache on a
12010 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12011 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12012 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12013   match(Set result (PartialSubtypeCheck sub super));
12014   effect( KILL rcx, KILL cr );
12015 
12016   ins_cost(1100);  // slightly larger than the next version
12017   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12018             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12019             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12020             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12021             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12022             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12023             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12024      "miss:\t" %}
12025 
12026   opcode(0x1); // Force a XOR of EDI
12027   ins_encode( enc_PartialSubtypeCheck() );
12028   ins_pipe( pipe_slow );
12029 %}
12030 
12031 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12032   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12033   effect( KILL rcx, KILL result );
12034 
12035   ins_cost(1000);
12036   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12037             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12038             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12039             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12040             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12041             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12042      "miss:\t" %}
12043 
12044   opcode(0x0);  // No need to XOR EDI
12045   ins_encode( enc_PartialSubtypeCheck() );
12046   ins_pipe( pipe_slow );
12047 %}
12048 
12049 // ============================================================================
12050 // Branch Instructions -- short offset versions
12051 //
12052 // These instructions are used to replace jumps of a long offset (the default
12053 // match) with jumps of a shorter offset.  These instructions are all tagged
12054 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12055 // match rules in general matching.  Instead, the ADLC generates a conversion
12056 // method in the MachNode which can be used to do in-place replacement of the
12057 // long variant with the shorter variant.  The compiler will determine if a
12058 // branch can be taken by the is_short_branch_offset() predicate in the machine
12059 // specific code section of the file.
12060 
12061 // Jump Direct - Label defines a relative address from JMP+1
12062 instruct jmpDir_short(label labl) %{
12063   match(Goto);
12064   effect(USE labl);
12065 
12066   ins_cost(300);
12067   format %{ "JMP,s  $labl" %}
12068   size(2);
12069   ins_encode %{
12070     Label* L = $labl$$label;
12071     __ jmpb(*L);
12072   %}
12073   ins_pipe( pipe_jmp );
12074   ins_short_branch(1);
12075 %}
12076 
12077 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12078 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12079   match(If cop cr);
12080   effect(USE labl);
12081 
12082   ins_cost(300);
12083   format %{ "J$cop,s  $labl" %}
12084   size(2);
12085   ins_encode %{
12086     Label* L = $labl$$label;
12087     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12088   %}
12089   ins_pipe( pipe_jcc );
12090   ins_short_branch(1);
12091 %}
12092 
12093 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12094 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12095   match(CountedLoopEnd cop cr);
12096   effect(USE labl);
12097 
12098   ins_cost(300);
12099   format %{ "J$cop,s  $labl\t# Loop end" %}
12100   size(2);
12101   ins_encode %{
12102     Label* L = $labl$$label;
12103     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12104   %}
12105   ins_pipe( pipe_jcc );
12106   ins_short_branch(1);
12107 %}
12108 
12109 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12110 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12111   match(CountedLoopEnd cop cmp);
12112   effect(USE labl);
12113 
12114   ins_cost(300);
12115   format %{ "J$cop,us $labl\t# Loop end" %}
12116   size(2);
12117   ins_encode %{
12118     Label* L = $labl$$label;
12119     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12120   %}
12121   ins_pipe( pipe_jcc );
12122   ins_short_branch(1);
12123 %}
12124 
12125 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12126   match(CountedLoopEnd cop cmp);
12127   effect(USE labl);
12128 
12129   ins_cost(300);
12130   format %{ "J$cop,us $labl\t# Loop end" %}
12131   size(2);
12132   ins_encode %{
12133     Label* L = $labl$$label;
12134     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12135   %}
12136   ins_pipe( pipe_jcc );
12137   ins_short_branch(1);
12138 %}
12139 
12140 // Jump Direct Conditional - using unsigned comparison
12141 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12142   match(If cop cmp);
12143   effect(USE labl);
12144 
12145   ins_cost(300);
12146   format %{ "J$cop,us $labl" %}
12147   size(2);
12148   ins_encode %{
12149     Label* L = $labl$$label;
12150     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12151   %}
12152   ins_pipe( pipe_jcc );
12153   ins_short_branch(1);
12154 %}
12155 
12156 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12157   match(If cop cmp);
12158   effect(USE labl);
12159 
12160   ins_cost(300);
12161   format %{ "J$cop,us $labl" %}
12162   size(2);
12163   ins_encode %{
12164     Label* L = $labl$$label;
12165     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12166   %}
12167   ins_pipe( pipe_jcc );
12168   ins_short_branch(1);
12169 %}
12170 
12171 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12172   match(If cop cmp);
12173   effect(USE labl);
12174 
12175   ins_cost(300);
12176   format %{ $$template
12177     if ($cop$$cmpcode == Assembler::notEqual) {
12178       $$emit$$"JP,u,s   $labl\n\t"
12179       $$emit$$"J$cop,u,s   $labl"
12180     } else {
12181       $$emit$$"JP,u,s   done\n\t"
12182       $$emit$$"J$cop,u,s  $labl\n\t"
12183       $$emit$$"done:"
12184     }
12185   %}
12186   size(4);
12187   ins_encode %{
12188     Label* l = $labl$$label;
12189     if ($cop$$cmpcode == Assembler::notEqual) {
12190       __ jccb(Assembler::parity, *l);
12191       __ jccb(Assembler::notEqual, *l);
12192     } else if ($cop$$cmpcode == Assembler::equal) {
12193       Label done;
12194       __ jccb(Assembler::parity, done);
12195       __ jccb(Assembler::equal, *l);
12196       __ bind(done);
12197     } else {
12198        ShouldNotReachHere();
12199     }
12200   %}
12201   ins_pipe(pipe_jcc);
12202   ins_short_branch(1);
12203 %}
12204 
12205 // ============================================================================
12206 // Long Compare
12207 //
12208 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12209 // is tricky.  The flavor of compare used depends on whether we are testing
12210 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12211 // The GE test is the negated LT test.  The LE test can be had by commuting
12212 // the operands (yielding a GE test) and then negating; negate again for the
12213 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12214 // NE test is negated from that.
12215 
12216 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12217 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12218 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12219 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12220 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12221 // foo match ends up with the wrong leaf.  One fix is to not match both
12222 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12223 // both forms beat the trinary form of long-compare and both are very useful
12224 // on Intel which has so few registers.
12225 
12226 // Manifest a CmpL result in an integer register.  Very painful.
12227 // This is the test to avoid.
12228 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12229   match(Set dst (CmpL3 src1 src2));
12230   effect( KILL flags );
12231   ins_cost(1000);
12232   format %{ "XOR    $dst,$dst\n\t"
12233             "CMP    $src1.hi,$src2.hi\n\t"
12234             "JLT,s  m_one\n\t"
12235             "JGT,s  p_one\n\t"
12236             "CMP    $src1.lo,$src2.lo\n\t"
12237             "JB,s   m_one\n\t"
12238             "JEQ,s  done\n"
12239     "p_one:\tINC    $dst\n\t"
12240             "JMP,s  done\n"
12241     "m_one:\tDEC    $dst\n"
12242      "done:" %}
12243   ins_encode %{
12244     Label p_one, m_one, done;
12245     __ xorptr($dst$$Register, $dst$$Register);
12246     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12247     __ jccb(Assembler::less,    m_one);
12248     __ jccb(Assembler::greater, p_one);
12249     __ cmpl($src1$$Register, $src2$$Register);
12250     __ jccb(Assembler::below,   m_one);
12251     __ jccb(Assembler::equal,   done);
12252     __ bind(p_one);
12253     __ incrementl($dst$$Register);
12254     __ jmpb(done);
12255     __ bind(m_one);
12256     __ decrementl($dst$$Register);
12257     __ bind(done);
12258   %}
12259   ins_pipe( pipe_slow );
12260 %}
12261 
12262 //======
12263 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12264 // compares.  Can be used for LE or GT compares by reversing arguments.
12265 // NOT GOOD FOR EQ/NE tests.
12266 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12267   match( Set flags (CmpL src zero ));
12268   ins_cost(100);
12269   format %{ "TEST   $src.hi,$src.hi" %}
12270   opcode(0x85);
12271   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12272   ins_pipe( ialu_cr_reg_reg );
12273 %}
12274 
12275 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12276 // compares.  Can be used for LE or GT compares by reversing arguments.
12277 // NOT GOOD FOR EQ/NE tests.
12278 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12279   match( Set flags (CmpL src1 src2 ));
12280   effect( TEMP tmp );
12281   ins_cost(300);
12282   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12283             "MOV    $tmp,$src1.hi\n\t"
12284             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12285   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12286   ins_pipe( ialu_cr_reg_reg );
12287 %}
12288 
12289 // Long compares reg < zero/req OR reg >= zero/req.
12290 // Just a wrapper for a normal branch, plus the predicate test.
12291 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12292   match(If cmp flags);
12293   effect(USE labl);
12294   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12295   expand %{
12296     jmpCon(cmp,flags,labl);    // JLT or JGE...
12297   %}
12298 %}
12299 
12300 // Compare 2 longs and CMOVE longs.
12301 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12302   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12303   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12304   ins_cost(400);
12305   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12306             "CMOV$cmp $dst.hi,$src.hi" %}
12307   opcode(0x0F,0x40);
12308   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12309   ins_pipe( pipe_cmov_reg_long );
12310 %}
12311 
12312 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12313   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12314   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12315   ins_cost(500);
12316   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12317             "CMOV$cmp $dst.hi,$src.hi" %}
12318   opcode(0x0F,0x40);
12319   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12320   ins_pipe( pipe_cmov_reg_long );
12321 %}
12322 
12323 // Compare 2 longs and CMOVE ints.
12324 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12325   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12326   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12327   ins_cost(200);
12328   format %{ "CMOV$cmp $dst,$src" %}
12329   opcode(0x0F,0x40);
12330   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12331   ins_pipe( pipe_cmov_reg );
12332 %}
12333 
12334 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12335   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12336   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12337   ins_cost(250);
12338   format %{ "CMOV$cmp $dst,$src" %}
12339   opcode(0x0F,0x40);
12340   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12341   ins_pipe( pipe_cmov_mem );
12342 %}
12343 
12344 // Compare 2 longs and CMOVE ints.
12345 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12346   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12347   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12348   ins_cost(200);
12349   format %{ "CMOV$cmp $dst,$src" %}
12350   opcode(0x0F,0x40);
12351   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12352   ins_pipe( pipe_cmov_reg );
12353 %}
12354 
12355 // Compare 2 longs and CMOVE doubles
12356 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12357   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12358   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12359   ins_cost(200);
12360   expand %{
12361     fcmovDPR_regS(cmp,flags,dst,src);
12362   %}
12363 %}
12364 
12365 // Compare 2 longs and CMOVE doubles
12366 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12367   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12368   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12369   ins_cost(200);
12370   expand %{
12371     fcmovD_regS(cmp,flags,dst,src);
12372   %}
12373 %}
12374 
12375 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12376   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12377   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12378   ins_cost(200);
12379   expand %{
12380     fcmovFPR_regS(cmp,flags,dst,src);
12381   %}
12382 %}
12383 
12384 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12385   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12386   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12387   ins_cost(200);
12388   expand %{
12389     fcmovF_regS(cmp,flags,dst,src);
12390   %}
12391 %}
12392 
12393 //======
12394 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12395 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12396   match( Set flags (CmpL src zero ));
12397   effect(TEMP tmp);
12398   ins_cost(200);
12399   format %{ "MOV    $tmp,$src.lo\n\t"
12400             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12401   ins_encode( long_cmp_flags0( src, tmp ) );
12402   ins_pipe( ialu_reg_reg_long );
12403 %}
12404 
12405 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12406 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12407   match( Set flags (CmpL src1 src2 ));
12408   ins_cost(200+300);
12409   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12410             "JNE,s  skip\n\t"
12411             "CMP    $src1.hi,$src2.hi\n\t"
12412      "skip:\t" %}
12413   ins_encode( long_cmp_flags1( src1, src2 ) );
12414   ins_pipe( ialu_cr_reg_reg );
12415 %}
12416 
12417 // Long compare reg == zero/reg OR reg != zero/reg
12418 // Just a wrapper for a normal branch, plus the predicate test.
12419 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12420   match(If cmp flags);
12421   effect(USE labl);
12422   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12423   expand %{
12424     jmpCon(cmp,flags,labl);    // JEQ or JNE...
12425   %}
12426 %}
12427 
12428 // Compare 2 longs and CMOVE longs.
12429 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12430   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12431   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12432   ins_cost(400);
12433   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12434             "CMOV$cmp $dst.hi,$src.hi" %}
12435   opcode(0x0F,0x40);
12436   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12437   ins_pipe( pipe_cmov_reg_long );
12438 %}
12439 
12440 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12441   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12442   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12443   ins_cost(500);
12444   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12445             "CMOV$cmp $dst.hi,$src.hi" %}
12446   opcode(0x0F,0x40);
12447   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12448   ins_pipe( pipe_cmov_reg_long );
12449 %}
12450 
12451 // Compare 2 longs and CMOVE ints.
12452 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
12453   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12454   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12455   ins_cost(200);
12456   format %{ "CMOV$cmp $dst,$src" %}
12457   opcode(0x0F,0x40);
12458   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12459   ins_pipe( pipe_cmov_reg );
12460 %}
12461 
12462 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
12463   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12464   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12465   ins_cost(250);
12466   format %{ "CMOV$cmp $dst,$src" %}
12467   opcode(0x0F,0x40);
12468   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12469   ins_pipe( pipe_cmov_mem );
12470 %}
12471 
12472 // Compare 2 longs and CMOVE ints.
12473 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
12474   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12475   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12476   ins_cost(200);
12477   format %{ "CMOV$cmp $dst,$src" %}
12478   opcode(0x0F,0x40);
12479   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12480   ins_pipe( pipe_cmov_reg );
12481 %}
12482 
12483 // Compare 2 longs and CMOVE doubles
12484 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
12485   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12486   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12487   ins_cost(200);
12488   expand %{
12489     fcmovDPR_regS(cmp,flags,dst,src);
12490   %}
12491 %}
12492 
12493 // Compare 2 longs and CMOVE doubles
12494 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
12495   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12496   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12497   ins_cost(200);
12498   expand %{
12499     fcmovD_regS(cmp,flags,dst,src);
12500   %}
12501 %}
12502 
12503 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
12504   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12505   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12506   ins_cost(200);
12507   expand %{
12508     fcmovFPR_regS(cmp,flags,dst,src);
12509   %}
12510 %}
12511 
12512 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
12513   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12514   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12515   ins_cost(200);
12516   expand %{
12517     fcmovF_regS(cmp,flags,dst,src);
12518   %}
12519 %}
12520 
12521 //======
12522 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12523 // Same as cmpL_reg_flags_LEGT except must negate src
12524 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
12525   match( Set flags (CmpL src zero ));
12526   effect( TEMP tmp );
12527   ins_cost(300);
12528   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
12529             "CMP    $tmp,$src.lo\n\t"
12530             "SBB    $tmp,$src.hi\n\t" %}
12531   ins_encode( long_cmp_flags3(src, tmp) );
12532   ins_pipe( ialu_reg_reg_long );
12533 %}
12534 
12535 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12536 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
12537 // requires a commuted test to get the same result.
12538 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12539   match( Set flags (CmpL src1 src2 ));
12540   effect( TEMP tmp );
12541   ins_cost(300);
12542   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
12543             "MOV    $tmp,$src2.hi\n\t"
12544             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
12545   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
12546   ins_pipe( ialu_cr_reg_reg );
12547 %}
12548 
12549 // Long compares reg < zero/req OR reg >= zero/req.
12550 // Just a wrapper for a normal branch, plus the predicate test
12551 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
12552   match(If cmp flags);
12553   effect(USE labl);
12554   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
12555   ins_cost(300);
12556   expand %{
12557     jmpCon(cmp,flags,labl);    // JGT or JLE...
12558   %}
12559 %}
12560 
12561 // Compare 2 longs and CMOVE longs.
12562 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
12563   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12564   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12565   ins_cost(400);
12566   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12567             "CMOV$cmp $dst.hi,$src.hi" %}
12568   opcode(0x0F,0x40);
12569   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12570   ins_pipe( pipe_cmov_reg_long );
12571 %}
12572 
12573 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
12574   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12575   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12576   ins_cost(500);
12577   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12578             "CMOV$cmp $dst.hi,$src.hi+4" %}
12579   opcode(0x0F,0x40);
12580   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12581   ins_pipe( pipe_cmov_reg_long );
12582 %}
12583 
12584 // Compare 2 longs and CMOVE ints.
12585 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
12586   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12587   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12588   ins_cost(200);
12589   format %{ "CMOV$cmp $dst,$src" %}
12590   opcode(0x0F,0x40);
12591   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12592   ins_pipe( pipe_cmov_reg );
12593 %}
12594 
12595 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
12596   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12597   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12598   ins_cost(250);
12599   format %{ "CMOV$cmp $dst,$src" %}
12600   opcode(0x0F,0x40);
12601   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12602   ins_pipe( pipe_cmov_mem );
12603 %}
12604 
12605 // Compare 2 longs and CMOVE ptrs.
12606 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
12607   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12608   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12609   ins_cost(200);
12610   format %{ "CMOV$cmp $dst,$src" %}
12611   opcode(0x0F,0x40);
12612   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12613   ins_pipe( pipe_cmov_reg );
12614 %}
12615 
12616 // Compare 2 longs and CMOVE doubles
12617 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
12618   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12619   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12620   ins_cost(200);
12621   expand %{
12622     fcmovDPR_regS(cmp,flags,dst,src);
12623   %}
12624 %}
12625 
12626 // Compare 2 longs and CMOVE doubles
12627 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
12628   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12629   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12630   ins_cost(200);
12631   expand %{
12632     fcmovD_regS(cmp,flags,dst,src);
12633   %}
12634 %}
12635 
12636 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
12637   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12638   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12639   ins_cost(200);
12640   expand %{
12641     fcmovFPR_regS(cmp,flags,dst,src);
12642   %}
12643 %}
12644 
12645 
12646 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
12647   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12648   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12649   ins_cost(200);
12650   expand %{
12651     fcmovF_regS(cmp,flags,dst,src);
12652   %}
12653 %}
12654 
12655 
12656 // ============================================================================
12657 // Procedure Call/Return Instructions
12658 // Call Java Static Instruction
12659 // Note: If this code changes, the corresponding ret_addr_offset() and
12660 //       compute_padding() functions will have to be adjusted.
12661 instruct CallStaticJavaDirect(method meth) %{
12662   match(CallStaticJava);
12663   effect(USE meth);
12664 
12665   ins_cost(300);
12666   format %{ "CALL,static " %}
12667   opcode(0xE8); /* E8 cd */
12668   ins_encode( pre_call_resets,
12669               Java_Static_Call( meth ),
12670               call_epilog,
12671               post_call_FPU );
12672   ins_pipe( pipe_slow );
12673   ins_alignment(4);
12674 %}
12675 
12676 // Call Java Dynamic Instruction
12677 // Note: If this code changes, the corresponding ret_addr_offset() and
12678 //       compute_padding() functions will have to be adjusted.
12679 instruct CallDynamicJavaDirect(method meth) %{
12680   match(CallDynamicJava);
12681   effect(USE meth);
12682 
12683   ins_cost(300);
12684   format %{ "MOV    EAX,(oop)-1\n\t"
12685             "CALL,dynamic" %}
12686   opcode(0xE8); /* E8 cd */
12687   ins_encode( pre_call_resets,
12688               Java_Dynamic_Call( meth ),
12689               call_epilog,
12690               post_call_FPU );
12691   ins_pipe( pipe_slow );
12692   ins_alignment(4);
12693 %}
12694 
12695 // Call Runtime Instruction
12696 instruct CallRuntimeDirect(method meth) %{
12697   match(CallRuntime );
12698   effect(USE meth);
12699 
12700   ins_cost(300);
12701   format %{ "CALL,runtime " %}
12702   opcode(0xE8); /* E8 cd */
12703   // Use FFREEs to clear entries in float stack
12704   ins_encode( pre_call_resets,
12705               FFree_Float_Stack_All,
12706               Java_To_Runtime( meth ),
12707               post_call_FPU );
12708   ins_pipe( pipe_slow );
12709 %}
12710 
12711 // Call runtime without safepoint
12712 instruct CallLeafDirect(method meth) %{
12713   match(CallLeaf);
12714   effect(USE meth);
12715 
12716   ins_cost(300);
12717   format %{ "CALL_LEAF,runtime " %}
12718   opcode(0xE8); /* E8 cd */
12719   ins_encode( pre_call_resets,
12720               FFree_Float_Stack_All,
12721               Java_To_Runtime( meth ),
12722               Verify_FPU_For_Leaf, post_call_FPU );
12723   ins_pipe( pipe_slow );
12724 %}
12725 
12726 instruct CallLeafNoFPDirect(method meth) %{
12727   match(CallLeafNoFP);
12728   effect(USE meth);
12729 
12730   ins_cost(300);
12731   format %{ "CALL_LEAF_NOFP,runtime " %}
12732   opcode(0xE8); /* E8 cd */
12733   ins_encode(Java_To_Runtime(meth));
12734   ins_pipe( pipe_slow );
12735 %}
12736 
12737 
12738 // Return Instruction
12739 // Remove the return address & jump to it.
12740 instruct Ret() %{
12741   match(Return);
12742   format %{ "RET" %}
12743   opcode(0xC3);
12744   ins_encode(OpcP);
12745   ins_pipe( pipe_jmp );
12746 %}
12747 
12748 // Tail Call; Jump from runtime stub to Java code.
12749 // Also known as an 'interprocedural jump'.
12750 // Target of jump will eventually return to caller.
12751 // TailJump below removes the return address.
12752 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
12753   match(TailCall jump_target method_oop );
12754   ins_cost(300);
12755   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
12756   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12757   ins_encode( OpcP, RegOpc(jump_target) );
12758   ins_pipe( pipe_jmp );
12759 %}
12760 
12761 
12762 // Tail Jump; remove the return address; jump to target.
12763 // TailCall above leaves the return address around.
12764 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
12765   match( TailJump jump_target ex_oop );
12766   ins_cost(300);
12767   format %{ "POP    EDX\t# pop return address into dummy\n\t"
12768             "JMP    $jump_target " %}
12769   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12770   ins_encode( enc_pop_rdx,
12771               OpcP, RegOpc(jump_target) );
12772   ins_pipe( pipe_jmp );
12773 %}
12774 
12775 // Create exception oop: created by stack-crawling runtime code.
12776 // Created exception is now available to this handler, and is setup
12777 // just prior to jumping to this handler.  No code emitted.
12778 instruct CreateException( eAXRegP ex_oop )
12779 %{
12780   match(Set ex_oop (CreateEx));
12781 
12782   size(0);
12783   // use the following format syntax
12784   format %{ "# exception oop is in EAX; no code emitted" %}
12785   ins_encode();
12786   ins_pipe( empty );
12787 %}
12788 
12789 
12790 // Rethrow exception:
12791 // The exception oop will come in the first argument position.
12792 // Then JUMP (not call) to the rethrow stub code.
12793 instruct RethrowException()
12794 %{
12795   match(Rethrow);
12796 
12797   // use the following format syntax
12798   format %{ "JMP    rethrow_stub" %}
12799   ins_encode(enc_rethrow);
12800   ins_pipe( pipe_jmp );
12801 %}
12802 
12803 // inlined locking and unlocking
12804 
12805 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
12806   predicate(Compile::current()->use_rtm());
12807   match(Set cr (FastLock object box));
12808   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
12809   ins_cost(300);
12810   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
12811   ins_encode %{
12812     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12813                  $scr$$Register, $cx1$$Register, $cx2$$Register,
12814                  _counters, _rtm_counters, _stack_rtm_counters,
12815                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
12816                  true, ra_->C->profile_rtm());
12817   %}
12818   ins_pipe(pipe_slow);
12819 %}
12820 
12821 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
12822   predicate(!Compile::current()->use_rtm());
12823   match(Set cr (FastLock object box));
12824   effect(TEMP tmp, TEMP scr, USE_KILL box);
12825   ins_cost(300);
12826   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
12827   ins_encode %{
12828     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12829                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
12830   %}
12831   ins_pipe(pipe_slow);
12832 %}
12833 
12834 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
12835   match(Set cr (FastUnlock object box));
12836   effect(TEMP tmp, USE_KILL box);
12837   ins_cost(300);
12838   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
12839   ins_encode %{
12840     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
12841   %}
12842   ins_pipe(pipe_slow);
12843 %}
12844 
12845 
12846 
12847 // ============================================================================
12848 // Safepoint Instruction
12849 instruct safePoint_poll(eFlagsReg cr) %{
12850   match(SafePoint);
12851   effect(KILL cr);
12852 
12853   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
12854   // On SPARC that might be acceptable as we can generate the address with
12855   // just a sethi, saving an or.  By polling at offset 0 we can end up
12856   // putting additional pressure on the index-0 in the D$.  Because of
12857   // alignment (just like the situation at hand) the lower indices tend
12858   // to see more traffic.  It'd be better to change the polling address
12859   // to offset 0 of the last $line in the polling page.
12860 
12861   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
12862   ins_cost(125);
12863   size(6) ;
12864   ins_encode( Safepoint_Poll() );
12865   ins_pipe( ialu_reg_mem );
12866 %}
12867 
12868 
12869 // ============================================================================
12870 // This name is KNOWN by the ADLC and cannot be changed.
12871 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
12872 // for this guy.
12873 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
12874   match(Set dst (ThreadLocal));
12875   effect(DEF dst, KILL cr);
12876 
12877   format %{ "MOV    $dst, Thread::current()" %}
12878   ins_encode %{
12879     Register dstReg = as_Register($dst$$reg);
12880     __ get_thread(dstReg);
12881   %}
12882   ins_pipe( ialu_reg_fat );
12883 %}
12884 
12885 
12886 
12887 //----------PEEPHOLE RULES-----------------------------------------------------
12888 // These must follow all instruction definitions as they use the names
12889 // defined in the instructions definitions.
12890 //
12891 // peepmatch ( root_instr_name [preceding_instruction]* );
12892 //
12893 // peepconstraint %{
12894 // (instruction_number.operand_name relational_op instruction_number.operand_name
12895 //  [, ...] );
12896 // // instruction numbers are zero-based using left to right order in peepmatch
12897 //
12898 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
12899 // // provide an instruction_number.operand_name for each operand that appears
12900 // // in the replacement instruction's match rule
12901 //
12902 // ---------VM FLAGS---------------------------------------------------------
12903 //
12904 // All peephole optimizations can be turned off using -XX:-OptoPeephole
12905 //
12906 // Each peephole rule is given an identifying number starting with zero and
12907 // increasing by one in the order seen by the parser.  An individual peephole
12908 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
12909 // on the command-line.
12910 //
12911 // ---------CURRENT LIMITATIONS----------------------------------------------
12912 //
12913 // Only match adjacent instructions in same basic block
12914 // Only equality constraints
12915 // Only constraints between operands, not (0.dest_reg == EAX_enc)
12916 // Only one replacement instruction
12917 //
12918 // ---------EXAMPLE----------------------------------------------------------
12919 //
12920 // // pertinent parts of existing instructions in architecture description
12921 // instruct movI(rRegI dst, rRegI src) %{
12922 //   match(Set dst (CopyI src));
12923 // %}
12924 //
12925 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
12926 //   match(Set dst (AddI dst src));
12927 //   effect(KILL cr);
12928 // %}
12929 //
12930 // // Change (inc mov) to lea
12931 // peephole %{
12932 //   // increment preceeded by register-register move
12933 //   peepmatch ( incI_eReg movI );
12934 //   // require that the destination register of the increment
12935 //   // match the destination register of the move
12936 //   peepconstraint ( 0.dst == 1.dst );
12937 //   // construct a replacement instruction that sets
12938 //   // the destination to ( move's source register + one )
12939 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
12940 // %}
12941 //
12942 // Implementation no longer uses movX instructions since
12943 // machine-independent system no longer uses CopyX nodes.
12944 //
12945 // peephole %{
12946 //   peepmatch ( incI_eReg movI );
12947 //   peepconstraint ( 0.dst == 1.dst );
12948 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
12949 // %}
12950 //
12951 // peephole %{
12952 //   peepmatch ( decI_eReg movI );
12953 //   peepconstraint ( 0.dst == 1.dst );
12954 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
12955 // %}
12956 //
12957 // peephole %{
12958 //   peepmatch ( addI_eReg_imm movI );
12959 //   peepconstraint ( 0.dst == 1.dst );
12960 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
12961 // %}
12962 //
12963 // peephole %{
12964 //   peepmatch ( addP_eReg_imm movP );
12965 //   peepconstraint ( 0.dst == 1.dst );
12966 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
12967 // %}
12968 
12969 // // Change load of spilled value to only a spill
12970 // instruct storeI(memory mem, rRegI src) %{
12971 //   match(Set mem (StoreI mem src));
12972 // %}
12973 //
12974 // instruct loadI(rRegI dst, memory mem) %{
12975 //   match(Set dst (LoadI mem));
12976 // %}
12977 //
12978 peephole %{
12979   peepmatch ( loadI storeI );
12980   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
12981   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
12982 %}
12983 
12984 //----------SMARTSPILL RULES---------------------------------------------------
12985 // These must follow all instruction definitions as they use the names
12986 // defined in the instructions definitions.