Old src/cpu/x86/vm/x86

   1 //
   2 // Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
   3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4 //
   5 // This code is free software; you can redistribute it and/or modify it
   6 // under the terms of the GNU General Public License version 2 only, as
   7 // published by the Free Software Foundation.
   8 //
   9 // This code is distributed in the hope that it will be useful, but WITHOUT
  10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12 // version 2 for more details (a copy is included in the LICENSE file that
  13 // accompanied this code).
  14 //
  15 // You should have received a copy of the GNU General Public License version
  16 // 2 along with this work; if not, write to the Free Software Foundation,
  17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18 //
  19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20 // or visit www.oracle.com if you need additional information or have any
  21 // questions.
  22 //
  23 //
  24 
  25 // X86 Architecture Description File
  26 
  27 //----------REGISTER DEFINITION BLOCK------------------------------------------
  28 // This information is used by the matcher and the register allocator to
  29 // describe individual registers and classes of registers within the target
  30 // archtecture.
  31 
  32 register %{
  33 //----------Architecture Description Register Definitions----------------------
  34 // General Registers
  35 // "reg_def"  name ( register save type, C convention save type,
  36 //                   ideal register type, encoding );
  37 // Register Save Types:
  38 //
  39 // NS  = No-Save:       The register allocator assumes that these registers
  40 //                      can be used without saving upon entry to the method, &
  41 //                      that they do not need to be saved at call sites.
  42 //
  43 // SOC = Save-On-Call:  The register allocator assumes that these registers
  44 //                      can be used without saving upon entry to the method,
  45 //                      but that they must be saved at call sites.
  46 //
  47 // SOE = Save-On-Entry: The register allocator assumes that these registers
  48 //                      must be saved before using them upon entry to the
  49 //                      method, but they do not need to be saved at call
  50 //                      sites.
  51 //
  52 // AS  = Always-Save:   The register allocator assumes that these registers
  53 //                      must be saved before using them upon entry to the
  54 //                      method, & that they must be saved at call sites.
  55 //
  56 // Ideal Register Type is used to determine how to save & restore a
  57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
  58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
  59 //
  60 // The encoding number is the actual bit-pattern placed into the opcodes.
  61 
  62 // General Registers
  63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
  64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
  65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
  66 
  67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
  68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
  69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
  70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
  71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
  72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
  73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
  74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
  75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
  76 
  77 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
  78 // allocator, and only shows up in the encodings.
  79 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  80 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
  81 // Ok so here's the trick FPR1 is really st(0) except in the midst
  82 // of emission of assembly for a machnode. During the emission the fpu stack
  83 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
  84 // the stack will not have this element so FPR1 == st(0) from the
  85 // oopMap viewpoint. This same weirdness with numbering causes
  86 // instruction encoding to have to play games with the register
  87 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
  88 // where it does flt->flt moves to see an example
  89 //
  90 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
  91 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
  92 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
  93 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
  94 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
  95 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
  96 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
  97 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
  98 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
  99 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
 100 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
 101 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
 102 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 103 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 104 //
 105 // Empty fill registers, which are never used, but supply alignment to xmm regs
 106 //
 107 reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2));
 108 reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3));
 109 reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4));
 110 reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5));
 111 reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6));
 112 reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7));
 113 reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8));
 114 reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9));
 115 
 116 // Specify priority of register selection within phases of register
 117 // allocation.  Highest priority is first.  A useful heuristic is to
 118 // give registers a low priority when they are required by machine
 119 // instructions, like EAX and EDX.  Registers which are used as
 120 // pairs must fall on an even boundary (witness the FPR#L's in this list).
 121 // For the Intel integer registers, the equivalent Long pairs are
 122 // EDX:EAX, EBX:ECX, and EDI:EBP.
 123 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
 124                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
 125                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
 126                     FPR6L, FPR6H, FPR7L, FPR7H,
 127                     FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
 128 
 129 
 130 //----------Architecture Description Register Classes--------------------------
 131 // Several register classes are automatically defined based upon information in
 132 // this architecture description.
 133 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
 134 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
 135 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 136 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 137 //
 138 // Class for no registers (empty set).
 139 reg_class no_reg();
 140 
 141 // Class for all registers
 142 reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 143 // Class for all registers (excluding EBP)
 144 reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
 145 // Dynamic register class that selects at runtime between register classes
 146 // any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
 147 // Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
 148 reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
 149 
 150 // Class for general registers
 151 reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 152 // Class for general registers (excluding EBP).
 153 // This register class can be used for implicit null checks on win95.
 154 // It is also safe for use by tailjumps (we don't want to allocate in ebp).
 155 // Used also if the PreserveFramePointer flag is true.
 156 reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
 157 // Dynamic register class that selects between int_reg and int_reg_no_ebp.
 158 reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
 159 
 160 // Class of "X" registers
 161 reg_class int_x_reg(EBX, ECX, EDX, EAX);
 162 
 163 // Class of registers that can appear in an address with no offset.
 164 // EBP and ESP require an extra instruction byte for zero offset.
 165 // Used in fast-unlock
 166 reg_class p_reg(EDX, EDI, ESI, EBX);
 167 
 168 // Class for general registers excluding ECX
 169 reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
 170 // Class for general registers excluding ECX (and EBP)
 171 reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
 172 // Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
 173 reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
 174 
 175 // Class for general registers excluding EAX
 176 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
 177 
 178 // Class for general registers excluding EAX and EBX.
 179 reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
 180 // Class for general registers excluding EAX and EBX (and EBP)
 181 reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
 182 // Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
 183 reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
 184 
 185 // Class of EAX (for multiply and divide operations)
 186 reg_class eax_reg(EAX);
 187 
 188 // Class of EBX (for atomic add)
 189 reg_class ebx_reg(EBX);
 190 
 191 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 192 reg_class ecx_reg(ECX);
 193 
 194 // Class of EDX (for multiply and divide operations)
 195 reg_class edx_reg(EDX);
 196 
 197 // Class of EDI (for synchronization)
 198 reg_class edi_reg(EDI);
 199 
 200 // Class of ESI (for synchronization)
 201 reg_class esi_reg(ESI);
 202 
 203 // Singleton class for stack pointer
 204 reg_class sp_reg(ESP);
 205 
 206 // Singleton class for instruction pointer
 207 // reg_class ip_reg(EIP);
 208 
 209 // Class of integer register pairs
 210 reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
 211 // Class of integer register pairs (excluding EBP and EDI);
 212 reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
 213 // Dynamic register class that selects between long_reg and long_reg_no_ebp.
 214 reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
 215 
 216 // Class of integer register pairs that aligns with calling convention
 217 reg_class eadx_reg( EAX,EDX );
 218 reg_class ebcx_reg( ECX,EBX );
 219 
 220 // Not AX or DX, used in divides
 221 reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
 222 // Not AX or DX (and neither EBP), used in divides
 223 reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
 224 // Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
 225 reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 226 
 227 // Floating point registers.  Notice FPR0 is not a choice.
 228 // FPR0 is not ever allocated; we use clever encodings to fake
 229 // a 2-address instructions out of Intels FP stack.
 230 reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
 231 
 232 reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
 233                       FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
 234                       FPR7L,FPR7H );
 235 
 236 reg_class fp_flt_reg0( FPR1L );
 237 reg_class fp_dbl_reg0( FPR1L,FPR1H );
 238 reg_class fp_dbl_reg1( FPR2L,FPR2H );
 239 reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
 240                           FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
 241 
 242 %}
 243 
 244 
 245 //----------SOURCE BLOCK-------------------------------------------------------
 246 // This is a block of C++ code which provides values, functions, and
 247 // definitions necessary in the rest of the architecture description
 248 source_hpp %{
 249 // Must be visible to the DFA in dfa_x86_32.cpp
 250 extern bool is_operand_hi32_zero(Node* n);
 251 %}
 252 
 253 source %{
 254 #define   RELOC_IMM32    Assembler::imm_operand
 255 #define   RELOC_DISP32   Assembler::disp32_operand
 256 
 257 #define __ _masm.
 258 
 259 // How to find the high register of a Long pair, given the low register
 260 #define   HIGH_FROM_LOW(x) ((x)+2)
 261 
 262 // These masks are used to provide 128-bit aligned bitmasks to the XMM
 263 // instructions, to allow sign-masking or sign-bit flipping.  They allow
 264 // fast versions of NegF/NegD and AbsF/AbsD.
 265 
 266 // Note: 'double' and 'long long' have 32-bits alignment on x86.
 267 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
 268   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
 269   // of 128-bits operands for SSE instructions.
 270   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
 271   // Store the value to a 128-bits operand.
 272   operand[0] = lo;
 273   operand[1] = hi;
 274   return operand;
 275 }
 276 
 277 // Buffer for 128-bits masks used by SSE instructions.
 278 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
 279 
 280 // Static initialization during VM startup.
 281 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
 282 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
 283 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
 284 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
 285 
 286 // Offset hacking within calls.
 287 static int pre_call_resets_size() {
 288   int size = 0;
 289   Compile* C = Compile::current();
 290   if (C->in_24_bit_fp_mode()) {
 291     size += 6; // fldcw
 292   }
 293   if (C->max_vector_size() > 16) {
 294     if(UseAVX <= 2) {
 295       size += 3; // vzeroupper
 296     }
 297   }
 298   return size;
 299 }
 300 
 301 // !!!!! Special hack to get all type of calls to specify the byte offset
 302 //       from the start of the call to the point where the return address
 303 //       will point.
 304 int MachCallStaticJavaNode::ret_addr_offset() {
 305   return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
 306 }
 307 
 308 int MachCallDynamicJavaNode::ret_addr_offset() {
 309   return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
 310 }
 311 
 312 static int sizeof_FFree_Float_Stack_All = -1;
 313 
 314 int MachCallRuntimeNode::ret_addr_offset() {
 315   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
 316   return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
 317 }
 318 
 319 // Indicate if the safepoint node needs the polling page as an input.
 320 // Since x86 does have absolute addressing, it doesn't.
 321 bool SafePointNode::needs_polling_address_input() {
 322   return false;
 323 }
 324 
 325 //
 326 // Compute padding required for nodes which need alignment
 327 //
 328 
 329 // The address of the call instruction needs to be 4-byte aligned to
 330 // ensure that it does not span a cache line so that it can be patched.
 331 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
 332   current_offset += pre_call_resets_size();  // skip fldcw, if any
 333   current_offset += 1;      // skip call opcode byte
 334   return round_to(current_offset, alignment_required()) - current_offset;
 335 }
 336 
 337 // The address of the call instruction needs to be 4-byte aligned to
 338 // ensure that it does not span a cache line so that it can be patched.
 339 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
 340   current_offset += pre_call_resets_size();  // skip fldcw, if any
 341   current_offset += 5;      // skip MOV instruction
 342   current_offset += 1;      // skip call opcode byte
 343   return round_to(current_offset, alignment_required()) - current_offset;
 344 }
 345 
 346 // EMIT_RM()
 347 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
 348   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
 349   cbuf.insts()->emit_int8(c);
 350 }
 351 
 352 // EMIT_CC()
 353 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
 354   unsigned char c = (unsigned char)( f1 | f2 );
 355   cbuf.insts()->emit_int8(c);
 356 }
 357 
 358 // EMIT_OPCODE()
 359 void emit_opcode(CodeBuffer &cbuf, int code) {
 360   cbuf.insts()->emit_int8((unsigned char) code);
 361 }
 362 
 363 // EMIT_OPCODE() w/ relocation information
 364 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
 365   cbuf.relocate(cbuf.insts_mark() + offset, reloc);
 366   emit_opcode(cbuf, code);
 367 }
 368 
 369 // EMIT_D8()
 370 void emit_d8(CodeBuffer &cbuf, int d8) {
 371   cbuf.insts()->emit_int8((unsigned char) d8);
 372 }
 373 
 374 // EMIT_D16()
 375 void emit_d16(CodeBuffer &cbuf, int d16) {
 376   cbuf.insts()->emit_int16(d16);
 377 }
 378 
 379 // EMIT_D32()
 380 void emit_d32(CodeBuffer &cbuf, int d32) {
 381   cbuf.insts()->emit_int32(d32);
 382 }
 383 
 384 // emit 32 bit value and construct relocation entry from relocInfo::relocType
 385 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
 386         int format) {
 387   cbuf.relocate(cbuf.insts_mark(), reloc, format);
 388   cbuf.insts()->emit_int32(d32);
 389 }
 390 
 391 // emit 32 bit value and construct relocation entry from RelocationHolder
 392 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
 393         int format) {
 394 #ifdef ASSERT
 395   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
 396     assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
 397   }
 398 #endif
 399   cbuf.relocate(cbuf.insts_mark(), rspec, format);
 400   cbuf.insts()->emit_int32(d32);
 401 }
 402 
 403 // Access stack slot for load or store
 404 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
 405   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
 406   if( -128 <= disp && disp <= 127 ) {
 407     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
 408     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 409     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
 410   } else {
 411     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
 412     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
 413     emit_d32(cbuf, disp);     // Displacement  // R/M byte
 414   }
 415 }
 416 
 417    // rRegI ereg, memory mem) %{    // emit_reg_mem
 418 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
 419   // There is no index & no scale, use form without SIB byte
 420   if ((index == 0x4) &&
 421       (scale == 0) && (base != ESP_enc)) {
 422     // If no displacement, mode is 0x0; unless base is [EBP]
 423     if ( (displace == 0) && (base != EBP_enc) ) {
 424       emit_rm(cbuf, 0x0, reg_encoding, base);
 425     }
 426     else {                    // If 8-bit displacement, mode 0x1
 427       if ((displace >= -128) && (displace <= 127)
 428           && (disp_reloc == relocInfo::none) ) {
 429         emit_rm(cbuf, 0x1, reg_encoding, base);
 430         emit_d8(cbuf, displace);
 431       }
 432       else {                  // If 32-bit displacement
 433         if (base == -1) { // Special flag for absolute address
 434           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
 435           // (manual lies; no SIB needed here)
 436           if ( disp_reloc != relocInfo::none ) {
 437             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 438           } else {
 439             emit_d32      (cbuf, displace);
 440           }
 441         }
 442         else {                // Normal base + offset
 443           emit_rm(cbuf, 0x2, reg_encoding, base);
 444           if ( disp_reloc != relocInfo::none ) {
 445             emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 446           } else {
 447             emit_d32      (cbuf, displace);
 448           }
 449         }
 450       }
 451     }
 452   }
 453   else {                      // Else, encode with the SIB byte
 454     // If no displacement, mode is 0x0; unless base is [EBP]
 455     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
 456       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
 457       emit_rm(cbuf, scale, index, base);
 458     }
 459     else {                    // If 8-bit displacement, mode 0x1
 460       if ((displace >= -128) && (displace <= 127)
 461           && (disp_reloc == relocInfo::none) ) {
 462         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
 463         emit_rm(cbuf, scale, index, base);
 464         emit_d8(cbuf, displace);
 465       }
 466       else {                  // If 32-bit displacement
 467         if (base == 0x04 ) {
 468           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 469           emit_rm(cbuf, scale, index, 0x04);
 470         } else {
 471           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
 472           emit_rm(cbuf, scale, index, base);
 473         }
 474         if ( disp_reloc != relocInfo::none ) {
 475           emit_d32_reloc(cbuf, displace, disp_reloc, 1);
 476         } else {
 477           emit_d32      (cbuf, displace);
 478         }
 479       }
 480     }
 481   }
 482 }
 483 
 484 
 485 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
 486   if( dst_encoding == src_encoding ) {
 487     // reg-reg copy, use an empty encoding
 488   } else {
 489     emit_opcode( cbuf, 0x8B );
 490     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
 491   }
 492 }
 493 
 494 void emit_cmpfp_fixup(MacroAssembler& _masm) {
 495   Label exit;
 496   __ jccb(Assembler::noParity, exit);
 497   __ pushf();
 498   //
 499   // comiss/ucomiss instructions set ZF,PF,CF flags and
 500   // zero OF,AF,SF for NaN values.
 501   // Fixup flags by zeroing ZF,PF so that compare of NaN
 502   // values returns 'less than' result (CF is set).
 503   // Leave the rest of flags unchanged.
 504   //
 505   //    7 6 5 4 3 2 1 0
 506   //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
 507   //    0 0 1 0 1 0 1 1   (0x2B)
 508   //
 509   __ andl(Address(rsp, 0), 0xffffff2b);
 510   __ popf();
 511   __ bind(exit);
 512 }
 513 
 514 void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
 515   Label done;
 516   __ movl(dst, -1);
 517   __ jcc(Assembler::parity, done);
 518   __ jcc(Assembler::below, done);
 519   __ setb(Assembler::notEqual, dst);
 520   __ movzbl(dst, dst);
 521   __ bind(done);
 522 }
 523 
 524 
 525 //=============================================================================
 526 const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
 527 
 528 int Compile::ConstantTable::calculate_table_base_offset() const {
 529   return 0;  // absolute addressing, no offset
 530 }
 531 
 532 bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
 533 void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
 534   ShouldNotReachHere();
 535 }
 536 
 537 void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
 538   // Empty encoding
 539 }
 540 
 541 uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
 542   return 0;
 543 }
 544 
 545 #ifndef PRODUCT
 546 void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 547   st->print("# MachConstantBaseNode (empty encoding)");
 548 }
 549 #endif
 550 
 551 
 552 //=============================================================================
 553 #ifndef PRODUCT
 554 void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
 555   Compile* C = ra_->C;
 556 
 557   int framesize = C->frame_size_in_bytes();
 558   int bangsize = C->bang_size_in_bytes();
 559   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 560   // Remove wordSize for return addr which is already pushed.
 561   framesize -= wordSize;
 562 
 563   if (C->need_stack_bang(bangsize)) {
 564     framesize -= wordSize;
 565     st->print("# stack bang (%d bytes)", bangsize);
 566     st->print("\n\t");
 567     st->print("PUSH   EBP\t# Save EBP");
 568     if (PreserveFramePointer) {
 569       st->print("\n\t");
 570       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 571     }
 572     if (framesize) {
 573       st->print("\n\t");
 574       st->print("SUB    ESP, #%d\t# Create frame",framesize);
 575     }
 576   } else {
 577     st->print("SUB    ESP, #%d\t# Create frame",framesize);
 578     st->print("\n\t");
 579     framesize -= wordSize;
 580     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
 581     if (PreserveFramePointer) {
 582       st->print("\n\t");
 583       st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
 584       if (framesize > 0) {
 585         st->print("\n\t");
 586         st->print("ADD    EBP, #%d", framesize);
 587       }
 588     }
 589   }
 590 
 591   if (VerifyStackAtCalls) {
 592     st->print("\n\t");
 593     framesize -= wordSize;
 594     st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
 595   }
 596 
 597   if( C->in_24_bit_fp_mode() ) {
 598     st->print("\n\t");
 599     st->print("FLDCW  \t# load 24 bit fpu control word");
 600   }
 601   if (UseSSE >= 2 && VerifyFPU) {
 602     st->print("\n\t");
 603     st->print("# verify FPU stack (must be clean on entry)");
 604   }
 605 
 606 #ifdef ASSERT
 607   if (VerifyStackAtCalls) {
 608     st->print("\n\t");
 609     st->print("# stack alignment check");
 610   }
 611 #endif
 612   st->cr();
 613 }
 614 #endif
 615 
 616 
 617 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 618   Compile* C = ra_->C;
 619   MacroAssembler _masm(&cbuf);
 620 
 621   int framesize = C->frame_size_in_bytes();
 622   int bangsize = C->bang_size_in_bytes();
 623 
 624   __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode());
 625 
 626   C->set_frame_complete(cbuf.insts_size());
 627 
 628   if (C->has_mach_constant_base_node()) {
 629     // NOTE: We set the table base offset here because users might be
 630     // emitted before MachConstantBaseNode.
 631     Compile::ConstantTable& constant_table = C->constant_table();
 632     constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
 633   }
 634 }
 635 
 636 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
 637   return MachNode::size(ra_); // too many variables; just compute it the hard way
 638 }
 639 
 640 int MachPrologNode::reloc() const {
 641   return 0; // a large enough number
 642 }
 643 
 644 //=============================================================================
 645 #ifndef PRODUCT
 646 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
 647   Compile *C = ra_->C;
 648   int framesize = C->frame_size_in_bytes();
 649   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 650   // Remove two words for return addr and rbp,
 651   framesize -= 2*wordSize;
 652 
 653   if (C->max_vector_size() > 16) {
 654     st->print("VZEROUPPER");
 655     st->cr(); st->print("\t");
 656   }
 657   if (C->in_24_bit_fp_mode()) {
 658     st->print("FLDCW  standard control word");
 659     st->cr(); st->print("\t");
 660   }
 661   if (framesize) {
 662     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
 663     st->cr(); st->print("\t");
 664   }
 665   st->print_cr("POPL   EBP"); st->print("\t");
 666   if (do_polling() && C->is_method_compilation()) {
 667     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
 668     st->cr(); st->print("\t");
 669   }
 670 }
 671 #endif
 672 
 673 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
 674   Compile *C = ra_->C;
 675 
 676   if (C->max_vector_size() > 16) {
 677     // Clear upper bits of YMM registers when current compiled code uses
 678     // wide vectors to avoid AVX <-> SSE transition penalty during call.
 679     MacroAssembler masm(&cbuf);
 680     masm.vzeroupper();
 681   }
 682   // If method set FPU control word, restore to standard control word
 683   if (C->in_24_bit_fp_mode()) {
 684     MacroAssembler masm(&cbuf);
 685     masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
 686   }
 687 
 688   int framesize = C->frame_size_in_bytes();
 689   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 690   // Remove two words for return addr and rbp,
 691   framesize -= 2*wordSize;
 692 
 693   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
 694 
 695   if (framesize >= 128) {
 696     emit_opcode(cbuf, 0x81); // add  SP, #framesize
 697     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 698     emit_d32(cbuf, framesize);
 699   } else if (framesize) {
 700     emit_opcode(cbuf, 0x83); // add  SP, #framesize
 701     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
 702     emit_d8(cbuf, framesize);
 703   }
 704 
 705   emit_opcode(cbuf, 0x58 | EBP_enc);
 706 
 707   if (do_polling() && C->is_method_compilation()) {
 708     cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
 709     emit_opcode(cbuf,0x85);
 710     emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
 711     emit_d32(cbuf, (intptr_t)os::get_polling_page());
 712   }
 713 }
 714 
 715 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
 716   Compile *C = ra_->C;
 717   // If method set FPU control word, restore to standard control word
 718   int size = C->in_24_bit_fp_mode() ? 6 : 0;
 719   if (C->max_vector_size() > 16) size += 3; // vzeroupper
 720   if (do_polling() && C->is_method_compilation()) size += 6;
 721 
 722   int framesize = C->frame_size_in_bytes();
 723   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
 724   // Remove two words for return addr and rbp,
 725   framesize -= 2*wordSize;
 726 
 727   size++; // popl rbp,
 728 
 729   if (framesize >= 128) {
 730     size += 6;
 731   } else {
 732     size += framesize ? 3 : 0;
 733   }
 734   return size;
 735 }
 736 
 737 int MachEpilogNode::reloc() const {
 738   return 0; // a large enough number
 739 }
 740 
 741 const Pipeline * MachEpilogNode::pipeline() const {
 742   return MachNode::pipeline_class();
 743 }
 744 
 745 int MachEpilogNode::safepoint_offset() const { return 0; }
 746 
 747 //=============================================================================
 748 
 749 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
 750 static enum RC rc_class( OptoReg::Name reg ) {
 751 
 752   if( !OptoReg::is_valid(reg)  ) return rc_bad;
 753   if (OptoReg::is_stack(reg)) return rc_stack;
 754 
 755   VMReg r = OptoReg::as_VMReg(reg);
 756   if (r->is_Register()) return rc_int;
 757   if (r->is_FloatRegister()) {
 758     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
 759     return rc_float;
 760   }
 761   assert(r->is_XMMRegister(), "must be");
 762   return rc_xmm;
 763 }
 764 
 765 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
 766                         int opcode, const char *op_str, int size, outputStream* st ) {
 767   if( cbuf ) {
 768     emit_opcode  (*cbuf, opcode );
 769     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
 770 #ifndef PRODUCT
 771   } else if( !do_size ) {
 772     if( size != 0 ) st->print("\n\t");
 773     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
 774       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
 775       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
 776     } else { // FLD, FST, PUSH, POP
 777       st->print("%s [ESP + #%d]",op_str,offset);
 778     }
 779 #endif
 780   }
 781   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 782   return size+3+offset_size;
 783 }
 784 
 785 // Helper for XMM registers.  Extra opcode bits, limited syntax.
 786 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
 787                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
 788   int in_size_in_bits = Assembler::EVEX_32bit;
 789   int evex_encoding = 0;
 790   if (reg_lo+1 == reg_hi) {
 791     in_size_in_bits = Assembler::EVEX_64bit;
 792     evex_encoding = Assembler::VEX_W;
 793   }
 794   if (cbuf) {
 795     MacroAssembler _masm(cbuf);
 796     if (reg_lo+1 == reg_hi) { // double move?
 797       if (is_load) {
 798         __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 799       } else {
 800         __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 801       }
 802     } else {
 803       if (is_load) {
 804         __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
 805       } else {
 806         __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
 807       }
 808     }
 809 #ifndef PRODUCT
 810   } else if (!do_size) {
 811     if (size != 0) st->print("\n\t");
 812     if (reg_lo+1 == reg_hi) { // double move?
 813       if (is_load) st->print("%s %s,[ESP + #%d]",
 814                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
 815                               Matcher::regName[reg_lo], offset);
 816       else         st->print("MOVSD  [ESP + #%d],%s",
 817                               offset, Matcher::regName[reg_lo]);
 818     } else {
 819       if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
 820                               Matcher::regName[reg_lo], offset);
 821       else         st->print("MOVSS  [ESP + #%d],%s",
 822                               offset, Matcher::regName[reg_lo]);
 823     }
 824 #endif
 825   }
 826   bool is_single_byte = false;
 827   if ((UseAVX > 2) && (offset != 0)) {
 828     is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
 829   }
 830   int offset_size = 0;
 831   if (UseAVX > 2 ) {
 832     offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
 833   } else {
 834     offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
 835   }
 836   size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
 837   // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
 838   return size+5+offset_size;
 839 }
 840 
 841 
 842 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 843                             int src_hi, int dst_hi, int size, outputStream* st ) {
 844   if (cbuf) {
 845     MacroAssembler _masm(cbuf);
 846     if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 847       __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 848                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 849     } else {
 850       __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 851                 as_XMMRegister(Matcher::_regEncode[src_lo]));
 852     }
 853 #ifndef PRODUCT
 854   } else if (!do_size) {
 855     if (size != 0) st->print("\n\t");
 856     if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
 857       if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
 858         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 859       } else {
 860         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 861       }
 862     } else {
 863       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
 864         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 865       } else {
 866         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
 867       }
 868     }
 869 #endif
 870   }
 871   // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
 872   // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
 873   int sz = (UseAVX > 2) ? 6 : 4;
 874   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
 875       UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
 876   return size + sz;
 877 }
 878 
 879 static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 880                             int src_hi, int dst_hi, int size, outputStream* st ) {
 881   // 32-bit
 882   if (cbuf) {
 883     MacroAssembler _masm(cbuf);
 884     __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
 885              as_Register(Matcher::_regEncode[src_lo]));
 886 #ifndef PRODUCT
 887   } else if (!do_size) {
 888     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 889 #endif
 890   }
 891   return (UseAVX> 2) ? 6 : 4;
 892 }
 893 
 894 
 895 static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 896                                  int src_hi, int dst_hi, int size, outputStream* st ) {
 897   // 32-bit
 898   if (cbuf) {
 899     MacroAssembler _masm(cbuf);
 900     __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
 901              as_XMMRegister(Matcher::_regEncode[src_lo]));
 902 #ifndef PRODUCT
 903   } else if (!do_size) {
 904     st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
 905 #endif
 906   }
 907   return (UseAVX> 2) ? 6 : 4;
 908 }
 909 
 910 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
 911   if( cbuf ) {
 912     emit_opcode(*cbuf, 0x8B );
 913     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
 914 #ifndef PRODUCT
 915   } else if( !do_size ) {
 916     if( size != 0 ) st->print("\n\t");
 917     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
 918 #endif
 919   }
 920   return size+2;
 921 }
 922 
 923 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
 924                                  int offset, int size, outputStream* st ) {
 925   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
 926     if( cbuf ) {
 927       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
 928       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
 929 #ifndef PRODUCT
 930     } else if( !do_size ) {
 931       if( size != 0 ) st->print("\n\t");
 932       st->print("FLD    %s",Matcher::regName[src_lo]);
 933 #endif
 934     }
 935     size += 2;
 936   }
 937 
 938   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
 939   const char *op_str;
 940   int op;
 941   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
 942     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
 943     op = 0xDD;
 944   } else {                   // 32-bit store
 945     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
 946     op = 0xD9;
 947     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
 948   }
 949 
 950   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 951 }
 952 
 953 // Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
 954 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
 955                           int src_hi, int dst_hi, uint ireg, outputStream* st);
 956 
 957 static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
 958                             int stack_offset, int reg, uint ireg, outputStream* st);
 959 
 960 static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
 961                                      int dst_offset, uint ireg, outputStream* st) {
 962   int calc_size = 0;
 963   int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 964   int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 965   switch (ireg) {
 966   case Op_VecS:
 967     calc_size = 3+src_offset_size + 3+dst_offset_size;
 968     break;
 969   case Op_VecD:
 970     calc_size = 3+src_offset_size + 3+dst_offset_size;
 971     src_offset += 4;
 972     dst_offset += 4;
 973     src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
 974     dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
 975     calc_size += 3+src_offset_size + 3+dst_offset_size;
 976     break;
 977   case Op_VecX:
 978   case Op_VecY:
 979   case Op_VecZ:
 980     calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
 981     break;
 982   default:
 983     ShouldNotReachHere();
 984   }
 985   if (cbuf) {
 986     MacroAssembler _masm(cbuf);
 987     int offset = __ offset();
 988     switch (ireg) {
 989     case Op_VecS:
 990       __ pushl(Address(rsp, src_offset));
 991       __ popl (Address(rsp, dst_offset));
 992       break;
 993     case Op_VecD:
 994       __ pushl(Address(rsp, src_offset));
 995       __ popl (Address(rsp, dst_offset));
 996       __ pushl(Address(rsp, src_offset+4));
 997       __ popl (Address(rsp, dst_offset+4));
 998       break;
 999     case Op_VecX:
1000       __ movdqu(Address(rsp, -16), xmm0);
1001       __ movdqu(xmm0, Address(rsp, src_offset));
1002       __ movdqu(Address(rsp, dst_offset), xmm0);
1003       __ movdqu(xmm0, Address(rsp, -16));
1004       break;
1005     case Op_VecY:
1006       __ vmovdqu(Address(rsp, -32), xmm0);
1007       __ vmovdqu(xmm0, Address(rsp, src_offset));
1008       __ vmovdqu(Address(rsp, dst_offset), xmm0);
1009       __ vmovdqu(xmm0, Address(rsp, -32));
1010     case Op_VecZ:
1011       __ evmovdqul(Address(rsp, -64), xmm0, 2);
1012       __ evmovdqul(xmm0, Address(rsp, src_offset), 2);
1013       __ evmovdqul(Address(rsp, dst_offset), xmm0, 2);
1014       __ evmovdqul(xmm0, Address(rsp, -64), 2);
1015       break;
1016     default:
1017       ShouldNotReachHere();
1018     }
1019     int size = __ offset() - offset;
1020     assert(size == calc_size, "incorrect size calculattion");
1021     return size;
1022 #ifndef PRODUCT
1023   } else if (!do_size) {
1024     switch (ireg) {
1025     case Op_VecS:
1026       st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1027                 "popl    [rsp + #%d]",
1028                 src_offset, dst_offset);
1029       break;
1030     case Op_VecD:
1031       st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1032                 "popq    [rsp + #%d]\n\t"
1033                 "pushl   [rsp + #%d]\n\t"
1034                 "popq    [rsp + #%d]",
1035                 src_offset, dst_offset, src_offset+4, dst_offset+4);
1036       break;
1037      case Op_VecX:
1038       st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1039                 "movdqu  xmm0, [rsp + #%d]\n\t"
1040                 "movdqu  [rsp + #%d], xmm0\n\t"
1041                 "movdqu  xmm0, [rsp - #16]",
1042                 src_offset, dst_offset);
1043       break;
1044     case Op_VecY:
1045       st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1046                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1047                 "vmovdqu [rsp + #%d], xmm0\n\t"
1048                 "vmovdqu xmm0, [rsp - #32]",
1049                 src_offset, dst_offset);
1050     case Op_VecZ:
1051       st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1052                 "vmovdqu xmm0, [rsp + #%d]\n\t"
1053                 "vmovdqu [rsp + #%d], xmm0\n\t"
1054                 "vmovdqu xmm0, [rsp - #64]",
1055                 src_offset, dst_offset);
1056       break;
1057     default:
1058       ShouldNotReachHere();
1059     }
1060 #endif
1061   }
1062   return calc_size;
1063 }
1064 
1065 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1066   // Get registers to move
1067   OptoReg::Name src_second = ra_->get_reg_second(in(1));
1068   OptoReg::Name src_first = ra_->get_reg_first(in(1));
1069   OptoReg::Name dst_second = ra_->get_reg_second(this );
1070   OptoReg::Name dst_first = ra_->get_reg_first(this );
1071 
1072   enum RC src_second_rc = rc_class(src_second);
1073   enum RC src_first_rc = rc_class(src_first);
1074   enum RC dst_second_rc = rc_class(dst_second);
1075   enum RC dst_first_rc = rc_class(dst_first);
1076 
1077   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1078 
1079   // Generate spill code!
1080   int size = 0;
1081 
1082   if( src_first == dst_first && src_second == dst_second )
1083     return size;            // Self copy, no move
1084 
1085   if (bottom_type()->isa_vect() != NULL) {
1086     uint ireg = ideal_reg();
1087     assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1088     assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1089     assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1090     if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1091       // mem -> mem
1092       int src_offset = ra_->reg2offset(src_first);
1093       int dst_offset = ra_->reg2offset(dst_first);
1094       return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1095     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1096       return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1097     } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1098       int stack_offset = ra_->reg2offset(dst_first);
1099       return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1100     } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1101       int stack_offset = ra_->reg2offset(src_first);
1102       return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1103     } else {
1104       ShouldNotReachHere();
1105     }
1106   }
1107 
1108   // --------------------------------------
1109   // Check for mem-mem move.  push/pop to move.
1110   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1111     if( src_second == dst_first ) { // overlapping stack copy ranges
1112       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1113       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1114       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1115       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1116     }
1117     // move low bits
1118     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1119     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1120     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1121       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1122       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1123     }
1124     return size;
1125   }
1126 
1127   // --------------------------------------
1128   // Check for integer reg-reg copy
1129   if( src_first_rc == rc_int && dst_first_rc == rc_int )
1130     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1131 
1132   // Check for integer store
1133   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1134     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1135 
1136   // Check for integer load
1137   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1138     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1139 
1140   // Check for integer reg-xmm reg copy
1141   if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1142     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1143             "no 64 bit integer-float reg moves" );
1144     return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1145   }
1146   // --------------------------------------
1147   // Check for float reg-reg copy
1148   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1149     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1150             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1151     if( cbuf ) {
1152 
1153       // Note the mucking with the register encode to compensate for the 0/1
1154       // indexing issue mentioned in a comment in the reg_def sections
1155       // for FPR registers many lines above here.
1156 
1157       if( src_first != FPR1L_num ) {
1158         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1159         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1160         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1161         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1162      } else {
1163         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1164         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1165      }
1166 #ifndef PRODUCT
1167     } else if( !do_size ) {
1168       if( size != 0 ) st->print("\n\t");
1169       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1170       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1171 #endif
1172     }
1173     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1174   }
1175 
1176   // Check for float store
1177   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1178     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1179   }
1180 
1181   // Check for float load
1182   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1183     int offset = ra_->reg2offset(src_first);
1184     const char *op_str;
1185     int op;
1186     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1187       op_str = "FLD_D";
1188       op = 0xDD;
1189     } else {                   // 32-bit load
1190       op_str = "FLD_S";
1191       op = 0xD9;
1192       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1193     }
1194     if( cbuf ) {
1195       emit_opcode  (*cbuf, op );
1196       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1197       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1198       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1199 #ifndef PRODUCT
1200     } else if( !do_size ) {
1201       if( size != 0 ) st->print("\n\t");
1202       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1203 #endif
1204     }
1205     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1206     return size + 3+offset_size+2;
1207   }
1208 
1209   // Check for xmm reg-reg copy
1210   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1211     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1212             (src_first+1 == src_second && dst_first+1 == dst_second),
1213             "no non-adjacent float-moves" );
1214     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1215   }
1216 
1217   // Check for xmm reg-integer reg copy
1218   if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1219     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1220             "no 64 bit float-integer reg moves" );
1221     return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1222   }
1223 
1224   // Check for xmm store
1225   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1226     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1227   }
1228 
1229   // Check for float xmm load
1230   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1231     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1232   }
1233 
1234   // Copy from float reg to xmm reg
1235   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1236     // copy to the top of stack from floating point reg
1237     // and use LEA to preserve flags
1238     if( cbuf ) {
1239       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1240       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1241       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1242       emit_d8(*cbuf,0xF8);
1243 #ifndef PRODUCT
1244     } else if( !do_size ) {
1245       if( size != 0 ) st->print("\n\t");
1246       st->print("LEA    ESP,[ESP-8]");
1247 #endif
1248     }
1249     size += 4;
1250 
1251     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1252 
1253     // Copy from the temp memory to the xmm reg.
1254     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1255 
1256     if( cbuf ) {
1257       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1258       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1259       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1260       emit_d8(*cbuf,0x08);
1261 #ifndef PRODUCT
1262     } else if( !do_size ) {
1263       if( size != 0 ) st->print("\n\t");
1264       st->print("LEA    ESP,[ESP+8]");
1265 #endif
1266     }
1267     size += 4;
1268     return size;
1269   }
1270 
1271   assert( size > 0, "missed a case" );
1272 
1273   // --------------------------------------------------------------------
1274   // Check for second bits still needing moving.
1275   if( src_second == dst_second )
1276     return size;               // Self copy; no move
1277   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1278 
1279   // Check for second word int-int move
1280   if( src_second_rc == rc_int && dst_second_rc == rc_int )
1281     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1282 
1283   // Check for second word integer store
1284   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1285     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1286 
1287   // Check for second word integer load
1288   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1289     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1290 
1291 
1292   Unimplemented();
1293   return 0; // Mute compiler
1294 }
1295 
1296 #ifndef PRODUCT
1297 void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1298   implementation( NULL, ra_, false, st );
1299 }
1300 #endif
1301 
1302 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1303   implementation( &cbuf, ra_, false, NULL );
1304 }
1305 
1306 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1307   return implementation( NULL, ra_, true, NULL );
1308 }
1309 
1310 
1311 //=============================================================================
1312 #ifndef PRODUCT
1313 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1314   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1315   int reg = ra_->get_reg_first(this);
1316   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1317 }
1318 #endif
1319 
1320 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1321   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1322   int reg = ra_->get_encode(this);
1323   if( offset >= 128 ) {
1324     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1325     emit_rm(cbuf, 0x2, reg, 0x04);
1326     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1327     emit_d32(cbuf, offset);
1328   }
1329   else {
1330     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1331     emit_rm(cbuf, 0x1, reg, 0x04);
1332     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1333     emit_d8(cbuf, offset);
1334   }
1335 }
1336 
1337 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1338   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1339   if( offset >= 128 ) {
1340     return 7;
1341   }
1342   else {
1343     return 4;
1344   }
1345 }
1346 
1347 //=============================================================================
1348 #ifndef PRODUCT
1349 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1350   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1351   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1352   st->print_cr("\tNOP");
1353   st->print_cr("\tNOP");
1354   if( !OptoBreakpoint )
1355     st->print_cr("\tNOP");
1356 }
1357 #endif
1358 
1359 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1360   MacroAssembler masm(&cbuf);
1361 #ifdef ASSERT
1362   uint insts_size = cbuf.insts_size();
1363 #endif
1364   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1365   masm.jump_cc(Assembler::notEqual,
1366                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1367   /* WARNING these NOPs are critical so that verified entry point is properly
1368      aligned for patching by NativeJump::patch_verified_entry() */
1369   int nops_cnt = 2;
1370   if( !OptoBreakpoint ) // Leave space for int3
1371      nops_cnt += 1;
1372   masm.nop(nops_cnt);
1373 
1374   assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1375 }
1376 
1377 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1378   return OptoBreakpoint ? 11 : 12;
1379 }
1380 
1381 
1382 //=============================================================================
1383 
1384 int Matcher::regnum_to_fpu_offset(int regnum) {
1385   return regnum - 32; // The FP registers are in the second chunk
1386 }
1387 
1388 // This is UltraSparc specific, true just means we have fast l2f conversion
1389 const bool Matcher::convL2FSupported(void) {
1390   return true;
1391 }
1392 
1393 // Is this branch offset short enough that a short branch can be used?
1394 //
1395 // NOTE: If the platform does not provide any short branch variants, then
1396 //       this method should return false for offset 0.
1397 bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1398   // The passed offset is relative to address of the branch.
1399   // On 86 a branch displacement is calculated relative to address
1400   // of a next instruction.
1401   offset -= br_size;
1402 
1403   // the short version of jmpConUCF2 contains multiple branches,
1404   // making the reach slightly less
1405   if (rule == jmpConUCF2_rule)
1406     return (-126 <= offset && offset <= 125);
1407   return (-128 <= offset && offset <= 127);
1408 }
1409 
1410 const bool Matcher::isSimpleConstant64(jlong value) {
1411   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1412   return false;
1413 }
1414 
1415 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1416 const bool Matcher::init_array_count_is_in_bytes = false;
1417 
1418 // Threshold size for cleararray.
1419 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1420 
1421 // Needs 2 CMOV's for longs.
1422 const int Matcher::long_cmove_cost() { return 1; }
1423 
1424 // No CMOVF/CMOVD with SSE/SSE2
1425 const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1426 
1427 // Does the CPU require late expand (see block.cpp for description of late expand)?
1428 const bool Matcher::require_postalloc_expand = false;
1429 
1430 // Should the Matcher clone shifts on addressing modes, expecting them to
1431 // be subsumed into complex addressing expressions or compute them into
1432 // registers?  True for Intel but false for most RISCs
1433 const bool Matcher::clone_shift_expressions = true;
1434 
1435 // Do we need to mask the count passed to shift instructions or does
1436 // the cpu only look at the lower 5/6 bits anyway?
1437 const bool Matcher::need_masked_shift_count = false;
1438 
1439 bool Matcher::narrow_oop_use_complex_address() {
1440   ShouldNotCallThis();
1441   return true;
1442 }
1443 
1444 bool Matcher::narrow_klass_use_complex_address() {
1445   ShouldNotCallThis();
1446   return true;
1447 }
1448 
1449 
1450 // Is it better to copy float constants, or load them directly from memory?
1451 // Intel can load a float constant from a direct address, requiring no
1452 // extra registers.  Most RISCs will have to materialize an address into a
1453 // register first, so they would do better to copy the constant from stack.
1454 const bool Matcher::rematerialize_float_constants = true;
1455 
1456 // If CPU can load and store mis-aligned doubles directly then no fixup is
1457 // needed.  Else we split the double into 2 integer pieces and move it
1458 // piece-by-piece.  Only happens when passing doubles into C code as the
1459 // Java calling convention forces doubles to be aligned.
1460 const bool Matcher::misaligned_doubles_ok = true;
1461 
1462 
1463 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1464   // Get the memory operand from the node
1465   uint numopnds = node->num_opnds();        // Virtual call for number of operands
1466   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1467   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1468   uint opcnt     = 1;                 // First operand
1469   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1470   while( idx >= skipped+num_edges ) {
1471     skipped += num_edges;
1472     opcnt++;                          // Bump operand count
1473     assert( opcnt < numopnds, "Accessing non-existent operand" );
1474     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1475   }
1476 
1477   MachOper *memory = node->_opnds[opcnt];
1478   MachOper *new_memory = NULL;
1479   switch (memory->opcode()) {
1480   case DIRECT:
1481   case INDOFFSET32X:
1482     // No transformation necessary.
1483     return;
1484   case INDIRECT:
1485     new_memory = new indirect_win95_safeOper( );
1486     break;
1487   case INDOFFSET8:
1488     new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1489     break;
1490   case INDOFFSET32:
1491     new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1492     break;
1493   case INDINDEXOFFSET:
1494     new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1495     break;
1496   case INDINDEXSCALE:
1497     new_memory = new indIndexScale_win95_safeOper(memory->scale());
1498     break;
1499   case INDINDEXSCALEOFFSET:
1500     new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1501     break;
1502   case LOAD_LONG_INDIRECT:
1503   case LOAD_LONG_INDOFFSET32:
1504     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1505     return;
1506   default:
1507     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1508     return;
1509   }
1510   node->_opnds[opcnt] = new_memory;
1511 }
1512 
1513 // Advertise here if the CPU requires explicit rounding operations
1514 // to implement the UseStrictFP mode.
1515 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1516 
1517 // Are floats conerted to double when stored to stack during deoptimization?
1518 // On x32 it is stored with convertion only when FPU is used for floats.
1519 bool Matcher::float_in_double() { return (UseSSE == 0); }
1520 
1521 // Do ints take an entire long register or just half?
1522 const bool Matcher::int_in_long = false;
1523 
1524 // Return whether or not this register is ever used as an argument.  This
1525 // function is used on startup to build the trampoline stubs in generateOptoStub.
1526 // Registers not mentioned will be killed by the VM call in the trampoline, and
1527 // arguments in those registers not be available to the callee.
1528 bool Matcher::can_be_java_arg( int reg ) {
1529   if(  reg == ECX_num   || reg == EDX_num   ) return true;
1530   if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1531   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1532   return false;
1533 }
1534 
1535 bool Matcher::is_spillable_arg( int reg ) {
1536   return can_be_java_arg(reg);
1537 }
1538 
1539 bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1540   // Use hardware integer DIV instruction when
1541   // it is faster than a code which use multiply.
1542   // Only when constant divisor fits into 32 bit
1543   // (min_jint is excluded to get only correct
1544   // positive 32 bit values from negative).
1545   return VM_Version::has_fast_idiv() &&
1546          (divisor == (int)divisor && divisor != min_jint);
1547 }
1548 
1549 // Register for DIVI projection of divmodI
1550 RegMask Matcher::divI_proj_mask() {
1551   return EAX_REG_mask();
1552 }
1553 
1554 // Register for MODI projection of divmodI
1555 RegMask Matcher::modI_proj_mask() {
1556   return EDX_REG_mask();
1557 }
1558 
1559 // Register for DIVL projection of divmodL
1560 RegMask Matcher::divL_proj_mask() {
1561   ShouldNotReachHere();
1562   return RegMask();
1563 }
1564 
1565 // Register for MODL projection of divmodL
1566 RegMask Matcher::modL_proj_mask() {
1567   ShouldNotReachHere();
1568   return RegMask();
1569 }
1570 
1571 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1572   return NO_REG_mask();
1573 }
1574 
1575 // Returns true if the high 32 bits of the value is known to be zero.
1576 bool is_operand_hi32_zero(Node* n) {
1577   int opc = n->Opcode();
1578   if (opc == Op_AndL) {
1579     Node* o2 = n->in(2);
1580     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1581       return true;
1582     }
1583   }
1584   if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1585     return true;
1586   }
1587   return false;
1588 }
1589 
1590 %}
1591 
1592 //----------ENCODING BLOCK-----------------------------------------------------
1593 // This block specifies the encoding classes used by the compiler to output
1594 // byte streams.  Encoding classes generate functions which are called by
1595 // Machine Instruction Nodes in order to generate the bit encoding of the
1596 // instruction.  Operands specify their base encoding interface with the
1597 // interface keyword.  There are currently supported four interfaces,
1598 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1599 // operand to generate a function which returns its register number when
1600 // queried.   CONST_INTER causes an operand to generate a function which
1601 // returns the value of the constant when queried.  MEMORY_INTER causes an
1602 // operand to generate four functions which return the Base Register, the
1603 // Index Register, the Scale Value, and the Offset Value of the operand when
1604 // queried.  COND_INTER causes an operand to generate six functions which
1605 // return the encoding code (ie - encoding bits for the instruction)
1606 // associated with each basic boolean condition for a conditional instruction.
1607 // Instructions specify two basic values for encoding.  They use the
1608 // ins_encode keyword to specify their encoding class (which must be one of
1609 // the class names specified in the encoding block), and they use the
1610 // opcode keyword to specify, in order, their primary, secondary, and
1611 // tertiary opcode.  Only the opcode sections which a particular instruction
1612 // needs for encoding need to be specified.
1613 encode %{
1614   // Build emit functions for each basic byte or larger field in the intel
1615   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1616   // code in the enc_class source block.  Emit functions will live in the
1617   // main source block for now.  In future, we can generalize this by
1618   // adding a syntax that specifies the sizes of fields in an order,
1619   // so that the adlc can build the emit functions automagically
1620 
1621   // Emit primary opcode
1622   enc_class OpcP %{
1623     emit_opcode(cbuf, $primary);
1624   %}
1625 
1626   // Emit secondary opcode
1627   enc_class OpcS %{
1628     emit_opcode(cbuf, $secondary);
1629   %}
1630 
1631   // Emit opcode directly
1632   enc_class Opcode(immI d8) %{
1633     emit_opcode(cbuf, $d8$$constant);
1634   %}
1635 
1636   enc_class SizePrefix %{
1637     emit_opcode(cbuf,0x66);
1638   %}
1639 
1640   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1641     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1642   %}
1643 
1644   enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1645     emit_opcode(cbuf,$opcode$$constant);
1646     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1647   %}
1648 
1649   enc_class mov_r32_imm0( rRegI dst ) %{
1650     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1651     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1652   %}
1653 
1654   enc_class cdq_enc %{
1655     // Full implementation of Java idiv and irem; checks for
1656     // special case as described in JVM spec., p.243 & p.271.
1657     //
1658     //         normal case                           special case
1659     //
1660     // input : rax,: dividend                         min_int
1661     //         reg: divisor                          -1
1662     //
1663     // output: rax,: quotient  (= rax, idiv reg)       min_int
1664     //         rdx: remainder (= rax, irem reg)       0
1665     //
1666     //  Code sequnce:
1667     //
1668     //  81 F8 00 00 00 80    cmp         rax,80000000h
1669     //  0F 85 0B 00 00 00    jne         normal_case
1670     //  33 D2                xor         rdx,edx
1671     //  83 F9 FF             cmp         rcx,0FFh
1672     //  0F 84 03 00 00 00    je          done
1673     //                  normal_case:
1674     //  99                   cdq
1675     //  F7 F9                idiv        rax,ecx
1676     //                  done:
1677     //
1678     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1679     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1680     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1681     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1682     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1683     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1684     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1685     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1686     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1687     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1688     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1689     // normal_case:
1690     emit_opcode(cbuf,0x99);                                         // cdq
1691     // idiv (note: must be emitted by the user of this rule)
1692     // normal:
1693   %}
1694 
1695   // Dense encoding for older common ops
1696   enc_class Opc_plus(immI opcode, rRegI reg) %{
1697     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1698   %}
1699 
1700 
1701   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1702   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1703     // Check for 8-bit immediate, and set sign extend bit in opcode
1704     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1705       emit_opcode(cbuf, $primary | 0x02);
1706     }
1707     else {                          // If 32-bit immediate
1708       emit_opcode(cbuf, $primary);
1709     }
1710   %}
1711 
1712   enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1713     // Emit primary opcode and set sign-extend bit
1714     // Check for 8-bit immediate, and set sign extend bit in opcode
1715     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1716       emit_opcode(cbuf, $primary | 0x02);    }
1717     else {                          // If 32-bit immediate
1718       emit_opcode(cbuf, $primary);
1719     }
1720     // Emit r/m byte with secondary opcode, after primary opcode.
1721     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1722   %}
1723 
1724   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1725     // Check for 8-bit immediate, and set sign extend bit in opcode
1726     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1727       $$$emit8$imm$$constant;
1728     }
1729     else {                          // If 32-bit immediate
1730       // Output immediate
1731       $$$emit32$imm$$constant;
1732     }
1733   %}
1734 
1735   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1736     // Emit primary opcode and set sign-extend bit
1737     // Check for 8-bit immediate, and set sign extend bit in opcode
1738     int con = (int)$imm$$constant; // Throw away top bits
1739     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1740     // Emit r/m byte with secondary opcode, after primary opcode.
1741     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1742     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1743     else                               emit_d32(cbuf,con);
1744   %}
1745 
1746   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1747     // Emit primary opcode and set sign-extend bit
1748     // Check for 8-bit immediate, and set sign extend bit in opcode
1749     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1750     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1751     // Emit r/m byte with tertiary opcode, after primary opcode.
1752     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1753     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1754     else                               emit_d32(cbuf,con);
1755   %}
1756 
1757   enc_class OpcSReg (rRegI dst) %{    // BSWAP
1758     emit_cc(cbuf, $secondary, $dst$$reg );
1759   %}
1760 
1761   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1762     int destlo = $dst$$reg;
1763     int desthi = HIGH_FROM_LOW(destlo);
1764     // bswap lo
1765     emit_opcode(cbuf, 0x0F);
1766     emit_cc(cbuf, 0xC8, destlo);
1767     // bswap hi
1768     emit_opcode(cbuf, 0x0F);
1769     emit_cc(cbuf, 0xC8, desthi);
1770     // xchg lo and hi
1771     emit_opcode(cbuf, 0x87);
1772     emit_rm(cbuf, 0x3, destlo, desthi);
1773   %}
1774 
1775   enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1776     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1777   %}
1778 
1779   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1780     $$$emit8$primary;
1781     emit_cc(cbuf, $secondary, $cop$$cmpcode);
1782   %}
1783 
1784   enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1785     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1786     emit_d8(cbuf, op >> 8 );
1787     emit_d8(cbuf, op & 255);
1788   %}
1789 
1790   // emulate a CMOV with a conditional branch around a MOV
1791   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1792     // Invert sense of branch from sense of CMOV
1793     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1794     emit_d8( cbuf, $brOffs$$constant );
1795   %}
1796 
1797   enc_class enc_PartialSubtypeCheck( ) %{
1798     Register Redi = as_Register(EDI_enc); // result register
1799     Register Reax = as_Register(EAX_enc); // super class
1800     Register Recx = as_Register(ECX_enc); // killed
1801     Register Resi = as_Register(ESI_enc); // sub class
1802     Label miss;
1803 
1804     MacroAssembler _masm(&cbuf);
1805     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1806                                      NULL, &miss,
1807                                      /*set_cond_codes:*/ true);
1808     if ($primary) {
1809       __ xorptr(Redi, Redi);
1810     }
1811     __ bind(miss);
1812   %}
1813 
1814   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1815     MacroAssembler masm(&cbuf);
1816     int start = masm.offset();
1817     if (UseSSE >= 2) {
1818       if (VerifyFPU) {
1819         masm.verify_FPU(0, "must be empty in SSE2+ mode");
1820       }
1821     } else {
1822       // External c_calling_convention expects the FPU stack to be 'clean'.
1823       // Compiled code leaves it dirty.  Do cleanup now.
1824       masm.empty_FPU_stack();
1825     }
1826     if (sizeof_FFree_Float_Stack_All == -1) {
1827       sizeof_FFree_Float_Stack_All = masm.offset() - start;
1828     } else {
1829       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1830     }
1831   %}
1832 
1833   enc_class Verify_FPU_For_Leaf %{
1834     if( VerifyFPU ) {
1835       MacroAssembler masm(&cbuf);
1836       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1837     }
1838   %}
1839 
1840   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1841     // This is the instruction starting address for relocation info.
1842     cbuf.set_insts_mark();
1843     $$$emit8$primary;
1844     // CALL directly to the runtime
1845     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1846                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1847 
1848     if (UseSSE >= 2) {
1849       MacroAssembler _masm(&cbuf);
1850       BasicType rt = tf()->return_type();
1851 
1852       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1853         // A C runtime call where the return value is unused.  In SSE2+
1854         // mode the result needs to be removed from the FPU stack.  It's
1855         // likely that this function call could be removed by the
1856         // optimizer if the C function is a pure function.
1857         __ ffree(0);
1858       } else if (rt == T_FLOAT) {
1859         __ lea(rsp, Address(rsp, -4));
1860         __ fstp_s(Address(rsp, 0));
1861         __ movflt(xmm0, Address(rsp, 0));
1862         __ lea(rsp, Address(rsp,  4));
1863       } else if (rt == T_DOUBLE) {
1864         __ lea(rsp, Address(rsp, -8));
1865         __ fstp_d(Address(rsp, 0));
1866         __ movdbl(xmm0, Address(rsp, 0));
1867         __ lea(rsp, Address(rsp,  8));
1868       }
1869     }
1870   %}
1871 
1872 
1873   enc_class pre_call_resets %{
1874     // If method sets FPU control word restore it here
1875     debug_only(int off0 = cbuf.insts_size());
1876     if (ra_->C->in_24_bit_fp_mode()) {
1877       MacroAssembler _masm(&cbuf);
1878       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1879     }
1880     if (ra_->C->max_vector_size() > 16) {
1881       // Clear upper bits of YMM registers when current compiled code uses
1882       // wide vectors to avoid AVX <-> SSE transition penalty during call.
1883       MacroAssembler _masm(&cbuf);
1884       __ vzeroupper();
1885     }
1886     debug_only(int off1 = cbuf.insts_size());
1887     assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1888   %}
1889 
1890   enc_class post_call_FPU %{
1891     // If method sets FPU control word do it here also
1892     if (Compile::current()->in_24_bit_fp_mode()) {
1893       MacroAssembler masm(&cbuf);
1894       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1895     }
1896   %}
1897 
1898   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1899     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1900     // who we intended to call.
1901     cbuf.set_insts_mark();
1902     $$$emit8$primary;
1903     if (!_method) {
1904       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1905                      runtime_call_Relocation::spec(), RELOC_IMM32 );
1906     } else if (_optimized_virtual) {
1907       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1908                      opt_virtual_call_Relocation::spec(), RELOC_IMM32 );
1909     } else {
1910       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1911                      static_call_Relocation::spec(), RELOC_IMM32 );
1912     }
1913     if (_method) {  // Emit stub for static call.
1914       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1915       if (stub == NULL) {
1916         ciEnv::current()->record_failure("CodeCache is full");
1917         return;
1918       } 
1919     }
1920   %}
1921 
1922   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1923     MacroAssembler _masm(&cbuf);
1924     __ ic_call((address)$meth$$method);
1925   %}
1926 
1927   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1928     int disp = in_bytes(Method::from_compiled_offset());
1929     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1930 
1931     // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1932     cbuf.set_insts_mark();
1933     $$$emit8$primary;
1934     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1935     emit_d8(cbuf, disp);             // Displacement
1936 
1937   %}
1938 
1939 //   Following encoding is no longer used, but may be restored if calling
1940 //   convention changes significantly.
1941 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1942 //
1943 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1944 //     // int ic_reg     = Matcher::inline_cache_reg();
1945 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
1946 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1947 //     // int imo_encode = Matcher::_regEncode[imo_reg];
1948 //
1949 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1950 //     // // so we load it immediately before the call
1951 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1952 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1953 //
1954 //     // xor rbp,ebp
1955 //     emit_opcode(cbuf, 0x33);
1956 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1957 //
1958 //     // CALL to interpreter.
1959 //     cbuf.set_insts_mark();
1960 //     $$$emit8$primary;
1961 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1962 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1963 //   %}
1964 
1965   enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1966     $$$emit8$primary;
1967     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1968     $$$emit8$shift$$constant;
1969   %}
1970 
1971   enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1972     // Load immediate does not have a zero or sign extended version
1973     // for 8-bit immediates
1974     emit_opcode(cbuf, 0xB8 + $dst$$reg);
1975     $$$emit32$src$$constant;
1976   %}
1977 
1978   enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1979     // Load immediate does not have a zero or sign extended version
1980     // for 8-bit immediates
1981     emit_opcode(cbuf, $primary + $dst$$reg);
1982     $$$emit32$src$$constant;
1983   %}
1984 
1985   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1986     // Load immediate does not have a zero or sign extended version
1987     // for 8-bit immediates
1988     int dst_enc = $dst$$reg;
1989     int src_con = $src$$constant & 0x0FFFFFFFFL;
1990     if (src_con == 0) {
1991       // xor dst, dst
1992       emit_opcode(cbuf, 0x33);
1993       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1994     } else {
1995       emit_opcode(cbuf, $primary + dst_enc);
1996       emit_d32(cbuf, src_con);
1997     }
1998   %}
1999 
2000   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2001     // Load immediate does not have a zero or sign extended version
2002     // for 8-bit immediates
2003     int dst_enc = $dst$$reg + 2;
2004     int src_con = ((julong)($src$$constant)) >> 32;
2005     if (src_con == 0) {
2006       // xor dst, dst
2007       emit_opcode(cbuf, 0x33);
2008       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2009     } else {
2010       emit_opcode(cbuf, $primary + dst_enc);
2011       emit_d32(cbuf, src_con);
2012     }
2013   %}
2014 
2015 
2016   // Encode a reg-reg copy.  If it is useless, then empty encoding.
2017   enc_class enc_Copy( rRegI dst, rRegI src ) %{
2018     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2019   %}
2020 
2021   enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2022     encode_Copy( cbuf, $dst$$reg, $src$$reg );
2023   %}
2024 
2025   enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2026     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2027   %}
2028 
2029   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2030     $$$emit8$primary;
2031     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2032   %}
2033 
2034   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2035     $$$emit8$secondary;
2036     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2037   %}
2038 
2039   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2040     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2041   %}
2042 
2043   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2044     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2045   %}
2046 
2047   enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2048     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2049   %}
2050 
2051   enc_class Con32 (immI src) %{    // Con32(storeImmI)
2052     // Output immediate
2053     $$$emit32$src$$constant;
2054   %}
2055 
2056   enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2057     // Output Float immediate bits
2058     jfloat jf = $src$$constant;
2059     int    jf_as_bits = jint_cast( jf );
2060     emit_d32(cbuf, jf_as_bits);
2061   %}
2062 
2063   enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2064     // Output Float immediate bits
2065     jfloat jf = $src$$constant;
2066     int    jf_as_bits = jint_cast( jf );
2067     emit_d32(cbuf, jf_as_bits);
2068   %}
2069 
2070   enc_class Con16 (immI src) %{    // Con16(storeImmI)
2071     // Output immediate
2072     $$$emit16$src$$constant;
2073   %}
2074 
2075   enc_class Con_d32(immI src) %{
2076     emit_d32(cbuf,$src$$constant);
2077   %}
2078 
2079   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2080     // Output immediate memory reference
2081     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2082     emit_d32(cbuf, 0x00);
2083   %}
2084 
2085   enc_class lock_prefix( ) %{
2086     if( os::is_MP() )
2087       emit_opcode(cbuf,0xF0);         // [Lock]
2088   %}
2089 
2090   // Cmp-xchg long value.
2091   // Note: we need to swap rbx, and rcx before and after the
2092   //       cmpxchg8 instruction because the instruction uses
2093   //       rcx as the high order word of the new value to store but
2094   //       our register encoding uses rbx,.
2095   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2096 
2097     // XCHG  rbx,ecx
2098     emit_opcode(cbuf,0x87);
2099     emit_opcode(cbuf,0xD9);
2100     // [Lock]
2101     if( os::is_MP() )
2102       emit_opcode(cbuf,0xF0);
2103     // CMPXCHG8 [Eptr]
2104     emit_opcode(cbuf,0x0F);
2105     emit_opcode(cbuf,0xC7);
2106     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2107     // XCHG  rbx,ecx
2108     emit_opcode(cbuf,0x87);
2109     emit_opcode(cbuf,0xD9);
2110   %}
2111 
2112   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2113     // [Lock]
2114     if( os::is_MP() )
2115       emit_opcode(cbuf,0xF0);
2116 
2117     // CMPXCHG [Eptr]
2118     emit_opcode(cbuf,0x0F);
2119     emit_opcode(cbuf,0xB1);
2120     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2121   %}
2122 
2123   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2124     int res_encoding = $res$$reg;
2125 
2126     // MOV  res,0
2127     emit_opcode( cbuf, 0xB8 + res_encoding);
2128     emit_d32( cbuf, 0 );
2129     // JNE,s  fail
2130     emit_opcode(cbuf,0x75);
2131     emit_d8(cbuf, 5 );
2132     // MOV  res,1
2133     emit_opcode( cbuf, 0xB8 + res_encoding);
2134     emit_d32( cbuf, 1 );
2135     // fail:
2136   %}
2137 
2138   enc_class set_instruction_start( ) %{
2139     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2140   %}
2141 
2142   enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2143     int reg_encoding = $ereg$$reg;
2144     int base  = $mem$$base;
2145     int index = $mem$$index;
2146     int scale = $mem$$scale;
2147     int displace = $mem$$disp;
2148     relocInfo::relocType disp_reloc = $mem->disp_reloc();
2149     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2150   %}
2151 
2152   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2153     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2154     int base  = $mem$$base;
2155     int index = $mem$$index;
2156     int scale = $mem$$scale;
2157     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2158     assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2159     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2160   %}
2161 
2162   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2163     int r1, r2;
2164     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2165     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2166     emit_opcode(cbuf,0x0F);
2167     emit_opcode(cbuf,$tertiary);
2168     emit_rm(cbuf, 0x3, r1, r2);
2169     emit_d8(cbuf,$cnt$$constant);
2170     emit_d8(cbuf,$primary);
2171     emit_rm(cbuf, 0x3, $secondary, r1);
2172     emit_d8(cbuf,$cnt$$constant);
2173   %}
2174 
2175   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2176     emit_opcode( cbuf, 0x8B ); // Move
2177     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2178     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2179       emit_d8(cbuf,$primary);
2180       emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2181       emit_d8(cbuf,$cnt$$constant-32);
2182     }
2183     emit_d8(cbuf,$primary);
2184     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2185     emit_d8(cbuf,31);
2186   %}
2187 
2188   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2189     int r1, r2;
2190     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2191     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2192 
2193     emit_opcode( cbuf, 0x8B ); // Move r1,r2
2194     emit_rm(cbuf, 0x3, r1, r2);
2195     if( $cnt$$constant > 32 ) { // Shift, if not by zero
2196       emit_opcode(cbuf,$primary);
2197       emit_rm(cbuf, 0x3, $secondary, r1);
2198       emit_d8(cbuf,$cnt$$constant-32);
2199     }
2200     emit_opcode(cbuf,0x33);  // XOR r2,r2
2201     emit_rm(cbuf, 0x3, r2, r2);
2202   %}
2203 
2204   // Clone of RegMem but accepts an extra parameter to access each
2205   // half of a double in memory; it never needs relocation info.
2206   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2207     emit_opcode(cbuf,$opcode$$constant);
2208     int reg_encoding = $rm_reg$$reg;
2209     int base     = $mem$$base;
2210     int index    = $mem$$index;
2211     int scale    = $mem$$scale;
2212     int displace = $mem$$disp + $disp_for_half$$constant;
2213     relocInfo::relocType disp_reloc = relocInfo::none;
2214     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2215   %}
2216 
2217   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2218   //
2219   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2220   // and it never needs relocation information.
2221   // Frequently used to move data between FPU's Stack Top and memory.
2222   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2223     int rm_byte_opcode = $rm_opcode$$constant;
2224     int base     = $mem$$base;
2225     int index    = $mem$$index;
2226     int scale    = $mem$$scale;
2227     int displace = $mem$$disp;
2228     assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2229     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2230   %}
2231 
2232   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2233     int rm_byte_opcode = $rm_opcode$$constant;
2234     int base     = $mem$$base;
2235     int index    = $mem$$index;
2236     int scale    = $mem$$scale;
2237     int displace = $mem$$disp;
2238     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2239     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2240   %}
2241 
2242   enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2243     int reg_encoding = $dst$$reg;
2244     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2245     int index        = 0x04;            // 0x04 indicates no index
2246     int scale        = 0x00;            // 0x00 indicates no scale
2247     int displace     = $src1$$constant; // 0x00 indicates no displacement
2248     relocInfo::relocType disp_reloc = relocInfo::none;
2249     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2250   %}
2251 
2252   enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2253     // Compare dst,src
2254     emit_opcode(cbuf,0x3B);
2255     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2256     // jmp dst < src around move
2257     emit_opcode(cbuf,0x7C);
2258     emit_d8(cbuf,2);
2259     // move dst,src
2260     emit_opcode(cbuf,0x8B);
2261     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2262   %}
2263 
2264   enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2265     // Compare dst,src
2266     emit_opcode(cbuf,0x3B);
2267     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2268     // jmp dst > src around move
2269     emit_opcode(cbuf,0x7F);
2270     emit_d8(cbuf,2);
2271     // move dst,src
2272     emit_opcode(cbuf,0x8B);
2273     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2274   %}
2275 
2276   enc_class enc_FPR_store(memory mem, regDPR src) %{
2277     // If src is FPR1, we can just FST to store it.
2278     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2279     int reg_encoding = 0x2; // Just store
2280     int base  = $mem$$base;
2281     int index = $mem$$index;
2282     int scale = $mem$$scale;
2283     int displace = $mem$$disp;
2284     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2285     if( $src$$reg != FPR1L_enc ) {
2286       reg_encoding = 0x3;  // Store & pop
2287       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2288       emit_d8( cbuf, 0xC0-1+$src$$reg );
2289     }
2290     cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2291     emit_opcode(cbuf,$primary);
2292     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2293   %}
2294 
2295   enc_class neg_reg(rRegI dst) %{
2296     // NEG $dst
2297     emit_opcode(cbuf,0xF7);
2298     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2299   %}
2300 
2301   enc_class setLT_reg(eCXRegI dst) %{
2302     // SETLT $dst
2303     emit_opcode(cbuf,0x0F);
2304     emit_opcode(cbuf,0x9C);
2305     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2306   %}
2307 
2308   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2309     int tmpReg = $tmp$$reg;
2310 
2311     // SUB $p,$q
2312     emit_opcode(cbuf,0x2B);
2313     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2314     // SBB $tmp,$tmp
2315     emit_opcode(cbuf,0x1B);
2316     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2317     // AND $tmp,$y
2318     emit_opcode(cbuf,0x23);
2319     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2320     // ADD $p,$tmp
2321     emit_opcode(cbuf,0x03);
2322     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2323   %}
2324 
2325   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2326     // TEST shift,32
2327     emit_opcode(cbuf,0xF7);
2328     emit_rm(cbuf, 0x3, 0, ECX_enc);
2329     emit_d32(cbuf,0x20);
2330     // JEQ,s small
2331     emit_opcode(cbuf, 0x74);
2332     emit_d8(cbuf, 0x04);
2333     // MOV    $dst.hi,$dst.lo
2334     emit_opcode( cbuf, 0x8B );
2335     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2336     // CLR    $dst.lo
2337     emit_opcode(cbuf, 0x33);
2338     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2339 // small:
2340     // SHLD   $dst.hi,$dst.lo,$shift
2341     emit_opcode(cbuf,0x0F);
2342     emit_opcode(cbuf,0xA5);
2343     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2344     // SHL    $dst.lo,$shift"
2345     emit_opcode(cbuf,0xD3);
2346     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2347   %}
2348 
2349   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2350     // TEST shift,32
2351     emit_opcode(cbuf,0xF7);
2352     emit_rm(cbuf, 0x3, 0, ECX_enc);
2353     emit_d32(cbuf,0x20);
2354     // JEQ,s small
2355     emit_opcode(cbuf, 0x74);
2356     emit_d8(cbuf, 0x04);
2357     // MOV    $dst.lo,$dst.hi
2358     emit_opcode( cbuf, 0x8B );
2359     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2360     // CLR    $dst.hi
2361     emit_opcode(cbuf, 0x33);
2362     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2363 // small:
2364     // SHRD   $dst.lo,$dst.hi,$shift
2365     emit_opcode(cbuf,0x0F);
2366     emit_opcode(cbuf,0xAD);
2367     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2368     // SHR    $dst.hi,$shift"
2369     emit_opcode(cbuf,0xD3);
2370     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2371   %}
2372 
2373   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2374     // TEST shift,32
2375     emit_opcode(cbuf,0xF7);
2376     emit_rm(cbuf, 0x3, 0, ECX_enc);
2377     emit_d32(cbuf,0x20);
2378     // JEQ,s small
2379     emit_opcode(cbuf, 0x74);
2380     emit_d8(cbuf, 0x05);
2381     // MOV    $dst.lo,$dst.hi
2382     emit_opcode( cbuf, 0x8B );
2383     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2384     // SAR    $dst.hi,31
2385     emit_opcode(cbuf, 0xC1);
2386     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2387     emit_d8(cbuf, 0x1F );
2388 // small:
2389     // SHRD   $dst.lo,$dst.hi,$shift
2390     emit_opcode(cbuf,0x0F);
2391     emit_opcode(cbuf,0xAD);
2392     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2393     // SAR    $dst.hi,$shift"
2394     emit_opcode(cbuf,0xD3);
2395     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2396   %}
2397 
2398 
2399   // ----------------- Encodings for floating point unit -----------------
2400   // May leave result in FPU-TOS or FPU reg depending on opcodes
2401   enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2402     $$$emit8$primary;
2403     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2404   %}
2405 
2406   // Pop argument in FPR0 with FSTP ST(0)
2407   enc_class PopFPU() %{
2408     emit_opcode( cbuf, 0xDD );
2409     emit_d8( cbuf, 0xD8 );
2410   %}
2411 
2412   // !!!!! equivalent to Pop_Reg_F
2413   enc_class Pop_Reg_DPR( regDPR dst ) %{
2414     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2415     emit_d8( cbuf, 0xD8+$dst$$reg );
2416   %}
2417 
2418   enc_class Push_Reg_DPR( regDPR dst ) %{
2419     emit_opcode( cbuf, 0xD9 );
2420     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2421   %}
2422 
2423   enc_class strictfp_bias1( regDPR dst ) %{
2424     emit_opcode( cbuf, 0xDB );           // FLD m80real
2425     emit_opcode( cbuf, 0x2D );
2426     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2427     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2428     emit_opcode( cbuf, 0xC8+$dst$$reg );
2429   %}
2430 
2431   enc_class strictfp_bias2( regDPR dst ) %{
2432     emit_opcode( cbuf, 0xDB );           // FLD m80real
2433     emit_opcode( cbuf, 0x2D );
2434     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2435     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2436     emit_opcode( cbuf, 0xC8+$dst$$reg );
2437   %}
2438 
2439   // Special case for moving an integer register to a stack slot.
2440   enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2441     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2442   %}
2443 
2444   // Special case for moving a register to a stack slot.
2445   enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2446     // Opcode already emitted
2447     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2448     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2449     emit_d32(cbuf, $dst$$disp);   // Displacement
2450   %}
2451 
2452   // Push the integer in stackSlot 'src' onto FP-stack
2453   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2454     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2455   %}
2456 
2457   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2458   enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2459     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2460   %}
2461 
2462   // Same as Pop_Mem_F except for opcode
2463   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2464   enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2465     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2466   %}
2467 
2468   enc_class Pop_Reg_FPR( regFPR dst ) %{
2469     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2470     emit_d8( cbuf, 0xD8+$dst$$reg );
2471   %}
2472 
2473   enc_class Push_Reg_FPR( regFPR dst ) %{
2474     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2475     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2476   %}
2477 
2478   // Push FPU's float to a stack-slot, and pop FPU-stack
2479   enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2480     int pop = 0x02;
2481     if ($src$$reg != FPR1L_enc) {
2482       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2483       emit_d8( cbuf, 0xC0-1+$src$$reg );
2484       pop = 0x03;
2485     }
2486     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2487   %}
2488 
2489   // Push FPU's double to a stack-slot, and pop FPU-stack
2490   enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2491     int pop = 0x02;
2492     if ($src$$reg != FPR1L_enc) {
2493       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2494       emit_d8( cbuf, 0xC0-1+$src$$reg );
2495       pop = 0x03;
2496     }
2497     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2498   %}
2499 
2500   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2501   enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2502     int pop = 0xD0 - 1; // -1 since we skip FLD
2503     if ($src$$reg != FPR1L_enc) {
2504       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2505       emit_d8( cbuf, 0xC0-1+$src$$reg );
2506       pop = 0xD8;
2507     }
2508     emit_opcode( cbuf, 0xDD );
2509     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2510   %}
2511 
2512 
2513   enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2514     // load dst in FPR0
2515     emit_opcode( cbuf, 0xD9 );
2516     emit_d8( cbuf, 0xC0-1+$dst$$reg );
2517     if ($src$$reg != FPR1L_enc) {
2518       // fincstp
2519       emit_opcode (cbuf, 0xD9);
2520       emit_opcode (cbuf, 0xF7);
2521       // swap src with FPR1:
2522       // FXCH FPR1 with src
2523       emit_opcode(cbuf, 0xD9);
2524       emit_d8(cbuf, 0xC8-1+$src$$reg );
2525       // fdecstp
2526       emit_opcode (cbuf, 0xD9);
2527       emit_opcode (cbuf, 0xF6);
2528     }
2529   %}
2530 
2531   enc_class Push_ModD_encoding(regD src0, regD src1) %{
2532     MacroAssembler _masm(&cbuf);
2533     __ subptr(rsp, 8);
2534     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2535     __ fld_d(Address(rsp, 0));
2536     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2537     __ fld_d(Address(rsp, 0));
2538   %}
2539 
2540   enc_class Push_ModF_encoding(regF src0, regF src1) %{
2541     MacroAssembler _masm(&cbuf);
2542     __ subptr(rsp, 4);
2543     __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2544     __ fld_s(Address(rsp, 0));
2545     __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2546     __ fld_s(Address(rsp, 0));
2547   %}
2548 
2549   enc_class Push_ResultD(regD dst) %{
2550     MacroAssembler _masm(&cbuf);
2551     __ fstp_d(Address(rsp, 0));
2552     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2553     __ addptr(rsp, 8);
2554   %}
2555 
2556   enc_class Push_ResultF(regF dst, immI d8) %{
2557     MacroAssembler _masm(&cbuf);
2558     __ fstp_s(Address(rsp, 0));
2559     __ movflt($dst$$XMMRegister, Address(rsp, 0));
2560     __ addptr(rsp, $d8$$constant);
2561   %}
2562 
2563   enc_class Push_SrcD(regD src) %{
2564     MacroAssembler _masm(&cbuf);
2565     __ subptr(rsp, 8);
2566     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2567     __ fld_d(Address(rsp, 0));
2568   %}
2569 
2570   enc_class push_stack_temp_qword() %{
2571     MacroAssembler _masm(&cbuf);
2572     __ subptr(rsp, 8);
2573   %}
2574 
2575   enc_class pop_stack_temp_qword() %{
2576     MacroAssembler _masm(&cbuf);
2577     __ addptr(rsp, 8);
2578   %}
2579 
2580   enc_class push_xmm_to_fpr1(regD src) %{
2581     MacroAssembler _masm(&cbuf);
2582     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2583     __ fld_d(Address(rsp, 0));
2584   %}
2585 
2586   enc_class Push_Result_Mod_DPR( regDPR src) %{
2587     if ($src$$reg != FPR1L_enc) {
2588       // fincstp
2589       emit_opcode (cbuf, 0xD9);
2590       emit_opcode (cbuf, 0xF7);
2591       // FXCH FPR1 with src
2592       emit_opcode(cbuf, 0xD9);
2593       emit_d8(cbuf, 0xC8-1+$src$$reg );
2594       // fdecstp
2595       emit_opcode (cbuf, 0xD9);
2596       emit_opcode (cbuf, 0xF6);
2597     }
2598     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2599     // // FSTP   FPR$dst$$reg
2600     // emit_opcode( cbuf, 0xDD );
2601     // emit_d8( cbuf, 0xD8+$dst$$reg );
2602   %}
2603 
2604   enc_class fnstsw_sahf_skip_parity() %{
2605     // fnstsw ax
2606     emit_opcode( cbuf, 0xDF );
2607     emit_opcode( cbuf, 0xE0 );
2608     // sahf
2609     emit_opcode( cbuf, 0x9E );
2610     // jnp  ::skip
2611     emit_opcode( cbuf, 0x7B );
2612     emit_opcode( cbuf, 0x05 );
2613   %}
2614 
2615   enc_class emitModDPR() %{
2616     // fprem must be iterative
2617     // :: loop
2618     // fprem
2619     emit_opcode( cbuf, 0xD9 );
2620     emit_opcode( cbuf, 0xF8 );
2621     // wait
2622     emit_opcode( cbuf, 0x9b );
2623     // fnstsw ax
2624     emit_opcode( cbuf, 0xDF );
2625     emit_opcode( cbuf, 0xE0 );
2626     // sahf
2627     emit_opcode( cbuf, 0x9E );
2628     // jp  ::loop
2629     emit_opcode( cbuf, 0x0F );
2630     emit_opcode( cbuf, 0x8A );
2631     emit_opcode( cbuf, 0xF4 );
2632     emit_opcode( cbuf, 0xFF );
2633     emit_opcode( cbuf, 0xFF );
2634     emit_opcode( cbuf, 0xFF );
2635   %}
2636 
2637   enc_class fpu_flags() %{
2638     // fnstsw_ax
2639     emit_opcode( cbuf, 0xDF);
2640     emit_opcode( cbuf, 0xE0);
2641     // test ax,0x0400
2642     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2643     emit_opcode( cbuf, 0xA9 );
2644     emit_d16   ( cbuf, 0x0400 );
2645     // // // This sequence works, but stalls for 12-16 cycles on PPro
2646     // // test rax,0x0400
2647     // emit_opcode( cbuf, 0xA9 );
2648     // emit_d32   ( cbuf, 0x00000400 );
2649     //
2650     // jz exit (no unordered comparison)
2651     emit_opcode( cbuf, 0x74 );
2652     emit_d8    ( cbuf, 0x02 );
2653     // mov ah,1 - treat as LT case (set carry flag)
2654     emit_opcode( cbuf, 0xB4 );
2655     emit_d8    ( cbuf, 0x01 );
2656     // sahf
2657     emit_opcode( cbuf, 0x9E);
2658   %}
2659 
2660   enc_class cmpF_P6_fixup() %{
2661     // Fixup the integer flags in case comparison involved a NaN
2662     //
2663     // JNP exit (no unordered comparison, P-flag is set by NaN)
2664     emit_opcode( cbuf, 0x7B );
2665     emit_d8    ( cbuf, 0x03 );
2666     // MOV AH,1 - treat as LT case (set carry flag)
2667     emit_opcode( cbuf, 0xB4 );
2668     emit_d8    ( cbuf, 0x01 );
2669     // SAHF
2670     emit_opcode( cbuf, 0x9E);
2671     // NOP     // target for branch to avoid branch to branch
2672     emit_opcode( cbuf, 0x90);
2673   %}
2674 
2675 //     fnstsw_ax();
2676 //     sahf();
2677 //     movl(dst, nan_result);
2678 //     jcc(Assembler::parity, exit);
2679 //     movl(dst, less_result);
2680 //     jcc(Assembler::below, exit);
2681 //     movl(dst, equal_result);
2682 //     jcc(Assembler::equal, exit);
2683 //     movl(dst, greater_result);
2684 
2685 // less_result     =  1;
2686 // greater_result  = -1;
2687 // equal_result    = 0;
2688 // nan_result      = -1;
2689 
2690   enc_class CmpF_Result(rRegI dst) %{
2691     // fnstsw_ax();
2692     emit_opcode( cbuf, 0xDF);
2693     emit_opcode( cbuf, 0xE0);
2694     // sahf
2695     emit_opcode( cbuf, 0x9E);
2696     // movl(dst, nan_result);
2697     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2698     emit_d32( cbuf, -1 );
2699     // jcc(Assembler::parity, exit);
2700     emit_opcode( cbuf, 0x7A );
2701     emit_d8    ( cbuf, 0x13 );
2702     // movl(dst, less_result);
2703     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2704     emit_d32( cbuf, -1 );
2705     // jcc(Assembler::below, exit);
2706     emit_opcode( cbuf, 0x72 );
2707     emit_d8    ( cbuf, 0x0C );
2708     // movl(dst, equal_result);
2709     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2710     emit_d32( cbuf, 0 );
2711     // jcc(Assembler::equal, exit);
2712     emit_opcode( cbuf, 0x74 );
2713     emit_d8    ( cbuf, 0x05 );
2714     // movl(dst, greater_result);
2715     emit_opcode( cbuf, 0xB8 + $dst$$reg);
2716     emit_d32( cbuf, 1 );
2717   %}
2718 
2719 
2720   // Compare the longs and set flags
2721   // BROKEN!  Do Not use as-is
2722   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2723     // CMP    $src1.hi,$src2.hi
2724     emit_opcode( cbuf, 0x3B );
2725     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2726     // JNE,s  done
2727     emit_opcode(cbuf,0x75);
2728     emit_d8(cbuf, 2 );
2729     // CMP    $src1.lo,$src2.lo
2730     emit_opcode( cbuf, 0x3B );
2731     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2732 // done:
2733   %}
2734 
2735   enc_class convert_int_long( regL dst, rRegI src ) %{
2736     // mov $dst.lo,$src
2737     int dst_encoding = $dst$$reg;
2738     int src_encoding = $src$$reg;
2739     encode_Copy( cbuf, dst_encoding  , src_encoding );
2740     // mov $dst.hi,$src
2741     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2742     // sar $dst.hi,31
2743     emit_opcode( cbuf, 0xC1 );
2744     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2745     emit_d8(cbuf, 0x1F );
2746   %}
2747 
2748   enc_class convert_long_double( eRegL src ) %{
2749     // push $src.hi
2750     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2751     // push $src.lo
2752     emit_opcode(cbuf, 0x50+$src$$reg  );
2753     // fild 64-bits at [SP]
2754     emit_opcode(cbuf,0xdf);
2755     emit_d8(cbuf, 0x6C);
2756     emit_d8(cbuf, 0x24);
2757     emit_d8(cbuf, 0x00);
2758     // pop stack
2759     emit_opcode(cbuf, 0x83); // add  SP, #8
2760     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2761     emit_d8(cbuf, 0x8);
2762   %}
2763 
2764   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2765     // IMUL   EDX:EAX,$src1
2766     emit_opcode( cbuf, 0xF7 );
2767     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2768     // SAR    EDX,$cnt-32
2769     int shift_count = ((int)$cnt$$constant) - 32;
2770     if (shift_count > 0) {
2771       emit_opcode(cbuf, 0xC1);
2772       emit_rm(cbuf, 0x3, 7, $dst$$reg );
2773       emit_d8(cbuf, shift_count);
2774     }
2775   %}
2776 
2777   // this version doesn't have add sp, 8
2778   enc_class convert_long_double2( eRegL src ) %{
2779     // push $src.hi
2780     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2781     // push $src.lo
2782     emit_opcode(cbuf, 0x50+$src$$reg  );
2783     // fild 64-bits at [SP]
2784     emit_opcode(cbuf,0xdf);
2785     emit_d8(cbuf, 0x6C);
2786     emit_d8(cbuf, 0x24);
2787     emit_d8(cbuf, 0x00);
2788   %}
2789 
2790   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2791     // Basic idea: long = (long)int * (long)int
2792     // IMUL EDX:EAX, src
2793     emit_opcode( cbuf, 0xF7 );
2794     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2795   %}
2796 
2797   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2798     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2799     // MUL EDX:EAX, src
2800     emit_opcode( cbuf, 0xF7 );
2801     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2802   %}
2803 
2804   enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2805     // Basic idea: lo(result) = lo(x_lo * y_lo)
2806     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2807     // MOV    $tmp,$src.lo
2808     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2809     // IMUL   $tmp,EDX
2810     emit_opcode( cbuf, 0x0F );
2811     emit_opcode( cbuf, 0xAF );
2812     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2813     // MOV    EDX,$src.hi
2814     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2815     // IMUL   EDX,EAX
2816     emit_opcode( cbuf, 0x0F );
2817     emit_opcode( cbuf, 0xAF );
2818     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2819     // ADD    $tmp,EDX
2820     emit_opcode( cbuf, 0x03 );
2821     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2822     // MUL   EDX:EAX,$src.lo
2823     emit_opcode( cbuf, 0xF7 );
2824     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2825     // ADD    EDX,ESI
2826     emit_opcode( cbuf, 0x03 );
2827     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2828   %}
2829 
2830   enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2831     // Basic idea: lo(result) = lo(src * y_lo)
2832     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2833     // IMUL   $tmp,EDX,$src
2834     emit_opcode( cbuf, 0x6B );
2835     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2836     emit_d8( cbuf, (int)$src$$constant );
2837     // MOV    EDX,$src
2838     emit_opcode(cbuf, 0xB8 + EDX_enc);
2839     emit_d32( cbuf, (int)$src$$constant );
2840     // MUL   EDX:EAX,EDX
2841     emit_opcode( cbuf, 0xF7 );
2842     emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2843     // ADD    EDX,ESI
2844     emit_opcode( cbuf, 0x03 );
2845     emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2846   %}
2847 
2848   enc_class long_div( eRegL src1, eRegL src2 ) %{
2849     // PUSH src1.hi
2850     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2851     // PUSH src1.lo
2852     emit_opcode(cbuf,               0x50+$src1$$reg  );
2853     // PUSH src2.hi
2854     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2855     // PUSH src2.lo
2856     emit_opcode(cbuf,               0x50+$src2$$reg  );
2857     // CALL directly to the runtime
2858     cbuf.set_insts_mark();
2859     emit_opcode(cbuf,0xE8);       // Call into runtime
2860     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2861     // Restore stack
2862     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2863     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2864     emit_d8(cbuf, 4*4);
2865   %}
2866 
2867   enc_class long_mod( eRegL src1, eRegL src2 ) %{
2868     // PUSH src1.hi
2869     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2870     // PUSH src1.lo
2871     emit_opcode(cbuf,               0x50+$src1$$reg  );
2872     // PUSH src2.hi
2873     emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2874     // PUSH src2.lo
2875     emit_opcode(cbuf,               0x50+$src2$$reg  );
2876     // CALL directly to the runtime
2877     cbuf.set_insts_mark();
2878     emit_opcode(cbuf,0xE8);       // Call into runtime
2879     emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2880     // Restore stack
2881     emit_opcode(cbuf, 0x83); // add  SP, #framesize
2882     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2883     emit_d8(cbuf, 4*4);
2884   %}
2885 
2886   enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2887     // MOV   $tmp,$src.lo
2888     emit_opcode(cbuf, 0x8B);
2889     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2890     // OR    $tmp,$src.hi
2891     emit_opcode(cbuf, 0x0B);
2892     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2893   %}
2894 
2895   enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2896     // CMP    $src1.lo,$src2.lo
2897     emit_opcode( cbuf, 0x3B );
2898     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2899     // JNE,s  skip
2900     emit_cc(cbuf, 0x70, 0x5);
2901     emit_d8(cbuf,2);
2902     // CMP    $src1.hi,$src2.hi
2903     emit_opcode( cbuf, 0x3B );
2904     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2905   %}
2906 
2907   enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2908     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2909     emit_opcode( cbuf, 0x3B );
2910     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2911     // MOV    $tmp,$src1.hi
2912     emit_opcode( cbuf, 0x8B );
2913     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2914     // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2915     emit_opcode( cbuf, 0x1B );
2916     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2917   %}
2918 
2919   enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2920     // XOR    $tmp,$tmp
2921     emit_opcode(cbuf,0x33);  // XOR
2922     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2923     // CMP    $tmp,$src.lo
2924     emit_opcode( cbuf, 0x3B );
2925     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2926     // SBB    $tmp,$src.hi
2927     emit_opcode( cbuf, 0x1B );
2928     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2929   %}
2930 
2931  // Sniff, sniff... smells like Gnu Superoptimizer
2932   enc_class neg_long( eRegL dst ) %{
2933     emit_opcode(cbuf,0xF7);    // NEG hi
2934     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2935     emit_opcode(cbuf,0xF7);    // NEG lo
2936     emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2937     emit_opcode(cbuf,0x83);    // SBB hi,0
2938     emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2939     emit_d8    (cbuf,0 );
2940   %}
2941 
2942   enc_class enc_pop_rdx() %{
2943     emit_opcode(cbuf,0x5A);
2944   %}
2945 
2946   enc_class enc_rethrow() %{
2947     cbuf.set_insts_mark();
2948     emit_opcode(cbuf, 0xE9);        // jmp    entry
2949     emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2950                    runtime_call_Relocation::spec(), RELOC_IMM32 );
2951   %}
2952 
2953 
2954   // Convert a double to an int.  Java semantics require we do complex
2955   // manglelations in the corner cases.  So we set the rounding mode to
2956   // 'zero', store the darned double down as an int, and reset the
2957   // rounding mode to 'nearest'.  The hardware throws an exception which
2958   // patches up the correct value directly to the stack.
2959   enc_class DPR2I_encoding( regDPR src ) %{
2960     // Flip to round-to-zero mode.  We attempted to allow invalid-op
2961     // exceptions here, so that a NAN or other corner-case value will
2962     // thrown an exception (but normal values get converted at full speed).
2963     // However, I2C adapters and other float-stack manglers leave pending
2964     // invalid-op exceptions hanging.  We would have to clear them before
2965     // enabling them and that is more expensive than just testing for the
2966     // invalid value Intel stores down in the corner cases.
2967     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2968     emit_opcode(cbuf,0x2D);
2969     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2970     // Allocate a word
2971     emit_opcode(cbuf,0x83);            // SUB ESP,4
2972     emit_opcode(cbuf,0xEC);
2973     emit_d8(cbuf,0x04);
2974     // Encoding assumes a double has been pushed into FPR0.
2975     // Store down the double as an int, popping the FPU stack
2976     emit_opcode(cbuf,0xDB);            // FISTP [ESP]
2977     emit_opcode(cbuf,0x1C);
2978     emit_d8(cbuf,0x24);
2979     // Restore the rounding mode; mask the exception
2980     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
2981     emit_opcode(cbuf,0x2D);
2982     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
2983         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
2984         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
2985 
2986     // Load the converted int; adjust CPU stack
2987     emit_opcode(cbuf,0x58);       // POP EAX
2988     emit_opcode(cbuf,0x3D);       // CMP EAX,imm
2989     emit_d32   (cbuf,0x80000000); //         0x80000000
2990     emit_opcode(cbuf,0x75);       // JNE around_slow_call
2991     emit_d8    (cbuf,0x07);       // Size of slow_call
2992     // Push src onto stack slow-path
2993     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
2994     emit_d8    (cbuf,0xC0-1+$src$$reg );
2995     // CALL directly to the runtime
2996     cbuf.set_insts_mark();
2997     emit_opcode(cbuf,0xE8);       // Call into runtime
2998     emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2999     // Carry on here...
3000   %}
3001 
3002   enc_class DPR2L_encoding( regDPR src ) %{
3003     emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3004     emit_opcode(cbuf,0x2D);
3005     emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3006     // Allocate a word
3007     emit_opcode(cbuf,0x83);            // SUB ESP,8
3008     emit_opcode(cbuf,0xEC);
3009     emit_d8(cbuf,0x08);
3010     // Encoding assumes a double has been pushed into FPR0.
3011     // Store down the double as a long, popping the FPU stack
3012     emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3013     emit_opcode(cbuf,0x3C);
3014     emit_d8(cbuf,0x24);
3015     // Restore the rounding mode; mask the exception
3016     emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3017     emit_opcode(cbuf,0x2D);
3018     emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3019         ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3020         : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3021 
3022     // Load the converted int; adjust CPU stack
3023     emit_opcode(cbuf,0x58);       // POP EAX
3024     emit_opcode(cbuf,0x5A);       // POP EDX
3025     emit_opcode(cbuf,0x81);       // CMP EDX,imm
3026     emit_d8    (cbuf,0xFA);       // rdx
3027     emit_d32   (cbuf,0x80000000); //         0x80000000
3028     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3029     emit_d8    (cbuf,0x07+4);     // Size of slow_call
3030     emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3031     emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3032     emit_opcode(cbuf,0x75);       // JNE around_slow_call
3033     emit_d8    (cbuf,0x07);       // Size of slow_call
3034     // Push src onto stack slow-path
3035     emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3036     emit_d8    (cbuf,0xC0-1+$src$$reg );
3037     // CALL directly to the runtime
3038     cbuf.set_insts_mark();
3039     emit_opcode(cbuf,0xE8);       // Call into runtime
3040     emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3041     // Carry on here...
3042   %}
3043 
3044   enc_class FMul_ST_reg( eRegFPR src1 ) %{
3045     // Operand was loaded from memory into fp ST (stack top)
3046     // FMUL   ST,$src  /* D8 C8+i */
3047     emit_opcode(cbuf, 0xD8);
3048     emit_opcode(cbuf, 0xC8 + $src1$$reg);
3049   %}
3050 
3051   enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3052     // FADDP  ST,src2  /* D8 C0+i */
3053     emit_opcode(cbuf, 0xD8);
3054     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3055     //could use FADDP  src2,fpST  /* DE C0+i */
3056   %}
3057 
3058   enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3059     // FADDP  src2,ST  /* DE C0+i */
3060     emit_opcode(cbuf, 0xDE);
3061     emit_opcode(cbuf, 0xC0 + $src2$$reg);
3062   %}
3063 
3064   enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3065     // Operand has been loaded into fp ST (stack top)
3066       // FSUB   ST,$src1
3067       emit_opcode(cbuf, 0xD8);
3068       emit_opcode(cbuf, 0xE0 + $src1$$reg);
3069 
3070       // FDIV
3071       emit_opcode(cbuf, 0xD8);
3072       emit_opcode(cbuf, 0xF0 + $src2$$reg);
3073   %}
3074 
3075   enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3076     // Operand was loaded from memory into fp ST (stack top)
3077     // FADD   ST,$src  /* D8 C0+i */
3078     emit_opcode(cbuf, 0xD8);
3079     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3080 
3081     // FMUL  ST,src2  /* D8 C*+i */
3082     emit_opcode(cbuf, 0xD8);
3083     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3084   %}
3085 
3086 
3087   enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3088     // Operand was loaded from memory into fp ST (stack top)
3089     // FADD   ST,$src  /* D8 C0+i */
3090     emit_opcode(cbuf, 0xD8);
3091     emit_opcode(cbuf, 0xC0 + $src1$$reg);
3092 
3093     // FMULP  src2,ST  /* DE C8+i */
3094     emit_opcode(cbuf, 0xDE);
3095     emit_opcode(cbuf, 0xC8 + $src2$$reg);
3096   %}
3097 
3098   // Atomically load the volatile long
3099   enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3100     emit_opcode(cbuf,0xDF);
3101     int rm_byte_opcode = 0x05;
3102     int base     = $mem$$base;
3103     int index    = $mem$$index;
3104     int scale    = $mem$$scale;
3105     int displace = $mem$$disp;
3106     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3107     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3108     store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3109   %}
3110 
3111   // Volatile Store Long.  Must be atomic, so move it into
3112   // the FP TOS and then do a 64-bit FIST.  Has to probe the
3113   // target address before the store (for null-ptr checks)
3114   // so the memory operand is used twice in the encoding.
3115   enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3116     store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3117     cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3118     emit_opcode(cbuf,0xDF);
3119     int rm_byte_opcode = 0x07;
3120     int base     = $mem$$base;
3121     int index    = $mem$$index;
3122     int scale    = $mem$$scale;
3123     int displace = $mem$$disp;
3124     relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3125     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3126   %}
3127 
3128   // Safepoint Poll.  This polls the safepoint page, and causes an
3129   // exception if it is not readable. Unfortunately, it kills the condition code
3130   // in the process
3131   // We current use TESTL [spp],EDI
3132   // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3133 
3134   enc_class Safepoint_Poll() %{
3135     cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3136     emit_opcode(cbuf,0x85);
3137     emit_rm (cbuf, 0x0, 0x7, 0x5);
3138     emit_d32(cbuf, (intptr_t)os::get_polling_page());
3139   %}
3140 %}
3141 
3142 
3143 //----------FRAME--------------------------------------------------------------
3144 // Definition of frame structure and management information.
3145 //
3146 //  S T A C K   L A Y O U T    Allocators stack-slot number
3147 //                             |   (to get allocators register number
3148 //  G  Owned by    |        |  v    add OptoReg::stack0())
3149 //  r   CALLER     |        |
3150 //  o     |        +--------+      pad to even-align allocators stack-slot
3151 //  w     V        |  pad0  |        numbers; owned by CALLER
3152 //  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3153 //  h     ^        |   in   |  5
3154 //        |        |  args  |  4   Holes in incoming args owned by SELF
3155 //  |     |        |        |  3
3156 //  |     |        +--------+
3157 //  V     |        | old out|      Empty on Intel, window on Sparc
3158 //        |    old |preserve|      Must be even aligned.
3159 //        |     SP-+--------+----> Matcher::_old_SP, even aligned
3160 //        |        |   in   |  3   area for Intel ret address
3161 //     Owned by    |preserve|      Empty on Sparc.
3162 //       SELF      +--------+
3163 //        |        |  pad2  |  2   pad to align old SP
3164 //        |        +--------+  1
3165 //        |        | locks  |  0
3166 //        |        +--------+----> OptoReg::stack0(), even aligned
3167 //        |        |  pad1  | 11   pad to align new SP
3168 //        |        +--------+
3169 //        |        |        | 10
3170 //        |        | spills |  9   spills
3171 //        V        |        |  8   (pad0 slot for callee)
3172 //      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3173 //        ^        |  out   |  7
3174 //        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3175 //     Owned by    +--------+
3176 //      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3177 //        |    new |preserve|      Must be even-aligned.
3178 //        |     SP-+--------+----> Matcher::_new_SP, even aligned
3179 //        |        |        |
3180 //
3181 // Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3182 //         known from SELF's arguments and the Java calling convention.
3183 //         Region 6-7 is determined per call site.
3184 // Note 2: If the calling convention leaves holes in the incoming argument
3185 //         area, those holes are owned by SELF.  Holes in the outgoing area
3186 //         are owned by the CALLEE.  Holes should not be nessecary in the
3187 //         incoming area, as the Java calling convention is completely under
3188 //         the control of the AD file.  Doubles can be sorted and packed to
3189 //         avoid holes.  Holes in the outgoing arguments may be nessecary for
3190 //         varargs C calling conventions.
3191 // Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3192 //         even aligned with pad0 as needed.
3193 //         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3194 //         region 6-11 is even aligned; it may be padded out more so that
3195 //         the region from SP to FP meets the minimum stack alignment.
3196 
3197 frame %{
3198   // What direction does stack grow in (assumed to be same for C & Java)
3199   stack_direction(TOWARDS_LOW);
3200 
3201   // These three registers define part of the calling convention
3202   // between compiled code and the interpreter.
3203   inline_cache_reg(EAX);                // Inline Cache Register
3204   interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3205 
3206   // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3207   cisc_spilling_operand_name(indOffset32);
3208 
3209   // Number of stack slots consumed by locking an object
3210   sync_stack_slots(1);
3211 
3212   // Compiled code's Frame Pointer
3213   frame_pointer(ESP);
3214   // Interpreter stores its frame pointer in a register which is
3215   // stored to the stack by I2CAdaptors.
3216   // I2CAdaptors convert from interpreted java to compiled java.
3217   interpreter_frame_pointer(EBP);
3218 
3219   // Stack alignment requirement
3220   // Alignment size in bytes (128-bit -> 16 bytes)
3221   stack_alignment(StackAlignmentInBytes);
3222 
3223   // Number of stack slots between incoming argument block and the start of
3224   // a new frame.  The PROLOG must add this many slots to the stack.  The
3225   // EPILOG must remove this many slots.  Intel needs one slot for
3226   // return address and one for rbp, (must save rbp)
3227   in_preserve_stack_slots(2+VerifyStackAtCalls);
3228 
3229   // Number of outgoing stack slots killed above the out_preserve_stack_slots
3230   // for calls to C.  Supports the var-args backing area for register parms.
3231   varargs_C_out_slots_killed(0);
3232 
3233   // The after-PROLOG location of the return address.  Location of
3234   // return address specifies a type (REG or STACK) and a number
3235   // representing the register number (i.e. - use a register name) or
3236   // stack slot.
3237   // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3238   // Otherwise, it is above the locks and verification slot and alignment word
3239   return_addr(STACK - 1 +
3240               round_to((Compile::current()->in_preserve_stack_slots() +
3241                         Compile::current()->fixed_slots()),
3242                        stack_alignment_in_slots()));
3243 
3244   // Body of function which returns an integer array locating
3245   // arguments either in registers or in stack slots.  Passed an array
3246   // of ideal registers called "sig" and a "length" count.  Stack-slot
3247   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3248   // arguments for a CALLEE.  Incoming stack arguments are
3249   // automatically biased by the preserve_stack_slots field above.
3250   calling_convention %{
3251     // No difference between ingoing/outgoing just pass false
3252     SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3253   %}
3254 
3255 
3256   // Body of function which returns an integer array locating
3257   // arguments either in registers or in stack slots.  Passed an array
3258   // of ideal registers called "sig" and a "length" count.  Stack-slot
3259   // offsets are based on outgoing arguments, i.e. a CALLER setting up
3260   // arguments for a CALLEE.  Incoming stack arguments are
3261   // automatically biased by the preserve_stack_slots field above.
3262   c_calling_convention %{
3263     // This is obviously always outgoing
3264     (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3265   %}
3266 
3267   // Location of C & interpreter return values
3268   c_return_value %{
3269     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3270     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3271     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3272 
3273     // in SSE2+ mode we want to keep the FPU stack clean so pretend
3274     // that C functions return float and double results in XMM0.
3275     if( ideal_reg == Op_RegD && UseSSE>=2 )
3276       return OptoRegPair(XMM0b_num,XMM0_num);
3277     if( ideal_reg == Op_RegF && UseSSE>=2 )
3278       return OptoRegPair(OptoReg::Bad,XMM0_num);
3279 
3280     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3281   %}
3282 
3283   // Location of return values
3284   return_value %{
3285     assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3286     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3287     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3288     if( ideal_reg == Op_RegD && UseSSE>=2 )
3289       return OptoRegPair(XMM0b_num,XMM0_num);
3290     if( ideal_reg == Op_RegF && UseSSE>=1 )
3291       return OptoRegPair(OptoReg::Bad,XMM0_num);
3292     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3293   %}
3294 
3295 %}
3296 
3297 //----------ATTRIBUTES---------------------------------------------------------
3298 //----------Operand Attributes-------------------------------------------------
3299 op_attrib op_cost(0);        // Required cost attribute
3300 
3301 //----------Instruction Attributes---------------------------------------------
3302 ins_attrib ins_cost(100);       // Required cost attribute
3303 ins_attrib ins_size(8);         // Required size attribute (in bits)
3304 ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3305                                 // non-matching short branch variant of some
3306                                                             // long branch?
3307 ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3308                                 // specifies the alignment that some part of the instruction (not
3309                                 // necessarily the start) requires.  If > 1, a compute_padding()
3310                                 // function must be provided for the instruction
3311 
3312 //----------OPERANDS-----------------------------------------------------------
3313 // Operand definitions must precede instruction definitions for correct parsing
3314 // in the ADLC because operands constitute user defined types which are used in
3315 // instruction definitions.
3316 
3317 //----------Simple Operands----------------------------------------------------
3318 // Immediate Operands
3319 // Integer Immediate
3320 operand immI() %{
3321   match(ConI);
3322 
3323   op_cost(10);
3324   format %{ %}
3325   interface(CONST_INTER);
3326 %}
3327 
3328 // Constant for test vs zero
3329 operand immI0() %{
3330   predicate(n->get_int() == 0);
3331   match(ConI);
3332 
3333   op_cost(0);
3334   format %{ %}
3335   interface(CONST_INTER);
3336 %}
3337 
3338 // Constant for increment
3339 operand immI1() %{
3340   predicate(n->get_int() == 1);
3341   match(ConI);
3342 
3343   op_cost(0);
3344   format %{ %}
3345   interface(CONST_INTER);
3346 %}
3347 
3348 // Constant for decrement
3349 operand immI_M1() %{
3350   predicate(n->get_int() == -1);
3351   match(ConI);
3352 
3353   op_cost(0);
3354   format %{ %}
3355   interface(CONST_INTER);
3356 %}
3357 
3358 // Valid scale values for addressing modes
3359 operand immI2() %{
3360   predicate(0 <= n->get_int() && (n->get_int() <= 3));
3361   match(ConI);
3362 
3363   format %{ %}
3364   interface(CONST_INTER);
3365 %}
3366 
3367 operand immI8() %{
3368   predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3369   match(ConI);
3370 
3371   op_cost(5);
3372   format %{ %}
3373   interface(CONST_INTER);
3374 %}
3375 
3376 operand immI16() %{
3377   predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3378   match(ConI);
3379 
3380   op_cost(10);
3381   format %{ %}
3382   interface(CONST_INTER);
3383 %}
3384 
3385 // Int Immediate non-negative
3386 operand immU31()
3387 %{
3388   predicate(n->get_int() >= 0);
3389   match(ConI);
3390 
3391   op_cost(0);
3392   format %{ %}
3393   interface(CONST_INTER);
3394 %}
3395 
3396 // Constant for long shifts
3397 operand immI_32() %{
3398   predicate( n->get_int() == 32 );
3399   match(ConI);
3400 
3401   op_cost(0);
3402   format %{ %}
3403   interface(CONST_INTER);
3404 %}
3405 
3406 operand immI_1_31() %{
3407   predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3408   match(ConI);
3409 
3410   op_cost(0);
3411   format %{ %}
3412   interface(CONST_INTER);
3413 %}
3414 
3415 operand immI_32_63() %{
3416   predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3417   match(ConI);
3418   op_cost(0);
3419 
3420   format %{ %}
3421   interface(CONST_INTER);
3422 %}
3423 
3424 operand immI_1() %{
3425   predicate( n->get_int() == 1 );
3426   match(ConI);
3427 
3428   op_cost(0);
3429   format %{ %}
3430   interface(CONST_INTER);
3431 %}
3432 
3433 operand immI_2() %{
3434   predicate( n->get_int() == 2 );
3435   match(ConI);
3436 
3437   op_cost(0);
3438   format %{ %}
3439   interface(CONST_INTER);
3440 %}
3441 
3442 operand immI_3() %{
3443   predicate( n->get_int() == 3 );
3444   match(ConI);
3445 
3446   op_cost(0);
3447   format %{ %}
3448   interface(CONST_INTER);
3449 %}
3450 
3451 // Pointer Immediate
3452 operand immP() %{
3453   match(ConP);
3454 
3455   op_cost(10);
3456   format %{ %}
3457   interface(CONST_INTER);
3458 %}
3459 
3460 // NULL Pointer Immediate
3461 operand immP0() %{
3462   predicate( n->get_ptr() == 0 );
3463   match(ConP);
3464   op_cost(0);
3465 
3466   format %{ %}
3467   interface(CONST_INTER);
3468 %}
3469 
3470 // Long Immediate
3471 operand immL() %{
3472   match(ConL);
3473 
3474   op_cost(20);
3475   format %{ %}
3476   interface(CONST_INTER);
3477 %}
3478 
3479 // Long Immediate zero
3480 operand immL0() %{
3481   predicate( n->get_long() == 0L );
3482   match(ConL);
3483   op_cost(0);
3484 
3485   format %{ %}
3486   interface(CONST_INTER);
3487 %}
3488 
3489 // Long Immediate zero
3490 operand immL_M1() %{
3491   predicate( n->get_long() == -1L );
3492   match(ConL);
3493   op_cost(0);
3494 
3495   format %{ %}
3496   interface(CONST_INTER);
3497 %}
3498 
3499 // Long immediate from 0 to 127.
3500 // Used for a shorter form of long mul by 10.
3501 operand immL_127() %{
3502   predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3503   match(ConL);
3504   op_cost(0);
3505 
3506   format %{ %}
3507   interface(CONST_INTER);
3508 %}
3509 
3510 // Long Immediate: low 32-bit mask
3511 operand immL_32bits() %{
3512   predicate(n->get_long() == 0xFFFFFFFFL);
3513   match(ConL);
3514   op_cost(0);
3515 
3516   format %{ %}
3517   interface(CONST_INTER);
3518 %}
3519 
3520 // Long Immediate: low 32-bit mask
3521 operand immL32() %{
3522   predicate(n->get_long() == (int)(n->get_long()));
3523   match(ConL);
3524   op_cost(20);
3525 
3526   format %{ %}
3527   interface(CONST_INTER);
3528 %}
3529 
3530 //Double Immediate zero
3531 operand immDPR0() %{
3532   // Do additional (and counter-intuitive) test against NaN to work around VC++
3533   // bug that generates code such that NaNs compare equal to 0.0
3534   predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3535   match(ConD);
3536 
3537   op_cost(5);
3538   format %{ %}
3539   interface(CONST_INTER);
3540 %}
3541 
3542 // Double Immediate one
3543 operand immDPR1() %{
3544   predicate( UseSSE<=1 && n->getd() == 1.0 );
3545   match(ConD);
3546 
3547   op_cost(5);
3548   format %{ %}
3549   interface(CONST_INTER);
3550 %}
3551 
3552 // Double Immediate
3553 operand immDPR() %{
3554   predicate(UseSSE<=1);
3555   match(ConD);
3556 
3557   op_cost(5);
3558   format %{ %}
3559   interface(CONST_INTER);
3560 %}
3561 
3562 operand immD() %{
3563   predicate(UseSSE>=2);
3564   match(ConD);
3565 
3566   op_cost(5);
3567   format %{ %}
3568   interface(CONST_INTER);
3569 %}
3570 
3571 // Double Immediate zero
3572 operand immD0() %{
3573   // Do additional (and counter-intuitive) test against NaN to work around VC++
3574   // bug that generates code such that NaNs compare equal to 0.0 AND do not
3575   // compare equal to -0.0.
3576   predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3577   match(ConD);
3578 
3579   format %{ %}
3580   interface(CONST_INTER);
3581 %}
3582 
3583 // Float Immediate zero
3584 operand immFPR0() %{
3585   predicate(UseSSE == 0 && n->getf() == 0.0F);
3586   match(ConF);
3587 
3588   op_cost(5);
3589   format %{ %}
3590   interface(CONST_INTER);
3591 %}
3592 
3593 // Float Immediate one
3594 operand immFPR1() %{
3595   predicate(UseSSE == 0 && n->getf() == 1.0F);
3596   match(ConF);
3597 
3598   op_cost(5);
3599   format %{ %}
3600   interface(CONST_INTER);
3601 %}
3602 
3603 // Float Immediate
3604 operand immFPR() %{
3605   predicate( UseSSE == 0 );
3606   match(ConF);
3607 
3608   op_cost(5);
3609   format %{ %}
3610   interface(CONST_INTER);
3611 %}
3612 
3613 // Float Immediate
3614 operand immF() %{
3615   predicate(UseSSE >= 1);
3616   match(ConF);
3617 
3618   op_cost(5);
3619   format %{ %}
3620   interface(CONST_INTER);
3621 %}
3622 
3623 // Float Immediate zero.  Zero and not -0.0
3624 operand immF0() %{
3625   predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3626   match(ConF);
3627 
3628   op_cost(5);
3629   format %{ %}
3630   interface(CONST_INTER);
3631 %}
3632 
3633 // Immediates for special shifts (sign extend)
3634 
3635 // Constants for increment
3636 operand immI_16() %{
3637   predicate( n->get_int() == 16 );
3638   match(ConI);
3639 
3640   format %{ %}
3641   interface(CONST_INTER);
3642 %}
3643 
3644 operand immI_24() %{
3645   predicate( n->get_int() == 24 );
3646   match(ConI);
3647 
3648   format %{ %}
3649   interface(CONST_INTER);
3650 %}
3651 
3652 // Constant for byte-wide masking
3653 operand immI_255() %{
3654   predicate( n->get_int() == 255 );
3655   match(ConI);
3656 
3657   format %{ %}
3658   interface(CONST_INTER);
3659 %}
3660 
3661 // Constant for short-wide masking
3662 operand immI_65535() %{
3663   predicate(n->get_int() == 65535);
3664   match(ConI);
3665 
3666   format %{ %}
3667   interface(CONST_INTER);
3668 %}
3669 
3670 // Register Operands
3671 // Integer Register
3672 operand rRegI() %{
3673   constraint(ALLOC_IN_RC(int_reg));
3674   match(RegI);
3675   match(xRegI);
3676   match(eAXRegI);
3677   match(eBXRegI);
3678   match(eCXRegI);
3679   match(eDXRegI);
3680   match(eDIRegI);
3681   match(eSIRegI);
3682 
3683   format %{ %}
3684   interface(REG_INTER);
3685 %}
3686 
3687 // Subset of Integer Register
3688 operand xRegI(rRegI reg) %{
3689   constraint(ALLOC_IN_RC(int_x_reg));
3690   match(reg);
3691   match(eAXRegI);
3692   match(eBXRegI);
3693   match(eCXRegI);
3694   match(eDXRegI);
3695 
3696   format %{ %}
3697   interface(REG_INTER);
3698 %}
3699 
3700 // Special Registers
3701 operand eAXRegI(xRegI reg) %{
3702   constraint(ALLOC_IN_RC(eax_reg));
3703   match(reg);
3704   match(rRegI);
3705 
3706   format %{ "EAX" %}
3707   interface(REG_INTER);
3708 %}
3709 
3710 // Special Registers
3711 operand eBXRegI(xRegI reg) %{
3712   constraint(ALLOC_IN_RC(ebx_reg));
3713   match(reg);
3714   match(rRegI);
3715 
3716   format %{ "EBX" %}
3717   interface(REG_INTER);
3718 %}
3719 
3720 operand eCXRegI(xRegI reg) %{
3721   constraint(ALLOC_IN_RC(ecx_reg));
3722   match(reg);
3723   match(rRegI);
3724 
3725   format %{ "ECX" %}
3726   interface(REG_INTER);
3727 %}
3728 
3729 operand eDXRegI(xRegI reg) %{
3730   constraint(ALLOC_IN_RC(edx_reg));
3731   match(reg);
3732   match(rRegI);
3733 
3734   format %{ "EDX" %}
3735   interface(REG_INTER);
3736 %}
3737 
3738 operand eDIRegI(xRegI reg) %{
3739   constraint(ALLOC_IN_RC(edi_reg));
3740   match(reg);
3741   match(rRegI);
3742 
3743   format %{ "EDI" %}
3744   interface(REG_INTER);
3745 %}
3746 
3747 operand naxRegI() %{
3748   constraint(ALLOC_IN_RC(nax_reg));
3749   match(RegI);
3750   match(eCXRegI);
3751   match(eDXRegI);
3752   match(eSIRegI);
3753   match(eDIRegI);
3754 
3755   format %{ %}
3756   interface(REG_INTER);
3757 %}
3758 
3759 operand nadxRegI() %{
3760   constraint(ALLOC_IN_RC(nadx_reg));
3761   match(RegI);
3762   match(eBXRegI);
3763   match(eCXRegI);
3764   match(eSIRegI);
3765   match(eDIRegI);
3766 
3767   format %{ %}
3768   interface(REG_INTER);
3769 %}
3770 
3771 operand ncxRegI() %{
3772   constraint(ALLOC_IN_RC(ncx_reg));
3773   match(RegI);
3774   match(eAXRegI);
3775   match(eDXRegI);
3776   match(eSIRegI);
3777   match(eDIRegI);
3778 
3779   format %{ %}
3780   interface(REG_INTER);
3781 %}
3782 
3783 // // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3784 // //
3785 operand eSIRegI(xRegI reg) %{
3786    constraint(ALLOC_IN_RC(esi_reg));
3787    match(reg);
3788    match(rRegI);
3789 
3790    format %{ "ESI" %}
3791    interface(REG_INTER);
3792 %}
3793 
3794 // Pointer Register
3795 operand anyRegP() %{
3796   constraint(ALLOC_IN_RC(any_reg));
3797   match(RegP);
3798   match(eAXRegP);
3799   match(eBXRegP);
3800   match(eCXRegP);
3801   match(eDIRegP);
3802   match(eRegP);
3803 
3804   format %{ %}
3805   interface(REG_INTER);
3806 %}
3807 
3808 operand eRegP() %{
3809   constraint(ALLOC_IN_RC(int_reg));
3810   match(RegP);
3811   match(eAXRegP);
3812   match(eBXRegP);
3813   match(eCXRegP);
3814   match(eDIRegP);
3815 
3816   format %{ %}
3817   interface(REG_INTER);
3818 %}
3819 
3820 // On windows95, EBP is not safe to use for implicit null tests.
3821 operand eRegP_no_EBP() %{
3822   constraint(ALLOC_IN_RC(int_reg_no_ebp));
3823   match(RegP);
3824   match(eAXRegP);
3825   match(eBXRegP);
3826   match(eCXRegP);
3827   match(eDIRegP);
3828 
3829   op_cost(100);
3830   format %{ %}
3831   interface(REG_INTER);
3832 %}
3833 
3834 operand naxRegP() %{
3835   constraint(ALLOC_IN_RC(nax_reg));
3836   match(RegP);
3837   match(eBXRegP);
3838   match(eDXRegP);
3839   match(eCXRegP);
3840   match(eSIRegP);
3841   match(eDIRegP);
3842 
3843   format %{ %}
3844   interface(REG_INTER);
3845 %}
3846 
3847 operand nabxRegP() %{
3848   constraint(ALLOC_IN_RC(nabx_reg));
3849   match(RegP);
3850   match(eCXRegP);
3851   match(eDXRegP);
3852   match(eSIRegP);
3853   match(eDIRegP);
3854 
3855   format %{ %}
3856   interface(REG_INTER);
3857 %}
3858 
3859 operand pRegP() %{
3860   constraint(ALLOC_IN_RC(p_reg));
3861   match(RegP);
3862   match(eBXRegP);
3863   match(eDXRegP);
3864   match(eSIRegP);
3865   match(eDIRegP);
3866 
3867   format %{ %}
3868   interface(REG_INTER);
3869 %}
3870 
3871 // Special Registers
3872 // Return a pointer value
3873 operand eAXRegP(eRegP reg) %{
3874   constraint(ALLOC_IN_RC(eax_reg));
3875   match(reg);
3876   format %{ "EAX" %}
3877   interface(REG_INTER);
3878 %}
3879 
3880 // Used in AtomicAdd
3881 operand eBXRegP(eRegP reg) %{
3882   constraint(ALLOC_IN_RC(ebx_reg));
3883   match(reg);
3884   format %{ "EBX" %}
3885   interface(REG_INTER);
3886 %}
3887 
3888 // Tail-call (interprocedural jump) to interpreter
3889 operand eCXRegP(eRegP reg) %{
3890   constraint(ALLOC_IN_RC(ecx_reg));
3891   match(reg);
3892   format %{ "ECX" %}
3893   interface(REG_INTER);
3894 %}
3895 
3896 operand eSIRegP(eRegP reg) %{
3897   constraint(ALLOC_IN_RC(esi_reg));
3898   match(reg);
3899   format %{ "ESI" %}
3900   interface(REG_INTER);
3901 %}
3902 
3903 // Used in rep stosw
3904 operand eDIRegP(eRegP reg) %{
3905   constraint(ALLOC_IN_RC(edi_reg));
3906   match(reg);
3907   format %{ "EDI" %}
3908   interface(REG_INTER);
3909 %}
3910 
3911 operand eRegL() %{
3912   constraint(ALLOC_IN_RC(long_reg));
3913   match(RegL);
3914   match(eADXRegL);
3915 
3916   format %{ %}
3917   interface(REG_INTER);
3918 %}
3919 
3920 operand eADXRegL( eRegL reg ) %{
3921   constraint(ALLOC_IN_RC(eadx_reg));
3922   match(reg);
3923 
3924   format %{ "EDX:EAX" %}
3925   interface(REG_INTER);
3926 %}
3927 
3928 operand eBCXRegL( eRegL reg ) %{
3929   constraint(ALLOC_IN_RC(ebcx_reg));
3930   match(reg);
3931 
3932   format %{ "EBX:ECX" %}
3933   interface(REG_INTER);
3934 %}
3935 
3936 // Special case for integer high multiply
3937 operand eADXRegL_low_only() %{
3938   constraint(ALLOC_IN_RC(eadx_reg));
3939   match(RegL);
3940 
3941   format %{ "EAX" %}
3942   interface(REG_INTER);
3943 %}
3944 
3945 // Flags register, used as output of compare instructions
3946 operand eFlagsReg() %{
3947   constraint(ALLOC_IN_RC(int_flags));
3948   match(RegFlags);
3949 
3950   format %{ "EFLAGS" %}
3951   interface(REG_INTER);
3952 %}
3953 
3954 // Flags register, used as output of FLOATING POINT compare instructions
3955 operand eFlagsRegU() %{
3956   constraint(ALLOC_IN_RC(int_flags));
3957   match(RegFlags);
3958 
3959   format %{ "EFLAGS_U" %}
3960   interface(REG_INTER);
3961 %}
3962 
3963 operand eFlagsRegUCF() %{
3964   constraint(ALLOC_IN_RC(int_flags));
3965   match(RegFlags);
3966   predicate(false);
3967 
3968   format %{ "EFLAGS_U_CF" %}
3969   interface(REG_INTER);
3970 %}
3971 
3972 // Condition Code Register used by long compare
3973 operand flagsReg_long_LTGE() %{
3974   constraint(ALLOC_IN_RC(int_flags));
3975   match(RegFlags);
3976   format %{ "FLAGS_LTGE" %}
3977   interface(REG_INTER);
3978 %}
3979 operand flagsReg_long_EQNE() %{
3980   constraint(ALLOC_IN_RC(int_flags));
3981   match(RegFlags);
3982   format %{ "FLAGS_EQNE" %}
3983   interface(REG_INTER);
3984 %}
3985 operand flagsReg_long_LEGT() %{
3986   constraint(ALLOC_IN_RC(int_flags));
3987   match(RegFlags);
3988   format %{ "FLAGS_LEGT" %}
3989   interface(REG_INTER);
3990 %}
3991 
3992 // Float register operands
3993 operand regDPR() %{
3994   predicate( UseSSE < 2 );
3995   constraint(ALLOC_IN_RC(fp_dbl_reg));
3996   match(RegD);
3997   match(regDPR1);
3998   match(regDPR2);
3999   format %{ %}
4000   interface(REG_INTER);
4001 %}
4002 
4003 operand regDPR1(regDPR reg) %{
4004   predicate( UseSSE < 2 );
4005   constraint(ALLOC_IN_RC(fp_dbl_reg0));
4006   match(reg);
4007   format %{ "FPR1" %}
4008   interface(REG_INTER);
4009 %}
4010 
4011 operand regDPR2(regDPR reg) %{
4012   predicate( UseSSE < 2 );
4013   constraint(ALLOC_IN_RC(fp_dbl_reg1));
4014   match(reg);
4015   format %{ "FPR2" %}
4016   interface(REG_INTER);
4017 %}
4018 
4019 operand regnotDPR1(regDPR reg) %{
4020   predicate( UseSSE < 2 );
4021   constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4022   match(reg);
4023   format %{ %}
4024   interface(REG_INTER);
4025 %}
4026 
4027 // Float register operands
4028 operand regFPR() %{
4029   predicate( UseSSE < 2 );
4030   constraint(ALLOC_IN_RC(fp_flt_reg));
4031   match(RegF);
4032   match(regFPR1);
4033   format %{ %}
4034   interface(REG_INTER);
4035 %}
4036 
4037 // Float register operands
4038 operand regFPR1(regFPR reg) %{
4039   predicate( UseSSE < 2 );
4040   constraint(ALLOC_IN_RC(fp_flt_reg0));
4041   match(reg);
4042   format %{ "FPR1" %}
4043   interface(REG_INTER);
4044 %}
4045 
4046 // XMM Float register operands
4047 operand regF() %{
4048   predicate( UseSSE>=1 );
4049   constraint(ALLOC_IN_RC(float_reg_legacy));
4050   match(RegF);
4051   format %{ %}
4052   interface(REG_INTER);
4053 %}
4054 
4055 // XMM Double register operands
4056 operand regD() %{
4057   predicate( UseSSE>=2 );
4058   constraint(ALLOC_IN_RC(double_reg_legacy));
4059   match(RegD);
4060   format %{ %}
4061   interface(REG_INTER);
4062 %}
4063 
4064 // Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4065 // runtime code generation via reg_class_dynamic.
4066 operand vecS() %{
4067   constraint(ALLOC_IN_RC(vectors_reg_legacy));
4068   match(VecS);
4069 
4070   format %{ %}
4071   interface(REG_INTER);
4072 %}
4073 
4074 operand vecD() %{
4075   constraint(ALLOC_IN_RC(vectord_reg_legacy));
4076   match(VecD);
4077 
4078   format %{ %}
4079   interface(REG_INTER);
4080 %}
4081 
4082 operand vecX() %{
4083   constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4084   match(VecX);
4085 
4086   format %{ %}
4087   interface(REG_INTER);
4088 %}
4089 
4090 operand vecY() %{
4091   constraint(ALLOC_IN_RC(vectory_reg_legacy));
4092   match(VecY);
4093 
4094   format %{ %}
4095   interface(REG_INTER);
4096 %}
4097 
4098 //----------Memory Operands----------------------------------------------------
4099 // Direct Memory Operand
4100 operand direct(immP addr) %{
4101   match(addr);
4102 
4103   format %{ "[$addr]" %}
4104   interface(MEMORY_INTER) %{
4105     base(0xFFFFFFFF);
4106     index(0x4);
4107     scale(0x0);
4108     disp($addr);
4109   %}
4110 %}
4111 
4112 // Indirect Memory Operand
4113 operand indirect(eRegP reg) %{
4114   constraint(ALLOC_IN_RC(int_reg));
4115   match(reg);
4116 
4117   format %{ "[$reg]" %}
4118   interface(MEMORY_INTER) %{
4119     base($reg);
4120     index(0x4);
4121     scale(0x0);
4122     disp(0x0);
4123   %}
4124 %}
4125 
4126 // Indirect Memory Plus Short Offset Operand
4127 operand indOffset8(eRegP reg, immI8 off) %{
4128   match(AddP reg off);
4129 
4130   format %{ "[$reg + $off]" %}
4131   interface(MEMORY_INTER) %{
4132     base($reg);
4133     index(0x4);
4134     scale(0x0);
4135     disp($off);
4136   %}
4137 %}
4138 
4139 // Indirect Memory Plus Long Offset Operand
4140 operand indOffset32(eRegP reg, immI off) %{
4141   match(AddP reg off);
4142 
4143   format %{ "[$reg + $off]" %}
4144   interface(MEMORY_INTER) %{
4145     base($reg);
4146     index(0x4);
4147     scale(0x0);
4148     disp($off);
4149   %}
4150 %}
4151 
4152 // Indirect Memory Plus Long Offset Operand
4153 operand indOffset32X(rRegI reg, immP off) %{
4154   match(AddP off reg);
4155 
4156   format %{ "[$reg + $off]" %}
4157   interface(MEMORY_INTER) %{
4158     base($reg);
4159     index(0x4);
4160     scale(0x0);
4161     disp($off);
4162   %}
4163 %}
4164 
4165 // Indirect Memory Plus Index Register Plus Offset Operand
4166 operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4167   match(AddP (AddP reg ireg) off);
4168 
4169   op_cost(10);
4170   format %{"[$reg + $off + $ireg]" %}
4171   interface(MEMORY_INTER) %{
4172     base($reg);
4173     index($ireg);
4174     scale(0x0);
4175     disp($off);
4176   %}
4177 %}
4178 
4179 // Indirect Memory Plus Index Register Plus Offset Operand
4180 operand indIndex(eRegP reg, rRegI ireg) %{
4181   match(AddP reg ireg);
4182 
4183   op_cost(10);
4184   format %{"[$reg + $ireg]" %}
4185   interface(MEMORY_INTER) %{
4186     base($reg);
4187     index($ireg);
4188     scale(0x0);
4189     disp(0x0);
4190   %}
4191 %}
4192 
4193 // // -------------------------------------------------------------------------
4194 // // 486 architecture doesn't support "scale * index + offset" with out a base
4195 // // -------------------------------------------------------------------------
4196 // // Scaled Memory Operands
4197 // // Indirect Memory Times Scale Plus Offset Operand
4198 // operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4199 //   match(AddP off (LShiftI ireg scale));
4200 //
4201 //   op_cost(10);
4202 //   format %{"[$off + $ireg << $scale]" %}
4203 //   interface(MEMORY_INTER) %{
4204 //     base(0x4);
4205 //     index($ireg);
4206 //     scale($scale);
4207 //     disp($off);
4208 //   %}
4209 // %}
4210 
4211 // Indirect Memory Times Scale Plus Index Register
4212 operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4213   match(AddP reg (LShiftI ireg scale));
4214 
4215   op_cost(10);
4216   format %{"[$reg + $ireg << $scale]" %}
4217   interface(MEMORY_INTER) %{
4218     base($reg);
4219     index($ireg);
4220     scale($scale);
4221     disp(0x0);
4222   %}
4223 %}
4224 
4225 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4226 operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4227   match(AddP (AddP reg (LShiftI ireg scale)) off);
4228 
4229   op_cost(10);
4230   format %{"[$reg + $off + $ireg << $scale]" %}
4231   interface(MEMORY_INTER) %{
4232     base($reg);
4233     index($ireg);
4234     scale($scale);
4235     disp($off);
4236   %}
4237 %}
4238 
4239 //----------Load Long Memory Operands------------------------------------------
4240 // The load-long idiom will use it's address expression again after loading
4241 // the first word of the long.  If the load-long destination overlaps with
4242 // registers used in the addressing expression, the 2nd half will be loaded
4243 // from a clobbered address.  Fix this by requiring that load-long use
4244 // address registers that do not overlap with the load-long target.
4245 
4246 // load-long support
4247 operand load_long_RegP() %{
4248   constraint(ALLOC_IN_RC(esi_reg));
4249   match(RegP);
4250   match(eSIRegP);
4251   op_cost(100);
4252   format %{  %}
4253   interface(REG_INTER);
4254 %}
4255 
4256 // Indirect Memory Operand Long
4257 operand load_long_indirect(load_long_RegP reg) %{
4258   constraint(ALLOC_IN_RC(esi_reg));
4259   match(reg);
4260 
4261   format %{ "[$reg]" %}
4262   interface(MEMORY_INTER) %{
4263     base($reg);
4264     index(0x4);
4265     scale(0x0);
4266     disp(0x0);
4267   %}
4268 %}
4269 
4270 // Indirect Memory Plus Long Offset Operand
4271 operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4272   match(AddP reg off);
4273 
4274   format %{ "[$reg + $off]" %}
4275   interface(MEMORY_INTER) %{
4276     base($reg);
4277     index(0x4);
4278     scale(0x0);
4279     disp($off);
4280   %}
4281 %}
4282 
4283 opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4284 
4285 
4286 //----------Special Memory Operands--------------------------------------------
4287 // Stack Slot Operand - This operand is used for loading and storing temporary
4288 //                      values on the stack where a match requires a value to
4289 //                      flow through memory.
4290 operand stackSlotP(sRegP reg) %{
4291   constraint(ALLOC_IN_RC(stack_slots));
4292   // No match rule because this operand is only generated in matching
4293   format %{ "[$reg]" %}
4294   interface(MEMORY_INTER) %{
4295     base(0x4);   // ESP
4296     index(0x4);  // No Index
4297     scale(0x0);  // No Scale
4298     disp($reg);  // Stack Offset
4299   %}
4300 %}
4301 
4302 operand stackSlotI(sRegI reg) %{
4303   constraint(ALLOC_IN_RC(stack_slots));
4304   // No match rule because this operand is only generated in matching
4305   format %{ "[$reg]" %}
4306   interface(MEMORY_INTER) %{
4307     base(0x4);   // ESP
4308     index(0x4);  // No Index
4309     scale(0x0);  // No Scale
4310     disp($reg);  // Stack Offset
4311   %}
4312 %}
4313 
4314 operand stackSlotF(sRegF reg) %{
4315   constraint(ALLOC_IN_RC(stack_slots));
4316   // No match rule because this operand is only generated in matching
4317   format %{ "[$reg]" %}
4318   interface(MEMORY_INTER) %{
4319     base(0x4);   // ESP
4320     index(0x4);  // No Index
4321     scale(0x0);  // No Scale
4322     disp($reg);  // Stack Offset
4323   %}
4324 %}
4325 
4326 operand stackSlotD(sRegD reg) %{
4327   constraint(ALLOC_IN_RC(stack_slots));
4328   // No match rule because this operand is only generated in matching
4329   format %{ "[$reg]" %}
4330   interface(MEMORY_INTER) %{
4331     base(0x4);   // ESP
4332     index(0x4);  // No Index
4333     scale(0x0);  // No Scale
4334     disp($reg);  // Stack Offset
4335   %}
4336 %}
4337 
4338 operand stackSlotL(sRegL reg) %{
4339   constraint(ALLOC_IN_RC(stack_slots));
4340   // No match rule because this operand is only generated in matching
4341   format %{ "[$reg]" %}
4342   interface(MEMORY_INTER) %{
4343     base(0x4);   // ESP
4344     index(0x4);  // No Index
4345     scale(0x0);  // No Scale
4346     disp($reg);  // Stack Offset
4347   %}
4348 %}
4349 
4350 //----------Memory Operands - Win95 Implicit Null Variants----------------
4351 // Indirect Memory Operand
4352 operand indirect_win95_safe(eRegP_no_EBP reg)
4353 %{
4354   constraint(ALLOC_IN_RC(int_reg));
4355   match(reg);
4356 
4357   op_cost(100);
4358   format %{ "[$reg]" %}
4359   interface(MEMORY_INTER) %{
4360     base($reg);
4361     index(0x4);
4362     scale(0x0);
4363     disp(0x0);
4364   %}
4365 %}
4366 
4367 // Indirect Memory Plus Short Offset Operand
4368 operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4369 %{
4370   match(AddP reg off);
4371 
4372   op_cost(100);
4373   format %{ "[$reg + $off]" %}
4374   interface(MEMORY_INTER) %{
4375     base($reg);
4376     index(0x4);
4377     scale(0x0);
4378     disp($off);
4379   %}
4380 %}
4381 
4382 // Indirect Memory Plus Long Offset Operand
4383 operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4384 %{
4385   match(AddP reg off);
4386 
4387   op_cost(100);
4388   format %{ "[$reg + $off]" %}
4389   interface(MEMORY_INTER) %{
4390     base($reg);
4391     index(0x4);
4392     scale(0x0);
4393     disp($off);
4394   %}
4395 %}
4396 
4397 // Indirect Memory Plus Index Register Plus Offset Operand
4398 operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4399 %{
4400   match(AddP (AddP reg ireg) off);
4401 
4402   op_cost(100);
4403   format %{"[$reg + $off + $ireg]" %}
4404   interface(MEMORY_INTER) %{
4405     base($reg);
4406     index($ireg);
4407     scale(0x0);
4408     disp($off);
4409   %}
4410 %}
4411 
4412 // Indirect Memory Times Scale Plus Index Register
4413 operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4414 %{
4415   match(AddP reg (LShiftI ireg scale));
4416 
4417   op_cost(100);
4418   format %{"[$reg + $ireg << $scale]" %}
4419   interface(MEMORY_INTER) %{
4420     base($reg);
4421     index($ireg);
4422     scale($scale);
4423     disp(0x0);
4424   %}
4425 %}
4426 
4427 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4428 operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4429 %{
4430   match(AddP (AddP reg (LShiftI ireg scale)) off);
4431 
4432   op_cost(100);
4433   format %{"[$reg + $off + $ireg << $scale]" %}
4434   interface(MEMORY_INTER) %{
4435     base($reg);
4436     index($ireg);
4437     scale($scale);
4438     disp($off);
4439   %}
4440 %}
4441 
4442 //----------Conditional Branch Operands----------------------------------------
4443 // Comparison Op  - This is the operation of the comparison, and is limited to
4444 //                  the following set of codes:
4445 //                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4446 //
4447 // Other attributes of the comparison, such as unsignedness, are specified
4448 // by the comparison instruction that sets a condition code flags register.
4449 // That result is represented by a flags operand whose subtype is appropriate
4450 // to the unsignedness (etc.) of the comparison.
4451 //
4452 // Later, the instruction which matches both the Comparison Op (a Bool) and
4453 // the flags (produced by the Cmp) specifies the coding of the comparison op
4454 // by matching a specific subtype of Bool operand below, such as cmpOpU.
4455 
4456 // Comparision Code
4457 operand cmpOp() %{
4458   match(Bool);
4459 
4460   format %{ "" %}
4461   interface(COND_INTER) %{
4462     equal(0x4, "e");
4463     not_equal(0x5, "ne");
4464     less(0xC, "l");
4465     greater_equal(0xD, "ge");
4466     less_equal(0xE, "le");
4467     greater(0xF, "g");
4468     overflow(0x0, "o");
4469     no_overflow(0x1, "no");
4470   %}
4471 %}
4472 
4473 // Comparison Code, unsigned compare.  Used by FP also, with
4474 // C2 (unordered) turned into GT or LT already.  The other bits
4475 // C0 and C3 are turned into Carry & Zero flags.
4476 operand cmpOpU() %{
4477   match(Bool);
4478 
4479   format %{ "" %}
4480   interface(COND_INTER) %{
4481     equal(0x4, "e");
4482     not_equal(0x5, "ne");
4483     less(0x2, "b");
4484     greater_equal(0x3, "nb");
4485     less_equal(0x6, "be");
4486     greater(0x7, "nbe");
4487     overflow(0x0, "o");
4488     no_overflow(0x1, "no");
4489   %}
4490 %}
4491 
4492 // Floating comparisons that don't require any fixup for the unordered case
4493 operand cmpOpUCF() %{
4494   match(Bool);
4495   predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4496             n->as_Bool()->_test._test == BoolTest::ge ||
4497             n->as_Bool()->_test._test == BoolTest::le ||
4498             n->as_Bool()->_test._test == BoolTest::gt);
4499   format %{ "" %}
4500   interface(COND_INTER) %{
4501     equal(0x4, "e");
4502     not_equal(0x5, "ne");
4503     less(0x2, "b");
4504     greater_equal(0x3, "nb");
4505     less_equal(0x6, "be");
4506     greater(0x7, "nbe");
4507     overflow(0x0, "o");
4508     no_overflow(0x1, "no");
4509   %}
4510 %}
4511 
4512 
4513 // Floating comparisons that can be fixed up with extra conditional jumps
4514 operand cmpOpUCF2() %{
4515   match(Bool);
4516   predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4517             n->as_Bool()->_test._test == BoolTest::eq);
4518   format %{ "" %}
4519   interface(COND_INTER) %{
4520     equal(0x4, "e");
4521     not_equal(0x5, "ne");
4522     less(0x2, "b");
4523     greater_equal(0x3, "nb");
4524     less_equal(0x6, "be");
4525     greater(0x7, "nbe");
4526     overflow(0x0, "o");
4527     no_overflow(0x1, "no");
4528   %}
4529 %}
4530 
4531 // Comparison Code for FP conditional move
4532 operand cmpOp_fcmov() %{
4533   match(Bool);
4534 
4535   predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4536             n->as_Bool()->_test._test != BoolTest::no_overflow);
4537   format %{ "" %}
4538   interface(COND_INTER) %{
4539     equal        (0x0C8);
4540     not_equal    (0x1C8);
4541     less         (0x0C0);
4542     greater_equal(0x1C0);
4543     less_equal   (0x0D0);
4544     greater      (0x1D0);
4545     overflow(0x0, "o"); // not really supported by the instruction
4546     no_overflow(0x1, "no"); // not really supported by the instruction
4547   %}
4548 %}
4549 
4550 // Comparision Code used in long compares
4551 operand cmpOp_commute() %{
4552   match(Bool);
4553 
4554   format %{ "" %}
4555   interface(COND_INTER) %{
4556     equal(0x4, "e");
4557     not_equal(0x5, "ne");
4558     less(0xF, "g");
4559     greater_equal(0xE, "le");
4560     less_equal(0xD, "ge");
4561     greater(0xC, "l");
4562     overflow(0x0, "o");
4563     no_overflow(0x1, "no");
4564   %}
4565 %}
4566 
4567 //----------OPERAND CLASSES----------------------------------------------------
4568 // Operand Classes are groups of operands that are used as to simplify
4569 // instruction definitions by not requiring the AD writer to specify separate
4570 // instructions for every form of operand when the instruction accepts
4571 // multiple operand types with the same basic encoding and format.  The classic
4572 // case of this is memory operands.
4573 
4574 opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4575                indIndex, indIndexScale, indIndexScaleOffset);
4576 
4577 // Long memory operations are encoded in 2 instructions and a +4 offset.
4578 // This means some kind of offset is always required and you cannot use
4579 // an oop as the offset (done when working on static globals).
4580 opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4581                     indIndex, indIndexScale, indIndexScaleOffset);
4582 
4583 
4584 //----------PIPELINE-----------------------------------------------------------
4585 // Rules which define the behavior of the target architectures pipeline.
4586 pipeline %{
4587 
4588 //----------ATTRIBUTES---------------------------------------------------------
4589 attributes %{
4590   variable_size_instructions;        // Fixed size instructions
4591   max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4592   instruction_unit_size = 1;         // An instruction is 1 bytes long
4593   instruction_fetch_unit_size = 16;  // The processor fetches one line
4594   instruction_fetch_units = 1;       // of 16 bytes
4595 
4596   // List of nop instructions
4597   nops( MachNop );
4598 %}
4599 
4600 //----------RESOURCES----------------------------------------------------------
4601 // Resources are the functional units available to the machine
4602 
4603 // Generic P2/P3 pipeline
4604 // 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4605 // 3 instructions decoded per cycle.
4606 // 2 load/store ops per cycle, 1 branch, 1 FPU,
4607 // 2 ALU op, only ALU0 handles mul/div instructions.
4608 resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4609            MS0, MS1, MEM = MS0 | MS1,
4610            BR, FPU,
4611            ALU0, ALU1, ALU = ALU0 | ALU1 );
4612 
4613 //----------PIPELINE DESCRIPTION-----------------------------------------------
4614 // Pipeline Description specifies the stages in the machine's pipeline
4615 
4616 // Generic P2/P3 pipeline
4617 pipe_desc(S0, S1, S2, S3, S4, S5);
4618 
4619 //----------PIPELINE CLASSES---------------------------------------------------
4620 // Pipeline Classes describe the stages in which input and output are
4621 // referenced by the hardware pipeline.
4622 
4623 // Naming convention: ialu or fpu
4624 // Then: _reg
4625 // Then: _reg if there is a 2nd register
4626 // Then: _long if it's a pair of instructions implementing a long
4627 // Then: _fat if it requires the big decoder
4628 //   Or: _mem if it requires the big decoder and a memory unit.
4629 
4630 // Integer ALU reg operation
4631 pipe_class ialu_reg(rRegI dst) %{
4632     single_instruction;
4633     dst    : S4(write);
4634     dst    : S3(read);
4635     DECODE : S0;        // any decoder
4636     ALU    : S3;        // any alu
4637 %}
4638 
4639 // Long ALU reg operation
4640 pipe_class ialu_reg_long(eRegL dst) %{
4641     instruction_count(2);
4642     dst    : S4(write);
4643     dst    : S3(read);
4644     DECODE : S0(2);     // any 2 decoders
4645     ALU    : S3(2);     // both alus
4646 %}
4647 
4648 // Integer ALU reg operation using big decoder
4649 pipe_class ialu_reg_fat(rRegI dst) %{
4650     single_instruction;
4651     dst    : S4(write);
4652     dst    : S3(read);
4653     D0     : S0;        // big decoder only
4654     ALU    : S3;        // any alu
4655 %}
4656 
4657 // Long ALU reg operation using big decoder
4658 pipe_class ialu_reg_long_fat(eRegL dst) %{
4659     instruction_count(2);
4660     dst    : S4(write);
4661     dst    : S3(read);
4662     D0     : S0(2);     // big decoder only; twice
4663     ALU    : S3(2);     // any 2 alus
4664 %}
4665 
4666 // Integer ALU reg-reg operation
4667 pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4668     single_instruction;
4669     dst    : S4(write);
4670     src    : S3(read);
4671     DECODE : S0;        // any decoder
4672     ALU    : S3;        // any alu
4673 %}
4674 
4675 // Long ALU reg-reg operation
4676 pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4677     instruction_count(2);
4678     dst    : S4(write);
4679     src    : S3(read);
4680     DECODE : S0(2);     // any 2 decoders
4681     ALU    : S3(2);     // both alus
4682 %}
4683 
4684 // Integer ALU reg-reg operation
4685 pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4686     single_instruction;
4687     dst    : S4(write);
4688     src    : S3(read);
4689     D0     : S0;        // big decoder only
4690     ALU    : S3;        // any alu
4691 %}
4692 
4693 // Long ALU reg-reg operation
4694 pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4695     instruction_count(2);
4696     dst    : S4(write);
4697     src    : S3(read);
4698     D0     : S0(2);     // big decoder only; twice
4699     ALU    : S3(2);     // both alus
4700 %}
4701 
4702 // Integer ALU reg-mem operation
4703 pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4704     single_instruction;
4705     dst    : S5(write);
4706     mem    : S3(read);
4707     D0     : S0;        // big decoder only
4708     ALU    : S4;        // any alu
4709     MEM    : S3;        // any mem
4710 %}
4711 
4712 // Long ALU reg-mem operation
4713 pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4714     instruction_count(2);
4715     dst    : S5(write);
4716     mem    : S3(read);
4717     D0     : S0(2);     // big decoder only; twice
4718     ALU    : S4(2);     // any 2 alus
4719     MEM    : S3(2);     // both mems
4720 %}
4721 
4722 // Integer mem operation (prefetch)
4723 pipe_class ialu_mem(memory mem)
4724 %{
4725     single_instruction;
4726     mem    : S3(read);
4727     D0     : S0;        // big decoder only
4728     MEM    : S3;        // any mem
4729 %}
4730 
4731 // Integer Store to Memory
4732 pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4733     single_instruction;
4734     mem    : S3(read);
4735     src    : S5(read);
4736     D0     : S0;        // big decoder only
4737     ALU    : S4;        // any alu
4738     MEM    : S3;
4739 %}
4740 
4741 // Long Store to Memory
4742 pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4743     instruction_count(2);
4744     mem    : S3(read);
4745     src    : S5(read);
4746     D0     : S0(2);     // big decoder only; twice
4747     ALU    : S4(2);     // any 2 alus
4748     MEM    : S3(2);     // Both mems
4749 %}
4750 
4751 // Integer Store to Memory
4752 pipe_class ialu_mem_imm(memory mem) %{
4753     single_instruction;
4754     mem    : S3(read);
4755     D0     : S0;        // big decoder only
4756     ALU    : S4;        // any alu
4757     MEM    : S3;
4758 %}
4759 
4760 // Integer ALU0 reg-reg operation
4761 pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4762     single_instruction;
4763     dst    : S4(write);
4764     src    : S3(read);
4765     D0     : S0;        // Big decoder only
4766     ALU0   : S3;        // only alu0
4767 %}
4768 
4769 // Integer ALU0 reg-mem operation
4770 pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4771     single_instruction;
4772     dst    : S5(write);
4773     mem    : S3(read);
4774     D0     : S0;        // big decoder only
4775     ALU0   : S4;        // ALU0 only
4776     MEM    : S3;        // any mem
4777 %}
4778 
4779 // Integer ALU reg-reg operation
4780 pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4781     single_instruction;
4782     cr     : S4(write);
4783     src1   : S3(read);
4784     src2   : S3(read);
4785     DECODE : S0;        // any decoder
4786     ALU    : S3;        // any alu
4787 %}
4788 
4789 // Integer ALU reg-imm operation
4790 pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4791     single_instruction;
4792     cr     : S4(write);
4793     src1   : S3(read);
4794     DECODE : S0;        // any decoder
4795     ALU    : S3;        // any alu
4796 %}
4797 
4798 // Integer ALU reg-mem operation
4799 pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4800     single_instruction;
4801     cr     : S4(write);
4802     src1   : S3(read);
4803     src2   : S3(read);
4804     D0     : S0;        // big decoder only
4805     ALU    : S4;        // any alu
4806     MEM    : S3;
4807 %}
4808 
4809 // Conditional move reg-reg
4810 pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4811     instruction_count(4);
4812     y      : S4(read);
4813     q      : S3(read);
4814     p      : S3(read);
4815     DECODE : S0(4);     // any decoder
4816 %}
4817 
4818 // Conditional move reg-reg
4819 pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4820     single_instruction;
4821     dst    : S4(write);
4822     src    : S3(read);
4823     cr     : S3(read);
4824     DECODE : S0;        // any decoder
4825 %}
4826 
4827 // Conditional move reg-mem
4828 pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4829     single_instruction;
4830     dst    : S4(write);
4831     src    : S3(read);
4832     cr     : S3(read);
4833     DECODE : S0;        // any decoder
4834     MEM    : S3;
4835 %}
4836 
4837 // Conditional move reg-reg long
4838 pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4839     single_instruction;
4840     dst    : S4(write);
4841     src    : S3(read);
4842     cr     : S3(read);
4843     DECODE : S0(2);     // any 2 decoders
4844 %}
4845 
4846 // Conditional move double reg-reg
4847 pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4848     single_instruction;
4849     dst    : S4(write);
4850     src    : S3(read);
4851     cr     : S3(read);
4852     DECODE : S0;        // any decoder
4853 %}
4854 
4855 // Float reg-reg operation
4856 pipe_class fpu_reg(regDPR dst) %{
4857     instruction_count(2);
4858     dst    : S3(read);
4859     DECODE : S0(2);     // any 2 decoders
4860     FPU    : S3;
4861 %}
4862 
4863 // Float reg-reg operation
4864 pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4865     instruction_count(2);
4866     dst    : S4(write);
4867     src    : S3(read);
4868     DECODE : S0(2);     // any 2 decoders
4869     FPU    : S3;
4870 %}
4871 
4872 // Float reg-reg operation
4873 pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4874     instruction_count(3);
4875     dst    : S4(write);
4876     src1   : S3(read);
4877     src2   : S3(read);
4878     DECODE : S0(3);     // any 3 decoders
4879     FPU    : S3(2);
4880 %}
4881 
4882 // Float reg-reg operation
4883 pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4884     instruction_count(4);
4885     dst    : S4(write);
4886     src1   : S3(read);
4887     src2   : S3(read);
4888     src3   : S3(read);
4889     DECODE : S0(4);     // any 3 decoders
4890     FPU    : S3(2);
4891 %}
4892 
4893 // Float reg-reg operation
4894 pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4895     instruction_count(4);
4896     dst    : S4(write);
4897     src1   : S3(read);
4898     src2   : S3(read);
4899     src3   : S3(read);
4900     DECODE : S1(3);     // any 3 decoders
4901     D0     : S0;        // Big decoder only
4902     FPU    : S3(2);
4903     MEM    : S3;
4904 %}
4905 
4906 // Float reg-mem operation
4907 pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4908     instruction_count(2);
4909     dst    : S5(write);
4910     mem    : S3(read);
4911     D0     : S0;        // big decoder only
4912     DECODE : S1;        // any decoder for FPU POP
4913     FPU    : S4;
4914     MEM    : S3;        // any mem
4915 %}
4916 
4917 // Float reg-mem operation
4918 pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4919     instruction_count(3);
4920     dst    : S5(write);
4921     src1   : S3(read);
4922     mem    : S3(read);
4923     D0     : S0;        // big decoder only
4924     DECODE : S1(2);     // any decoder for FPU POP
4925     FPU    : S4;
4926     MEM    : S3;        // any mem
4927 %}
4928 
4929 // Float mem-reg operation
4930 pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4931     instruction_count(2);
4932     src    : S5(read);
4933     mem    : S3(read);
4934     DECODE : S0;        // any decoder for FPU PUSH
4935     D0     : S1;        // big decoder only
4936     FPU    : S4;
4937     MEM    : S3;        // any mem
4938 %}
4939 
4940 pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
4941     instruction_count(3);
4942     src1   : S3(read);
4943     src2   : S3(read);
4944     mem    : S3(read);
4945     DECODE : S0(2);     // any decoder for FPU PUSH
4946     D0     : S1;        // big decoder only
4947     FPU    : S4;
4948     MEM    : S3;        // any mem
4949 %}
4950 
4951 pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
4952     instruction_count(3);
4953     src1   : S3(read);
4954     src2   : S3(read);
4955     mem    : S4(read);
4956     DECODE : S0;        // any decoder for FPU PUSH
4957     D0     : S0(2);     // big decoder only
4958     FPU    : S4;
4959     MEM    : S3(2);     // any mem
4960 %}
4961 
4962 pipe_class fpu_mem_mem(memory dst, memory src1) %{
4963     instruction_count(2);
4964     src1   : S3(read);
4965     dst    : S4(read);
4966     D0     : S0(2);     // big decoder only
4967     MEM    : S3(2);     // any mem
4968 %}
4969 
4970 pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
4971     instruction_count(3);
4972     src1   : S3(read);
4973     src2   : S3(read);
4974     dst    : S4(read);
4975     D0     : S0(3);     // big decoder only
4976     FPU    : S4;
4977     MEM    : S3(3);     // any mem
4978 %}
4979 
4980 pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
4981     instruction_count(3);
4982     src1   : S4(read);
4983     mem    : S4(read);
4984     DECODE : S0;        // any decoder for FPU PUSH
4985     D0     : S0(2);     // big decoder only
4986     FPU    : S4;
4987     MEM    : S3(2);     // any mem
4988 %}
4989 
4990 // Float load constant
4991 pipe_class fpu_reg_con(regDPR dst) %{
4992     instruction_count(2);
4993     dst    : S5(write);
4994     D0     : S0;        // big decoder only for the load
4995     DECODE : S1;        // any decoder for FPU POP
4996     FPU    : S4;
4997     MEM    : S3;        // any mem
4998 %}
4999 
5000 // Float load constant
5001 pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5002     instruction_count(3);
5003     dst    : S5(write);
5004     src    : S3(read);
5005     D0     : S0;        // big decoder only for the load
5006     DECODE : S1(2);     // any decoder for FPU POP
5007     FPU    : S4;
5008     MEM    : S3;        // any mem
5009 %}
5010 
5011 // UnConditional branch
5012 pipe_class pipe_jmp( label labl ) %{
5013     single_instruction;
5014     BR   : S3;
5015 %}
5016 
5017 // Conditional branch
5018 pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5019     single_instruction;
5020     cr    : S1(read);
5021     BR    : S3;
5022 %}
5023 
5024 // Allocation idiom
5025 pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5026     instruction_count(1); force_serialization;
5027     fixed_latency(6);
5028     heap_ptr : S3(read);
5029     DECODE   : S0(3);
5030     D0       : S2;
5031     MEM      : S3;
5032     ALU      : S3(2);
5033     dst      : S5(write);
5034     BR       : S5;
5035 %}
5036 
5037 // Generic big/slow expanded idiom
5038 pipe_class pipe_slow(  ) %{
5039     instruction_count(10); multiple_bundles; force_serialization;
5040     fixed_latency(100);
5041     D0  : S0(2);
5042     MEM : S3(2);
5043 %}
5044 
5045 // The real do-nothing guy
5046 pipe_class empty( ) %{
5047     instruction_count(0);
5048 %}
5049 
5050 // Define the class for the Nop node
5051 define %{
5052    MachNop = empty;
5053 %}
5054 
5055 %}
5056 
5057 //----------INSTRUCTIONS-------------------------------------------------------
5058 //
5059 // match      -- States which machine-independent subtree may be replaced
5060 //               by this instruction.
5061 // ins_cost   -- The estimated cost of this instruction is used by instruction
5062 //               selection to identify a minimum cost tree of machine
5063 //               instructions that matches a tree of machine-independent
5064 //               instructions.
5065 // format     -- A string providing the disassembly for this instruction.
5066 //               The value of an instruction's operand may be inserted
5067 //               by referring to it with a '$' prefix.
5068 // opcode     -- Three instruction opcodes may be provided.  These are referred
5069 //               to within an encode class as $primary, $secondary, and $tertiary
5070 //               respectively.  The primary opcode is commonly used to
5071 //               indicate the type of machine instruction, while secondary
5072 //               and tertiary are often used for prefix options or addressing
5073 //               modes.
5074 // ins_encode -- A list of encode classes with parameters. The encode class
5075 //               name must have been defined in an 'enc_class' specification
5076 //               in the encode section of the architecture description.
5077 
5078 //----------BSWAP-Instruction--------------------------------------------------
5079 instruct bytes_reverse_int(rRegI dst) %{
5080   match(Set dst (ReverseBytesI dst));
5081 
5082   format %{ "BSWAP  $dst" %}
5083   opcode(0x0F, 0xC8);
5084   ins_encode( OpcP, OpcSReg(dst) );
5085   ins_pipe( ialu_reg );
5086 %}
5087 
5088 instruct bytes_reverse_long(eRegL dst) %{
5089   match(Set dst (ReverseBytesL dst));
5090 
5091   format %{ "BSWAP  $dst.lo\n\t"
5092             "BSWAP  $dst.hi\n\t"
5093             "XCHG   $dst.lo $dst.hi" %}
5094 
5095   ins_cost(125);
5096   ins_encode( bswap_long_bytes(dst) );
5097   ins_pipe( ialu_reg_reg);
5098 %}
5099 
5100 instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5101   match(Set dst (ReverseBytesUS dst));
5102   effect(KILL cr);
5103 
5104   format %{ "BSWAP  $dst\n\t"
5105             "SHR    $dst,16\n\t" %}
5106   ins_encode %{
5107     __ bswapl($dst$$Register);
5108     __ shrl($dst$$Register, 16);
5109   %}
5110   ins_pipe( ialu_reg );
5111 %}
5112 
5113 instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5114   match(Set dst (ReverseBytesS dst));
5115   effect(KILL cr);
5116 
5117   format %{ "BSWAP  $dst\n\t"
5118             "SAR    $dst,16\n\t" %}
5119   ins_encode %{
5120     __ bswapl($dst$$Register);
5121     __ sarl($dst$$Register, 16);
5122   %}
5123   ins_pipe( ialu_reg );
5124 %}
5125 
5126 
5127 //---------- Zeros Count Instructions ------------------------------------------
5128 
5129 instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5130   predicate(UseCountLeadingZerosInstruction);
5131   match(Set dst (CountLeadingZerosI src));
5132   effect(KILL cr);
5133 
5134   format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5135   ins_encode %{
5136     __ lzcntl($dst$$Register, $src$$Register);
5137   %}
5138   ins_pipe(ialu_reg);
5139 %}
5140 
5141 instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5142   predicate(!UseCountLeadingZerosInstruction);
5143   match(Set dst (CountLeadingZerosI src));
5144   effect(KILL cr);
5145 
5146   format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5147             "JNZ    skip\n\t"
5148             "MOV    $dst, -1\n"
5149       "skip:\n\t"
5150             "NEG    $dst\n\t"
5151             "ADD    $dst, 31" %}
5152   ins_encode %{
5153     Register Rdst = $dst$$Register;
5154     Register Rsrc = $src$$Register;
5155     Label skip;
5156     __ bsrl(Rdst, Rsrc);
5157     __ jccb(Assembler::notZero, skip);
5158     __ movl(Rdst, -1);
5159     __ bind(skip);
5160     __ negl(Rdst);
5161     __ addl(Rdst, BitsPerInt - 1);
5162   %}
5163   ins_pipe(ialu_reg);
5164 %}
5165 
5166 instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5167   predicate(UseCountLeadingZerosInstruction);
5168   match(Set dst (CountLeadingZerosL src));
5169   effect(TEMP dst, KILL cr);
5170 
5171   format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5172             "JNC    done\n\t"
5173             "LZCNT  $dst, $src.lo\n\t"
5174             "ADD    $dst, 32\n"
5175       "done:" %}
5176   ins_encode %{
5177     Register Rdst = $dst$$Register;
5178     Register Rsrc = $src$$Register;
5179     Label done;
5180     __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5181     __ jccb(Assembler::carryClear, done);
5182     __ lzcntl(Rdst, Rsrc);
5183     __ addl(Rdst, BitsPerInt);
5184     __ bind(done);
5185   %}
5186   ins_pipe(ialu_reg);
5187 %}
5188 
5189 instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5190   predicate(!UseCountLeadingZerosInstruction);
5191   match(Set dst (CountLeadingZerosL src));
5192   effect(TEMP dst, KILL cr);
5193 
5194   format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5195             "JZ     msw_is_zero\n\t"
5196             "ADD    $dst, 32\n\t"
5197             "JMP    not_zero\n"
5198       "msw_is_zero:\n\t"
5199             "BSR    $dst, $src.lo\n\t"
5200             "JNZ    not_zero\n\t"
5201             "MOV    $dst, -1\n"
5202       "not_zero:\n\t"
5203             "NEG    $dst\n\t"
5204             "ADD    $dst, 63\n" %}
5205  ins_encode %{
5206     Register Rdst = $dst$$Register;
5207     Register Rsrc = $src$$Register;
5208     Label msw_is_zero;
5209     Label not_zero;
5210     __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5211     __ jccb(Assembler::zero, msw_is_zero);
5212     __ addl(Rdst, BitsPerInt);
5213     __ jmpb(not_zero);
5214     __ bind(msw_is_zero);
5215     __ bsrl(Rdst, Rsrc);
5216     __ jccb(Assembler::notZero, not_zero);
5217     __ movl(Rdst, -1);
5218     __ bind(not_zero);
5219     __ negl(Rdst);
5220     __ addl(Rdst, BitsPerLong - 1);
5221   %}
5222   ins_pipe(ialu_reg);
5223 %}
5224 
5225 instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5226   predicate(UseCountTrailingZerosInstruction);
5227   match(Set dst (CountTrailingZerosI src));
5228   effect(KILL cr);
5229 
5230   format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5231   ins_encode %{
5232     __ tzcntl($dst$$Register, $src$$Register);
5233   %}
5234   ins_pipe(ialu_reg);
5235 %}
5236 
5237 instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5238   predicate(!UseCountTrailingZerosInstruction);
5239   match(Set dst (CountTrailingZerosI src));
5240   effect(KILL cr);
5241 
5242   format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5243             "JNZ    done\n\t"
5244             "MOV    $dst, 32\n"
5245       "done:" %}
5246   ins_encode %{
5247     Register Rdst = $dst$$Register;
5248     Label done;
5249     __ bsfl(Rdst, $src$$Register);
5250     __ jccb(Assembler::notZero, done);
5251     __ movl(Rdst, BitsPerInt);
5252     __ bind(done);
5253   %}
5254   ins_pipe(ialu_reg);
5255 %}
5256 
5257 instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5258   predicate(UseCountTrailingZerosInstruction);
5259   match(Set dst (CountTrailingZerosL src));
5260   effect(TEMP dst, KILL cr);
5261 
5262   format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5263             "JNC    done\n\t"
5264             "TZCNT  $dst, $src.hi\n\t"
5265             "ADD    $dst, 32\n"
5266             "done:" %}
5267   ins_encode %{
5268     Register Rdst = $dst$$Register;
5269     Register Rsrc = $src$$Register;
5270     Label done;
5271     __ tzcntl(Rdst, Rsrc);
5272     __ jccb(Assembler::carryClear, done);
5273     __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5274     __ addl(Rdst, BitsPerInt);
5275     __ bind(done);
5276   %}
5277   ins_pipe(ialu_reg);
5278 %}
5279 
5280 instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5281   predicate(!UseCountTrailingZerosInstruction);
5282   match(Set dst (CountTrailingZerosL src));
5283   effect(TEMP dst, KILL cr);
5284 
5285   format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5286             "JNZ    done\n\t"
5287             "BSF    $dst, $src.hi\n\t"
5288             "JNZ    msw_not_zero\n\t"
5289             "MOV    $dst, 32\n"
5290       "msw_not_zero:\n\t"
5291             "ADD    $dst, 32\n"
5292       "done:" %}
5293   ins_encode %{
5294     Register Rdst = $dst$$Register;
5295     Register Rsrc = $src$$Register;
5296     Label msw_not_zero;
5297     Label done;
5298     __ bsfl(Rdst, Rsrc);
5299     __ jccb(Assembler::notZero, done);
5300     __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5301     __ jccb(Assembler::notZero, msw_not_zero);
5302     __ movl(Rdst, BitsPerInt);
5303     __ bind(msw_not_zero);
5304     __ addl(Rdst, BitsPerInt);
5305     __ bind(done);
5306   %}
5307   ins_pipe(ialu_reg);
5308 %}
5309 
5310 
5311 //---------- Population Count Instructions -------------------------------------
5312 
5313 instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5314   predicate(UsePopCountInstruction);
5315   match(Set dst (PopCountI src));
5316   effect(KILL cr);
5317 
5318   format %{ "POPCNT $dst, $src" %}
5319   ins_encode %{
5320     __ popcntl($dst$$Register, $src$$Register);
5321   %}
5322   ins_pipe(ialu_reg);
5323 %}
5324 
5325 instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5326   predicate(UsePopCountInstruction);
5327   match(Set dst (PopCountI (LoadI mem)));
5328   effect(KILL cr);
5329 
5330   format %{ "POPCNT $dst, $mem" %}
5331   ins_encode %{
5332     __ popcntl($dst$$Register, $mem$$Address);
5333   %}
5334   ins_pipe(ialu_reg);
5335 %}
5336 
5337 // Note: Long.bitCount(long) returns an int.
5338 instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5339   predicate(UsePopCountInstruction);
5340   match(Set dst (PopCountL src));
5341   effect(KILL cr, TEMP tmp, TEMP dst);
5342 
5343   format %{ "POPCNT $dst, $src.lo\n\t"
5344             "POPCNT $tmp, $src.hi\n\t"
5345             "ADD    $dst, $tmp" %}
5346   ins_encode %{
5347     __ popcntl($dst$$Register, $src$$Register);
5348     __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5349     __ addl($dst$$Register, $tmp$$Register);
5350   %}
5351   ins_pipe(ialu_reg);
5352 %}
5353 
5354 // Note: Long.bitCount(long) returns an int.
5355 instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5356   predicate(UsePopCountInstruction);
5357   match(Set dst (PopCountL (LoadL mem)));
5358   effect(KILL cr, TEMP tmp, TEMP dst);
5359 
5360   format %{ "POPCNT $dst, $mem\n\t"
5361             "POPCNT $tmp, $mem+4\n\t"
5362             "ADD    $dst, $tmp" %}
5363   ins_encode %{
5364     //__ popcntl($dst$$Register, $mem$$Address$$first);
5365     //__ popcntl($tmp$$Register, $mem$$Address$$second);
5366     __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5367     __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5368     __ addl($dst$$Register, $tmp$$Register);
5369   %}
5370   ins_pipe(ialu_reg);
5371 %}
5372 
5373 
5374 //----------Load/Store/Move Instructions---------------------------------------
5375 //----------Load Instructions--------------------------------------------------
5376 // Load Byte (8bit signed)
5377 instruct loadB(xRegI dst, memory mem) %{
5378   match(Set dst (LoadB mem));
5379 
5380   ins_cost(125);
5381   format %{ "MOVSX8 $dst,$mem\t# byte" %}
5382 
5383   ins_encode %{
5384     __ movsbl($dst$$Register, $mem$$Address);
5385   %}
5386 
5387   ins_pipe(ialu_reg_mem);
5388 %}
5389 
5390 // Load Byte (8bit signed) into Long Register
5391 instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5392   match(Set dst (ConvI2L (LoadB mem)));
5393   effect(KILL cr);
5394 
5395   ins_cost(375);
5396   format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5397             "MOV    $dst.hi,$dst.lo\n\t"
5398             "SAR    $dst.hi,7" %}
5399 
5400   ins_encode %{
5401     __ movsbl($dst$$Register, $mem$$Address);
5402     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5403     __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5404   %}
5405 
5406   ins_pipe(ialu_reg_mem);
5407 %}
5408 
5409 // Load Unsigned Byte (8bit UNsigned)
5410 instruct loadUB(xRegI dst, memory mem) %{
5411   match(Set dst (LoadUB mem));
5412 
5413   ins_cost(125);
5414   format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5415 
5416   ins_encode %{
5417     __ movzbl($dst$$Register, $mem$$Address);
5418   %}
5419 
5420   ins_pipe(ialu_reg_mem);
5421 %}
5422 
5423 // Load Unsigned Byte (8 bit UNsigned) into Long Register
5424 instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5425   match(Set dst (ConvI2L (LoadUB mem)));
5426   effect(KILL cr);
5427 
5428   ins_cost(250);
5429   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5430             "XOR    $dst.hi,$dst.hi" %}
5431 
5432   ins_encode %{
5433     Register Rdst = $dst$$Register;
5434     __ movzbl(Rdst, $mem$$Address);
5435     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5436   %}
5437 
5438   ins_pipe(ialu_reg_mem);
5439 %}
5440 
5441 // Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5442 instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5443   match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5444   effect(KILL cr);
5445 
5446   format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5447             "XOR    $dst.hi,$dst.hi\n\t"
5448             "AND    $dst.lo,right_n_bits($mask, 8)" %}
5449   ins_encode %{
5450     Register Rdst = $dst$$Register;
5451     __ movzbl(Rdst, $mem$$Address);
5452     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5453     __ andl(Rdst, $mask$$constant & right_n_bits(8));
5454   %}
5455   ins_pipe(ialu_reg_mem);
5456 %}
5457 
5458 // Load Short (16bit signed)
5459 instruct loadS(rRegI dst, memory mem) %{
5460   match(Set dst (LoadS mem));
5461 
5462   ins_cost(125);
5463   format %{ "MOVSX  $dst,$mem\t# short" %}
5464 
5465   ins_encode %{
5466     __ movswl($dst$$Register, $mem$$Address);
5467   %}
5468 
5469   ins_pipe(ialu_reg_mem);
5470 %}
5471 
5472 // Load Short (16 bit signed) to Byte (8 bit signed)
5473 instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5474   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5475 
5476   ins_cost(125);
5477   format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5478   ins_encode %{
5479     __ movsbl($dst$$Register, $mem$$Address);
5480   %}
5481   ins_pipe(ialu_reg_mem);
5482 %}
5483 
5484 // Load Short (16bit signed) into Long Register
5485 instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5486   match(Set dst (ConvI2L (LoadS mem)));
5487   effect(KILL cr);
5488 
5489   ins_cost(375);
5490   format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5491             "MOV    $dst.hi,$dst.lo\n\t"
5492             "SAR    $dst.hi,15" %}
5493 
5494   ins_encode %{
5495     __ movswl($dst$$Register, $mem$$Address);
5496     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5497     __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5498   %}
5499 
5500   ins_pipe(ialu_reg_mem);
5501 %}
5502 
5503 // Load Unsigned Short/Char (16bit unsigned)
5504 instruct loadUS(rRegI dst, memory mem) %{
5505   match(Set dst (LoadUS mem));
5506 
5507   ins_cost(125);
5508   format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5509 
5510   ins_encode %{
5511     __ movzwl($dst$$Register, $mem$$Address);
5512   %}
5513 
5514   ins_pipe(ialu_reg_mem);
5515 %}
5516 
5517 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5518 instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5519   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5520 
5521   ins_cost(125);
5522   format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5523   ins_encode %{
5524     __ movsbl($dst$$Register, $mem$$Address);
5525   %}
5526   ins_pipe(ialu_reg_mem);
5527 %}
5528 
5529 // Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5530 instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5531   match(Set dst (ConvI2L (LoadUS mem)));
5532   effect(KILL cr);
5533 
5534   ins_cost(250);
5535   format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5536             "XOR    $dst.hi,$dst.hi" %}
5537 
5538   ins_encode %{
5539     __ movzwl($dst$$Register, $mem$$Address);
5540     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5541   %}
5542 
5543   ins_pipe(ialu_reg_mem);
5544 %}
5545 
5546 // Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5547 instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5548   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5549   effect(KILL cr);
5550 
5551   format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5552             "XOR    $dst.hi,$dst.hi" %}
5553   ins_encode %{
5554     Register Rdst = $dst$$Register;
5555     __ movzbl(Rdst, $mem$$Address);
5556     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5557   %}
5558   ins_pipe(ialu_reg_mem);
5559 %}
5560 
5561 // Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5562 instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5563   match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5564   effect(KILL cr);
5565 
5566   format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5567             "XOR    $dst.hi,$dst.hi\n\t"
5568             "AND    $dst.lo,right_n_bits($mask, 16)" %}
5569   ins_encode %{
5570     Register Rdst = $dst$$Register;
5571     __ movzwl(Rdst, $mem$$Address);
5572     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5573     __ andl(Rdst, $mask$$constant & right_n_bits(16));
5574   %}
5575   ins_pipe(ialu_reg_mem);
5576 %}
5577 
5578 // Load Integer
5579 instruct loadI(rRegI dst, memory mem) %{
5580   match(Set dst (LoadI mem));
5581 
5582   ins_cost(125);
5583   format %{ "MOV    $dst,$mem\t# int" %}
5584 
5585   ins_encode %{
5586     __ movl($dst$$Register, $mem$$Address);
5587   %}
5588 
5589   ins_pipe(ialu_reg_mem);
5590 %}
5591 
5592 // Load Integer (32 bit signed) to Byte (8 bit signed)
5593 instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5594   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5595 
5596   ins_cost(125);
5597   format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5598   ins_encode %{
5599     __ movsbl($dst$$Register, $mem$$Address);
5600   %}
5601   ins_pipe(ialu_reg_mem);
5602 %}
5603 
5604 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5605 instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5606   match(Set dst (AndI (LoadI mem) mask));
5607 
5608   ins_cost(125);
5609   format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5610   ins_encode %{
5611     __ movzbl($dst$$Register, $mem$$Address);
5612   %}
5613   ins_pipe(ialu_reg_mem);
5614 %}
5615 
5616 // Load Integer (32 bit signed) to Short (16 bit signed)
5617 instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5618   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5619 
5620   ins_cost(125);
5621   format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5622   ins_encode %{
5623     __ movswl($dst$$Register, $mem$$Address);
5624   %}
5625   ins_pipe(ialu_reg_mem);
5626 %}
5627 
5628 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5629 instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5630   match(Set dst (AndI (LoadI mem) mask));
5631 
5632   ins_cost(125);
5633   format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5634   ins_encode %{
5635     __ movzwl($dst$$Register, $mem$$Address);
5636   %}
5637   ins_pipe(ialu_reg_mem);
5638 %}
5639 
5640 // Load Integer into Long Register
5641 instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5642   match(Set dst (ConvI2L (LoadI mem)));
5643   effect(KILL cr);
5644 
5645   ins_cost(375);
5646   format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5647             "MOV    $dst.hi,$dst.lo\n\t"
5648             "SAR    $dst.hi,31" %}
5649 
5650   ins_encode %{
5651     __ movl($dst$$Register, $mem$$Address);
5652     __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5653     __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5654   %}
5655 
5656   ins_pipe(ialu_reg_mem);
5657 %}
5658 
5659 // Load Integer with mask 0xFF into Long Register
5660 instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5661   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5662   effect(KILL cr);
5663 
5664   format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5665             "XOR    $dst.hi,$dst.hi" %}
5666   ins_encode %{
5667     Register Rdst = $dst$$Register;
5668     __ movzbl(Rdst, $mem$$Address);
5669     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5670   %}
5671   ins_pipe(ialu_reg_mem);
5672 %}
5673 
5674 // Load Integer with mask 0xFFFF into Long Register
5675 instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5676   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5677   effect(KILL cr);
5678 
5679   format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5680             "XOR    $dst.hi,$dst.hi" %}
5681   ins_encode %{
5682     Register Rdst = $dst$$Register;
5683     __ movzwl(Rdst, $mem$$Address);
5684     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5685   %}
5686   ins_pipe(ialu_reg_mem);
5687 %}
5688 
5689 // Load Integer with 31-bit mask into Long Register
5690 instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5691   match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5692   effect(KILL cr);
5693 
5694   format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5695             "XOR    $dst.hi,$dst.hi\n\t"
5696             "AND    $dst.lo,$mask" %}
5697   ins_encode %{
5698     Register Rdst = $dst$$Register;
5699     __ movl(Rdst, $mem$$Address);
5700     __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5701     __ andl(Rdst, $mask$$constant);
5702   %}
5703   ins_pipe(ialu_reg_mem);
5704 %}
5705 
5706 // Load Unsigned Integer into Long Register
5707 instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5708   match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5709   effect(KILL cr);
5710 
5711   ins_cost(250);
5712   format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5713             "XOR    $dst.hi,$dst.hi" %}
5714 
5715   ins_encode %{
5716     __ movl($dst$$Register, $mem$$Address);
5717     __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5718   %}
5719 
5720   ins_pipe(ialu_reg_mem);
5721 %}
5722 
5723 // Load Long.  Cannot clobber address while loading, so restrict address
5724 // register to ESI
5725 instruct loadL(eRegL dst, load_long_memory mem) %{
5726   predicate(!((LoadLNode*)n)->require_atomic_access());
5727   match(Set dst (LoadL mem));
5728 
5729   ins_cost(250);
5730   format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5731             "MOV    $dst.hi,$mem+4" %}
5732 
5733   ins_encode %{
5734     Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5735     Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5736     __ movl($dst$$Register, Amemlo);
5737     __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5738   %}
5739 
5740   ins_pipe(ialu_reg_long_mem);
5741 %}
5742 
5743 // Volatile Load Long.  Must be atomic, so do 64-bit FILD
5744 // then store it down to the stack and reload on the int
5745 // side.
5746 instruct loadL_volatile(stackSlotL dst, memory mem) %{
5747   predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5748   match(Set dst (LoadL mem));
5749 
5750   ins_cost(200);
5751   format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5752             "FISTp  $dst" %}
5753   ins_encode(enc_loadL_volatile(mem,dst));
5754   ins_pipe( fpu_reg_mem );
5755 %}
5756 
5757 instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5758   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5759   match(Set dst (LoadL mem));
5760   effect(TEMP tmp);
5761   ins_cost(180);
5762   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5763             "MOVSD  $dst,$tmp" %}
5764   ins_encode %{
5765     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5766     __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5767   %}
5768   ins_pipe( pipe_slow );
5769 %}
5770 
5771 instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5772   predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5773   match(Set dst (LoadL mem));
5774   effect(TEMP tmp);
5775   ins_cost(160);
5776   format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5777             "MOVD   $dst.lo,$tmp\n\t"
5778             "PSRLQ  $tmp,32\n\t"
5779             "MOVD   $dst.hi,$tmp" %}
5780   ins_encode %{
5781     __ movdbl($tmp$$XMMRegister, $mem$$Address);
5782     __ movdl($dst$$Register, $tmp$$XMMRegister);
5783     __ psrlq($tmp$$XMMRegister, 32);
5784     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5785   %}
5786   ins_pipe( pipe_slow );
5787 %}
5788 
5789 // Load Range
5790 instruct loadRange(rRegI dst, memory mem) %{
5791   match(Set dst (LoadRange mem));
5792 
5793   ins_cost(125);
5794   format %{ "MOV    $dst,$mem" %}
5795   opcode(0x8B);
5796   ins_encode( OpcP, RegMem(dst,mem));
5797   ins_pipe( ialu_reg_mem );
5798 %}
5799 
5800 
5801 // Load Pointer
5802 instruct loadP(eRegP dst, memory mem) %{
5803   match(Set dst (LoadP mem));
5804 
5805   ins_cost(125);
5806   format %{ "MOV    $dst,$mem" %}
5807   opcode(0x8B);
5808   ins_encode( OpcP, RegMem(dst,mem));
5809   ins_pipe( ialu_reg_mem );
5810 %}
5811 
5812 // Load Klass Pointer
5813 instruct loadKlass(eRegP dst, memory mem) %{
5814   match(Set dst (LoadKlass mem));
5815 
5816   ins_cost(125);
5817   format %{ "MOV    $dst,$mem" %}
5818   opcode(0x8B);
5819   ins_encode( OpcP, RegMem(dst,mem));
5820   ins_pipe( ialu_reg_mem );
5821 %}
5822 
5823 // Load Double
5824 instruct loadDPR(regDPR dst, memory mem) %{
5825   predicate(UseSSE<=1);
5826   match(Set dst (LoadD mem));
5827 
5828   ins_cost(150);
5829   format %{ "FLD_D  ST,$mem\n\t"
5830             "FSTP   $dst" %}
5831   opcode(0xDD);               /* DD /0 */
5832   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5833               Pop_Reg_DPR(dst) );
5834   ins_pipe( fpu_reg_mem );
5835 %}
5836 
5837 // Load Double to XMM
5838 instruct loadD(regD dst, memory mem) %{
5839   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5840   match(Set dst (LoadD mem));
5841   ins_cost(145);
5842   format %{ "MOVSD  $dst,$mem" %}
5843   ins_encode %{
5844     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5845   %}
5846   ins_pipe( pipe_slow );
5847 %}
5848 
5849 instruct loadD_partial(regD dst, memory mem) %{
5850   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5851   match(Set dst (LoadD mem));
5852   ins_cost(145);
5853   format %{ "MOVLPD $dst,$mem" %}
5854   ins_encode %{
5855     __ movdbl ($dst$$XMMRegister, $mem$$Address);
5856   %}
5857   ins_pipe( pipe_slow );
5858 %}
5859 
5860 // Load to XMM register (single-precision floating point)
5861 // MOVSS instruction
5862 instruct loadF(regF dst, memory mem) %{
5863   predicate(UseSSE>=1);
5864   match(Set dst (LoadF mem));
5865   ins_cost(145);
5866   format %{ "MOVSS  $dst,$mem" %}
5867   ins_encode %{
5868     __ movflt ($dst$$XMMRegister, $mem$$Address);
5869   %}
5870   ins_pipe( pipe_slow );
5871 %}
5872 
5873 // Load Float
5874 instruct loadFPR(regFPR dst, memory mem) %{
5875   predicate(UseSSE==0);
5876   match(Set dst (LoadF mem));
5877 
5878   ins_cost(150);
5879   format %{ "FLD_S  ST,$mem\n\t"
5880             "FSTP   $dst" %}
5881   opcode(0xD9);               /* D9 /0 */
5882   ins_encode( OpcP, RMopc_Mem(0x00,mem),
5883               Pop_Reg_FPR(dst) );
5884   ins_pipe( fpu_reg_mem );
5885 %}
5886 
5887 // Load Effective Address
5888 instruct leaP8(eRegP dst, indOffset8 mem) %{
5889   match(Set dst mem);
5890 
5891   ins_cost(110);
5892   format %{ "LEA    $dst,$mem" %}
5893   opcode(0x8D);
5894   ins_encode( OpcP, RegMem(dst,mem));
5895   ins_pipe( ialu_reg_reg_fat );
5896 %}
5897 
5898 instruct leaP32(eRegP dst, indOffset32 mem) %{
5899   match(Set dst mem);
5900 
5901   ins_cost(110);
5902   format %{ "LEA    $dst,$mem" %}
5903   opcode(0x8D);
5904   ins_encode( OpcP, RegMem(dst,mem));
5905   ins_pipe( ialu_reg_reg_fat );
5906 %}
5907 
5908 instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5909   match(Set dst mem);
5910 
5911   ins_cost(110);
5912   format %{ "LEA    $dst,$mem" %}
5913   opcode(0x8D);
5914   ins_encode( OpcP, RegMem(dst,mem));
5915   ins_pipe( ialu_reg_reg_fat );
5916 %}
5917 
5918 instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5919   match(Set dst mem);
5920 
5921   ins_cost(110);
5922   format %{ "LEA    $dst,$mem" %}
5923   opcode(0x8D);
5924   ins_encode( OpcP, RegMem(dst,mem));
5925   ins_pipe( ialu_reg_reg_fat );
5926 %}
5927 
5928 instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5929   match(Set dst mem);
5930 
5931   ins_cost(110);
5932   format %{ "LEA    $dst,$mem" %}
5933   opcode(0x8D);
5934   ins_encode( OpcP, RegMem(dst,mem));
5935   ins_pipe( ialu_reg_reg_fat );
5936 %}
5937 
5938 // Load Constant
5939 instruct loadConI(rRegI dst, immI src) %{
5940   match(Set dst src);
5941 
5942   format %{ "MOV    $dst,$src" %}
5943   ins_encode( LdImmI(dst, src) );
5944   ins_pipe( ialu_reg_fat );
5945 %}
5946 
5947 // Load Constant zero
5948 instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
5949   match(Set dst src);
5950   effect(KILL cr);
5951 
5952   ins_cost(50);
5953   format %{ "XOR    $dst,$dst" %}
5954   opcode(0x33);  /* + rd */
5955   ins_encode( OpcP, RegReg( dst, dst ) );
5956   ins_pipe( ialu_reg );
5957 %}
5958 
5959 instruct loadConP(eRegP dst, immP src) %{
5960   match(Set dst src);
5961 
5962   format %{ "MOV    $dst,$src" %}
5963   opcode(0xB8);  /* + rd */
5964   ins_encode( LdImmP(dst, src) );
5965   ins_pipe( ialu_reg_fat );
5966 %}
5967 
5968 instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
5969   match(Set dst src);
5970   effect(KILL cr);
5971   ins_cost(200);
5972   format %{ "MOV    $dst.lo,$src.lo\n\t"
5973             "MOV    $dst.hi,$src.hi" %}
5974   opcode(0xB8);
5975   ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
5976   ins_pipe( ialu_reg_long_fat );
5977 %}
5978 
5979 instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
5980   match(Set dst src);
5981   effect(KILL cr);
5982   ins_cost(150);
5983   format %{ "XOR    $dst.lo,$dst.lo\n\t"
5984             "XOR    $dst.hi,$dst.hi" %}
5985   opcode(0x33,0x33);
5986   ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
5987   ins_pipe( ialu_reg_long );
5988 %}
5989 
5990 // The instruction usage is guarded by predicate in operand immFPR().
5991 instruct loadConFPR(regFPR dst, immFPR con) %{
5992   match(Set dst con);
5993   ins_cost(125);
5994   format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
5995             "FSTP   $dst" %}
5996   ins_encode %{
5997     __ fld_s($constantaddress($con));
5998     __ fstp_d($dst$$reg);
5999   %}
6000   ins_pipe(fpu_reg_con);
6001 %}
6002 
6003 // The instruction usage is guarded by predicate in operand immFPR0().
6004 instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6005   match(Set dst con);
6006   ins_cost(125);
6007   format %{ "FLDZ   ST\n\t"
6008             "FSTP   $dst" %}
6009   ins_encode %{
6010     __ fldz();
6011     __ fstp_d($dst$$reg);
6012   %}
6013   ins_pipe(fpu_reg_con);
6014 %}
6015 
6016 // The instruction usage is guarded by predicate in operand immFPR1().
6017 instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6018   match(Set dst con);
6019   ins_cost(125);
6020   format %{ "FLD1   ST\n\t"
6021             "FSTP   $dst" %}
6022   ins_encode %{
6023     __ fld1();
6024     __ fstp_d($dst$$reg);
6025   %}
6026   ins_pipe(fpu_reg_con);
6027 %}
6028 
6029 // The instruction usage is guarded by predicate in operand immF().
6030 instruct loadConF(regF dst, immF con) %{
6031   match(Set dst con);
6032   ins_cost(125);
6033   format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6034   ins_encode %{
6035     __ movflt($dst$$XMMRegister, $constantaddress($con));
6036   %}
6037   ins_pipe(pipe_slow);
6038 %}
6039 
6040 // The instruction usage is guarded by predicate in operand immF0().
6041 instruct loadConF0(regF dst, immF0 src) %{
6042   match(Set dst src);
6043   ins_cost(100);
6044   format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6045   ins_encode %{
6046     __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6047   %}
6048   ins_pipe(pipe_slow);
6049 %}
6050 
6051 // The instruction usage is guarded by predicate in operand immDPR().
6052 instruct loadConDPR(regDPR dst, immDPR con) %{
6053   match(Set dst con);
6054   ins_cost(125);
6055 
6056   format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6057             "FSTP   $dst" %}
6058   ins_encode %{
6059     __ fld_d($constantaddress($con));
6060     __ fstp_d($dst$$reg);
6061   %}
6062   ins_pipe(fpu_reg_con);
6063 %}
6064 
6065 // The instruction usage is guarded by predicate in operand immDPR0().
6066 instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6067   match(Set dst con);
6068   ins_cost(125);
6069 
6070   format %{ "FLDZ   ST\n\t"
6071             "FSTP   $dst" %}
6072   ins_encode %{
6073     __ fldz();
6074     __ fstp_d($dst$$reg);
6075   %}
6076   ins_pipe(fpu_reg_con);
6077 %}
6078 
6079 // The instruction usage is guarded by predicate in operand immDPR1().
6080 instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6081   match(Set dst con);
6082   ins_cost(125);
6083 
6084   format %{ "FLD1   ST\n\t"
6085             "FSTP   $dst" %}
6086   ins_encode %{
6087     __ fld1();
6088     __ fstp_d($dst$$reg);
6089   %}
6090   ins_pipe(fpu_reg_con);
6091 %}
6092 
6093 // The instruction usage is guarded by predicate in operand immD().
6094 instruct loadConD(regD dst, immD con) %{
6095   match(Set dst con);
6096   ins_cost(125);
6097   format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6098   ins_encode %{
6099     __ movdbl($dst$$XMMRegister, $constantaddress($con));
6100   %}
6101   ins_pipe(pipe_slow);
6102 %}
6103 
6104 // The instruction usage is guarded by predicate in operand immD0().
6105 instruct loadConD0(regD dst, immD0 src) %{
6106   match(Set dst src);
6107   ins_cost(100);
6108   format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6109   ins_encode %{
6110     __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6111   %}
6112   ins_pipe( pipe_slow );
6113 %}
6114 
6115 // Load Stack Slot
6116 instruct loadSSI(rRegI dst, stackSlotI src) %{
6117   match(Set dst src);
6118   ins_cost(125);
6119 
6120   format %{ "MOV    $dst,$src" %}
6121   opcode(0x8B);
6122   ins_encode( OpcP, RegMem(dst,src));
6123   ins_pipe( ialu_reg_mem );
6124 %}
6125 
6126 instruct loadSSL(eRegL dst, stackSlotL src) %{
6127   match(Set dst src);
6128 
6129   ins_cost(200);
6130   format %{ "MOV    $dst,$src.lo\n\t"
6131             "MOV    $dst+4,$src.hi" %}
6132   opcode(0x8B, 0x8B);
6133   ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6134   ins_pipe( ialu_mem_long_reg );
6135 %}
6136 
6137 // Load Stack Slot
6138 instruct loadSSP(eRegP dst, stackSlotP src) %{
6139   match(Set dst src);
6140   ins_cost(125);
6141 
6142   format %{ "MOV    $dst,$src" %}
6143   opcode(0x8B);
6144   ins_encode( OpcP, RegMem(dst,src));
6145   ins_pipe( ialu_reg_mem );
6146 %}
6147 
6148 // Load Stack Slot
6149 instruct loadSSF(regFPR dst, stackSlotF src) %{
6150   match(Set dst src);
6151   ins_cost(125);
6152 
6153   format %{ "FLD_S  $src\n\t"
6154             "FSTP   $dst" %}
6155   opcode(0xD9);               /* D9 /0, FLD m32real */
6156   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6157               Pop_Reg_FPR(dst) );
6158   ins_pipe( fpu_reg_mem );
6159 %}
6160 
6161 // Load Stack Slot
6162 instruct loadSSD(regDPR dst, stackSlotD src) %{
6163   match(Set dst src);
6164   ins_cost(125);
6165 
6166   format %{ "FLD_D  $src\n\t"
6167             "FSTP   $dst" %}
6168   opcode(0xDD);               /* DD /0, FLD m64real */
6169   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6170               Pop_Reg_DPR(dst) );
6171   ins_pipe( fpu_reg_mem );
6172 %}
6173 
6174 // Prefetch instructions for allocation.
6175 // Must be safe to execute with invalid address (cannot fault).
6176 
6177 instruct prefetchAlloc0( memory mem ) %{
6178   predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6179   match(PrefetchAllocation mem);
6180   ins_cost(0);
6181   size(0);
6182   format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6183   ins_encode();
6184   ins_pipe(empty);
6185 %}
6186 
6187 instruct prefetchAlloc( memory mem ) %{
6188   predicate(AllocatePrefetchInstr==3);
6189   match( PrefetchAllocation mem );
6190   ins_cost(100);
6191 
6192   format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6193   ins_encode %{
6194     __ prefetchw($mem$$Address);
6195   %}
6196   ins_pipe(ialu_mem);
6197 %}
6198 
6199 instruct prefetchAllocNTA( memory mem ) %{
6200   predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6201   match(PrefetchAllocation mem);
6202   ins_cost(100);
6203 
6204   format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6205   ins_encode %{
6206     __ prefetchnta($mem$$Address);
6207   %}
6208   ins_pipe(ialu_mem);
6209 %}
6210 
6211 instruct prefetchAllocT0( memory mem ) %{
6212   predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6213   match(PrefetchAllocation mem);
6214   ins_cost(100);
6215 
6216   format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6217   ins_encode %{
6218     __ prefetcht0($mem$$Address);
6219   %}
6220   ins_pipe(ialu_mem);
6221 %}
6222 
6223 instruct prefetchAllocT2( memory mem ) %{
6224   predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6225   match(PrefetchAllocation mem);
6226   ins_cost(100);
6227 
6228   format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6229   ins_encode %{
6230     __ prefetcht2($mem$$Address);
6231   %}
6232   ins_pipe(ialu_mem);
6233 %}
6234 
6235 //----------Store Instructions-------------------------------------------------
6236 
6237 // Store Byte
6238 instruct storeB(memory mem, xRegI src) %{
6239   match(Set mem (StoreB mem src));
6240 
6241   ins_cost(125);
6242   format %{ "MOV8   $mem,$src" %}
6243   opcode(0x88);
6244   ins_encode( OpcP, RegMem( src, mem ) );
6245   ins_pipe( ialu_mem_reg );
6246 %}
6247 
6248 // Store Char/Short
6249 instruct storeC(memory mem, rRegI src) %{
6250   match(Set mem (StoreC mem src));
6251 
6252   ins_cost(125);
6253   format %{ "MOV16  $mem,$src" %}
6254   opcode(0x89, 0x66);
6255   ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6256   ins_pipe( ialu_mem_reg );
6257 %}
6258 
6259 // Store Integer
6260 instruct storeI(memory mem, rRegI src) %{
6261   match(Set mem (StoreI mem src));
6262 
6263   ins_cost(125);
6264   format %{ "MOV    $mem,$src" %}
6265   opcode(0x89);
6266   ins_encode( OpcP, RegMem( src, mem ) );
6267   ins_pipe( ialu_mem_reg );
6268 %}
6269 
6270 // Store Long
6271 instruct storeL(long_memory mem, eRegL src) %{
6272   predicate(!((StoreLNode*)n)->require_atomic_access());
6273   match(Set mem (StoreL mem src));
6274 
6275   ins_cost(200);
6276   format %{ "MOV    $mem,$src.lo\n\t"
6277             "MOV    $mem+4,$src.hi" %}
6278   opcode(0x89, 0x89);
6279   ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6280   ins_pipe( ialu_mem_long_reg );
6281 %}
6282 
6283 // Store Long to Integer
6284 instruct storeL2I(memory mem, eRegL src) %{
6285   match(Set mem (StoreI mem (ConvL2I src)));
6286 
6287   format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6288   ins_encode %{
6289     __ movl($mem$$Address, $src$$Register);
6290   %}
6291   ins_pipe(ialu_mem_reg);
6292 %}
6293 
6294 // Volatile Store Long.  Must be atomic, so move it into
6295 // the FP TOS and then do a 64-bit FIST.  Has to probe the
6296 // target address before the store (for null-ptr checks)
6297 // so the memory operand is used twice in the encoding.
6298 instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6299   predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6300   match(Set mem (StoreL mem src));
6301   effect( KILL cr );
6302   ins_cost(400);
6303   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6304             "FILD   $src\n\t"
6305             "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6306   opcode(0x3B);
6307   ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6308   ins_pipe( fpu_reg_mem );
6309 %}
6310 
6311 instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6312   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6313   match(Set mem (StoreL mem src));
6314   effect( TEMP tmp, KILL cr );
6315   ins_cost(380);
6316   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6317             "MOVSD  $tmp,$src\n\t"
6318             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6319   ins_encode %{
6320     __ cmpl(rax, $mem$$Address);
6321     __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6322     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6323   %}
6324   ins_pipe( pipe_slow );
6325 %}
6326 
6327 instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6328   predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6329   match(Set mem (StoreL mem src));
6330   effect( TEMP tmp2 , TEMP tmp, KILL cr );
6331   ins_cost(360);
6332   format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6333             "MOVD   $tmp,$src.lo\n\t"
6334             "MOVD   $tmp2,$src.hi\n\t"
6335             "PUNPCKLDQ $tmp,$tmp2\n\t"
6336             "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6337   ins_encode %{
6338     __ cmpl(rax, $mem$$Address);
6339     __ movdl($tmp$$XMMRegister, $src$$Register);
6340     __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6341     __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6342     __ movdbl($mem$$Address, $tmp$$XMMRegister);
6343   %}
6344   ins_pipe( pipe_slow );
6345 %}
6346 
6347 // Store Pointer; for storing unknown oops and raw pointers
6348 instruct storeP(memory mem, anyRegP src) %{
6349   match(Set mem (StoreP mem src));
6350 
6351   ins_cost(125);
6352   format %{ "MOV    $mem,$src" %}
6353   opcode(0x89);
6354   ins_encode( OpcP, RegMem( src, mem ) );
6355   ins_pipe( ialu_mem_reg );
6356 %}
6357 
6358 // Store Integer Immediate
6359 instruct storeImmI(memory mem, immI src) %{
6360   match(Set mem (StoreI mem src));
6361 
6362   ins_cost(150);
6363   format %{ "MOV    $mem,$src" %}
6364   opcode(0xC7);               /* C7 /0 */
6365   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6366   ins_pipe( ialu_mem_imm );
6367 %}
6368 
6369 // Store Short/Char Immediate
6370 instruct storeImmI16(memory mem, immI16 src) %{
6371   predicate(UseStoreImmI16);
6372   match(Set mem (StoreC mem src));
6373 
6374   ins_cost(150);
6375   format %{ "MOV16  $mem,$src" %}
6376   opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6377   ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6378   ins_pipe( ialu_mem_imm );
6379 %}
6380 
6381 // Store Pointer Immediate; null pointers or constant oops that do not
6382 // need card-mark barriers.
6383 instruct storeImmP(memory mem, immP src) %{
6384   match(Set mem (StoreP mem src));
6385 
6386   ins_cost(150);
6387   format %{ "MOV    $mem,$src" %}
6388   opcode(0xC7);               /* C7 /0 */
6389   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6390   ins_pipe( ialu_mem_imm );
6391 %}
6392 
6393 // Store Byte Immediate
6394 instruct storeImmB(memory mem, immI8 src) %{
6395   match(Set mem (StoreB mem src));
6396 
6397   ins_cost(150);
6398   format %{ "MOV8   $mem,$src" %}
6399   opcode(0xC6);               /* C6 /0 */
6400   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6401   ins_pipe( ialu_mem_imm );
6402 %}
6403 
6404 // Store CMS card-mark Immediate
6405 instruct storeImmCM(memory mem, immI8 src) %{
6406   match(Set mem (StoreCM mem src));
6407 
6408   ins_cost(150);
6409   format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6410   opcode(0xC6);               /* C6 /0 */
6411   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6412   ins_pipe( ialu_mem_imm );
6413 %}
6414 
6415 // Store Double
6416 instruct storeDPR( memory mem, regDPR1 src) %{
6417   predicate(UseSSE<=1);
6418   match(Set mem (StoreD mem src));
6419 
6420   ins_cost(100);
6421   format %{ "FST_D  $mem,$src" %}
6422   opcode(0xDD);       /* DD /2 */
6423   ins_encode( enc_FPR_store(mem,src) );
6424   ins_pipe( fpu_mem_reg );
6425 %}
6426 
6427 // Store double does rounding on x86
6428 instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6429   predicate(UseSSE<=1);
6430   match(Set mem (StoreD mem (RoundDouble src)));
6431 
6432   ins_cost(100);
6433   format %{ "FST_D  $mem,$src\t# round" %}
6434   opcode(0xDD);       /* DD /2 */
6435   ins_encode( enc_FPR_store(mem,src) );
6436   ins_pipe( fpu_mem_reg );
6437 %}
6438 
6439 // Store XMM register to memory (double-precision floating points)
6440 // MOVSD instruction
6441 instruct storeD(memory mem, regD src) %{
6442   predicate(UseSSE>=2);
6443   match(Set mem (StoreD mem src));
6444   ins_cost(95);
6445   format %{ "MOVSD  $mem,$src" %}
6446   ins_encode %{
6447     __ movdbl($mem$$Address, $src$$XMMRegister);
6448   %}
6449   ins_pipe( pipe_slow );
6450 %}
6451 
6452 // Store XMM register to memory (single-precision floating point)
6453 // MOVSS instruction
6454 instruct storeF(memory mem, regF src) %{
6455   predicate(UseSSE>=1);
6456   match(Set mem (StoreF mem src));
6457   ins_cost(95);
6458   format %{ "MOVSS  $mem,$src" %}
6459   ins_encode %{
6460     __ movflt($mem$$Address, $src$$XMMRegister);
6461   %}
6462   ins_pipe( pipe_slow );
6463 %}
6464 
6465 // Store Float
6466 instruct storeFPR( memory mem, regFPR1 src) %{
6467   predicate(UseSSE==0);
6468   match(Set mem (StoreF mem src));
6469 
6470   ins_cost(100);
6471   format %{ "FST_S  $mem,$src" %}
6472   opcode(0xD9);       /* D9 /2 */
6473   ins_encode( enc_FPR_store(mem,src) );
6474   ins_pipe( fpu_mem_reg );
6475 %}
6476 
6477 // Store Float does rounding on x86
6478 instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6479   predicate(UseSSE==0);
6480   match(Set mem (StoreF mem (RoundFloat src)));
6481 
6482   ins_cost(100);
6483   format %{ "FST_S  $mem,$src\t# round" %}
6484   opcode(0xD9);       /* D9 /2 */
6485   ins_encode( enc_FPR_store(mem,src) );
6486   ins_pipe( fpu_mem_reg );
6487 %}
6488 
6489 // Store Float does rounding on x86
6490 instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6491   predicate(UseSSE<=1);
6492   match(Set mem (StoreF mem (ConvD2F src)));
6493 
6494   ins_cost(100);
6495   format %{ "FST_S  $mem,$src\t# D-round" %}
6496   opcode(0xD9);       /* D9 /2 */
6497   ins_encode( enc_FPR_store(mem,src) );
6498   ins_pipe( fpu_mem_reg );
6499 %}
6500 
6501 // Store immediate Float value (it is faster than store from FPU register)
6502 // The instruction usage is guarded by predicate in operand immFPR().
6503 instruct storeFPR_imm( memory mem, immFPR src) %{
6504   match(Set mem (StoreF mem src));
6505 
6506   ins_cost(50);
6507   format %{ "MOV    $mem,$src\t# store float" %}
6508   opcode(0xC7);               /* C7 /0 */
6509   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6510   ins_pipe( ialu_mem_imm );
6511 %}
6512 
6513 // Store immediate Float value (it is faster than store from XMM register)
6514 // The instruction usage is guarded by predicate in operand immF().
6515 instruct storeF_imm( memory mem, immF src) %{
6516   match(Set mem (StoreF mem src));
6517 
6518   ins_cost(50);
6519   format %{ "MOV    $mem,$src\t# store float" %}
6520   opcode(0xC7);               /* C7 /0 */
6521   ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6522   ins_pipe( ialu_mem_imm );
6523 %}
6524 
6525 // Store Integer to stack slot
6526 instruct storeSSI(stackSlotI dst, rRegI src) %{
6527   match(Set dst src);
6528 
6529   ins_cost(100);
6530   format %{ "MOV    $dst,$src" %}
6531   opcode(0x89);
6532   ins_encode( OpcPRegSS( dst, src ) );
6533   ins_pipe( ialu_mem_reg );
6534 %}
6535 
6536 // Store Integer to stack slot
6537 instruct storeSSP(stackSlotP dst, eRegP src) %{
6538   match(Set dst src);
6539 
6540   ins_cost(100);
6541   format %{ "MOV    $dst,$src" %}
6542   opcode(0x89);
6543   ins_encode( OpcPRegSS( dst, src ) );
6544   ins_pipe( ialu_mem_reg );
6545 %}
6546 
6547 // Store Long to stack slot
6548 instruct storeSSL(stackSlotL dst, eRegL src) %{
6549   match(Set dst src);
6550 
6551   ins_cost(200);
6552   format %{ "MOV    $dst,$src.lo\n\t"
6553             "MOV    $dst+4,$src.hi" %}
6554   opcode(0x89, 0x89);
6555   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6556   ins_pipe( ialu_mem_long_reg );
6557 %}
6558 
6559 //----------MemBar Instructions-----------------------------------------------
6560 // Memory barrier flavors
6561 
6562 instruct membar_acquire() %{
6563   match(MemBarAcquire);
6564   match(LoadFence);
6565   ins_cost(400);
6566 
6567   size(0);
6568   format %{ "MEMBAR-acquire ! (empty encoding)" %}
6569   ins_encode();
6570   ins_pipe(empty);
6571 %}
6572 
6573 instruct membar_acquire_lock() %{
6574   match(MemBarAcquireLock);
6575   ins_cost(0);
6576 
6577   size(0);
6578   format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6579   ins_encode( );
6580   ins_pipe(empty);
6581 %}
6582 
6583 instruct membar_release() %{
6584   match(MemBarRelease);
6585   match(StoreFence);
6586   ins_cost(400);
6587 
6588   size(0);
6589   format %{ "MEMBAR-release ! (empty encoding)" %}
6590   ins_encode( );
6591   ins_pipe(empty);
6592 %}
6593 
6594 instruct membar_release_lock() %{
6595   match(MemBarReleaseLock);
6596   ins_cost(0);
6597 
6598   size(0);
6599   format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6600   ins_encode( );
6601   ins_pipe(empty);
6602 %}
6603 
6604 instruct membar_volatile(eFlagsReg cr) %{
6605   match(MemBarVolatile);
6606   effect(KILL cr);
6607   ins_cost(400);
6608 
6609   format %{
6610     $$template
6611     if (os::is_MP()) {
6612       $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6613     } else {
6614       $$emit$$"MEMBAR-volatile ! (empty encoding)"
6615     }
6616   %}
6617   ins_encode %{
6618     __ membar(Assembler::StoreLoad);
6619   %}
6620   ins_pipe(pipe_slow);
6621 %}
6622 
6623 instruct unnecessary_membar_volatile() %{
6624   match(MemBarVolatile);
6625   predicate(Matcher::post_store_load_barrier(n));
6626   ins_cost(0);
6627 
6628   size(0);
6629   format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6630   ins_encode( );
6631   ins_pipe(empty);
6632 %}
6633 
6634 instruct membar_storestore() %{
6635   match(MemBarStoreStore);
6636   ins_cost(0);
6637 
6638   size(0);
6639   format %{ "MEMBAR-storestore (empty encoding)" %}
6640   ins_encode( );
6641   ins_pipe(empty);
6642 %}
6643 
6644 //----------Move Instructions--------------------------------------------------
6645 instruct castX2P(eAXRegP dst, eAXRegI src) %{
6646   match(Set dst (CastX2P src));
6647   format %{ "# X2P  $dst, $src" %}
6648   ins_encode( /*empty encoding*/ );
6649   ins_cost(0);
6650   ins_pipe(empty);
6651 %}
6652 
6653 instruct castP2X(rRegI dst, eRegP src ) %{
6654   match(Set dst (CastP2X src));
6655   ins_cost(50);
6656   format %{ "MOV    $dst, $src\t# CastP2X" %}
6657   ins_encode( enc_Copy( dst, src) );
6658   ins_pipe( ialu_reg_reg );
6659 %}
6660 
6661 //----------Conditional Move---------------------------------------------------
6662 // Conditional move
6663 instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6664   predicate(!VM_Version::supports_cmov() );
6665   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6666   ins_cost(200);
6667   format %{ "J$cop,us skip\t# signed cmove\n\t"
6668             "MOV    $dst,$src\n"
6669       "skip:" %}
6670   ins_encode %{
6671     Label Lskip;
6672     // Invert sense of branch from sense of CMOV
6673     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6674     __ movl($dst$$Register, $src$$Register);
6675     __ bind(Lskip);
6676   %}
6677   ins_pipe( pipe_cmov_reg );
6678 %}
6679 
6680 instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6681   predicate(!VM_Version::supports_cmov() );
6682   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6683   ins_cost(200);
6684   format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6685             "MOV    $dst,$src\n"
6686       "skip:" %}
6687   ins_encode %{
6688     Label Lskip;
6689     // Invert sense of branch from sense of CMOV
6690     __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6691     __ movl($dst$$Register, $src$$Register);
6692     __ bind(Lskip);
6693   %}
6694   ins_pipe( pipe_cmov_reg );
6695 %}
6696 
6697 instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6698   predicate(VM_Version::supports_cmov() );
6699   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6700   ins_cost(200);
6701   format %{ "CMOV$cop $dst,$src" %}
6702   opcode(0x0F,0x40);
6703   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6704   ins_pipe( pipe_cmov_reg );
6705 %}
6706 
6707 instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6708   predicate(VM_Version::supports_cmov() );
6709   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6710   ins_cost(200);
6711   format %{ "CMOV$cop $dst,$src" %}
6712   opcode(0x0F,0x40);
6713   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6714   ins_pipe( pipe_cmov_reg );
6715 %}
6716 
6717 instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6718   predicate(VM_Version::supports_cmov() );
6719   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6720   ins_cost(200);
6721   expand %{
6722     cmovI_regU(cop, cr, dst, src);
6723   %}
6724 %}
6725 
6726 // Conditional move
6727 instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6728   predicate(VM_Version::supports_cmov() );
6729   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6730   ins_cost(250);
6731   format %{ "CMOV$cop $dst,$src" %}
6732   opcode(0x0F,0x40);
6733   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6734   ins_pipe( pipe_cmov_mem );
6735 %}
6736 
6737 // Conditional move
6738 instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6739   predicate(VM_Version::supports_cmov() );
6740   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6741   ins_cost(250);
6742   format %{ "CMOV$cop $dst,$src" %}
6743   opcode(0x0F,0x40);
6744   ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6745   ins_pipe( pipe_cmov_mem );
6746 %}
6747 
6748 instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6749   predicate(VM_Version::supports_cmov() );
6750   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6751   ins_cost(250);
6752   expand %{
6753     cmovI_memU(cop, cr, dst, src);
6754   %}
6755 %}
6756 
6757 // Conditional move
6758 instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6759   predicate(VM_Version::supports_cmov() );
6760   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6761   ins_cost(200);
6762   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6763   opcode(0x0F,0x40);
6764   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6765   ins_pipe( pipe_cmov_reg );
6766 %}
6767 
6768 // Conditional move (non-P6 version)
6769 // Note:  a CMoveP is generated for  stubs and native wrappers
6770 //        regardless of whether we are on a P6, so we
6771 //        emulate a cmov here
6772 instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6773   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6774   ins_cost(300);
6775   format %{ "Jn$cop   skip\n\t"
6776           "MOV    $dst,$src\t# pointer\n"
6777       "skip:" %}
6778   opcode(0x8b);
6779   ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6780   ins_pipe( pipe_cmov_reg );
6781 %}
6782 
6783 // Conditional move
6784 instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6785   predicate(VM_Version::supports_cmov() );
6786   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6787   ins_cost(200);
6788   format %{ "CMOV$cop $dst,$src\t# ptr" %}
6789   opcode(0x0F,0x40);
6790   ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6791   ins_pipe( pipe_cmov_reg );
6792 %}
6793 
6794 instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6795   predicate(VM_Version::supports_cmov() );
6796   match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6797   ins_cost(200);
6798   expand %{
6799     cmovP_regU(cop, cr, dst, src);
6800   %}
6801 %}
6802 
6803 // DISABLED: Requires the ADLC to emit a bottom_type call that
6804 // correctly meets the two pointer arguments; one is an incoming
6805 // register but the other is a memory operand.  ALSO appears to
6806 // be buggy with implicit null checks.
6807 //
6808 //// Conditional move
6809 //instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6810 //  predicate(VM_Version::supports_cmov() );
6811 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6812 //  ins_cost(250);
6813 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6814 //  opcode(0x0F,0x40);
6815 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6816 //  ins_pipe( pipe_cmov_mem );
6817 //%}
6818 //
6819 //// Conditional move
6820 //instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6821 //  predicate(VM_Version::supports_cmov() );
6822 //  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6823 //  ins_cost(250);
6824 //  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6825 //  opcode(0x0F,0x40);
6826 //  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6827 //  ins_pipe( pipe_cmov_mem );
6828 //%}
6829 
6830 // Conditional move
6831 instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6832   predicate(UseSSE<=1);
6833   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6834   ins_cost(200);
6835   format %{ "FCMOV$cop $dst,$src\t# double" %}
6836   opcode(0xDA);
6837   ins_encode( enc_cmov_dpr(cop,src) );
6838   ins_pipe( pipe_cmovDPR_reg );
6839 %}
6840 
6841 // Conditional move
6842 instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6843   predicate(UseSSE==0);
6844   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6845   ins_cost(200);
6846   format %{ "FCMOV$cop $dst,$src\t# float" %}
6847   opcode(0xDA);
6848   ins_encode( enc_cmov_dpr(cop,src) );
6849   ins_pipe( pipe_cmovDPR_reg );
6850 %}
6851 
6852 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6853 instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6854   predicate(UseSSE<=1);
6855   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6856   ins_cost(200);
6857   format %{ "Jn$cop   skip\n\t"
6858             "MOV    $dst,$src\t# double\n"
6859       "skip:" %}
6860   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6861   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6862   ins_pipe( pipe_cmovDPR_reg );
6863 %}
6864 
6865 // Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6866 instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6867   predicate(UseSSE==0);
6868   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6869   ins_cost(200);
6870   format %{ "Jn$cop    skip\n\t"
6871             "MOV    $dst,$src\t# float\n"
6872       "skip:" %}
6873   opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6874   ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6875   ins_pipe( pipe_cmovDPR_reg );
6876 %}
6877 
6878 // No CMOVE with SSE/SSE2
6879 instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6880   predicate (UseSSE>=1);
6881   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6882   ins_cost(200);
6883   format %{ "Jn$cop   skip\n\t"
6884             "MOVSS  $dst,$src\t# float\n"
6885       "skip:" %}
6886   ins_encode %{
6887     Label skip;
6888     // Invert sense of branch from sense of CMOV
6889     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6890     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6891     __ bind(skip);
6892   %}
6893   ins_pipe( pipe_slow );
6894 %}
6895 
6896 // No CMOVE with SSE/SSE2
6897 instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6898   predicate (UseSSE>=2);
6899   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6900   ins_cost(200);
6901   format %{ "Jn$cop   skip\n\t"
6902             "MOVSD  $dst,$src\t# float\n"
6903       "skip:" %}
6904   ins_encode %{
6905     Label skip;
6906     // Invert sense of branch from sense of CMOV
6907     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6908     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6909     __ bind(skip);
6910   %}
6911   ins_pipe( pipe_slow );
6912 %}
6913 
6914 // unsigned version
6915 instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6916   predicate (UseSSE>=1);
6917   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6918   ins_cost(200);
6919   format %{ "Jn$cop   skip\n\t"
6920             "MOVSS  $dst,$src\t# float\n"
6921       "skip:" %}
6922   ins_encode %{
6923     Label skip;
6924     // Invert sense of branch from sense of CMOV
6925     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6926     __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6927     __ bind(skip);
6928   %}
6929   ins_pipe( pipe_slow );
6930 %}
6931 
6932 instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
6933   predicate (UseSSE>=1);
6934   match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6935   ins_cost(200);
6936   expand %{
6937     fcmovF_regU(cop, cr, dst, src);
6938   %}
6939 %}
6940 
6941 // unsigned version
6942 instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
6943   predicate (UseSSE>=2);
6944   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6945   ins_cost(200);
6946   format %{ "Jn$cop   skip\n\t"
6947             "MOVSD  $dst,$src\t# float\n"
6948       "skip:" %}
6949   ins_encode %{
6950     Label skip;
6951     // Invert sense of branch from sense of CMOV
6952     __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6953     __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6954     __ bind(skip);
6955   %}
6956   ins_pipe( pipe_slow );
6957 %}
6958 
6959 instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
6960   predicate (UseSSE>=2);
6961   match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6962   ins_cost(200);
6963   expand %{
6964     fcmovD_regU(cop, cr, dst, src);
6965   %}
6966 %}
6967 
6968 instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
6969   predicate(VM_Version::supports_cmov() );
6970   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6971   ins_cost(200);
6972   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6973             "CMOV$cop $dst.hi,$src.hi" %}
6974   opcode(0x0F,0x40);
6975   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6976   ins_pipe( pipe_cmov_reg_long );
6977 %}
6978 
6979 instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
6980   predicate(VM_Version::supports_cmov() );
6981   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6982   ins_cost(200);
6983   format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6984             "CMOV$cop $dst.hi,$src.hi" %}
6985   opcode(0x0F,0x40);
6986   ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6987   ins_pipe( pipe_cmov_reg_long );
6988 %}
6989 
6990 instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
6991   predicate(VM_Version::supports_cmov() );
6992   match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6993   ins_cost(200);
6994   expand %{
6995     cmovL_regU(cop, cr, dst, src);
6996   %}
6997 %}
6998 
6999 //----------Arithmetic Instructions--------------------------------------------
7000 //----------Addition Instructions----------------------------------------------
7001 
7002 // Integer Addition Instructions
7003 instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7004   match(Set dst (AddI dst src));
7005   effect(KILL cr);
7006 
7007   size(2);
7008   format %{ "ADD    $dst,$src" %}
7009   opcode(0x03);
7010   ins_encode( OpcP, RegReg( dst, src) );
7011   ins_pipe( ialu_reg_reg );
7012 %}
7013 
7014 instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7015   match(Set dst (AddI dst src));
7016   effect(KILL cr);
7017 
7018   format %{ "ADD    $dst,$src" %}
7019   opcode(0x81, 0x00); /* /0 id */
7020   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7021   ins_pipe( ialu_reg );
7022 %}
7023 
7024 instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7025   predicate(UseIncDec);
7026   match(Set dst (AddI dst src));
7027   effect(KILL cr);
7028 
7029   size(1);
7030   format %{ "INC    $dst" %}
7031   opcode(0x40); /*  */
7032   ins_encode( Opc_plus( primary, dst ) );
7033   ins_pipe( ialu_reg );
7034 %}
7035 
7036 instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7037   match(Set dst (AddI src0 src1));
7038   ins_cost(110);
7039 
7040   format %{ "LEA    $dst,[$src0 + $src1]" %}
7041   opcode(0x8D); /* 0x8D /r */
7042   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7043   ins_pipe( ialu_reg_reg );
7044 %}
7045 
7046 instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7047   match(Set dst (AddP src0 src1));
7048   ins_cost(110);
7049 
7050   format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7051   opcode(0x8D); /* 0x8D /r */
7052   ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7053   ins_pipe( ialu_reg_reg );
7054 %}
7055 
7056 instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7057   predicate(UseIncDec);
7058   match(Set dst (AddI dst src));
7059   effect(KILL cr);
7060 
7061   size(1);
7062   format %{ "DEC    $dst" %}
7063   opcode(0x48); /*  */
7064   ins_encode( Opc_plus( primary, dst ) );
7065   ins_pipe( ialu_reg );
7066 %}
7067 
7068 instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7069   match(Set dst (AddP dst src));
7070   effect(KILL cr);
7071 
7072   size(2);
7073   format %{ "ADD    $dst,$src" %}
7074   opcode(0x03);
7075   ins_encode( OpcP, RegReg( dst, src) );
7076   ins_pipe( ialu_reg_reg );
7077 %}
7078 
7079 instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7080   match(Set dst (AddP dst src));
7081   effect(KILL cr);
7082 
7083   format %{ "ADD    $dst,$src" %}
7084   opcode(0x81,0x00); /* Opcode 81 /0 id */
7085   // ins_encode( RegImm( dst, src) );
7086   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7087   ins_pipe( ialu_reg );
7088 %}
7089 
7090 instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7091   match(Set dst (AddI dst (LoadI src)));
7092   effect(KILL cr);
7093 
7094   ins_cost(125);
7095   format %{ "ADD    $dst,$src" %}
7096   opcode(0x03);
7097   ins_encode( OpcP, RegMem( dst, src) );
7098   ins_pipe( ialu_reg_mem );
7099 %}
7100 
7101 instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7102   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7103   effect(KILL cr);
7104 
7105   ins_cost(150);
7106   format %{ "ADD    $dst,$src" %}
7107   opcode(0x01);  /* Opcode 01 /r */
7108   ins_encode( OpcP, RegMem( src, dst ) );
7109   ins_pipe( ialu_mem_reg );
7110 %}
7111 
7112 // Add Memory with Immediate
7113 instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7114   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7115   effect(KILL cr);
7116 
7117   ins_cost(125);
7118   format %{ "ADD    $dst,$src" %}
7119   opcode(0x81);               /* Opcode 81 /0 id */
7120   ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7121   ins_pipe( ialu_mem_imm );
7122 %}
7123 
7124 instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7125   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7126   effect(KILL cr);
7127 
7128   ins_cost(125);
7129   format %{ "INC    $dst" %}
7130   opcode(0xFF);               /* Opcode FF /0 */
7131   ins_encode( OpcP, RMopc_Mem(0x00,dst));
7132   ins_pipe( ialu_mem_imm );
7133 %}
7134 
7135 instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7136   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7137   effect(KILL cr);
7138 
7139   ins_cost(125);
7140   format %{ "DEC    $dst" %}
7141   opcode(0xFF);               /* Opcode FF /1 */
7142   ins_encode( OpcP, RMopc_Mem(0x01,dst));
7143   ins_pipe( ialu_mem_imm );
7144 %}
7145 
7146 
7147 instruct checkCastPP( eRegP dst ) %{
7148   match(Set dst (CheckCastPP dst));
7149 
7150   size(0);
7151   format %{ "#checkcastPP of $dst" %}
7152   ins_encode( /*empty encoding*/ );
7153   ins_pipe( empty );
7154 %}
7155 
7156 instruct castPP( eRegP dst ) %{
7157   match(Set dst (CastPP dst));
7158   format %{ "#castPP of $dst" %}
7159   ins_encode( /*empty encoding*/ );
7160   ins_pipe( empty );
7161 %}
7162 
7163 instruct castII( rRegI dst ) %{
7164   match(Set dst (CastII dst));
7165   format %{ "#castII of $dst" %}
7166   ins_encode( /*empty encoding*/ );
7167   ins_cost(0);
7168   ins_pipe( empty );
7169 %}
7170 
7171 
7172 // Load-locked - same as a regular pointer load when used with compare-swap
7173 instruct loadPLocked(eRegP dst, memory mem) %{
7174   match(Set dst (LoadPLocked mem));
7175 
7176   ins_cost(125);
7177   format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7178   opcode(0x8B);
7179   ins_encode( OpcP, RegMem(dst,mem));
7180   ins_pipe( ialu_reg_mem );
7181 %}
7182 
7183 // Conditional-store of the updated heap-top.
7184 // Used during allocation of the shared heap.
7185 // Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7186 instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7187   match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7188   // EAX is killed if there is contention, but then it's also unused.
7189   // In the common case of no contention, EAX holds the new oop address.
7190   format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7191   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7192   ins_pipe( pipe_cmpxchg );
7193 %}
7194 
7195 // Conditional-store of an int value.
7196 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7197 instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7198   match(Set cr (StoreIConditional mem (Binary oldval newval)));
7199   effect(KILL oldval);
7200   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7201   ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7202   ins_pipe( pipe_cmpxchg );
7203 %}
7204 
7205 // Conditional-store of a long value.
7206 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7207 instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7208   match(Set cr (StoreLConditional mem (Binary oldval newval)));
7209   effect(KILL oldval);
7210   format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7211             "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7212             "XCHG   EBX,ECX"
7213   %}
7214   ins_encode %{
7215     // Note: we need to swap rbx, and rcx before and after the
7216     //       cmpxchg8 instruction because the instruction uses
7217     //       rcx as the high order word of the new value to store but
7218     //       our register encoding uses rbx.
7219     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7220     if( os::is_MP() )
7221       __ lock();
7222     __ cmpxchg8($mem$$Address);
7223     __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7224   %}
7225   ins_pipe( pipe_cmpxchg );
7226 %}
7227 
7228 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7229 
7230 instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7231   predicate(VM_Version::supports_cx8());
7232   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7233   effect(KILL cr, KILL oldval);
7234   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7235             "MOV    $res,0\n\t"
7236             "JNE,s  fail\n\t"
7237             "MOV    $res,1\n"
7238           "fail:" %}
7239   ins_encode( enc_cmpxchg8(mem_ptr),
7240               enc_flags_ne_to_boolean(res) );
7241   ins_pipe( pipe_cmpxchg );
7242 %}
7243 
7244 instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7245   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7246   effect(KILL cr, KILL oldval);
7247   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7248             "MOV    $res,0\n\t"
7249             "JNE,s  fail\n\t"
7250             "MOV    $res,1\n"
7251           "fail:" %}
7252   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7253   ins_pipe( pipe_cmpxchg );
7254 %}
7255 
7256 instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7257   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7258   effect(KILL cr, KILL oldval);
7259   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7260             "MOV    $res,0\n\t"
7261             "JNE,s  fail\n\t"
7262             "MOV    $res,1\n"
7263           "fail:" %}
7264   ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7265   ins_pipe( pipe_cmpxchg );
7266 %}
7267 
7268 instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7269   predicate(n->as_LoadStore()->result_not_used());
7270   match(Set dummy (GetAndAddI mem add));
7271   effect(KILL cr);
7272   format %{ "ADDL  [$mem],$add" %}
7273   ins_encode %{
7274     if (os::is_MP()) { __ lock(); }
7275     __ addl($mem$$Address, $add$$constant);
7276   %}
7277   ins_pipe( pipe_cmpxchg );
7278 %}
7279 
7280 instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7281   match(Set newval (GetAndAddI mem newval));
7282   effect(KILL cr);
7283   format %{ "XADDL  [$mem],$newval" %}
7284   ins_encode %{
7285     if (os::is_MP()) { __ lock(); }
7286     __ xaddl($mem$$Address, $newval$$Register);
7287   %}
7288   ins_pipe( pipe_cmpxchg );
7289 %}
7290 
7291 instruct xchgI( memory mem, rRegI newval) %{
7292   match(Set newval (GetAndSetI mem newval));
7293   format %{ "XCHGL  $newval,[$mem]" %}
7294   ins_encode %{
7295     __ xchgl($newval$$Register, $mem$$Address);
7296   %}
7297   ins_pipe( pipe_cmpxchg );
7298 %}
7299 
7300 instruct xchgP( memory mem, pRegP newval) %{
7301   match(Set newval (GetAndSetP mem newval));
7302   format %{ "XCHGL  $newval,[$mem]" %}
7303   ins_encode %{
7304     __ xchgl($newval$$Register, $mem$$Address);
7305   %}
7306   ins_pipe( pipe_cmpxchg );
7307 %}
7308 
7309 //----------Subtraction Instructions-------------------------------------------
7310 
7311 // Integer Subtraction Instructions
7312 instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7313   match(Set dst (SubI dst src));
7314   effect(KILL cr);
7315 
7316   size(2);
7317   format %{ "SUB    $dst,$src" %}
7318   opcode(0x2B);
7319   ins_encode( OpcP, RegReg( dst, src) );
7320   ins_pipe( ialu_reg_reg );
7321 %}
7322 
7323 instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7324   match(Set dst (SubI dst src));
7325   effect(KILL cr);
7326 
7327   format %{ "SUB    $dst,$src" %}
7328   opcode(0x81,0x05);  /* Opcode 81 /5 */
7329   // ins_encode( RegImm( dst, src) );
7330   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7331   ins_pipe( ialu_reg );
7332 %}
7333 
7334 instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7335   match(Set dst (SubI dst (LoadI src)));
7336   effect(KILL cr);
7337 
7338   ins_cost(125);
7339   format %{ "SUB    $dst,$src" %}
7340   opcode(0x2B);
7341   ins_encode( OpcP, RegMem( dst, src) );
7342   ins_pipe( ialu_reg_mem );
7343 %}
7344 
7345 instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7346   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7347   effect(KILL cr);
7348 
7349   ins_cost(150);
7350   format %{ "SUB    $dst,$src" %}
7351   opcode(0x29);  /* Opcode 29 /r */
7352   ins_encode( OpcP, RegMem( src, dst ) );
7353   ins_pipe( ialu_mem_reg );
7354 %}
7355 
7356 // Subtract from a pointer
7357 instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7358   match(Set dst (AddP dst (SubI zero src)));
7359   effect(KILL cr);
7360 
7361   size(2);
7362   format %{ "SUB    $dst,$src" %}
7363   opcode(0x2B);
7364   ins_encode( OpcP, RegReg( dst, src) );
7365   ins_pipe( ialu_reg_reg );
7366 %}
7367 
7368 instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7369   match(Set dst (SubI zero dst));
7370   effect(KILL cr);
7371 
7372   size(2);
7373   format %{ "NEG    $dst" %}
7374   opcode(0xF7,0x03);  // Opcode F7 /3
7375   ins_encode( OpcP, RegOpc( dst ) );
7376   ins_pipe( ialu_reg );
7377 %}
7378 
7379 //----------Multiplication/Division Instructions-------------------------------
7380 // Integer Multiplication Instructions
7381 // Multiply Register
7382 instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7383   match(Set dst (MulI dst src));
7384   effect(KILL cr);
7385 
7386   size(3);
7387   ins_cost(300);
7388   format %{ "IMUL   $dst,$src" %}
7389   opcode(0xAF, 0x0F);
7390   ins_encode( OpcS, OpcP, RegReg( dst, src) );
7391   ins_pipe( ialu_reg_reg_alu0 );
7392 %}
7393 
7394 // Multiply 32-bit Immediate
7395 instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7396   match(Set dst (MulI src imm));
7397   effect(KILL cr);
7398 
7399   ins_cost(300);
7400   format %{ "IMUL   $dst,$src,$imm" %}
7401   opcode(0x69);  /* 69 /r id */
7402   ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7403   ins_pipe( ialu_reg_reg_alu0 );
7404 %}
7405 
7406 instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7407   match(Set dst src);
7408   effect(KILL cr);
7409 
7410   // Note that this is artificially increased to make it more expensive than loadConL
7411   ins_cost(250);
7412   format %{ "MOV    EAX,$src\t// low word only" %}
7413   opcode(0xB8);
7414   ins_encode( LdImmL_Lo(dst, src) );
7415   ins_pipe( ialu_reg_fat );
7416 %}
7417 
7418 // Multiply by 32-bit Immediate, taking the shifted high order results
7419 //  (special case for shift by 32)
7420 instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7421   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7422   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7423              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7424              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7425   effect(USE src1, KILL cr);
7426 
7427   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7428   ins_cost(0*100 + 1*400 - 150);
7429   format %{ "IMUL   EDX:EAX,$src1" %}
7430   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7431   ins_pipe( pipe_slow );
7432 %}
7433 
7434 // Multiply by 32-bit Immediate, taking the shifted high order results
7435 instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7436   match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7437   predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7438              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7439              _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7440   effect(USE src1, KILL cr);
7441 
7442   // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7443   ins_cost(1*100 + 1*400 - 150);
7444   format %{ "IMUL   EDX:EAX,$src1\n\t"
7445             "SAR    EDX,$cnt-32" %}
7446   ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7447   ins_pipe( pipe_slow );
7448 %}
7449 
7450 // Multiply Memory 32-bit Immediate
7451 instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7452   match(Set dst (MulI (LoadI src) imm));
7453   effect(KILL cr);
7454 
7455   ins_cost(300);
7456   format %{ "IMUL   $dst,$src,$imm" %}
7457   opcode(0x69);  /* 69 /r id */
7458   ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7459   ins_pipe( ialu_reg_mem_alu0 );
7460 %}
7461 
7462 // Multiply Memory
7463 instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7464   match(Set dst (MulI dst (LoadI src)));
7465   effect(KILL cr);
7466 
7467   ins_cost(350);
7468   format %{ "IMUL   $dst,$src" %}
7469   opcode(0xAF, 0x0F);
7470   ins_encode( OpcS, OpcP, RegMem( dst, src) );
7471   ins_pipe( ialu_reg_mem_alu0 );
7472 %}
7473 
7474 // Multiply Register Int to Long
7475 instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7476   // Basic Idea: long = (long)int * (long)int
7477   match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7478   effect(DEF dst, USE src, USE src1, KILL flags);
7479 
7480   ins_cost(300);
7481   format %{ "IMUL   $dst,$src1" %}
7482 
7483   ins_encode( long_int_multiply( dst, src1 ) );
7484   ins_pipe( ialu_reg_reg_alu0 );
7485 %}
7486 
7487 instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7488   // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7489   match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7490   effect(KILL flags);
7491 
7492   ins_cost(300);
7493   format %{ "MUL    $dst,$src1" %}
7494 
7495   ins_encode( long_uint_multiply(dst, src1) );
7496   ins_pipe( ialu_reg_reg_alu0 );
7497 %}
7498 
7499 // Multiply Register Long
7500 instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7501   match(Set dst (MulL dst src));
7502   effect(KILL cr, TEMP tmp);
7503   ins_cost(4*100+3*400);
7504 // Basic idea: lo(result) = lo(x_lo * y_lo)
7505 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7506   format %{ "MOV    $tmp,$src.lo\n\t"
7507             "IMUL   $tmp,EDX\n\t"
7508             "MOV    EDX,$src.hi\n\t"
7509             "IMUL   EDX,EAX\n\t"
7510             "ADD    $tmp,EDX\n\t"
7511             "MUL    EDX:EAX,$src.lo\n\t"
7512             "ADD    EDX,$tmp" %}
7513   ins_encode( long_multiply( dst, src, tmp ) );
7514   ins_pipe( pipe_slow );
7515 %}
7516 
7517 // Multiply Register Long where the left operand's high 32 bits are zero
7518 instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7519   predicate(is_operand_hi32_zero(n->in(1)));
7520   match(Set dst (MulL dst src));
7521   effect(KILL cr, TEMP tmp);
7522   ins_cost(2*100+2*400);
7523 // Basic idea: lo(result) = lo(x_lo * y_lo)
7524 //             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7525   format %{ "MOV    $tmp,$src.hi\n\t"
7526             "IMUL   $tmp,EAX\n\t"
7527             "MUL    EDX:EAX,$src.lo\n\t"
7528             "ADD    EDX,$tmp" %}
7529   ins_encode %{
7530     __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7531     __ imull($tmp$$Register, rax);
7532     __ mull($src$$Register);
7533     __ addl(rdx, $tmp$$Register);
7534   %}
7535   ins_pipe( pipe_slow );
7536 %}
7537 
7538 // Multiply Register Long where the right operand's high 32 bits are zero
7539 instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7540   predicate(is_operand_hi32_zero(n->in(2)));
7541   match(Set dst (MulL dst src));
7542   effect(KILL cr, TEMP tmp);
7543   ins_cost(2*100+2*400);
7544 // Basic idea: lo(result) = lo(x_lo * y_lo)
7545 //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7546   format %{ "MOV    $tmp,$src.lo\n\t"
7547             "IMUL   $tmp,EDX\n\t"
7548             "MUL    EDX:EAX,$src.lo\n\t"
7549             "ADD    EDX,$tmp" %}
7550   ins_encode %{
7551     __ movl($tmp$$Register, $src$$Register);
7552     __ imull($tmp$$Register, rdx);
7553     __ mull($src$$Register);
7554     __ addl(rdx, $tmp$$Register);
7555   %}
7556   ins_pipe( pipe_slow );
7557 %}
7558 
7559 // Multiply Register Long where the left and the right operands' high 32 bits are zero
7560 instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7561   predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7562   match(Set dst (MulL dst src));
7563   effect(KILL cr);
7564   ins_cost(1*400);
7565 // Basic idea: lo(result) = lo(x_lo * y_lo)
7566 //             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7567   format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7568   ins_encode %{
7569     __ mull($src$$Register);
7570   %}
7571   ins_pipe( pipe_slow );
7572 %}
7573 
7574 // Multiply Register Long by small constant
7575 instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7576   match(Set dst (MulL dst src));
7577   effect(KILL cr, TEMP tmp);
7578   ins_cost(2*100+2*400);
7579   size(12);
7580 // Basic idea: lo(result) = lo(src * EAX)
7581 //             hi(result) = hi(src * EAX) + lo(src * EDX)
7582   format %{ "IMUL   $tmp,EDX,$src\n\t"
7583             "MOV    EDX,$src\n\t"
7584             "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7585             "ADD    EDX,$tmp" %}
7586   ins_encode( long_multiply_con( dst, src, tmp ) );
7587   ins_pipe( pipe_slow );
7588 %}
7589 
7590 // Integer DIV with Register
7591 instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7592   match(Set rax (DivI rax div));
7593   effect(KILL rdx, KILL cr);
7594   size(26);
7595   ins_cost(30*100+10*100);
7596   format %{ "CMP    EAX,0x80000000\n\t"
7597             "JNE,s  normal\n\t"
7598             "XOR    EDX,EDX\n\t"
7599             "CMP    ECX,-1\n\t"
7600             "JE,s   done\n"
7601     "normal: CDQ\n\t"
7602             "IDIV   $div\n\t"
7603     "done:"        %}
7604   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7605   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7606   ins_pipe( ialu_reg_reg_alu0 );
7607 %}
7608 
7609 // Divide Register Long
7610 instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7611   match(Set dst (DivL src1 src2));
7612   effect( KILL cr, KILL cx, KILL bx );
7613   ins_cost(10000);
7614   format %{ "PUSH   $src1.hi\n\t"
7615             "PUSH   $src1.lo\n\t"
7616             "PUSH   $src2.hi\n\t"
7617             "PUSH   $src2.lo\n\t"
7618             "CALL   SharedRuntime::ldiv\n\t"
7619             "ADD    ESP,16" %}
7620   ins_encode( long_div(src1,src2) );
7621   ins_pipe( pipe_slow );
7622 %}
7623 
7624 // Integer DIVMOD with Register, both quotient and mod results
7625 instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7626   match(DivModI rax div);
7627   effect(KILL cr);
7628   size(26);
7629   ins_cost(30*100+10*100);
7630   format %{ "CMP    EAX,0x80000000\n\t"
7631             "JNE,s  normal\n\t"
7632             "XOR    EDX,EDX\n\t"
7633             "CMP    ECX,-1\n\t"
7634             "JE,s   done\n"
7635     "normal: CDQ\n\t"
7636             "IDIV   $div\n\t"
7637     "done:"        %}
7638   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7639   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7640   ins_pipe( pipe_slow );
7641 %}
7642 
7643 // Integer MOD with Register
7644 instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7645   match(Set rdx (ModI rax div));
7646   effect(KILL rax, KILL cr);
7647 
7648   size(26);
7649   ins_cost(300);
7650   format %{ "CDQ\n\t"
7651             "IDIV   $div" %}
7652   opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7653   ins_encode( cdq_enc, OpcP, RegOpc(div) );
7654   ins_pipe( ialu_reg_reg_alu0 );
7655 %}
7656 
7657 // Remainder Register Long
7658 instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7659   match(Set dst (ModL src1 src2));
7660   effect( KILL cr, KILL cx, KILL bx );
7661   ins_cost(10000);
7662   format %{ "PUSH   $src1.hi\n\t"
7663             "PUSH   $src1.lo\n\t"
7664             "PUSH   $src2.hi\n\t"
7665             "PUSH   $src2.lo\n\t"
7666             "CALL   SharedRuntime::lrem\n\t"
7667             "ADD    ESP,16" %}
7668   ins_encode( long_mod(src1,src2) );
7669   ins_pipe( pipe_slow );
7670 %}
7671 
7672 // Divide Register Long (no special case since divisor != -1)
7673 instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7674   match(Set dst (DivL dst imm));
7675   effect( TEMP tmp, TEMP tmp2, KILL cr );
7676   ins_cost(1000);
7677   format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7678             "XOR    $tmp2,$tmp2\n\t"
7679             "CMP    $tmp,EDX\n\t"
7680             "JA,s   fast\n\t"
7681             "MOV    $tmp2,EAX\n\t"
7682             "MOV    EAX,EDX\n\t"
7683             "MOV    EDX,0\n\t"
7684             "JLE,s  pos\n\t"
7685             "LNEG   EAX : $tmp2\n\t"
7686             "DIV    $tmp # unsigned division\n\t"
7687             "XCHG   EAX,$tmp2\n\t"
7688             "DIV    $tmp\n\t"
7689             "LNEG   $tmp2 : EAX\n\t"
7690             "JMP,s  done\n"
7691     "pos:\n\t"
7692             "DIV    $tmp\n\t"
7693             "XCHG   EAX,$tmp2\n"
7694     "fast:\n\t"
7695             "DIV    $tmp\n"
7696     "done:\n\t"
7697             "MOV    EDX,$tmp2\n\t"
7698             "NEG    EDX:EAX # if $imm < 0" %}
7699   ins_encode %{
7700     int con = (int)$imm$$constant;
7701     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7702     int pcon = (con > 0) ? con : -con;
7703     Label Lfast, Lpos, Ldone;
7704 
7705     __ movl($tmp$$Register, pcon);
7706     __ xorl($tmp2$$Register,$tmp2$$Register);
7707     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7708     __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7709 
7710     __ movl($tmp2$$Register, $dst$$Register); // save
7711     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7712     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7713     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7714 
7715     // Negative dividend.
7716     // convert value to positive to use unsigned division
7717     __ lneg($dst$$Register, $tmp2$$Register);
7718     __ divl($tmp$$Register);
7719     __ xchgl($dst$$Register, $tmp2$$Register);
7720     __ divl($tmp$$Register);
7721     // revert result back to negative
7722     __ lneg($tmp2$$Register, $dst$$Register);
7723     __ jmpb(Ldone);
7724 
7725     __ bind(Lpos);
7726     __ divl($tmp$$Register); // Use unsigned division
7727     __ xchgl($dst$$Register, $tmp2$$Register);
7728     // Fallthrow for final divide, tmp2 has 32 bit hi result
7729 
7730     __ bind(Lfast);
7731     // fast path: src is positive
7732     __ divl($tmp$$Register); // Use unsigned division
7733 
7734     __ bind(Ldone);
7735     __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7736     if (con < 0) {
7737       __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7738     }
7739   %}
7740   ins_pipe( pipe_slow );
7741 %}
7742 
7743 // Remainder Register Long (remainder fit into 32 bits)
7744 instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7745   match(Set dst (ModL dst imm));
7746   effect( TEMP tmp, TEMP tmp2, KILL cr );
7747   ins_cost(1000);
7748   format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7749             "CMP    $tmp,EDX\n\t"
7750             "JA,s   fast\n\t"
7751             "MOV    $tmp2,EAX\n\t"
7752             "MOV    EAX,EDX\n\t"
7753             "MOV    EDX,0\n\t"
7754             "JLE,s  pos\n\t"
7755             "LNEG   EAX : $tmp2\n\t"
7756             "DIV    $tmp # unsigned division\n\t"
7757             "MOV    EAX,$tmp2\n\t"
7758             "DIV    $tmp\n\t"
7759             "NEG    EDX\n\t"
7760             "JMP,s  done\n"
7761     "pos:\n\t"
7762             "DIV    $tmp\n\t"
7763             "MOV    EAX,$tmp2\n"
7764     "fast:\n\t"
7765             "DIV    $tmp\n"
7766     "done:\n\t"
7767             "MOV    EAX,EDX\n\t"
7768             "SAR    EDX,31\n\t" %}
7769   ins_encode %{
7770     int con = (int)$imm$$constant;
7771     assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7772     int pcon = (con > 0) ? con : -con;
7773     Label  Lfast, Lpos, Ldone;
7774 
7775     __ movl($tmp$$Register, pcon);
7776     __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7777     __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7778 
7779     __ movl($tmp2$$Register, $dst$$Register); // save
7780     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7781     __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7782     __ jccb(Assembler::lessEqual, Lpos); // result is positive
7783 
7784     // Negative dividend.
7785     // convert value to positive to use unsigned division
7786     __ lneg($dst$$Register, $tmp2$$Register);
7787     __ divl($tmp$$Register);
7788     __ movl($dst$$Register, $tmp2$$Register);
7789     __ divl($tmp$$Register);
7790     // revert remainder back to negative
7791     __ negl(HIGH_FROM_LOW($dst$$Register));
7792     __ jmpb(Ldone);
7793 
7794     __ bind(Lpos);
7795     __ divl($tmp$$Register);
7796     __ movl($dst$$Register, $tmp2$$Register);
7797 
7798     __ bind(Lfast);
7799     // fast path: src is positive
7800     __ divl($tmp$$Register);
7801 
7802     __ bind(Ldone);
7803     __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7804     __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
7805 
7806   %}
7807   ins_pipe( pipe_slow );
7808 %}
7809 
7810 // Integer Shift Instructions
7811 // Shift Left by one
7812 instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7813   match(Set dst (LShiftI dst shift));
7814   effect(KILL cr);
7815 
7816   size(2);
7817   format %{ "SHL    $dst,$shift" %}
7818   opcode(0xD1, 0x4);  /* D1 /4 */
7819   ins_encode( OpcP, RegOpc( dst ) );
7820   ins_pipe( ialu_reg );
7821 %}
7822 
7823 // Shift Left by 8-bit immediate
7824 instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7825   match(Set dst (LShiftI dst shift));
7826   effect(KILL cr);
7827 
7828   size(3);
7829   format %{ "SHL    $dst,$shift" %}
7830   opcode(0xC1, 0x4);  /* C1 /4 ib */
7831   ins_encode( RegOpcImm( dst, shift) );
7832   ins_pipe( ialu_reg );
7833 %}
7834 
7835 // Shift Left by variable
7836 instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7837   match(Set dst (LShiftI dst shift));
7838   effect(KILL cr);
7839 
7840   size(2);
7841   format %{ "SHL    $dst,$shift" %}
7842   opcode(0xD3, 0x4);  /* D3 /4 */
7843   ins_encode( OpcP, RegOpc( dst ) );
7844   ins_pipe( ialu_reg_reg );
7845 %}
7846 
7847 // Arithmetic shift right by one
7848 instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7849   match(Set dst (RShiftI dst shift));
7850   effect(KILL cr);
7851 
7852   size(2);
7853   format %{ "SAR    $dst,$shift" %}
7854   opcode(0xD1, 0x7);  /* D1 /7 */
7855   ins_encode( OpcP, RegOpc( dst ) );
7856   ins_pipe( ialu_reg );
7857 %}
7858 
7859 // Arithmetic shift right by one
7860 instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
7861   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7862   effect(KILL cr);
7863   format %{ "SAR    $dst,$shift" %}
7864   opcode(0xD1, 0x7);  /* D1 /7 */
7865   ins_encode( OpcP, RMopc_Mem(secondary,dst) );
7866   ins_pipe( ialu_mem_imm );
7867 %}
7868 
7869 // Arithmetic Shift Right by 8-bit immediate
7870 instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7871   match(Set dst (RShiftI dst shift));
7872   effect(KILL cr);
7873 
7874   size(3);
7875   format %{ "SAR    $dst,$shift" %}
7876   opcode(0xC1, 0x7);  /* C1 /7 ib */
7877   ins_encode( RegOpcImm( dst, shift ) );
7878   ins_pipe( ialu_mem_imm );
7879 %}
7880 
7881 // Arithmetic Shift Right by 8-bit immediate
7882 instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
7883   match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7884   effect(KILL cr);
7885 
7886   format %{ "SAR    $dst,$shift" %}
7887   opcode(0xC1, 0x7);  /* C1 /7 ib */
7888   ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
7889   ins_pipe( ialu_mem_imm );
7890 %}
7891 
7892 // Arithmetic Shift Right by variable
7893 instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7894   match(Set dst (RShiftI dst shift));
7895   effect(KILL cr);
7896 
7897   size(2);
7898   format %{ "SAR    $dst,$shift" %}
7899   opcode(0xD3, 0x7);  /* D3 /7 */
7900   ins_encode( OpcP, RegOpc( dst ) );
7901   ins_pipe( ialu_reg_reg );
7902 %}
7903 
7904 // Logical shift right by one
7905 instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7906   match(Set dst (URShiftI dst shift));
7907   effect(KILL cr);
7908 
7909   size(2);
7910   format %{ "SHR    $dst,$shift" %}
7911   opcode(0xD1, 0x5);  /* D1 /5 */
7912   ins_encode( OpcP, RegOpc( dst ) );
7913   ins_pipe( ialu_reg );
7914 %}
7915 
7916 // Logical Shift Right by 8-bit immediate
7917 instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7918   match(Set dst (URShiftI dst shift));
7919   effect(KILL cr);
7920 
7921   size(3);
7922   format %{ "SHR    $dst,$shift" %}
7923   opcode(0xC1, 0x5);  /* C1 /5 ib */
7924   ins_encode( RegOpcImm( dst, shift) );
7925   ins_pipe( ialu_reg );
7926 %}
7927 
7928 
7929 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
7930 // This idiom is used by the compiler for the i2b bytecode.
7931 instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
7932   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
7933 
7934   size(3);
7935   format %{ "MOVSX  $dst,$src :8" %}
7936   ins_encode %{
7937     __ movsbl($dst$$Register, $src$$Register);
7938   %}
7939   ins_pipe(ialu_reg_reg);
7940 %}
7941 
7942 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
7943 // This idiom is used by the compiler the i2s bytecode.
7944 instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
7945   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
7946 
7947   size(3);
7948   format %{ "MOVSX  $dst,$src :16" %}
7949   ins_encode %{
7950     __ movswl($dst$$Register, $src$$Register);
7951   %}
7952   ins_pipe(ialu_reg_reg);
7953 %}
7954 
7955 
7956 // Logical Shift Right by variable
7957 instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7958   match(Set dst (URShiftI dst shift));
7959   effect(KILL cr);
7960 
7961   size(2);
7962   format %{ "SHR    $dst,$shift" %}
7963   opcode(0xD3, 0x5);  /* D3 /5 */
7964   ins_encode( OpcP, RegOpc( dst ) );
7965   ins_pipe( ialu_reg_reg );
7966 %}
7967 
7968 
7969 //----------Logical Instructions-----------------------------------------------
7970 //----------Integer Logical Instructions---------------------------------------
7971 // And Instructions
7972 // And Register with Register
7973 instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7974   match(Set dst (AndI dst src));
7975   effect(KILL cr);
7976 
7977   size(2);
7978   format %{ "AND    $dst,$src" %}
7979   opcode(0x23);
7980   ins_encode( OpcP, RegReg( dst, src) );
7981   ins_pipe( ialu_reg_reg );
7982 %}
7983 
7984 // And Register with Immediate
7985 instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7986   match(Set dst (AndI dst src));
7987   effect(KILL cr);
7988 
7989   format %{ "AND    $dst,$src" %}
7990   opcode(0x81,0x04);  /* Opcode 81 /4 */
7991   // ins_encode( RegImm( dst, src) );
7992   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7993   ins_pipe( ialu_reg );
7994 %}
7995 
7996 // And Register with Memory
7997 instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7998   match(Set dst (AndI dst (LoadI src)));
7999   effect(KILL cr);
8000 
8001   ins_cost(125);
8002   format %{ "AND    $dst,$src" %}
8003   opcode(0x23);
8004   ins_encode( OpcP, RegMem( dst, src) );
8005   ins_pipe( ialu_reg_mem );
8006 %}
8007 
8008 // And Memory with Register
8009 instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8010   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8011   effect(KILL cr);
8012 
8013   ins_cost(150);
8014   format %{ "AND    $dst,$src" %}
8015   opcode(0x21);  /* Opcode 21 /r */
8016   ins_encode( OpcP, RegMem( src, dst ) );
8017   ins_pipe( ialu_mem_reg );
8018 %}
8019 
8020 // And Memory with Immediate
8021 instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8022   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8023   effect(KILL cr);
8024 
8025   ins_cost(125);
8026   format %{ "AND    $dst,$src" %}
8027   opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8028   // ins_encode( MemImm( dst, src) );
8029   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8030   ins_pipe( ialu_mem_imm );
8031 %}
8032 
8033 // BMI1 instructions
8034 instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8035   match(Set dst (AndI (XorI src1 minus_1) src2));
8036   predicate(UseBMI1Instructions);
8037   effect(KILL cr);
8038 
8039   format %{ "ANDNL  $dst, $src1, $src2" %}
8040 
8041   ins_encode %{
8042     __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8043   %}
8044   ins_pipe(ialu_reg);
8045 %}
8046 
8047 instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8048   match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8049   predicate(UseBMI1Instructions);
8050   effect(KILL cr);
8051 
8052   ins_cost(125);
8053   format %{ "ANDNL  $dst, $src1, $src2" %}
8054 
8055   ins_encode %{
8056     __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8057   %}
8058   ins_pipe(ialu_reg_mem);
8059 %}
8060 
8061 instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8062   match(Set dst (AndI (SubI imm_zero src) src));
8063   predicate(UseBMI1Instructions);
8064   effect(KILL cr);
8065 
8066   format %{ "BLSIL  $dst, $src" %}
8067 
8068   ins_encode %{
8069     __ blsil($dst$$Register, $src$$Register);
8070   %}
8071   ins_pipe(ialu_reg);
8072 %}
8073 
8074 instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8075   match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8076   predicate(UseBMI1Instructions);
8077   effect(KILL cr);
8078 
8079   ins_cost(125);
8080   format %{ "BLSIL  $dst, $src" %}
8081 
8082   ins_encode %{
8083     __ blsil($dst$$Register, $src$$Address);
8084   %}
8085   ins_pipe(ialu_reg_mem);
8086 %}
8087 
8088 instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8089 %{
8090   match(Set dst (XorI (AddI src minus_1) src));
8091   predicate(UseBMI1Instructions);
8092   effect(KILL cr);
8093 
8094   format %{ "BLSMSKL $dst, $src" %}
8095 
8096   ins_encode %{
8097     __ blsmskl($dst$$Register, $src$$Register);
8098   %}
8099 
8100   ins_pipe(ialu_reg);
8101 %}
8102 
8103 instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8104 %{
8105   match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8106   predicate(UseBMI1Instructions);
8107   effect(KILL cr);
8108 
8109   ins_cost(125);
8110   format %{ "BLSMSKL $dst, $src" %}
8111 
8112   ins_encode %{
8113     __ blsmskl($dst$$Register, $src$$Address);
8114   %}
8115 
8116   ins_pipe(ialu_reg_mem);
8117 %}
8118 
8119 instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8120 %{
8121   match(Set dst (AndI (AddI src minus_1) src) );
8122   predicate(UseBMI1Instructions);
8123   effect(KILL cr);
8124 
8125   format %{ "BLSRL  $dst, $src" %}
8126 
8127   ins_encode %{
8128     __ blsrl($dst$$Register, $src$$Register);
8129   %}
8130 
8131   ins_pipe(ialu_reg);
8132 %}
8133 
8134 instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8135 %{
8136   match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8137   predicate(UseBMI1Instructions);
8138   effect(KILL cr);
8139 
8140   ins_cost(125);
8141   format %{ "BLSRL  $dst, $src" %}
8142 
8143   ins_encode %{
8144     __ blsrl($dst$$Register, $src$$Address);
8145   %}
8146 
8147   ins_pipe(ialu_reg_mem);
8148 %}
8149 
8150 // Or Instructions
8151 // Or Register with Register
8152 instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8153   match(Set dst (OrI dst src));
8154   effect(KILL cr);
8155 
8156   size(2);
8157   format %{ "OR     $dst,$src" %}
8158   opcode(0x0B);
8159   ins_encode( OpcP, RegReg( dst, src) );
8160   ins_pipe( ialu_reg_reg );
8161 %}
8162 
8163 instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8164   match(Set dst (OrI dst (CastP2X src)));
8165   effect(KILL cr);
8166 
8167   size(2);
8168   format %{ "OR     $dst,$src" %}
8169   opcode(0x0B);
8170   ins_encode( OpcP, RegReg( dst, src) );
8171   ins_pipe( ialu_reg_reg );
8172 %}
8173 
8174 
8175 // Or Register with Immediate
8176 instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8177   match(Set dst (OrI dst src));
8178   effect(KILL cr);
8179 
8180   format %{ "OR     $dst,$src" %}
8181   opcode(0x81,0x01);  /* Opcode 81 /1 id */
8182   // ins_encode( RegImm( dst, src) );
8183   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8184   ins_pipe( ialu_reg );
8185 %}
8186 
8187 // Or Register with Memory
8188 instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8189   match(Set dst (OrI dst (LoadI src)));
8190   effect(KILL cr);
8191 
8192   ins_cost(125);
8193   format %{ "OR     $dst,$src" %}
8194   opcode(0x0B);
8195   ins_encode( OpcP, RegMem( dst, src) );
8196   ins_pipe( ialu_reg_mem );
8197 %}
8198 
8199 // Or Memory with Register
8200 instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8201   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8202   effect(KILL cr);
8203 
8204   ins_cost(150);
8205   format %{ "OR     $dst,$src" %}
8206   opcode(0x09);  /* Opcode 09 /r */
8207   ins_encode( OpcP, RegMem( src, dst ) );
8208   ins_pipe( ialu_mem_reg );
8209 %}
8210 
8211 // Or Memory with Immediate
8212 instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8213   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8214   effect(KILL cr);
8215 
8216   ins_cost(125);
8217   format %{ "OR     $dst,$src" %}
8218   opcode(0x81,0x1);  /* Opcode 81 /1 id */
8219   // ins_encode( MemImm( dst, src) );
8220   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8221   ins_pipe( ialu_mem_imm );
8222 %}
8223 
8224 // ROL/ROR
8225 // ROL expand
8226 instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8227   effect(USE_DEF dst, USE shift, KILL cr);
8228 
8229   format %{ "ROL    $dst, $shift" %}
8230   opcode(0xD1, 0x0); /* Opcode D1 /0 */
8231   ins_encode( OpcP, RegOpc( dst ));
8232   ins_pipe( ialu_reg );
8233 %}
8234 
8235 instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8236   effect(USE_DEF dst, USE shift, KILL cr);
8237 
8238   format %{ "ROL    $dst, $shift" %}
8239   opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8240   ins_encode( RegOpcImm(dst, shift) );
8241   ins_pipe(ialu_reg);
8242 %}
8243 
8244 instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8245   effect(USE_DEF dst, USE shift, KILL cr);
8246 
8247   format %{ "ROL    $dst, $shift" %}
8248   opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8249   ins_encode(OpcP, RegOpc(dst));
8250   ins_pipe( ialu_reg_reg );
8251 %}
8252 // end of ROL expand
8253 
8254 // ROL 32bit by one once
8255 instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8256   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8257 
8258   expand %{
8259     rolI_eReg_imm1(dst, lshift, cr);
8260   %}
8261 %}
8262 
8263 // ROL 32bit var by imm8 once
8264 instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8265   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8266   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8267 
8268   expand %{
8269     rolI_eReg_imm8(dst, lshift, cr);
8270   %}
8271 %}
8272 
8273 // ROL 32bit var by var once
8274 instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8275   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8276 
8277   expand %{
8278     rolI_eReg_CL(dst, shift, cr);
8279   %}
8280 %}
8281 
8282 // ROL 32bit var by var once
8283 instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8284   match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8285 
8286   expand %{
8287     rolI_eReg_CL(dst, shift, cr);
8288   %}
8289 %}
8290 
8291 // ROR expand
8292 instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8293   effect(USE_DEF dst, USE shift, KILL cr);
8294 
8295   format %{ "ROR    $dst, $shift" %}
8296   opcode(0xD1,0x1);  /* Opcode D1 /1 */
8297   ins_encode( OpcP, RegOpc( dst ) );
8298   ins_pipe( ialu_reg );
8299 %}
8300 
8301 instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8302   effect (USE_DEF dst, USE shift, KILL cr);
8303 
8304   format %{ "ROR    $dst, $shift" %}
8305   opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8306   ins_encode( RegOpcImm(dst, shift) );
8307   ins_pipe( ialu_reg );
8308 %}
8309 
8310 instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8311   effect(USE_DEF dst, USE shift, KILL cr);
8312 
8313   format %{ "ROR    $dst, $shift" %}
8314   opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8315   ins_encode(OpcP, RegOpc(dst));
8316   ins_pipe( ialu_reg_reg );
8317 %}
8318 // end of ROR expand
8319 
8320 // ROR right once
8321 instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8322   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8323 
8324   expand %{
8325     rorI_eReg_imm1(dst, rshift, cr);
8326   %}
8327 %}
8328 
8329 // ROR 32bit by immI8 once
8330 instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8331   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8332   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8333 
8334   expand %{
8335     rorI_eReg_imm8(dst, rshift, cr);
8336   %}
8337 %}
8338 
8339 // ROR 32bit var by var once
8340 instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8341   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8342 
8343   expand %{
8344     rorI_eReg_CL(dst, shift, cr);
8345   %}
8346 %}
8347 
8348 // ROR 32bit var by var once
8349 instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8350   match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8351 
8352   expand %{
8353     rorI_eReg_CL(dst, shift, cr);
8354   %}
8355 %}
8356 
8357 // Xor Instructions
8358 // Xor Register with Register
8359 instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8360   match(Set dst (XorI dst src));
8361   effect(KILL cr);
8362 
8363   size(2);
8364   format %{ "XOR    $dst,$src" %}
8365   opcode(0x33);
8366   ins_encode( OpcP, RegReg( dst, src) );
8367   ins_pipe( ialu_reg_reg );
8368 %}
8369 
8370 // Xor Register with Immediate -1
8371 instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8372   match(Set dst (XorI dst imm));
8373 
8374   size(2);
8375   format %{ "NOT    $dst" %}
8376   ins_encode %{
8377      __ notl($dst$$Register);
8378   %}
8379   ins_pipe( ialu_reg );
8380 %}
8381 
8382 // Xor Register with Immediate
8383 instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8384   match(Set dst (XorI dst src));
8385   effect(KILL cr);
8386 
8387   format %{ "XOR    $dst,$src" %}
8388   opcode(0x81,0x06);  /* Opcode 81 /6 id */
8389   // ins_encode( RegImm( dst, src) );
8390   ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8391   ins_pipe( ialu_reg );
8392 %}
8393 
8394 // Xor Register with Memory
8395 instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8396   match(Set dst (XorI dst (LoadI src)));
8397   effect(KILL cr);
8398 
8399   ins_cost(125);
8400   format %{ "XOR    $dst,$src" %}
8401   opcode(0x33);
8402   ins_encode( OpcP, RegMem(dst, src) );
8403   ins_pipe( ialu_reg_mem );
8404 %}
8405 
8406 // Xor Memory with Register
8407 instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8408   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8409   effect(KILL cr);
8410 
8411   ins_cost(150);
8412   format %{ "XOR    $dst,$src" %}
8413   opcode(0x31);  /* Opcode 31 /r */
8414   ins_encode( OpcP, RegMem( src, dst ) );
8415   ins_pipe( ialu_mem_reg );
8416 %}
8417 
8418 // Xor Memory with Immediate
8419 instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8420   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8421   effect(KILL cr);
8422 
8423   ins_cost(125);
8424   format %{ "XOR    $dst,$src" %}
8425   opcode(0x81,0x6);  /* Opcode 81 /6 id */
8426   ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8427   ins_pipe( ialu_mem_imm );
8428 %}
8429 
8430 //----------Convert Int to Boolean---------------------------------------------
8431 
8432 instruct movI_nocopy(rRegI dst, rRegI src) %{
8433   effect( DEF dst, USE src );
8434   format %{ "MOV    $dst,$src" %}
8435   ins_encode( enc_Copy( dst, src) );
8436   ins_pipe( ialu_reg_reg );
8437 %}
8438 
8439 instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8440   effect( USE_DEF dst, USE src, KILL cr );
8441 
8442   size(4);
8443   format %{ "NEG    $dst\n\t"
8444             "ADC    $dst,$src" %}
8445   ins_encode( neg_reg(dst),
8446               OpcRegReg(0x13,dst,src) );
8447   ins_pipe( ialu_reg_reg_long );
8448 %}
8449 
8450 instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8451   match(Set dst (Conv2B src));
8452 
8453   expand %{
8454     movI_nocopy(dst,src);
8455     ci2b(dst,src,cr);
8456   %}
8457 %}
8458 
8459 instruct movP_nocopy(rRegI dst, eRegP src) %{
8460   effect( DEF dst, USE src );
8461   format %{ "MOV    $dst,$src" %}
8462   ins_encode( enc_Copy( dst, src) );
8463   ins_pipe( ialu_reg_reg );
8464 %}
8465 
8466 instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8467   effect( USE_DEF dst, USE src, KILL cr );
8468   format %{ "NEG    $dst\n\t"
8469             "ADC    $dst,$src" %}
8470   ins_encode( neg_reg(dst),
8471               OpcRegReg(0x13,dst,src) );
8472   ins_pipe( ialu_reg_reg_long );
8473 %}
8474 
8475 instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8476   match(Set dst (Conv2B src));
8477 
8478   expand %{
8479     movP_nocopy(dst,src);
8480     cp2b(dst,src,cr);
8481   %}
8482 %}
8483 
8484 instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8485   match(Set dst (CmpLTMask p q));
8486   effect(KILL cr);
8487   ins_cost(400);
8488 
8489   // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8490   format %{ "XOR    $dst,$dst\n\t"
8491             "CMP    $p,$q\n\t"
8492             "SETlt  $dst\n\t"
8493             "NEG    $dst" %}
8494   ins_encode %{
8495     Register Rp = $p$$Register;
8496     Register Rq = $q$$Register;
8497     Register Rd = $dst$$Register;
8498     Label done;
8499     __ xorl(Rd, Rd);
8500     __ cmpl(Rp, Rq);
8501     __ setb(Assembler::less, Rd);
8502     __ negl(Rd);
8503   %}
8504 
8505   ins_pipe(pipe_slow);
8506 %}
8507 
8508 instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8509   match(Set dst (CmpLTMask dst zero));
8510   effect(DEF dst, KILL cr);
8511   ins_cost(100);
8512 
8513   format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8514   ins_encode %{
8515   __ sarl($dst$$Register, 31);
8516   %}
8517   ins_pipe(ialu_reg);
8518 %}
8519 
8520 /* better to save a register than avoid a branch */
8521 instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8522   match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8523   effect(KILL cr);
8524   ins_cost(400);
8525   format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8526             "JGE    done\n\t"
8527             "ADD    $p,$y\n"
8528             "done:  " %}
8529   ins_encode %{
8530     Register Rp = $p$$Register;
8531     Register Rq = $q$$Register;
8532     Register Ry = $y$$Register;
8533     Label done;
8534     __ subl(Rp, Rq);
8535     __ jccb(Assembler::greaterEqual, done);
8536     __ addl(Rp, Ry);
8537     __ bind(done);
8538   %}
8539 
8540   ins_pipe(pipe_cmplt);
8541 %}
8542 
8543 /* better to save a register than avoid a branch */
8544 instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8545   match(Set y (AndI (CmpLTMask p q) y));
8546   effect(KILL cr);
8547 
8548   ins_cost(300);
8549 
8550   format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8551             "JLT      done\n\t"
8552             "XORL     $y, $y\n"
8553             "done:  " %}
8554   ins_encode %{
8555     Register Rp = $p$$Register;
8556     Register Rq = $q$$Register;
8557     Register Ry = $y$$Register;
8558     Label done;
8559     __ cmpl(Rp, Rq);
8560     __ jccb(Assembler::less, done);
8561     __ xorl(Ry, Ry);
8562     __ bind(done);
8563   %}
8564 
8565   ins_pipe(pipe_cmplt);
8566 %}
8567 
8568 /* If I enable this, I encourage spilling in the inner loop of compress.
8569 instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8570   match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8571 */
8572 //----------Overflow Math Instructions-----------------------------------------
8573 
8574 instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8575 %{
8576   match(Set cr (OverflowAddI op1 op2));
8577   effect(DEF cr, USE_KILL op1, USE op2);
8578 
8579   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8580 
8581   ins_encode %{
8582     __ addl($op1$$Register, $op2$$Register);
8583   %}
8584   ins_pipe(ialu_reg_reg);
8585 %}
8586 
8587 instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8588 %{
8589   match(Set cr (OverflowAddI op1 op2));
8590   effect(DEF cr, USE_KILL op1, USE op2);
8591 
8592   format %{ "ADD    $op1, $op2\t# overflow check int" %}
8593 
8594   ins_encode %{
8595     __ addl($op1$$Register, $op2$$constant);
8596   %}
8597   ins_pipe(ialu_reg_reg);
8598 %}
8599 
8600 instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8601 %{
8602   match(Set cr (OverflowSubI op1 op2));
8603 
8604   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8605   ins_encode %{
8606     __ cmpl($op1$$Register, $op2$$Register);
8607   %}
8608   ins_pipe(ialu_reg_reg);
8609 %}
8610 
8611 instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8612 %{
8613   match(Set cr (OverflowSubI op1 op2));
8614 
8615   format %{ "CMP    $op1, $op2\t# overflow check int" %}
8616   ins_encode %{
8617     __ cmpl($op1$$Register, $op2$$constant);
8618   %}
8619   ins_pipe(ialu_reg_reg);
8620 %}
8621 
8622 instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8623 %{
8624   match(Set cr (OverflowSubI zero op2));
8625   effect(DEF cr, USE_KILL op2);
8626 
8627   format %{ "NEG    $op2\t# overflow check int" %}
8628   ins_encode %{
8629     __ negl($op2$$Register);
8630   %}
8631   ins_pipe(ialu_reg_reg);
8632 %}
8633 
8634 instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8635 %{
8636   match(Set cr (OverflowMulI op1 op2));
8637   effect(DEF cr, USE_KILL op1, USE op2);
8638 
8639   format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8640   ins_encode %{
8641     __ imull($op1$$Register, $op2$$Register);
8642   %}
8643   ins_pipe(ialu_reg_reg_alu0);
8644 %}
8645 
8646 instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8647 %{
8648   match(Set cr (OverflowMulI op1 op2));
8649   effect(DEF cr, TEMP tmp, USE op1, USE op2);
8650 
8651   format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8652   ins_encode %{
8653     __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8654   %}
8655   ins_pipe(ialu_reg_reg_alu0);
8656 %}
8657 
8658 //----------Long Instructions------------------------------------------------
8659 // Add Long Register with Register
8660 instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8661   match(Set dst (AddL dst src));
8662   effect(KILL cr);
8663   ins_cost(200);
8664   format %{ "ADD    $dst.lo,$src.lo\n\t"
8665             "ADC    $dst.hi,$src.hi" %}
8666   opcode(0x03, 0x13);
8667   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8668   ins_pipe( ialu_reg_reg_long );
8669 %}
8670 
8671 // Add Long Register with Immediate
8672 instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8673   match(Set dst (AddL dst src));
8674   effect(KILL cr);
8675   format %{ "ADD    $dst.lo,$src.lo\n\t"
8676             "ADC    $dst.hi,$src.hi" %}
8677   opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8678   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8679   ins_pipe( ialu_reg_long );
8680 %}
8681 
8682 // Add Long Register with Memory
8683 instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8684   match(Set dst (AddL dst (LoadL mem)));
8685   effect(KILL cr);
8686   ins_cost(125);
8687   format %{ "ADD    $dst.lo,$mem\n\t"
8688             "ADC    $dst.hi,$mem+4" %}
8689   opcode(0x03, 0x13);
8690   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8691   ins_pipe( ialu_reg_long_mem );
8692 %}
8693 
8694 // Subtract Long Register with Register.
8695 instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8696   match(Set dst (SubL dst src));
8697   effect(KILL cr);
8698   ins_cost(200);
8699   format %{ "SUB    $dst.lo,$src.lo\n\t"
8700             "SBB    $dst.hi,$src.hi" %}
8701   opcode(0x2B, 0x1B);
8702   ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8703   ins_pipe( ialu_reg_reg_long );
8704 %}
8705 
8706 // Subtract Long Register with Immediate
8707 instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8708   match(Set dst (SubL dst src));
8709   effect(KILL cr);
8710   format %{ "SUB    $dst.lo,$src.lo\n\t"
8711             "SBB    $dst.hi,$src.hi" %}
8712   opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
8713   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8714   ins_pipe( ialu_reg_long );
8715 %}
8716 
8717 // Subtract Long Register with Memory
8718 instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8719   match(Set dst (SubL dst (LoadL mem)));
8720   effect(KILL cr);
8721   ins_cost(125);
8722   format %{ "SUB    $dst.lo,$mem\n\t"
8723             "SBB    $dst.hi,$mem+4" %}
8724   opcode(0x2B, 0x1B);
8725   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8726   ins_pipe( ialu_reg_long_mem );
8727 %}
8728 
8729 instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8730   match(Set dst (SubL zero dst));
8731   effect(KILL cr);
8732   ins_cost(300);
8733   format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
8734   ins_encode( neg_long(dst) );
8735   ins_pipe( ialu_reg_reg_long );
8736 %}
8737 
8738 // And Long Register with Register
8739 instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8740   match(Set dst (AndL dst src));
8741   effect(KILL cr);
8742   format %{ "AND    $dst.lo,$src.lo\n\t"
8743             "AND    $dst.hi,$src.hi" %}
8744   opcode(0x23,0x23);
8745   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8746   ins_pipe( ialu_reg_reg_long );
8747 %}
8748 
8749 // And Long Register with Immediate
8750 instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8751   match(Set dst (AndL dst src));
8752   effect(KILL cr);
8753   format %{ "AND    $dst.lo,$src.lo\n\t"
8754             "AND    $dst.hi,$src.hi" %}
8755   opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
8756   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8757   ins_pipe( ialu_reg_long );
8758 %}
8759 
8760 // And Long Register with Memory
8761 instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8762   match(Set dst (AndL dst (LoadL mem)));
8763   effect(KILL cr);
8764   ins_cost(125);
8765   format %{ "AND    $dst.lo,$mem\n\t"
8766             "AND    $dst.hi,$mem+4" %}
8767   opcode(0x23, 0x23);
8768   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8769   ins_pipe( ialu_reg_long_mem );
8770 %}
8771 
8772 // BMI1 instructions
8773 instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
8774   match(Set dst (AndL (XorL src1 minus_1) src2));
8775   predicate(UseBMI1Instructions);
8776   effect(KILL cr, TEMP dst);
8777 
8778   format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
8779             "ANDNL  $dst.hi, $src1.hi, $src2.hi"
8780          %}
8781 
8782   ins_encode %{
8783     Register Rdst = $dst$$Register;
8784     Register Rsrc1 = $src1$$Register;
8785     Register Rsrc2 = $src2$$Register;
8786     __ andnl(Rdst, Rsrc1, Rsrc2);
8787     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
8788   %}
8789   ins_pipe(ialu_reg_reg_long);
8790 %}
8791 
8792 instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
8793   match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
8794   predicate(UseBMI1Instructions);
8795   effect(KILL cr, TEMP dst);
8796 
8797   ins_cost(125);
8798   format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
8799             "ANDNL  $dst.hi, $src1.hi, $src2+4"
8800          %}
8801 
8802   ins_encode %{
8803     Register Rdst = $dst$$Register;
8804     Register Rsrc1 = $src1$$Register;
8805     Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
8806 
8807     __ andnl(Rdst, Rsrc1, $src2$$Address);
8808     __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
8809   %}
8810   ins_pipe(ialu_reg_mem);
8811 %}
8812 
8813 instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
8814   match(Set dst (AndL (SubL imm_zero src) src));
8815   predicate(UseBMI1Instructions);
8816   effect(KILL cr, TEMP dst);
8817 
8818   format %{ "MOVL   $dst.hi, 0\n\t"
8819             "BLSIL  $dst.lo, $src.lo\n\t"
8820             "JNZ    done\n\t"
8821             "BLSIL  $dst.hi, $src.hi\n"
8822             "done:"
8823          %}
8824 
8825   ins_encode %{
8826     Label done;
8827     Register Rdst = $dst$$Register;
8828     Register Rsrc = $src$$Register;
8829     __ movl(HIGH_FROM_LOW(Rdst), 0);
8830     __ blsil(Rdst, Rsrc);
8831     __ jccb(Assembler::notZero, done);
8832     __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8833     __ bind(done);
8834   %}
8835   ins_pipe(ialu_reg);
8836 %}
8837 
8838 instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
8839   match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
8840   predicate(UseBMI1Instructions);
8841   effect(KILL cr, TEMP dst);
8842 
8843   ins_cost(125);
8844   format %{ "MOVL   $dst.hi, 0\n\t"
8845             "BLSIL  $dst.lo, $src\n\t"
8846             "JNZ    done\n\t"
8847             "BLSIL  $dst.hi, $src+4\n"
8848             "done:"
8849          %}
8850 
8851   ins_encode %{
8852     Label done;
8853     Register Rdst = $dst$$Register;
8854     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8855 
8856     __ movl(HIGH_FROM_LOW(Rdst), 0);
8857     __ blsil(Rdst, $src$$Address);
8858     __ jccb(Assembler::notZero, done);
8859     __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
8860     __ bind(done);
8861   %}
8862   ins_pipe(ialu_reg_mem);
8863 %}
8864 
8865 instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8866 %{
8867   match(Set dst (XorL (AddL src minus_1) src));
8868   predicate(UseBMI1Instructions);
8869   effect(KILL cr, TEMP dst);
8870 
8871   format %{ "MOVL    $dst.hi, 0\n\t"
8872             "BLSMSKL $dst.lo, $src.lo\n\t"
8873             "JNC     done\n\t"
8874             "BLSMSKL $dst.hi, $src.hi\n"
8875             "done:"
8876          %}
8877 
8878   ins_encode %{
8879     Label done;
8880     Register Rdst = $dst$$Register;
8881     Register Rsrc = $src$$Register;
8882     __ movl(HIGH_FROM_LOW(Rdst), 0);
8883     __ blsmskl(Rdst, Rsrc);
8884     __ jccb(Assembler::carryClear, done);
8885     __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8886     __ bind(done);
8887   %}
8888 
8889   ins_pipe(ialu_reg);
8890 %}
8891 
8892 instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8893 %{
8894   match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
8895   predicate(UseBMI1Instructions);
8896   effect(KILL cr, TEMP dst);
8897 
8898   ins_cost(125);
8899   format %{ "MOVL    $dst.hi, 0\n\t"
8900             "BLSMSKL $dst.lo, $src\n\t"
8901             "JNC     done\n\t"
8902             "BLSMSKL $dst.hi, $src+4\n"
8903             "done:"
8904          %}
8905 
8906   ins_encode %{
8907     Label done;
8908     Register Rdst = $dst$$Register;
8909     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8910 
8911     __ movl(HIGH_FROM_LOW(Rdst), 0);
8912     __ blsmskl(Rdst, $src$$Address);
8913     __ jccb(Assembler::carryClear, done);
8914     __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
8915     __ bind(done);
8916   %}
8917 
8918   ins_pipe(ialu_reg_mem);
8919 %}
8920 
8921 instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8922 %{
8923   match(Set dst (AndL (AddL src minus_1) src) );
8924   predicate(UseBMI1Instructions);
8925   effect(KILL cr, TEMP dst);
8926 
8927   format %{ "MOVL   $dst.hi, $src.hi\n\t"
8928             "BLSRL  $dst.lo, $src.lo\n\t"
8929             "JNC    done\n\t"
8930             "BLSRL  $dst.hi, $src.hi\n"
8931             "done:"
8932   %}
8933 
8934   ins_encode %{
8935     Label done;
8936     Register Rdst = $dst$$Register;
8937     Register Rsrc = $src$$Register;
8938     __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8939     __ blsrl(Rdst, Rsrc);
8940     __ jccb(Assembler::carryClear, done);
8941     __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8942     __ bind(done);
8943   %}
8944 
8945   ins_pipe(ialu_reg);
8946 %}
8947 
8948 instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8949 %{
8950   match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
8951   predicate(UseBMI1Instructions);
8952   effect(KILL cr, TEMP dst);
8953 
8954   ins_cost(125);
8955   format %{ "MOVL   $dst.hi, $src+4\n\t"
8956             "BLSRL  $dst.lo, $src\n\t"
8957             "JNC    done\n\t"
8958             "BLSRL  $dst.hi, $src+4\n"
8959             "done:"
8960   %}
8961 
8962   ins_encode %{
8963     Label done;
8964     Register Rdst = $dst$$Register;
8965     Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8966     __ movl(HIGH_FROM_LOW(Rdst), src_hi);
8967     __ blsrl(Rdst, $src$$Address);
8968     __ jccb(Assembler::carryClear, done);
8969     __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
8970     __ bind(done);
8971   %}
8972 
8973   ins_pipe(ialu_reg_mem);
8974 %}
8975 
8976 // Or Long Register with Register
8977 instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8978   match(Set dst (OrL dst src));
8979   effect(KILL cr);
8980   format %{ "OR     $dst.lo,$src.lo\n\t"
8981             "OR     $dst.hi,$src.hi" %}
8982   opcode(0x0B,0x0B);
8983   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8984   ins_pipe( ialu_reg_reg_long );
8985 %}
8986 
8987 // Or Long Register with Immediate
8988 instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8989   match(Set dst (OrL dst src));
8990   effect(KILL cr);
8991   format %{ "OR     $dst.lo,$src.lo\n\t"
8992             "OR     $dst.hi,$src.hi" %}
8993   opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
8994   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8995   ins_pipe( ialu_reg_long );
8996 %}
8997 
8998 // Or Long Register with Memory
8999 instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9000   match(Set dst (OrL dst (LoadL mem)));
9001   effect(KILL cr);
9002   ins_cost(125);
9003   format %{ "OR     $dst.lo,$mem\n\t"
9004             "OR     $dst.hi,$mem+4" %}
9005   opcode(0x0B,0x0B);
9006   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9007   ins_pipe( ialu_reg_long_mem );
9008 %}
9009 
9010 // Xor Long Register with Register
9011 instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9012   match(Set dst (XorL dst src));
9013   effect(KILL cr);
9014   format %{ "XOR    $dst.lo,$src.lo\n\t"
9015             "XOR    $dst.hi,$src.hi" %}
9016   opcode(0x33,0x33);
9017   ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9018   ins_pipe( ialu_reg_reg_long );
9019 %}
9020 
9021 // Xor Long Register with Immediate -1
9022 instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9023   match(Set dst (XorL dst imm));
9024   format %{ "NOT    $dst.lo\n\t"
9025             "NOT    $dst.hi" %}
9026   ins_encode %{
9027      __ notl($dst$$Register);
9028      __ notl(HIGH_FROM_LOW($dst$$Register));
9029   %}
9030   ins_pipe( ialu_reg_long );
9031 %}
9032 
9033 // Xor Long Register with Immediate
9034 instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9035   match(Set dst (XorL dst src));
9036   effect(KILL cr);
9037   format %{ "XOR    $dst.lo,$src.lo\n\t"
9038             "XOR    $dst.hi,$src.hi" %}
9039   opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9040   ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9041   ins_pipe( ialu_reg_long );
9042 %}
9043 
9044 // Xor Long Register with Memory
9045 instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9046   match(Set dst (XorL dst (LoadL mem)));
9047   effect(KILL cr);
9048   ins_cost(125);
9049   format %{ "XOR    $dst.lo,$mem\n\t"
9050             "XOR    $dst.hi,$mem+4" %}
9051   opcode(0x33,0x33);
9052   ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9053   ins_pipe( ialu_reg_long_mem );
9054 %}
9055 
9056 // Shift Left Long by 1
9057 instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9058   predicate(UseNewLongLShift);
9059   match(Set dst (LShiftL dst cnt));
9060   effect(KILL cr);
9061   ins_cost(100);
9062   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9063             "ADC    $dst.hi,$dst.hi" %}
9064   ins_encode %{
9065     __ addl($dst$$Register,$dst$$Register);
9066     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9067   %}
9068   ins_pipe( ialu_reg_long );
9069 %}
9070 
9071 // Shift Left Long by 2
9072 instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9073   predicate(UseNewLongLShift);
9074   match(Set dst (LShiftL dst cnt));
9075   effect(KILL cr);
9076   ins_cost(100);
9077   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9078             "ADC    $dst.hi,$dst.hi\n\t"
9079             "ADD    $dst.lo,$dst.lo\n\t"
9080             "ADC    $dst.hi,$dst.hi" %}
9081   ins_encode %{
9082     __ addl($dst$$Register,$dst$$Register);
9083     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9084     __ addl($dst$$Register,$dst$$Register);
9085     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9086   %}
9087   ins_pipe( ialu_reg_long );
9088 %}
9089 
9090 // Shift Left Long by 3
9091 instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9092   predicate(UseNewLongLShift);
9093   match(Set dst (LShiftL dst cnt));
9094   effect(KILL cr);
9095   ins_cost(100);
9096   format %{ "ADD    $dst.lo,$dst.lo\n\t"
9097             "ADC    $dst.hi,$dst.hi\n\t"
9098             "ADD    $dst.lo,$dst.lo\n\t"
9099             "ADC    $dst.hi,$dst.hi\n\t"
9100             "ADD    $dst.lo,$dst.lo\n\t"
9101             "ADC    $dst.hi,$dst.hi" %}
9102   ins_encode %{
9103     __ addl($dst$$Register,$dst$$Register);
9104     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9105     __ addl($dst$$Register,$dst$$Register);
9106     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9107     __ addl($dst$$Register,$dst$$Register);
9108     __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9109   %}
9110   ins_pipe( ialu_reg_long );
9111 %}
9112 
9113 // Shift Left Long by 1-31
9114 instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9115   match(Set dst (LShiftL dst cnt));
9116   effect(KILL cr);
9117   ins_cost(200);
9118   format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9119             "SHL    $dst.lo,$cnt" %}
9120   opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9121   ins_encode( move_long_small_shift(dst,cnt) );
9122   ins_pipe( ialu_reg_long );
9123 %}
9124 
9125 // Shift Left Long by 32-63
9126 instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9127   match(Set dst (LShiftL dst cnt));
9128   effect(KILL cr);
9129   ins_cost(300);
9130   format %{ "MOV    $dst.hi,$dst.lo\n"
9131           "\tSHL    $dst.hi,$cnt-32\n"
9132           "\tXOR    $dst.lo,$dst.lo" %}
9133   opcode(0xC1, 0x4);  /* C1 /4 ib */
9134   ins_encode( move_long_big_shift_clr(dst,cnt) );
9135   ins_pipe( ialu_reg_long );
9136 %}
9137 
9138 // Shift Left Long by variable
9139 instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9140   match(Set dst (LShiftL dst shift));
9141   effect(KILL cr);
9142   ins_cost(500+200);
9143   size(17);
9144   format %{ "TEST   $shift,32\n\t"
9145             "JEQ,s  small\n\t"
9146             "MOV    $dst.hi,$dst.lo\n\t"
9147             "XOR    $dst.lo,$dst.lo\n"
9148     "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9149             "SHL    $dst.lo,$shift" %}
9150   ins_encode( shift_left_long( dst, shift ) );
9151   ins_pipe( pipe_slow );
9152 %}
9153 
9154 // Shift Right Long by 1-31
9155 instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9156   match(Set dst (URShiftL dst cnt));
9157   effect(KILL cr);
9158   ins_cost(200);
9159   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9160             "SHR    $dst.hi,$cnt" %}
9161   opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9162   ins_encode( move_long_small_shift(dst,cnt) );
9163   ins_pipe( ialu_reg_long );
9164 %}
9165 
9166 // Shift Right Long by 32-63
9167 instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9168   match(Set dst (URShiftL dst cnt));
9169   effect(KILL cr);
9170   ins_cost(300);
9171   format %{ "MOV    $dst.lo,$dst.hi\n"
9172           "\tSHR    $dst.lo,$cnt-32\n"
9173           "\tXOR    $dst.hi,$dst.hi" %}
9174   opcode(0xC1, 0x5);  /* C1 /5 ib */
9175   ins_encode( move_long_big_shift_clr(dst,cnt) );
9176   ins_pipe( ialu_reg_long );
9177 %}
9178 
9179 // Shift Right Long by variable
9180 instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9181   match(Set dst (URShiftL dst shift));
9182   effect(KILL cr);
9183   ins_cost(600);
9184   size(17);
9185   format %{ "TEST   $shift,32\n\t"
9186             "JEQ,s  small\n\t"
9187             "MOV    $dst.lo,$dst.hi\n\t"
9188             "XOR    $dst.hi,$dst.hi\n"
9189     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9190             "SHR    $dst.hi,$shift" %}
9191   ins_encode( shift_right_long( dst, shift ) );
9192   ins_pipe( pipe_slow );
9193 %}
9194 
9195 // Shift Right Long by 1-31
9196 instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9197   match(Set dst (RShiftL dst cnt));
9198   effect(KILL cr);
9199   ins_cost(200);
9200   format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9201             "SAR    $dst.hi,$cnt" %}
9202   opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9203   ins_encode( move_long_small_shift(dst,cnt) );
9204   ins_pipe( ialu_reg_long );
9205 %}
9206 
9207 // Shift Right Long by 32-63
9208 instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9209   match(Set dst (RShiftL dst cnt));
9210   effect(KILL cr);
9211   ins_cost(300);
9212   format %{ "MOV    $dst.lo,$dst.hi\n"
9213           "\tSAR    $dst.lo,$cnt-32\n"
9214           "\tSAR    $dst.hi,31" %}
9215   opcode(0xC1, 0x7);  /* C1 /7 ib */
9216   ins_encode( move_long_big_shift_sign(dst,cnt) );
9217   ins_pipe( ialu_reg_long );
9218 %}
9219 
9220 // Shift Right arithmetic Long by variable
9221 instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9222   match(Set dst (RShiftL dst shift));
9223   effect(KILL cr);
9224   ins_cost(600);
9225   size(18);
9226   format %{ "TEST   $shift,32\n\t"
9227             "JEQ,s  small\n\t"
9228             "MOV    $dst.lo,$dst.hi\n\t"
9229             "SAR    $dst.hi,31\n"
9230     "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9231             "SAR    $dst.hi,$shift" %}
9232   ins_encode( shift_right_arith_long( dst, shift ) );
9233   ins_pipe( pipe_slow );
9234 %}
9235 
9236 
9237 //----------Double Instructions------------------------------------------------
9238 // Double Math
9239 
9240 // Compare & branch
9241 
9242 // P6 version of float compare, sets condition codes in EFLAGS
9243 instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9244   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9245   match(Set cr (CmpD src1 src2));
9246   effect(KILL rax);
9247   ins_cost(150);
9248   format %{ "FLD    $src1\n\t"
9249             "FUCOMIP ST,$src2  // P6 instruction\n\t"
9250             "JNP    exit\n\t"
9251             "MOV    ah,1       // saw a NaN, set CF\n\t"
9252             "SAHF\n"
9253      "exit:\tNOP               // avoid branch to branch" %}
9254   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9255   ins_encode( Push_Reg_DPR(src1),
9256               OpcP, RegOpc(src2),
9257               cmpF_P6_fixup );
9258   ins_pipe( pipe_slow );
9259 %}
9260 
9261 instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9262   predicate(VM_Version::supports_cmov() && UseSSE <=1);
9263   match(Set cr (CmpD src1 src2));
9264   ins_cost(150);
9265   format %{ "FLD    $src1\n\t"
9266             "FUCOMIP ST,$src2  // P6 instruction" %}
9267   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9268   ins_encode( Push_Reg_DPR(src1),
9269               OpcP, RegOpc(src2));
9270   ins_pipe( pipe_slow );
9271 %}
9272 
9273 // Compare & branch
9274 instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9275   predicate(UseSSE<=1);
9276   match(Set cr (CmpD src1 src2));
9277   effect(KILL rax);
9278   ins_cost(200);
9279   format %{ "FLD    $src1\n\t"
9280             "FCOMp  $src2\n\t"
9281             "FNSTSW AX\n\t"
9282             "TEST   AX,0x400\n\t"
9283             "JZ,s   flags\n\t"
9284             "MOV    AH,1\t# unordered treat as LT\n"
9285     "flags:\tSAHF" %}
9286   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9287   ins_encode( Push_Reg_DPR(src1),
9288               OpcP, RegOpc(src2),
9289               fpu_flags);
9290   ins_pipe( pipe_slow );
9291 %}
9292 
9293 // Compare vs zero into -1,0,1
9294 instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9295   predicate(UseSSE<=1);
9296   match(Set dst (CmpD3 src1 zero));
9297   effect(KILL cr, KILL rax);
9298   ins_cost(280);
9299   format %{ "FTSTD  $dst,$src1" %}
9300   opcode(0xE4, 0xD9);
9301   ins_encode( Push_Reg_DPR(src1),
9302               OpcS, OpcP, PopFPU,
9303               CmpF_Result(dst));
9304   ins_pipe( pipe_slow );
9305 %}
9306 
9307 // Compare into -1,0,1
9308 instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9309   predicate(UseSSE<=1);
9310   match(Set dst (CmpD3 src1 src2));
9311   effect(KILL cr, KILL rax);
9312   ins_cost(300);
9313   format %{ "FCMPD  $dst,$src1,$src2" %}
9314   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9315   ins_encode( Push_Reg_DPR(src1),
9316               OpcP, RegOpc(src2),
9317               CmpF_Result(dst));
9318   ins_pipe( pipe_slow );
9319 %}
9320 
9321 // float compare and set condition codes in EFLAGS by XMM regs
9322 instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9323   predicate(UseSSE>=2);
9324   match(Set cr (CmpD src1 src2));
9325   ins_cost(145);
9326   format %{ "UCOMISD $src1,$src2\n\t"
9327             "JNP,s   exit\n\t"
9328             "PUSHF\t# saw NaN, set CF\n\t"
9329             "AND     [rsp], #0xffffff2b\n\t"
9330             "POPF\n"
9331     "exit:" %}
9332   ins_encode %{
9333     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9334     emit_cmpfp_fixup(_masm);
9335   %}
9336   ins_pipe( pipe_slow );
9337 %}
9338 
9339 instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9340   predicate(UseSSE>=2);
9341   match(Set cr (CmpD src1 src2));
9342   ins_cost(100);
9343   format %{ "UCOMISD $src1,$src2" %}
9344   ins_encode %{
9345     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9346   %}
9347   ins_pipe( pipe_slow );
9348 %}
9349 
9350 // float compare and set condition codes in EFLAGS by XMM regs
9351 instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9352   predicate(UseSSE>=2);
9353   match(Set cr (CmpD src1 (LoadD src2)));
9354   ins_cost(145);
9355   format %{ "UCOMISD $src1,$src2\n\t"
9356             "JNP,s   exit\n\t"
9357             "PUSHF\t# saw NaN, set CF\n\t"
9358             "AND     [rsp], #0xffffff2b\n\t"
9359             "POPF\n"
9360     "exit:" %}
9361   ins_encode %{
9362     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9363     emit_cmpfp_fixup(_masm);
9364   %}
9365   ins_pipe( pipe_slow );
9366 %}
9367 
9368 instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9369   predicate(UseSSE>=2);
9370   match(Set cr (CmpD src1 (LoadD src2)));
9371   ins_cost(100);
9372   format %{ "UCOMISD $src1,$src2" %}
9373   ins_encode %{
9374     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9375   %}
9376   ins_pipe( pipe_slow );
9377 %}
9378 
9379 // Compare into -1,0,1 in XMM
9380 instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9381   predicate(UseSSE>=2);
9382   match(Set dst (CmpD3 src1 src2));
9383   effect(KILL cr);
9384   ins_cost(255);
9385   format %{ "UCOMISD $src1, $src2\n\t"
9386             "MOV     $dst, #-1\n\t"
9387             "JP,s    done\n\t"
9388             "JB,s    done\n\t"
9389             "SETNE   $dst\n\t"
9390             "MOVZB   $dst, $dst\n"
9391     "done:" %}
9392   ins_encode %{
9393     __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9394     emit_cmpfp3(_masm, $dst$$Register);
9395   %}
9396   ins_pipe( pipe_slow );
9397 %}
9398 
9399 // Compare into -1,0,1 in XMM and memory
9400 instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9401   predicate(UseSSE>=2);
9402   match(Set dst (CmpD3 src1 (LoadD src2)));
9403   effect(KILL cr);
9404   ins_cost(275);
9405   format %{ "UCOMISD $src1, $src2\n\t"
9406             "MOV     $dst, #-1\n\t"
9407             "JP,s    done\n\t"
9408             "JB,s    done\n\t"
9409             "SETNE   $dst\n\t"
9410             "MOVZB   $dst, $dst\n"
9411     "done:" %}
9412   ins_encode %{
9413     __ ucomisd($src1$$XMMRegister, $src2$$Address);
9414     emit_cmpfp3(_masm, $dst$$Register);
9415   %}
9416   ins_pipe( pipe_slow );
9417 %}
9418 
9419 
9420 instruct subDPR_reg(regDPR dst, regDPR src) %{
9421   predicate (UseSSE <=1);
9422   match(Set dst (SubD dst src));
9423 
9424   format %{ "FLD    $src\n\t"
9425             "DSUBp  $dst,ST" %}
9426   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9427   ins_cost(150);
9428   ins_encode( Push_Reg_DPR(src),
9429               OpcP, RegOpc(dst) );
9430   ins_pipe( fpu_reg_reg );
9431 %}
9432 
9433 instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9434   predicate (UseSSE <=1);
9435   match(Set dst (RoundDouble (SubD src1 src2)));
9436   ins_cost(250);
9437 
9438   format %{ "FLD    $src2\n\t"
9439             "DSUB   ST,$src1\n\t"
9440             "FSTP_D $dst\t# D-round" %}
9441   opcode(0xD8, 0x5);
9442   ins_encode( Push_Reg_DPR(src2),
9443               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9444   ins_pipe( fpu_mem_reg_reg );
9445 %}
9446 
9447 
9448 instruct subDPR_reg_mem(regDPR dst, memory src) %{
9449   predicate (UseSSE <=1);
9450   match(Set dst (SubD dst (LoadD src)));
9451   ins_cost(150);
9452 
9453   format %{ "FLD    $src\n\t"
9454             "DSUBp  $dst,ST" %}
9455   opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9456   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9457               OpcP, RegOpc(dst) );
9458   ins_pipe( fpu_reg_mem );
9459 %}
9460 
9461 instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9462   predicate (UseSSE<=1);
9463   match(Set dst (AbsD src));
9464   ins_cost(100);
9465   format %{ "FABS" %}
9466   opcode(0xE1, 0xD9);
9467   ins_encode( OpcS, OpcP );
9468   ins_pipe( fpu_reg_reg );
9469 %}
9470 
9471 instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9472   predicate(UseSSE<=1);
9473   match(Set dst (NegD src));
9474   ins_cost(100);
9475   format %{ "FCHS" %}
9476   opcode(0xE0, 0xD9);
9477   ins_encode( OpcS, OpcP );
9478   ins_pipe( fpu_reg_reg );
9479 %}
9480 
9481 instruct addDPR_reg(regDPR dst, regDPR src) %{
9482   predicate(UseSSE<=1);
9483   match(Set dst (AddD dst src));
9484   format %{ "FLD    $src\n\t"
9485             "DADD   $dst,ST" %}
9486   size(4);
9487   ins_cost(150);
9488   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9489   ins_encode( Push_Reg_DPR(src),
9490               OpcP, RegOpc(dst) );
9491   ins_pipe( fpu_reg_reg );
9492 %}
9493 
9494 
9495 instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9496   predicate(UseSSE<=1);
9497   match(Set dst (RoundDouble (AddD src1 src2)));
9498   ins_cost(250);
9499 
9500   format %{ "FLD    $src2\n\t"
9501             "DADD   ST,$src1\n\t"
9502             "FSTP_D $dst\t# D-round" %}
9503   opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9504   ins_encode( Push_Reg_DPR(src2),
9505               OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9506   ins_pipe( fpu_mem_reg_reg );
9507 %}
9508 
9509 
9510 instruct addDPR_reg_mem(regDPR dst, memory src) %{
9511   predicate(UseSSE<=1);
9512   match(Set dst (AddD dst (LoadD src)));
9513   ins_cost(150);
9514 
9515   format %{ "FLD    $src\n\t"
9516             "DADDp  $dst,ST" %}
9517   opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9518   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9519               OpcP, RegOpc(dst) );
9520   ins_pipe( fpu_reg_mem );
9521 %}
9522 
9523 // add-to-memory
9524 instruct addDPR_mem_reg(memory dst, regDPR src) %{
9525   predicate(UseSSE<=1);
9526   match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9527   ins_cost(150);
9528 
9529   format %{ "FLD_D  $dst\n\t"
9530             "DADD   ST,$src\n\t"
9531             "FST_D  $dst" %}
9532   opcode(0xDD, 0x0);
9533   ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9534               Opcode(0xD8), RegOpc(src),
9535               set_instruction_start,
9536               Opcode(0xDD), RMopc_Mem(0x03,dst) );
9537   ins_pipe( fpu_reg_mem );
9538 %}
9539 
9540 instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9541   predicate(UseSSE<=1);
9542   match(Set dst (AddD dst con));
9543   ins_cost(125);
9544   format %{ "FLD1\n\t"
9545             "DADDp  $dst,ST" %}
9546   ins_encode %{
9547     __ fld1();
9548     __ faddp($dst$$reg);
9549   %}
9550   ins_pipe(fpu_reg);
9551 %}
9552 
9553 instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9554   predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9555   match(Set dst (AddD dst con));
9556   ins_cost(200);
9557   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9558             "DADDp  $dst,ST" %}
9559   ins_encode %{
9560     __ fld_d($constantaddress($con));
9561     __ faddp($dst$$reg);
9562   %}
9563   ins_pipe(fpu_reg_mem);
9564 %}
9565 
9566 instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9567   predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9568   match(Set dst (RoundDouble (AddD src con)));
9569   ins_cost(200);
9570   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9571             "DADD   ST,$src\n\t"
9572             "FSTP_D $dst\t# D-round" %}
9573   ins_encode %{
9574     __ fld_d($constantaddress($con));
9575     __ fadd($src$$reg);
9576     __ fstp_d(Address(rsp, $dst$$disp));
9577   %}
9578   ins_pipe(fpu_mem_reg_con);
9579 %}
9580 
9581 instruct mulDPR_reg(regDPR dst, regDPR src) %{
9582   predicate(UseSSE<=1);
9583   match(Set dst (MulD dst src));
9584   format %{ "FLD    $src\n\t"
9585             "DMULp  $dst,ST" %}
9586   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9587   ins_cost(150);
9588   ins_encode( Push_Reg_DPR(src),
9589               OpcP, RegOpc(dst) );
9590   ins_pipe( fpu_reg_reg );
9591 %}
9592 
9593 // Strict FP instruction biases argument before multiply then
9594 // biases result to avoid double rounding of subnormals.
9595 //
9596 // scale arg1 by multiplying arg1 by 2^(-15360)
9597 // load arg2
9598 // multiply scaled arg1 by arg2
9599 // rescale product by 2^(15360)
9600 //
9601 instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9602   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9603   match(Set dst (MulD dst src));
9604   ins_cost(1);   // Select this instruction for all strict FP double multiplies
9605 
9606   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9607             "DMULp  $dst,ST\n\t"
9608             "FLD    $src\n\t"
9609             "DMULp  $dst,ST\n\t"
9610             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9611             "DMULp  $dst,ST\n\t" %}
9612   opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9613   ins_encode( strictfp_bias1(dst),
9614               Push_Reg_DPR(src),
9615               OpcP, RegOpc(dst),
9616               strictfp_bias2(dst) );
9617   ins_pipe( fpu_reg_reg );
9618 %}
9619 
9620 instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9621   predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9622   match(Set dst (MulD dst con));
9623   ins_cost(200);
9624   format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9625             "DMULp  $dst,ST" %}
9626   ins_encode %{
9627     __ fld_d($constantaddress($con));
9628     __ fmulp($dst$$reg);
9629   %}
9630   ins_pipe(fpu_reg_mem);
9631 %}
9632 
9633 
9634 instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9635   predicate( UseSSE<=1 );
9636   match(Set dst (MulD dst (LoadD src)));
9637   ins_cost(200);
9638   format %{ "FLD_D  $src\n\t"
9639             "DMULp  $dst,ST" %}
9640   opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9641   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9642               OpcP, RegOpc(dst) );
9643   ins_pipe( fpu_reg_mem );
9644 %}
9645 
9646 //
9647 // Cisc-alternate to reg-reg multiply
9648 instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9649   predicate( UseSSE<=1 );
9650   match(Set dst (MulD src (LoadD mem)));
9651   ins_cost(250);
9652   format %{ "FLD_D  $mem\n\t"
9653             "DMUL   ST,$src\n\t"
9654             "FSTP_D $dst" %}
9655   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9656   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9657               OpcReg_FPR(src),
9658               Pop_Reg_DPR(dst) );
9659   ins_pipe( fpu_reg_reg_mem );
9660 %}
9661 
9662 
9663 // MACRO3 -- addDPR a mulDPR
9664 // This instruction is a '2-address' instruction in that the result goes
9665 // back to src2.  This eliminates a move from the macro; possibly the
9666 // register allocator will have to add it back (and maybe not).
9667 instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9668   predicate( UseSSE<=1 );
9669   match(Set src2 (AddD (MulD src0 src1) src2));
9670   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9671             "DMUL   ST,$src1\n\t"
9672             "DADDp  $src2,ST" %}
9673   ins_cost(250);
9674   opcode(0xDD); /* LoadD DD /0 */
9675   ins_encode( Push_Reg_FPR(src0),
9676               FMul_ST_reg(src1),
9677               FAddP_reg_ST(src2) );
9678   ins_pipe( fpu_reg_reg_reg );
9679 %}
9680 
9681 
9682 // MACRO3 -- subDPR a mulDPR
9683 instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9684   predicate( UseSSE<=1 );
9685   match(Set src2 (SubD (MulD src0 src1) src2));
9686   format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9687             "DMUL   ST,$src1\n\t"
9688             "DSUBRp $src2,ST" %}
9689   ins_cost(250);
9690   ins_encode( Push_Reg_FPR(src0),
9691               FMul_ST_reg(src1),
9692               Opcode(0xDE), Opc_plus(0xE0,src2));
9693   ins_pipe( fpu_reg_reg_reg );
9694 %}
9695 
9696 
9697 instruct divDPR_reg(regDPR dst, regDPR src) %{
9698   predicate( UseSSE<=1 );
9699   match(Set dst (DivD dst src));
9700 
9701   format %{ "FLD    $src\n\t"
9702             "FDIVp  $dst,ST" %}
9703   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9704   ins_cost(150);
9705   ins_encode( Push_Reg_DPR(src),
9706               OpcP, RegOpc(dst) );
9707   ins_pipe( fpu_reg_reg );
9708 %}
9709 
9710 // Strict FP instruction biases argument before division then
9711 // biases result, to avoid double rounding of subnormals.
9712 //
9713 // scale dividend by multiplying dividend by 2^(-15360)
9714 // load divisor
9715 // divide scaled dividend by divisor
9716 // rescale quotient by 2^(15360)
9717 //
9718 instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9719   predicate (UseSSE<=1);
9720   match(Set dst (DivD dst src));
9721   predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9722   ins_cost(01);
9723 
9724   format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9725             "DMULp  $dst,ST\n\t"
9726             "FLD    $src\n\t"
9727             "FDIVp  $dst,ST\n\t"
9728             "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9729             "DMULp  $dst,ST\n\t" %}
9730   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9731   ins_encode( strictfp_bias1(dst),
9732               Push_Reg_DPR(src),
9733               OpcP, RegOpc(dst),
9734               strictfp_bias2(dst) );
9735   ins_pipe( fpu_reg_reg );
9736 %}
9737 
9738 instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9739   predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
9740   match(Set dst (RoundDouble (DivD src1 src2)));
9741 
9742   format %{ "FLD    $src1\n\t"
9743             "FDIV   ST,$src2\n\t"
9744             "FSTP_D $dst\t# D-round" %}
9745   opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
9746   ins_encode( Push_Reg_DPR(src1),
9747               OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
9748   ins_pipe( fpu_mem_reg_reg );
9749 %}
9750 
9751 
9752 instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9753   predicate(UseSSE<=1);
9754   match(Set dst (ModD dst src));
9755   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9756 
9757   format %{ "DMOD   $dst,$src" %}
9758   ins_cost(250);
9759   ins_encode(Push_Reg_Mod_DPR(dst, src),
9760               emitModDPR(),
9761               Push_Result_Mod_DPR(src),
9762               Pop_Reg_DPR(dst));
9763   ins_pipe( pipe_slow );
9764 %}
9765 
9766 instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9767   predicate(UseSSE>=2);
9768   match(Set dst (ModD src0 src1));
9769   effect(KILL rax, KILL cr);
9770 
9771   format %{ "SUB    ESP,8\t # DMOD\n"
9772           "\tMOVSD  [ESP+0],$src1\n"
9773           "\tFLD_D  [ESP+0]\n"
9774           "\tMOVSD  [ESP+0],$src0\n"
9775           "\tFLD_D  [ESP+0]\n"
9776      "loop:\tFPREM\n"
9777           "\tFWAIT\n"
9778           "\tFNSTSW AX\n"
9779           "\tSAHF\n"
9780           "\tJP     loop\n"
9781           "\tFSTP_D [ESP+0]\n"
9782           "\tMOVSD  $dst,[ESP+0]\n"
9783           "\tADD    ESP,8\n"
9784           "\tFSTP   ST0\t # Restore FPU Stack"
9785     %}
9786   ins_cost(250);
9787   ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
9788   ins_pipe( pipe_slow );
9789 %}
9790 
9791 instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{
9792   predicate (UseSSE<=1);
9793   match(Set dst (SinD src));
9794   ins_cost(1800);
9795   format %{ "DSIN   $dst" %}
9796   opcode(0xD9, 0xFE);
9797   ins_encode( OpcP, OpcS );
9798   ins_pipe( pipe_slow );
9799 %}
9800 
9801 instruct sinD_reg(regD dst, eFlagsReg cr) %{
9802   predicate (UseSSE>=2);
9803   match(Set dst (SinD dst));
9804   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9805   ins_cost(1800);
9806   format %{ "DSIN   $dst" %}
9807   opcode(0xD9, 0xFE);
9808   ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
9809   ins_pipe( pipe_slow );
9810 %}
9811 
9812 instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{
9813   predicate (UseSSE<=1);
9814   match(Set dst (CosD src));
9815   ins_cost(1800);
9816   format %{ "DCOS   $dst" %}
9817   opcode(0xD9, 0xFF);
9818   ins_encode( OpcP, OpcS );
9819   ins_pipe( pipe_slow );
9820 %}
9821 
9822 instruct cosD_reg(regD dst, eFlagsReg cr) %{
9823   predicate (UseSSE>=2);
9824   match(Set dst (CosD dst));
9825   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9826   ins_cost(1800);
9827   format %{ "DCOS   $dst" %}
9828   opcode(0xD9, 0xFF);
9829   ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
9830   ins_pipe( pipe_slow );
9831 %}
9832 
9833 instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
9834   predicate (UseSSE<=1);
9835   match(Set dst(TanD src));
9836   format %{ "DTAN   $dst" %}
9837   ins_encode( Opcode(0xD9), Opcode(0xF2),    // fptan
9838               Opcode(0xDD), Opcode(0xD8));   // fstp st
9839   ins_pipe( pipe_slow );
9840 %}
9841 
9842 instruct tanD_reg(regD dst, eFlagsReg cr) %{
9843   predicate (UseSSE>=2);
9844   match(Set dst(TanD dst));
9845   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9846   format %{ "DTAN   $dst" %}
9847   ins_encode( Push_SrcD(dst),
9848               Opcode(0xD9), Opcode(0xF2),    // fptan
9849               Opcode(0xDD), Opcode(0xD8),   // fstp st
9850               Push_ResultD(dst) );
9851   ins_pipe( pipe_slow );
9852 %}
9853 
9854 instruct atanDPR_reg(regDPR dst, regDPR src) %{
9855   predicate (UseSSE<=1);
9856   match(Set dst(AtanD dst src));
9857   format %{ "DATA   $dst,$src" %}
9858   opcode(0xD9, 0xF3);
9859   ins_encode( Push_Reg_DPR(src),
9860               OpcP, OpcS, RegOpc(dst) );
9861   ins_pipe( pipe_slow );
9862 %}
9863 
9864 instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
9865   predicate (UseSSE>=2);
9866   match(Set dst(AtanD dst src));
9867   effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9868   format %{ "DATA   $dst,$src" %}
9869   opcode(0xD9, 0xF3);
9870   ins_encode( Push_SrcD(src),
9871               OpcP, OpcS, Push_ResultD(dst) );
9872   ins_pipe( pipe_slow );
9873 %}
9874 
9875 instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
9876   predicate (UseSSE<=1);
9877   match(Set dst (SqrtD src));
9878   format %{ "DSQRT  $dst,$src" %}
9879   opcode(0xFA, 0xD9);
9880   ins_encode( Push_Reg_DPR(src),
9881               OpcS, OpcP, Pop_Reg_DPR(dst) );
9882   ins_pipe( pipe_slow );
9883 %}
9884 
9885 instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
9886   predicate (UseSSE<=1);
9887   match(Set Y (PowD X Y));  // Raise X to the Yth power
9888   effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
9889   format %{ "fast_pow $X $Y -> $Y  // KILL $rax, $rcx, $rdx" %}
9890   ins_encode %{
9891     __ subptr(rsp, 8);
9892     __ fld_s($X$$reg - 1);
9893     __ fast_pow();
9894     __ addptr(rsp, 8);
9895   %}
9896   ins_pipe( pipe_slow );
9897 %}
9898 
9899 instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
9900   predicate (UseSSE>=2);
9901   match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
9902   effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
9903   format %{ "fast_pow $src0 $src1 -> $dst  // KILL $rax, $rcx, $rdx" %}
9904   ins_encode %{
9905     __ subptr(rsp, 8);
9906     __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
9907     __ fld_d(Address(rsp, 0));
9908     __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
9909     __ fld_d(Address(rsp, 0));
9910     __ fast_pow();
9911     __ fstp_d(Address(rsp, 0));
9912     __ movdbl($dst$$XMMRegister, Address(rsp, 0));
9913     __ addptr(rsp, 8);
9914   %}
9915   ins_pipe( pipe_slow );
9916 %}
9917 
9918 instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
9919   predicate (UseSSE<=1);
9920   // The source Double operand on FPU stack
9921   match(Set dst (Log10D src));
9922   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9923   // fxch         ; swap ST(0) with ST(1)
9924   // fyl2x        ; compute log_10(2) * log_2(x)
9925   format %{ "FLDLG2 \t\t\t#Log10\n\t"
9926             "FXCH   \n\t"
9927             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9928          %}
9929   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9930               Opcode(0xD9), Opcode(0xC9),   // fxch
9931               Opcode(0xD9), Opcode(0xF1));  // fyl2x
9932 
9933   ins_pipe( pipe_slow );
9934 %}
9935 
9936 instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
9937   predicate (UseSSE>=2);
9938   effect(KILL cr);
9939   match(Set dst (Log10D src));
9940   // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9941   // fyl2x        ; compute log_10(2) * log_2(x)
9942   format %{ "FLDLG2 \t\t\t#Log10\n\t"
9943             "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9944          %}
9945   ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9946               Push_SrcD(src),
9947               Opcode(0xD9), Opcode(0xF1),   // fyl2x
9948               Push_ResultD(dst));
9949 
9950   ins_pipe( pipe_slow );
9951 %}
9952 
9953 instruct logDPR_reg(regDPR1 dst, regDPR1 src) %{
9954   predicate (UseSSE<=1);
9955   // The source Double operand on FPU stack
9956   match(Set dst (LogD src));
9957   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
9958   // fxch         ; swap ST(0) with ST(1)
9959   // fyl2x        ; compute log_e(2) * log_2(x)
9960   format %{ "FLDLN2 \t\t\t#Log_e\n\t"
9961             "FXCH   \n\t"
9962             "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
9963          %}
9964   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
9965               Opcode(0xD9), Opcode(0xC9),   // fxch
9966               Opcode(0xD9), Opcode(0xF1));  // fyl2x
9967 
9968   ins_pipe( pipe_slow );
9969 %}
9970 
9971 instruct logD_reg(regD dst, regD src, eFlagsReg cr) %{
9972   predicate (UseSSE>=2);
9973   effect(KILL cr);
9974   // The source and result Double operands in XMM registers
9975   match(Set dst (LogD src));
9976   // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
9977   // fyl2x        ; compute log_e(2) * log_2(x)
9978   format %{ "FLDLN2 \t\t\t#Log_e\n\t"
9979             "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
9980          %}
9981   ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
9982               Push_SrcD(src),
9983               Opcode(0xD9), Opcode(0xF1),   // fyl2x
9984               Push_ResultD(dst));
9985   ins_pipe( pipe_slow );
9986 %}
9987 
9988 //-------------Float Instructions-------------------------------
9989 // Float Math
9990 
9991 // Code for float compare:
9992 //     fcompp();
9993 //     fwait(); fnstsw_ax();
9994 //     sahf();
9995 //     movl(dst, unordered_result);
9996 //     jcc(Assembler::parity, exit);
9997 //     movl(dst, less_result);
9998 //     jcc(Assembler::below, exit);
9999 //     movl(dst, equal_result);
10000 //     jcc(Assembler::equal, exit);
10001 //     movl(dst, greater_result);
10002 //   exit:
10003 
10004 // P6 version of float compare, sets condition codes in EFLAGS
10005 instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10006   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10007   match(Set cr (CmpF src1 src2));
10008   effect(KILL rax);
10009   ins_cost(150);
10010   format %{ "FLD    $src1\n\t"
10011             "FUCOMIP ST,$src2  // P6 instruction\n\t"
10012             "JNP    exit\n\t"
10013             "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10014             "SAHF\n"
10015      "exit:\tNOP               // avoid branch to branch" %}
10016   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10017   ins_encode( Push_Reg_DPR(src1),
10018               OpcP, RegOpc(src2),
10019               cmpF_P6_fixup );
10020   ins_pipe( pipe_slow );
10021 %}
10022 
10023 instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10024   predicate(VM_Version::supports_cmov() && UseSSE == 0);
10025   match(Set cr (CmpF src1 src2));
10026   ins_cost(100);
10027   format %{ "FLD    $src1\n\t"
10028             "FUCOMIP ST,$src2  // P6 instruction" %}
10029   opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10030   ins_encode( Push_Reg_DPR(src1),
10031               OpcP, RegOpc(src2));
10032   ins_pipe( pipe_slow );
10033 %}
10034 
10035 
10036 // Compare & branch
10037 instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10038   predicate(UseSSE == 0);
10039   match(Set cr (CmpF src1 src2));
10040   effect(KILL rax);
10041   ins_cost(200);
10042   format %{ "FLD    $src1\n\t"
10043             "FCOMp  $src2\n\t"
10044             "FNSTSW AX\n\t"
10045             "TEST   AX,0x400\n\t"
10046             "JZ,s   flags\n\t"
10047             "MOV    AH,1\t# unordered treat as LT\n"
10048     "flags:\tSAHF" %}
10049   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10050   ins_encode( Push_Reg_DPR(src1),
10051               OpcP, RegOpc(src2),
10052               fpu_flags);
10053   ins_pipe( pipe_slow );
10054 %}
10055 
10056 // Compare vs zero into -1,0,1
10057 instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10058   predicate(UseSSE == 0);
10059   match(Set dst (CmpF3 src1 zero));
10060   effect(KILL cr, KILL rax);
10061   ins_cost(280);
10062   format %{ "FTSTF  $dst,$src1" %}
10063   opcode(0xE4, 0xD9);
10064   ins_encode( Push_Reg_DPR(src1),
10065               OpcS, OpcP, PopFPU,
10066               CmpF_Result(dst));
10067   ins_pipe( pipe_slow );
10068 %}
10069 
10070 // Compare into -1,0,1
10071 instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10072   predicate(UseSSE == 0);
10073   match(Set dst (CmpF3 src1 src2));
10074   effect(KILL cr, KILL rax);
10075   ins_cost(300);
10076   format %{ "FCMPF  $dst,$src1,$src2" %}
10077   opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10078   ins_encode( Push_Reg_DPR(src1),
10079               OpcP, RegOpc(src2),
10080               CmpF_Result(dst));
10081   ins_pipe( pipe_slow );
10082 %}
10083 
10084 // float compare and set condition codes in EFLAGS by XMM regs
10085 instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10086   predicate(UseSSE>=1);
10087   match(Set cr (CmpF src1 src2));
10088   ins_cost(145);
10089   format %{ "UCOMISS $src1,$src2\n\t"
10090             "JNP,s   exit\n\t"
10091             "PUSHF\t# saw NaN, set CF\n\t"
10092             "AND     [rsp], #0xffffff2b\n\t"
10093             "POPF\n"
10094     "exit:" %}
10095   ins_encode %{
10096     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10097     emit_cmpfp_fixup(_masm);
10098   %}
10099   ins_pipe( pipe_slow );
10100 %}
10101 
10102 instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10103   predicate(UseSSE>=1);
10104   match(Set cr (CmpF src1 src2));
10105   ins_cost(100);
10106   format %{ "UCOMISS $src1,$src2" %}
10107   ins_encode %{
10108     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10109   %}
10110   ins_pipe( pipe_slow );
10111 %}
10112 
10113 // float compare and set condition codes in EFLAGS by XMM regs
10114 instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10115   predicate(UseSSE>=1);
10116   match(Set cr (CmpF src1 (LoadF src2)));
10117   ins_cost(165);
10118   format %{ "UCOMISS $src1,$src2\n\t"
10119             "JNP,s   exit\n\t"
10120             "PUSHF\t# saw NaN, set CF\n\t"
10121             "AND     [rsp], #0xffffff2b\n\t"
10122             "POPF\n"
10123     "exit:" %}
10124   ins_encode %{
10125     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10126     emit_cmpfp_fixup(_masm);
10127   %}
10128   ins_pipe( pipe_slow );
10129 %}
10130 
10131 instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10132   predicate(UseSSE>=1);
10133   match(Set cr (CmpF src1 (LoadF src2)));
10134   ins_cost(100);
10135   format %{ "UCOMISS $src1,$src2" %}
10136   ins_encode %{
10137     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10138   %}
10139   ins_pipe( pipe_slow );
10140 %}
10141 
10142 // Compare into -1,0,1 in XMM
10143 instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10144   predicate(UseSSE>=1);
10145   match(Set dst (CmpF3 src1 src2));
10146   effect(KILL cr);
10147   ins_cost(255);
10148   format %{ "UCOMISS $src1, $src2\n\t"
10149             "MOV     $dst, #-1\n\t"
10150             "JP,s    done\n\t"
10151             "JB,s    done\n\t"
10152             "SETNE   $dst\n\t"
10153             "MOVZB   $dst, $dst\n"
10154     "done:" %}
10155   ins_encode %{
10156     __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10157     emit_cmpfp3(_masm, $dst$$Register);
10158   %}
10159   ins_pipe( pipe_slow );
10160 %}
10161 
10162 // Compare into -1,0,1 in XMM and memory
10163 instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10164   predicate(UseSSE>=1);
10165   match(Set dst (CmpF3 src1 (LoadF src2)));
10166   effect(KILL cr);
10167   ins_cost(275);
10168   format %{ "UCOMISS $src1, $src2\n\t"
10169             "MOV     $dst, #-1\n\t"
10170             "JP,s    done\n\t"
10171             "JB,s    done\n\t"
10172             "SETNE   $dst\n\t"
10173             "MOVZB   $dst, $dst\n"
10174     "done:" %}
10175   ins_encode %{
10176     __ ucomiss($src1$$XMMRegister, $src2$$Address);
10177     emit_cmpfp3(_masm, $dst$$Register);
10178   %}
10179   ins_pipe( pipe_slow );
10180 %}
10181 
10182 // Spill to obtain 24-bit precision
10183 instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10184   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10185   match(Set dst (SubF src1 src2));
10186 
10187   format %{ "FSUB   $dst,$src1 - $src2" %}
10188   opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10189   ins_encode( Push_Reg_FPR(src1),
10190               OpcReg_FPR(src2),
10191               Pop_Mem_FPR(dst) );
10192   ins_pipe( fpu_mem_reg_reg );
10193 %}
10194 //
10195 // This instruction does not round to 24-bits
10196 instruct subFPR_reg(regFPR dst, regFPR src) %{
10197   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10198   match(Set dst (SubF dst src));
10199 
10200   format %{ "FSUB   $dst,$src" %}
10201   opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10202   ins_encode( Push_Reg_FPR(src),
10203               OpcP, RegOpc(dst) );
10204   ins_pipe( fpu_reg_reg );
10205 %}
10206 
10207 // Spill to obtain 24-bit precision
10208 instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10209   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10210   match(Set dst (AddF src1 src2));
10211 
10212   format %{ "FADD   $dst,$src1,$src2" %}
10213   opcode(0xD8, 0x0); /* D8 C0+i */
10214   ins_encode( Push_Reg_FPR(src2),
10215               OpcReg_FPR(src1),
10216               Pop_Mem_FPR(dst) );
10217   ins_pipe( fpu_mem_reg_reg );
10218 %}
10219 //
10220 // This instruction does not round to 24-bits
10221 instruct addFPR_reg(regFPR dst, regFPR src) %{
10222   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10223   match(Set dst (AddF dst src));
10224 
10225   format %{ "FLD    $src\n\t"
10226             "FADDp  $dst,ST" %}
10227   opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10228   ins_encode( Push_Reg_FPR(src),
10229               OpcP, RegOpc(dst) );
10230   ins_pipe( fpu_reg_reg );
10231 %}
10232 
10233 instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10234   predicate(UseSSE==0);
10235   match(Set dst (AbsF src));
10236   ins_cost(100);
10237   format %{ "FABS" %}
10238   opcode(0xE1, 0xD9);
10239   ins_encode( OpcS, OpcP );
10240   ins_pipe( fpu_reg_reg );
10241 %}
10242 
10243 instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10244   predicate(UseSSE==0);
10245   match(Set dst (NegF src));
10246   ins_cost(100);
10247   format %{ "FCHS" %}
10248   opcode(0xE0, 0xD9);
10249   ins_encode( OpcS, OpcP );
10250   ins_pipe( fpu_reg_reg );
10251 %}
10252 
10253 // Cisc-alternate to addFPR_reg
10254 // Spill to obtain 24-bit precision
10255 instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10256   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10257   match(Set dst (AddF src1 (LoadF src2)));
10258 
10259   format %{ "FLD    $src2\n\t"
10260             "FADD   ST,$src1\n\t"
10261             "FSTP_S $dst" %}
10262   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10263   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10264               OpcReg_FPR(src1),
10265               Pop_Mem_FPR(dst) );
10266   ins_pipe( fpu_mem_reg_mem );
10267 %}
10268 //
10269 // Cisc-alternate to addFPR_reg
10270 // This instruction does not round to 24-bits
10271 instruct addFPR_reg_mem(regFPR dst, memory src) %{
10272   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10273   match(Set dst (AddF dst (LoadF src)));
10274 
10275   format %{ "FADD   $dst,$src" %}
10276   opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10277   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10278               OpcP, RegOpc(dst) );
10279   ins_pipe( fpu_reg_mem );
10280 %}
10281 
10282 // // Following two instructions for _222_mpegaudio
10283 // Spill to obtain 24-bit precision
10284 instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10285   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10286   match(Set dst (AddF src1 src2));
10287 
10288   format %{ "FADD   $dst,$src1,$src2" %}
10289   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10290   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10291               OpcReg_FPR(src2),
10292               Pop_Mem_FPR(dst) );
10293   ins_pipe( fpu_mem_reg_mem );
10294 %}
10295 
10296 // Cisc-spill variant
10297 // Spill to obtain 24-bit precision
10298 instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10299   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10300   match(Set dst (AddF src1 (LoadF src2)));
10301 
10302   format %{ "FADD   $dst,$src1,$src2 cisc" %}
10303   opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10304   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10305               set_instruction_start,
10306               OpcP, RMopc_Mem(secondary,src1),
10307               Pop_Mem_FPR(dst) );
10308   ins_pipe( fpu_mem_mem_mem );
10309 %}
10310 
10311 // Spill to obtain 24-bit precision
10312 instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10313   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10314   match(Set dst (AddF src1 src2));
10315 
10316   format %{ "FADD   $dst,$src1,$src2" %}
10317   opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10318   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10319               set_instruction_start,
10320               OpcP, RMopc_Mem(secondary,src1),
10321               Pop_Mem_FPR(dst) );
10322   ins_pipe( fpu_mem_mem_mem );
10323 %}
10324 
10325 
10326 // Spill to obtain 24-bit precision
10327 instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10328   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10329   match(Set dst (AddF src con));
10330   format %{ "FLD    $src\n\t"
10331             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10332             "FSTP_S $dst"  %}
10333   ins_encode %{
10334     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10335     __ fadd_s($constantaddress($con));
10336     __ fstp_s(Address(rsp, $dst$$disp));
10337   %}
10338   ins_pipe(fpu_mem_reg_con);
10339 %}
10340 //
10341 // This instruction does not round to 24-bits
10342 instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10343   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10344   match(Set dst (AddF src con));
10345   format %{ "FLD    $src\n\t"
10346             "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10347             "FSTP   $dst"  %}
10348   ins_encode %{
10349     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10350     __ fadd_s($constantaddress($con));
10351     __ fstp_d($dst$$reg);
10352   %}
10353   ins_pipe(fpu_reg_reg_con);
10354 %}
10355 
10356 // Spill to obtain 24-bit precision
10357 instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10358   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10359   match(Set dst (MulF src1 src2));
10360 
10361   format %{ "FLD    $src1\n\t"
10362             "FMUL   $src2\n\t"
10363             "FSTP_S $dst"  %}
10364   opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10365   ins_encode( Push_Reg_FPR(src1),
10366               OpcReg_FPR(src2),
10367               Pop_Mem_FPR(dst) );
10368   ins_pipe( fpu_mem_reg_reg );
10369 %}
10370 //
10371 // This instruction does not round to 24-bits
10372 instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10373   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10374   match(Set dst (MulF src1 src2));
10375 
10376   format %{ "FLD    $src1\n\t"
10377             "FMUL   $src2\n\t"
10378             "FSTP_S $dst"  %}
10379   opcode(0xD8, 0x1); /* D8 C8+i */
10380   ins_encode( Push_Reg_FPR(src2),
10381               OpcReg_FPR(src1),
10382               Pop_Reg_FPR(dst) );
10383   ins_pipe( fpu_reg_reg_reg );
10384 %}
10385 
10386 
10387 // Spill to obtain 24-bit precision
10388 // Cisc-alternate to reg-reg multiply
10389 instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10390   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10391   match(Set dst (MulF src1 (LoadF src2)));
10392 
10393   format %{ "FLD_S  $src2\n\t"
10394             "FMUL   $src1\n\t"
10395             "FSTP_S $dst"  %}
10396   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10397   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10398               OpcReg_FPR(src1),
10399               Pop_Mem_FPR(dst) );
10400   ins_pipe( fpu_mem_reg_mem );
10401 %}
10402 //
10403 // This instruction does not round to 24-bits
10404 // Cisc-alternate to reg-reg multiply
10405 instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10406   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10407   match(Set dst (MulF src1 (LoadF src2)));
10408 
10409   format %{ "FMUL   $dst,$src1,$src2" %}
10410   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10411   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10412               OpcReg_FPR(src1),
10413               Pop_Reg_FPR(dst) );
10414   ins_pipe( fpu_reg_reg_mem );
10415 %}
10416 
10417 // Spill to obtain 24-bit precision
10418 instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10419   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10420   match(Set dst (MulF src1 src2));
10421 
10422   format %{ "FMUL   $dst,$src1,$src2" %}
10423   opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10424   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10425               set_instruction_start,
10426               OpcP, RMopc_Mem(secondary,src1),
10427               Pop_Mem_FPR(dst) );
10428   ins_pipe( fpu_mem_mem_mem );
10429 %}
10430 
10431 // Spill to obtain 24-bit precision
10432 instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10433   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10434   match(Set dst (MulF src con));
10435 
10436   format %{ "FLD    $src\n\t"
10437             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10438             "FSTP_S $dst"  %}
10439   ins_encode %{
10440     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10441     __ fmul_s($constantaddress($con));
10442     __ fstp_s(Address(rsp, $dst$$disp));
10443   %}
10444   ins_pipe(fpu_mem_reg_con);
10445 %}
10446 //
10447 // This instruction does not round to 24-bits
10448 instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10449   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10450   match(Set dst (MulF src con));
10451 
10452   format %{ "FLD    $src\n\t"
10453             "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10454             "FSTP   $dst"  %}
10455   ins_encode %{
10456     __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10457     __ fmul_s($constantaddress($con));
10458     __ fstp_d($dst$$reg);
10459   %}
10460   ins_pipe(fpu_reg_reg_con);
10461 %}
10462 
10463 
10464 //
10465 // MACRO1 -- subsume unshared load into mulFPR
10466 // This instruction does not round to 24-bits
10467 instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10468   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10469   match(Set dst (MulF (LoadF mem1) src));
10470 
10471   format %{ "FLD    $mem1    ===MACRO1===\n\t"
10472             "FMUL   ST,$src\n\t"
10473             "FSTP   $dst" %}
10474   opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10475   ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10476               OpcReg_FPR(src),
10477               Pop_Reg_FPR(dst) );
10478   ins_pipe( fpu_reg_reg_mem );
10479 %}
10480 //
10481 // MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10482 // This instruction does not round to 24-bits
10483 instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10484   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10485   match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10486   ins_cost(95);
10487 
10488   format %{ "FLD    $mem1     ===MACRO2===\n\t"
10489             "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10490             "FADD   ST,$src2\n\t"
10491             "FSTP   $dst" %}
10492   opcode(0xD9); /* LoadF D9 /0 */
10493   ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10494               FMul_ST_reg(src1),
10495               FAdd_ST_reg(src2),
10496               Pop_Reg_FPR(dst) );
10497   ins_pipe( fpu_reg_mem_reg_reg );
10498 %}
10499 
10500 // MACRO3 -- addFPR a mulFPR
10501 // This instruction does not round to 24-bits.  It is a '2-address'
10502 // instruction in that the result goes back to src2.  This eliminates
10503 // a move from the macro; possibly the register allocator will have
10504 // to add it back (and maybe not).
10505 instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10506   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10507   match(Set src2 (AddF (MulF src0 src1) src2));
10508 
10509   format %{ "FLD    $src0     ===MACRO3===\n\t"
10510             "FMUL   ST,$src1\n\t"
10511             "FADDP  $src2,ST" %}
10512   opcode(0xD9); /* LoadF D9 /0 */
10513   ins_encode( Push_Reg_FPR(src0),
10514               FMul_ST_reg(src1),
10515               FAddP_reg_ST(src2) );
10516   ins_pipe( fpu_reg_reg_reg );
10517 %}
10518 
10519 // MACRO4 -- divFPR subFPR
10520 // This instruction does not round to 24-bits
10521 instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10522   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10523   match(Set dst (DivF (SubF src2 src1) src3));
10524 
10525   format %{ "FLD    $src2   ===MACRO4===\n\t"
10526             "FSUB   ST,$src1\n\t"
10527             "FDIV   ST,$src3\n\t"
10528             "FSTP  $dst" %}
10529   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10530   ins_encode( Push_Reg_FPR(src2),
10531               subFPR_divFPR_encode(src1,src3),
10532               Pop_Reg_FPR(dst) );
10533   ins_pipe( fpu_reg_reg_reg_reg );
10534 %}
10535 
10536 // Spill to obtain 24-bit precision
10537 instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10538   predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10539   match(Set dst (DivF src1 src2));
10540 
10541   format %{ "FDIV   $dst,$src1,$src2" %}
10542   opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10543   ins_encode( Push_Reg_FPR(src1),
10544               OpcReg_FPR(src2),
10545               Pop_Mem_FPR(dst) );
10546   ins_pipe( fpu_mem_reg_reg );
10547 %}
10548 //
10549 // This instruction does not round to 24-bits
10550 instruct divFPR_reg(regFPR dst, regFPR src) %{
10551   predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10552   match(Set dst (DivF dst src));
10553 
10554   format %{ "FDIV   $dst,$src" %}
10555   opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10556   ins_encode( Push_Reg_FPR(src),
10557               OpcP, RegOpc(dst) );
10558   ins_pipe( fpu_reg_reg );
10559 %}
10560 
10561 
10562 // Spill to obtain 24-bit precision
10563 instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10564   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10565   match(Set dst (ModF src1 src2));
10566   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10567 
10568   format %{ "FMOD   $dst,$src1,$src2" %}
10569   ins_encode( Push_Reg_Mod_DPR(src1, src2),
10570               emitModDPR(),
10571               Push_Result_Mod_DPR(src2),
10572               Pop_Mem_FPR(dst));
10573   ins_pipe( pipe_slow );
10574 %}
10575 //
10576 // This instruction does not round to 24-bits
10577 instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10578   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10579   match(Set dst (ModF dst src));
10580   effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10581 
10582   format %{ "FMOD   $dst,$src" %}
10583   ins_encode(Push_Reg_Mod_DPR(dst, src),
10584               emitModDPR(),
10585               Push_Result_Mod_DPR(src),
10586               Pop_Reg_FPR(dst));
10587   ins_pipe( pipe_slow );
10588 %}
10589 
10590 instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10591   predicate(UseSSE>=1);
10592   match(Set dst (ModF src0 src1));
10593   effect(KILL rax, KILL cr);
10594   format %{ "SUB    ESP,4\t # FMOD\n"
10595           "\tMOVSS  [ESP+0],$src1\n"
10596           "\tFLD_S  [ESP+0]\n"
10597           "\tMOVSS  [ESP+0],$src0\n"
10598           "\tFLD_S  [ESP+0]\n"
10599      "loop:\tFPREM\n"
10600           "\tFWAIT\n"
10601           "\tFNSTSW AX\n"
10602           "\tSAHF\n"
10603           "\tJP     loop\n"
10604           "\tFSTP_S [ESP+0]\n"
10605           "\tMOVSS  $dst,[ESP+0]\n"
10606           "\tADD    ESP,4\n"
10607           "\tFSTP   ST0\t # Restore FPU Stack"
10608     %}
10609   ins_cost(250);
10610   ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10611   ins_pipe( pipe_slow );
10612 %}
10613 
10614 
10615 //----------Arithmetic Conversion Instructions---------------------------------
10616 // The conversions operations are all Alpha sorted.  Please keep it that way!
10617 
10618 instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10619   predicate(UseSSE==0);
10620   match(Set dst (RoundFloat src));
10621   ins_cost(125);
10622   format %{ "FST_S  $dst,$src\t# F-round" %}
10623   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10624   ins_pipe( fpu_mem_reg );
10625 %}
10626 
10627 instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10628   predicate(UseSSE<=1);
10629   match(Set dst (RoundDouble src));
10630   ins_cost(125);
10631   format %{ "FST_D  $dst,$src\t# D-round" %}
10632   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10633   ins_pipe( fpu_mem_reg );
10634 %}
10635 
10636 // Force rounding to 24-bit precision and 6-bit exponent
10637 instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10638   predicate(UseSSE==0);
10639   match(Set dst (ConvD2F src));
10640   format %{ "FST_S  $dst,$src\t# F-round" %}
10641   expand %{
10642     roundFloat_mem_reg(dst,src);
10643   %}
10644 %}
10645 
10646 // Force rounding to 24-bit precision and 6-bit exponent
10647 instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10648   predicate(UseSSE==1);
10649   match(Set dst (ConvD2F src));
10650   effect( KILL cr );
10651   format %{ "SUB    ESP,4\n\t"
10652             "FST_S  [ESP],$src\t# F-round\n\t"
10653             "MOVSS  $dst,[ESP]\n\t"
10654             "ADD ESP,4" %}
10655   ins_encode %{
10656     __ subptr(rsp, 4);
10657     if ($src$$reg != FPR1L_enc) {
10658       __ fld_s($src$$reg-1);
10659       __ fstp_s(Address(rsp, 0));
10660     } else {
10661       __ fst_s(Address(rsp, 0));
10662     }
10663     __ movflt($dst$$XMMRegister, Address(rsp, 0));
10664     __ addptr(rsp, 4);
10665   %}
10666   ins_pipe( pipe_slow );
10667 %}
10668 
10669 // Force rounding double precision to single precision
10670 instruct convD2F_reg(regF dst, regD src) %{
10671   predicate(UseSSE>=2);
10672   match(Set dst (ConvD2F src));
10673   format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10674   ins_encode %{
10675     __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10676   %}
10677   ins_pipe( pipe_slow );
10678 %}
10679 
10680 instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10681   predicate(UseSSE==0);
10682   match(Set dst (ConvF2D src));
10683   format %{ "FST_S  $dst,$src\t# D-round" %}
10684   ins_encode( Pop_Reg_Reg_DPR(dst, src));
10685   ins_pipe( fpu_reg_reg );
10686 %}
10687 
10688 instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10689   predicate(UseSSE==1);
10690   match(Set dst (ConvF2D src));
10691   format %{ "FST_D  $dst,$src\t# D-round" %}
10692   expand %{
10693     roundDouble_mem_reg(dst,src);
10694   %}
10695 %}
10696 
10697 instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10698   predicate(UseSSE==1);
10699   match(Set dst (ConvF2D src));
10700   effect( KILL cr );
10701   format %{ "SUB    ESP,4\n\t"
10702             "MOVSS  [ESP] $src\n\t"
10703             "FLD_S  [ESP]\n\t"
10704             "ADD    ESP,4\n\t"
10705             "FSTP   $dst\t# D-round" %}
10706   ins_encode %{
10707     __ subptr(rsp, 4);
10708     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10709     __ fld_s(Address(rsp, 0));
10710     __ addptr(rsp, 4);
10711     __ fstp_d($dst$$reg);
10712   %}
10713   ins_pipe( pipe_slow );
10714 %}
10715 
10716 instruct convF2D_reg(regD dst, regF src) %{
10717   predicate(UseSSE>=2);
10718   match(Set dst (ConvF2D src));
10719   format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10720   ins_encode %{
10721     __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10722   %}
10723   ins_pipe( pipe_slow );
10724 %}
10725 
10726 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10727 instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10728   predicate(UseSSE<=1);
10729   match(Set dst (ConvD2I src));
10730   effect( KILL tmp, KILL cr );
10731   format %{ "FLD    $src\t# Convert double to int \n\t"
10732             "FLDCW  trunc mode\n\t"
10733             "SUB    ESP,4\n\t"
10734             "FISTp  [ESP + #0]\n\t"
10735             "FLDCW  std/24-bit mode\n\t"
10736             "POP    EAX\n\t"
10737             "CMP    EAX,0x80000000\n\t"
10738             "JNE,s  fast\n\t"
10739             "FLD_D  $src\n\t"
10740             "CALL   d2i_wrapper\n"
10741       "fast:" %}
10742   ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10743   ins_pipe( pipe_slow );
10744 %}
10745 
10746 // Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10747 instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10748   predicate(UseSSE>=2);
10749   match(Set dst (ConvD2I src));
10750   effect( KILL tmp, KILL cr );
10751   format %{ "CVTTSD2SI $dst, $src\n\t"
10752             "CMP    $dst,0x80000000\n\t"
10753             "JNE,s  fast\n\t"
10754             "SUB    ESP, 8\n\t"
10755             "MOVSD  [ESP], $src\n\t"
10756             "FLD_D  [ESP]\n\t"
10757             "ADD    ESP, 8\n\t"
10758             "CALL   d2i_wrapper\n"
10759       "fast:" %}
10760   ins_encode %{
10761     Label fast;
10762     __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10763     __ cmpl($dst$$Register, 0x80000000);
10764     __ jccb(Assembler::notEqual, fast);
10765     __ subptr(rsp, 8);
10766     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10767     __ fld_d(Address(rsp, 0));
10768     __ addptr(rsp, 8);
10769     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10770     __ bind(fast);
10771   %}
10772   ins_pipe( pipe_slow );
10773 %}
10774 
10775 instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10776   predicate(UseSSE<=1);
10777   match(Set dst (ConvD2L src));
10778   effect( KILL cr );
10779   format %{ "FLD    $src\t# Convert double to long\n\t"
10780             "FLDCW  trunc mode\n\t"
10781             "SUB    ESP,8\n\t"
10782             "FISTp  [ESP + #0]\n\t"
10783             "FLDCW  std/24-bit mode\n\t"
10784             "POP    EAX\n\t"
10785             "POP    EDX\n\t"
10786             "CMP    EDX,0x80000000\n\t"
10787             "JNE,s  fast\n\t"
10788             "TEST   EAX,EAX\n\t"
10789             "JNE,s  fast\n\t"
10790             "FLD    $src\n\t"
10791             "CALL   d2l_wrapper\n"
10792       "fast:" %}
10793   ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10794   ins_pipe( pipe_slow );
10795 %}
10796 
10797 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10798 instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10799   predicate (UseSSE>=2);
10800   match(Set dst (ConvD2L src));
10801   effect( KILL cr );
10802   format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10803             "MOVSD  [ESP],$src\n\t"
10804             "FLD_D  [ESP]\n\t"
10805             "FLDCW  trunc mode\n\t"
10806             "FISTp  [ESP + #0]\n\t"
10807             "FLDCW  std/24-bit mode\n\t"
10808             "POP    EAX\n\t"
10809             "POP    EDX\n\t"
10810             "CMP    EDX,0x80000000\n\t"
10811             "JNE,s  fast\n\t"
10812             "TEST   EAX,EAX\n\t"
10813             "JNE,s  fast\n\t"
10814             "SUB    ESP,8\n\t"
10815             "MOVSD  [ESP],$src\n\t"
10816             "FLD_D  [ESP]\n\t"
10817             "ADD    ESP,8\n\t"
10818             "CALL   d2l_wrapper\n"
10819       "fast:" %}
10820   ins_encode %{
10821     Label fast;
10822     __ subptr(rsp, 8);
10823     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10824     __ fld_d(Address(rsp, 0));
10825     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10826     __ fistp_d(Address(rsp, 0));
10827     // Restore the rounding mode, mask the exception
10828     if (Compile::current()->in_24_bit_fp_mode()) {
10829       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10830     } else {
10831       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10832     }
10833     // Load the converted long, adjust CPU stack
10834     __ pop(rax);
10835     __ pop(rdx);
10836     __ cmpl(rdx, 0x80000000);
10837     __ jccb(Assembler::notEqual, fast);
10838     __ testl(rax, rax);
10839     __ jccb(Assembler::notEqual, fast);
10840     __ subptr(rsp, 8);
10841     __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10842     __ fld_d(Address(rsp, 0));
10843     __ addptr(rsp, 8);
10844     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10845     __ bind(fast);
10846   %}
10847   ins_pipe( pipe_slow );
10848 %}
10849 
10850 // Convert a double to an int.  Java semantics require we do complex
10851 // manglations in the corner cases.  So we set the rounding mode to
10852 // 'zero', store the darned double down as an int, and reset the
10853 // rounding mode to 'nearest'.  The hardware stores a flag value down
10854 // if we would overflow or converted a NAN; we check for this and
10855 // and go the slow path if needed.
10856 instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10857   predicate(UseSSE==0);
10858   match(Set dst (ConvF2I src));
10859   effect( KILL tmp, KILL cr );
10860   format %{ "FLD    $src\t# Convert float to int \n\t"
10861             "FLDCW  trunc mode\n\t"
10862             "SUB    ESP,4\n\t"
10863             "FISTp  [ESP + #0]\n\t"
10864             "FLDCW  std/24-bit mode\n\t"
10865             "POP    EAX\n\t"
10866             "CMP    EAX,0x80000000\n\t"
10867             "JNE,s  fast\n\t"
10868             "FLD    $src\n\t"
10869             "CALL   d2i_wrapper\n"
10870       "fast:" %}
10871   // DPR2I_encoding works for FPR2I
10872   ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10873   ins_pipe( pipe_slow );
10874 %}
10875 
10876 // Convert a float in xmm to an int reg.
10877 instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10878   predicate(UseSSE>=1);
10879   match(Set dst (ConvF2I src));
10880   effect( KILL tmp, KILL cr );
10881   format %{ "CVTTSS2SI $dst, $src\n\t"
10882             "CMP    $dst,0x80000000\n\t"
10883             "JNE,s  fast\n\t"
10884             "SUB    ESP, 4\n\t"
10885             "MOVSS  [ESP], $src\n\t"
10886             "FLD    [ESP]\n\t"
10887             "ADD    ESP, 4\n\t"
10888             "CALL   d2i_wrapper\n"
10889       "fast:" %}
10890   ins_encode %{
10891     Label fast;
10892     __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10893     __ cmpl($dst$$Register, 0x80000000);
10894     __ jccb(Assembler::notEqual, fast);
10895     __ subptr(rsp, 4);
10896     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10897     __ fld_s(Address(rsp, 0));
10898     __ addptr(rsp, 4);
10899     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10900     __ bind(fast);
10901   %}
10902   ins_pipe( pipe_slow );
10903 %}
10904 
10905 instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10906   predicate(UseSSE==0);
10907   match(Set dst (ConvF2L src));
10908   effect( KILL cr );
10909   format %{ "FLD    $src\t# Convert float to long\n\t"
10910             "FLDCW  trunc mode\n\t"
10911             "SUB    ESP,8\n\t"
10912             "FISTp  [ESP + #0]\n\t"
10913             "FLDCW  std/24-bit mode\n\t"
10914             "POP    EAX\n\t"
10915             "POP    EDX\n\t"
10916             "CMP    EDX,0x80000000\n\t"
10917             "JNE,s  fast\n\t"
10918             "TEST   EAX,EAX\n\t"
10919             "JNE,s  fast\n\t"
10920             "FLD    $src\n\t"
10921             "CALL   d2l_wrapper\n"
10922       "fast:" %}
10923   // DPR2L_encoding works for FPR2L
10924   ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10925   ins_pipe( pipe_slow );
10926 %}
10927 
10928 // XMM lacks a float/double->long conversion, so use the old FPU stack.
10929 instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10930   predicate (UseSSE>=1);
10931   match(Set dst (ConvF2L src));
10932   effect( KILL cr );
10933   format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10934             "MOVSS  [ESP],$src\n\t"
10935             "FLD_S  [ESP]\n\t"
10936             "FLDCW  trunc mode\n\t"
10937             "FISTp  [ESP + #0]\n\t"
10938             "FLDCW  std/24-bit mode\n\t"
10939             "POP    EAX\n\t"
10940             "POP    EDX\n\t"
10941             "CMP    EDX,0x80000000\n\t"
10942             "JNE,s  fast\n\t"
10943             "TEST   EAX,EAX\n\t"
10944             "JNE,s  fast\n\t"
10945             "SUB    ESP,4\t# Convert float to long\n\t"
10946             "MOVSS  [ESP],$src\n\t"
10947             "FLD_S  [ESP]\n\t"
10948             "ADD    ESP,4\n\t"
10949             "CALL   d2l_wrapper\n"
10950       "fast:" %}
10951   ins_encode %{
10952     Label fast;
10953     __ subptr(rsp, 8);
10954     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10955     __ fld_s(Address(rsp, 0));
10956     __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10957     __ fistp_d(Address(rsp, 0));
10958     // Restore the rounding mode, mask the exception
10959     if (Compile::current()->in_24_bit_fp_mode()) {
10960       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10961     } else {
10962       __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10963     }
10964     // Load the converted long, adjust CPU stack
10965     __ pop(rax);
10966     __ pop(rdx);
10967     __ cmpl(rdx, 0x80000000);
10968     __ jccb(Assembler::notEqual, fast);
10969     __ testl(rax, rax);
10970     __ jccb(Assembler::notEqual, fast);
10971     __ subptr(rsp, 4);
10972     __ movflt(Address(rsp, 0), $src$$XMMRegister);
10973     __ fld_s(Address(rsp, 0));
10974     __ addptr(rsp, 4);
10975     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10976     __ bind(fast);
10977   %}
10978   ins_pipe( pipe_slow );
10979 %}
10980 
10981 instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
10982   predicate( UseSSE<=1 );
10983   match(Set dst (ConvI2D src));
10984   format %{ "FILD   $src\n\t"
10985             "FSTP   $dst" %}
10986   opcode(0xDB, 0x0);  /* DB /0 */
10987   ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
10988   ins_pipe( fpu_reg_mem );
10989 %}
10990 
10991 instruct convI2D_reg(regD dst, rRegI src) %{
10992   predicate( UseSSE>=2 && !UseXmmI2D );
10993   match(Set dst (ConvI2D src));
10994   format %{ "CVTSI2SD $dst,$src" %}
10995   ins_encode %{
10996     __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10997   %}
10998   ins_pipe( pipe_slow );
10999 %}
11000 
11001 instruct convI2D_mem(regD dst, memory mem) %{
11002   predicate( UseSSE>=2 );
11003   match(Set dst (ConvI2D (LoadI mem)));
11004   format %{ "CVTSI2SD $dst,$mem" %}
11005   ins_encode %{
11006     __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11007   %}
11008   ins_pipe( pipe_slow );
11009 %}
11010 
11011 instruct convXI2D_reg(regD dst, rRegI src)
11012 %{
11013   predicate( UseSSE>=2 && UseXmmI2D );
11014   match(Set dst (ConvI2D src));
11015 
11016   format %{ "MOVD  $dst,$src\n\t"
11017             "CVTDQ2PD $dst,$dst\t# i2d" %}
11018   ins_encode %{
11019     __ movdl($dst$$XMMRegister, $src$$Register);
11020     __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11021   %}
11022   ins_pipe(pipe_slow); // XXX
11023 %}
11024 
11025 instruct convI2DPR_mem(regDPR dst, memory mem) %{
11026   predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11027   match(Set dst (ConvI2D (LoadI mem)));
11028   format %{ "FILD   $mem\n\t"
11029             "FSTP   $dst" %}
11030   opcode(0xDB);      /* DB /0 */
11031   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11032               Pop_Reg_DPR(dst));
11033   ins_pipe( fpu_reg_mem );
11034 %}
11035 
11036 // Convert a byte to a float; no rounding step needed.
11037 instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11038   predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11039   match(Set dst (ConvI2F src));
11040   format %{ "FILD   $src\n\t"
11041             "FSTP   $dst" %}
11042 
11043   opcode(0xDB, 0x0);  /* DB /0 */
11044   ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11045   ins_pipe( fpu_reg_mem );
11046 %}
11047 
11048 // In 24-bit mode, force exponent rounding by storing back out
11049 instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11050   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11051   match(Set dst (ConvI2F src));
11052   ins_cost(200);
11053   format %{ "FILD   $src\n\t"
11054             "FSTP_S $dst" %}
11055   opcode(0xDB, 0x0);  /* DB /0 */
11056   ins_encode( Push_Mem_I(src),
11057               Pop_Mem_FPR(dst));
11058   ins_pipe( fpu_mem_mem );
11059 %}
11060 
11061 // In 24-bit mode, force exponent rounding by storing back out
11062 instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11063   predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11064   match(Set dst (ConvI2F (LoadI mem)));
11065   ins_cost(200);
11066   format %{ "FILD   $mem\n\t"
11067             "FSTP_S $dst" %}
11068   opcode(0xDB);  /* DB /0 */
11069   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11070               Pop_Mem_FPR(dst));
11071   ins_pipe( fpu_mem_mem );
11072 %}
11073 
11074 // This instruction does not round to 24-bits
11075 instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11076   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11077   match(Set dst (ConvI2F src));
11078   format %{ "FILD   $src\n\t"
11079             "FSTP   $dst" %}
11080   opcode(0xDB, 0x0);  /* DB /0 */
11081   ins_encode( Push_Mem_I(src),
11082               Pop_Reg_FPR(dst));
11083   ins_pipe( fpu_reg_mem );
11084 %}
11085 
11086 // This instruction does not round to 24-bits
11087 instruct convI2FPR_mem(regFPR dst, memory mem) %{
11088   predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11089   match(Set dst (ConvI2F (LoadI mem)));
11090   format %{ "FILD   $mem\n\t"
11091             "FSTP   $dst" %}
11092   opcode(0xDB);      /* DB /0 */
11093   ins_encode( OpcP, RMopc_Mem(0x00,mem),
11094               Pop_Reg_FPR(dst));
11095   ins_pipe( fpu_reg_mem );
11096 %}
11097 
11098 // Convert an int to a float in xmm; no rounding step needed.
11099 instruct convI2F_reg(regF dst, rRegI src) %{
11100   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11101   match(Set dst (ConvI2F src));
11102   format %{ "CVTSI2SS $dst, $src" %}
11103   ins_encode %{
11104     __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11105   %}
11106   ins_pipe( pipe_slow );
11107 %}
11108 
11109  instruct convXI2F_reg(regF dst, rRegI src)
11110 %{
11111   predicate( UseSSE>=2 && UseXmmI2F );
11112   match(Set dst (ConvI2F src));
11113 
11114   format %{ "MOVD  $dst,$src\n\t"
11115             "CVTDQ2PS $dst,$dst\t# i2f" %}
11116   ins_encode %{
11117     __ movdl($dst$$XMMRegister, $src$$Register);
11118     __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11119   %}
11120   ins_pipe(pipe_slow); // XXX
11121 %}
11122 
11123 instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11124   match(Set dst (ConvI2L src));
11125   effect(KILL cr);
11126   ins_cost(375);
11127   format %{ "MOV    $dst.lo,$src\n\t"
11128             "MOV    $dst.hi,$src\n\t"
11129             "SAR    $dst.hi,31" %}
11130   ins_encode(convert_int_long(dst,src));
11131   ins_pipe( ialu_reg_reg_long );
11132 %}
11133 
11134 // Zero-extend convert int to long
11135 instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11136   match(Set dst (AndL (ConvI2L src) mask) );
11137   effect( KILL flags );
11138   ins_cost(250);
11139   format %{ "MOV    $dst.lo,$src\n\t"
11140             "XOR    $dst.hi,$dst.hi" %}
11141   opcode(0x33); // XOR
11142   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11143   ins_pipe( ialu_reg_reg_long );
11144 %}
11145 
11146 // Zero-extend long
11147 instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11148   match(Set dst (AndL src mask) );
11149   effect( KILL flags );
11150   ins_cost(250);
11151   format %{ "MOV    $dst.lo,$src.lo\n\t"
11152             "XOR    $dst.hi,$dst.hi\n\t" %}
11153   opcode(0x33); // XOR
11154   ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11155   ins_pipe( ialu_reg_reg_long );
11156 %}
11157 
11158 instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11159   predicate (UseSSE<=1);
11160   match(Set dst (ConvL2D src));
11161   effect( KILL cr );
11162   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11163             "PUSH   $src.lo\n\t"
11164             "FILD   ST,[ESP + #0]\n\t"
11165             "ADD    ESP,8\n\t"
11166             "FSTP_D $dst\t# D-round" %}
11167   opcode(0xDF, 0x5);  /* DF /5 */
11168   ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11169   ins_pipe( pipe_slow );
11170 %}
11171 
11172 instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11173   predicate (UseSSE>=2);
11174   match(Set dst (ConvL2D src));
11175   effect( KILL cr );
11176   format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11177             "PUSH   $src.lo\n\t"
11178             "FILD_D [ESP]\n\t"
11179             "FSTP_D [ESP]\n\t"
11180             "MOVSD  $dst,[ESP]\n\t"
11181             "ADD    ESP,8" %}
11182   opcode(0xDF, 0x5);  /* DF /5 */
11183   ins_encode(convert_long_double2(src), Push_ResultD(dst));
11184   ins_pipe( pipe_slow );
11185 %}
11186 
11187 instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11188   predicate (UseSSE>=1);
11189   match(Set dst (ConvL2F src));
11190   effect( KILL cr );
11191   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11192             "PUSH   $src.lo\n\t"
11193             "FILD_D [ESP]\n\t"
11194             "FSTP_S [ESP]\n\t"
11195             "MOVSS  $dst,[ESP]\n\t"
11196             "ADD    ESP,8" %}
11197   opcode(0xDF, 0x5);  /* DF /5 */
11198   ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11199   ins_pipe( pipe_slow );
11200 %}
11201 
11202 instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11203   match(Set dst (ConvL2F src));
11204   effect( KILL cr );
11205   format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11206             "PUSH   $src.lo\n\t"
11207             "FILD   ST,[ESP + #0]\n\t"
11208             "ADD    ESP,8\n\t"
11209             "FSTP_S $dst\t# F-round" %}
11210   opcode(0xDF, 0x5);  /* DF /5 */
11211   ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11212   ins_pipe( pipe_slow );
11213 %}
11214 
11215 instruct convL2I_reg( rRegI dst, eRegL src ) %{
11216   match(Set dst (ConvL2I src));
11217   effect( DEF dst, USE src );
11218   format %{ "MOV    $dst,$src.lo" %}
11219   ins_encode(enc_CopyL_Lo(dst,src));
11220   ins_pipe( ialu_reg_reg );
11221 %}
11222 
11223 instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11224   match(Set dst (MoveF2I src));
11225   effect( DEF dst, USE src );
11226   ins_cost(100);
11227   format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11228   ins_encode %{
11229     __ movl($dst$$Register, Address(rsp, $src$$disp));
11230   %}
11231   ins_pipe( ialu_reg_mem );
11232 %}
11233 
11234 instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11235   predicate(UseSSE==0);
11236   match(Set dst (MoveF2I src));
11237   effect( DEF dst, USE src );
11238 
11239   ins_cost(125);
11240   format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11241   ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11242   ins_pipe( fpu_mem_reg );
11243 %}
11244 
11245 instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11246   predicate(UseSSE>=1);
11247   match(Set dst (MoveF2I src));
11248   effect( DEF dst, USE src );
11249 
11250   ins_cost(95);
11251   format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11252   ins_encode %{
11253     __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11254   %}
11255   ins_pipe( pipe_slow );
11256 %}
11257 
11258 instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11259   predicate(UseSSE>=2);
11260   match(Set dst (MoveF2I src));
11261   effect( DEF dst, USE src );
11262   ins_cost(85);
11263   format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11264   ins_encode %{
11265     __ movdl($dst$$Register, $src$$XMMRegister);
11266   %}
11267   ins_pipe( pipe_slow );
11268 %}
11269 
11270 instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11271   match(Set dst (MoveI2F src));
11272   effect( DEF dst, USE src );
11273 
11274   ins_cost(100);
11275   format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11276   ins_encode %{
11277     __ movl(Address(rsp, $dst$$disp), $src$$Register);
11278   %}
11279   ins_pipe( ialu_mem_reg );
11280 %}
11281 
11282 
11283 instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11284   predicate(UseSSE==0);
11285   match(Set dst (MoveI2F src));
11286   effect(DEF dst, USE src);
11287 
11288   ins_cost(125);
11289   format %{ "FLD_S  $src\n\t"
11290             "FSTP   $dst\t# MoveI2F_stack_reg" %}
11291   opcode(0xD9);               /* D9 /0, FLD m32real */
11292   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11293               Pop_Reg_FPR(dst) );
11294   ins_pipe( fpu_reg_mem );
11295 %}
11296 
11297 instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11298   predicate(UseSSE>=1);
11299   match(Set dst (MoveI2F src));
11300   effect( DEF dst, USE src );
11301 
11302   ins_cost(95);
11303   format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11304   ins_encode %{
11305     __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11306   %}
11307   ins_pipe( pipe_slow );
11308 %}
11309 
11310 instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11311   predicate(UseSSE>=2);
11312   match(Set dst (MoveI2F src));
11313   effect( DEF dst, USE src );
11314 
11315   ins_cost(85);
11316   format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11317   ins_encode %{
11318     __ movdl($dst$$XMMRegister, $src$$Register);
11319   %}
11320   ins_pipe( pipe_slow );
11321 %}
11322 
11323 instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11324   match(Set dst (MoveD2L src));
11325   effect(DEF dst, USE src);
11326 
11327   ins_cost(250);
11328   format %{ "MOV    $dst.lo,$src\n\t"
11329             "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11330   opcode(0x8B, 0x8B);
11331   ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11332   ins_pipe( ialu_mem_long_reg );
11333 %}
11334 
11335 instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11336   predicate(UseSSE<=1);
11337   match(Set dst (MoveD2L src));
11338   effect(DEF dst, USE src);
11339 
11340   ins_cost(125);
11341   format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11342   ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11343   ins_pipe( fpu_mem_reg );
11344 %}
11345 
11346 instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11347   predicate(UseSSE>=2);
11348   match(Set dst (MoveD2L src));
11349   effect(DEF dst, USE src);
11350   ins_cost(95);
11351   format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11352   ins_encode %{
11353     __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11354   %}
11355   ins_pipe( pipe_slow );
11356 %}
11357 
11358 instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11359   predicate(UseSSE>=2);
11360   match(Set dst (MoveD2L src));
11361   effect(DEF dst, USE src, TEMP tmp);
11362   ins_cost(85);
11363   format %{ "MOVD   $dst.lo,$src\n\t"
11364             "PSHUFLW $tmp,$src,0x4E\n\t"
11365             "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11366   ins_encode %{
11367     __ movdl($dst$$Register, $src$$XMMRegister);
11368     __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11369     __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11370   %}
11371   ins_pipe( pipe_slow );
11372 %}
11373 
11374 instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11375   match(Set dst (MoveL2D src));
11376   effect(DEF dst, USE src);
11377 
11378   ins_cost(200);
11379   format %{ "MOV    $dst,$src.lo\n\t"
11380             "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11381   opcode(0x89, 0x89);
11382   ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11383   ins_pipe( ialu_mem_long_reg );
11384 %}
11385 
11386 
11387 instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11388   predicate(UseSSE<=1);
11389   match(Set dst (MoveL2D src));
11390   effect(DEF dst, USE src);
11391   ins_cost(125);
11392 
11393   format %{ "FLD_D  $src\n\t"
11394             "FSTP   $dst\t# MoveL2D_stack_reg" %}
11395   opcode(0xDD);               /* DD /0, FLD m64real */
11396   ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11397               Pop_Reg_DPR(dst) );
11398   ins_pipe( fpu_reg_mem );
11399 %}
11400 
11401 
11402 instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11403   predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11404   match(Set dst (MoveL2D src));
11405   effect(DEF dst, USE src);
11406 
11407   ins_cost(95);
11408   format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11409   ins_encode %{
11410     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11411   %}
11412   ins_pipe( pipe_slow );
11413 %}
11414 
11415 instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11416   predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11417   match(Set dst (MoveL2D src));
11418   effect(DEF dst, USE src);
11419 
11420   ins_cost(95);
11421   format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11422   ins_encode %{
11423     __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11424   %}
11425   ins_pipe( pipe_slow );
11426 %}
11427 
11428 instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11429   predicate(UseSSE>=2);
11430   match(Set dst (MoveL2D src));
11431   effect(TEMP dst, USE src, TEMP tmp);
11432   ins_cost(85);
11433   format %{ "MOVD   $dst,$src.lo\n\t"
11434             "MOVD   $tmp,$src.hi\n\t"
11435             "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11436   ins_encode %{
11437     __ movdl($dst$$XMMRegister, $src$$Register);
11438     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11439     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11440   %}
11441   ins_pipe( pipe_slow );
11442 %}
11443 
11444 
11445 // =======================================================================
11446 // fast clearing of an array
11447 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11448   predicate(!UseFastStosb);
11449   match(Set dummy (ClearArray cnt base));
11450   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11451   format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11452             "SHL    ECX,1\t# Convert doublewords to words\n\t"
11453             "REP STOS\t# store EAX into [EDI++] while ECX--" %}
11454   ins_encode %{
11455     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
11456   %}
11457   ins_pipe( pipe_slow );
11458 %}
11459 
11460 instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11461   predicate(UseFastStosb);
11462   match(Set dummy (ClearArray cnt base));
11463   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11464   format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11465             "SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11466             "REP STOSB\t# store EAX into [EDI++] while ECX--" %}
11467   ins_encode %{
11468     __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
11469   %}
11470   ins_pipe( pipe_slow );
11471 %}
11472 
11473 instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11474                         eAXRegI result, regD tmp1, eFlagsReg cr) %{
11475   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11476   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11477 
11478   format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11479   ins_encode %{
11480     __ string_compare($str1$$Register, $str2$$Register,
11481                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
11482                       $tmp1$$XMMRegister);
11483   %}
11484   ins_pipe( pipe_slow );
11485 %}
11486 
11487 // fast string equals
11488 instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11489                        regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11490   match(Set result (StrEquals (Binary str1 str2) cnt));
11491   effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11492 
11493   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11494   ins_encode %{
11495     __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
11496                           $cnt$$Register, $result$$Register, $tmp3$$Register,
11497                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11498   %}
11499   ins_pipe( pipe_slow );
11500 %}
11501 
11502 // fast search of substring with known size.
11503 instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11504                             eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11505   predicate(UseSSE42Intrinsics);
11506   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11507   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11508 
11509   format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11510   ins_encode %{
11511     int icnt2 = (int)$int_cnt2$$constant;
11512     if (icnt2 >= 8) {
11513       // IndexOf for constant substrings with size >= 8 elements
11514       // which don't need to be loaded through stack.
11515       __ string_indexofC8($str1$$Register, $str2$$Register,
11516                           $cnt1$$Register, $cnt2$$Register,
11517                           icnt2, $result$$Register,
11518                           $vec$$XMMRegister, $tmp$$Register);
11519     } else {
11520       // Small strings are loaded through stack if they cross page boundary.
11521       __ string_indexof($str1$$Register, $str2$$Register,
11522                         $cnt1$$Register, $cnt2$$Register,
11523                         icnt2, $result$$Register,
11524                         $vec$$XMMRegister, $tmp$$Register);
11525     }
11526   %}
11527   ins_pipe( pipe_slow );
11528 %}
11529 
11530 instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11531                         eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11532   predicate(UseSSE42Intrinsics);
11533   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11534   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11535 
11536   format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11537   ins_encode %{
11538     __ string_indexof($str1$$Register, $str2$$Register,
11539                       $cnt1$$Register, $cnt2$$Register,
11540                       (-1), $result$$Register,
11541                       $vec$$XMMRegister, $tmp$$Register);
11542   %}
11543   ins_pipe( pipe_slow );
11544 %}
11545 
11546 // fast array equals
11547 instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11548                       regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11549 %{
11550   match(Set result (AryEq ary1 ary2));
11551   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11552   //ins_cost(300);
11553 
11554   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11555   ins_encode %{
11556     __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
11557                           $tmp3$$Register, $result$$Register, $tmp4$$Register,
11558                           $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11559   %}
11560   ins_pipe( pipe_slow );
11561 %}
11562 
11563 // encode char[] to byte[] in ISO_8859_1
11564 instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11565                           regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11566                           eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11567   match(Set result (EncodeISOArray src (Binary dst len)));
11568   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11569 
11570   format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11571   ins_encode %{
11572     __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11573                         $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11574                         $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11575   %}
11576   ins_pipe( pipe_slow );
11577 %}
11578 
11579 
11580 //----------Control Flow Instructions------------------------------------------
11581 // Signed compare Instructions
11582 instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
11583   match(Set cr (CmpI op1 op2));
11584   effect( DEF cr, USE op1, USE op2 );
11585   format %{ "CMP    $op1,$op2" %}
11586   opcode(0x3B);  /* Opcode 3B /r */
11587   ins_encode( OpcP, RegReg( op1, op2) );
11588   ins_pipe( ialu_cr_reg_reg );
11589 %}
11590 
11591 instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
11592   match(Set cr (CmpI op1 op2));
11593   effect( DEF cr, USE op1 );
11594   format %{ "CMP    $op1,$op2" %}
11595   opcode(0x81,0x07);  /* Opcode 81 /7 */
11596   // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
11597   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11598   ins_pipe( ialu_cr_reg_imm );
11599 %}
11600 
11601 // Cisc-spilled version of cmpI_eReg
11602 instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
11603   match(Set cr (CmpI op1 (LoadI op2)));
11604 
11605   format %{ "CMP    $op1,$op2" %}
11606   ins_cost(500);
11607   opcode(0x3B);  /* Opcode 3B /r */
11608   ins_encode( OpcP, RegMem( op1, op2) );
11609   ins_pipe( ialu_cr_reg_mem );
11610 %}
11611 
11612 instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
11613   match(Set cr (CmpI src zero));
11614   effect( DEF cr, USE src );
11615 
11616   format %{ "TEST   $src,$src" %}
11617   opcode(0x85);
11618   ins_encode( OpcP, RegReg( src, src ) );
11619   ins_pipe( ialu_cr_reg_imm );
11620 %}
11621 
11622 instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
11623   match(Set cr (CmpI (AndI src con) zero));
11624 
11625   format %{ "TEST   $src,$con" %}
11626   opcode(0xF7,0x00);
11627   ins_encode( OpcP, RegOpc(src), Con32(con) );
11628   ins_pipe( ialu_cr_reg_imm );
11629 %}
11630 
11631 instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
11632   match(Set cr (CmpI (AndI src mem) zero));
11633 
11634   format %{ "TEST   $src,$mem" %}
11635   opcode(0x85);
11636   ins_encode( OpcP, RegMem( src, mem ) );
11637   ins_pipe( ialu_cr_reg_mem );
11638 %}
11639 
11640 // Unsigned compare Instructions; really, same as signed except they
11641 // produce an eFlagsRegU instead of eFlagsReg.
11642 instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
11643   match(Set cr (CmpU op1 op2));
11644 
11645   format %{ "CMPu   $op1,$op2" %}
11646   opcode(0x3B);  /* Opcode 3B /r */
11647   ins_encode( OpcP, RegReg( op1, op2) );
11648   ins_pipe( ialu_cr_reg_reg );
11649 %}
11650 
11651 instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
11652   match(Set cr (CmpU op1 op2));
11653 
11654   format %{ "CMPu   $op1,$op2" %}
11655   opcode(0x81,0x07);  /* Opcode 81 /7 */
11656   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11657   ins_pipe( ialu_cr_reg_imm );
11658 %}
11659 
11660 // // Cisc-spilled version of cmpU_eReg
11661 instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
11662   match(Set cr (CmpU op1 (LoadI op2)));
11663 
11664   format %{ "CMPu   $op1,$op2" %}
11665   ins_cost(500);
11666   opcode(0x3B);  /* Opcode 3B /r */
11667   ins_encode( OpcP, RegMem( op1, op2) );
11668   ins_pipe( ialu_cr_reg_mem );
11669 %}
11670 
11671 // // Cisc-spilled version of cmpU_eReg
11672 //instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
11673 //  match(Set cr (CmpU (LoadI op1) op2));
11674 //
11675 //  format %{ "CMPu   $op1,$op2" %}
11676 //  ins_cost(500);
11677 //  opcode(0x39);  /* Opcode 39 /r */
11678 //  ins_encode( OpcP, RegMem( op1, op2) );
11679 //%}
11680 
11681 instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
11682   match(Set cr (CmpU src zero));
11683 
11684   format %{ "TESTu  $src,$src" %}
11685   opcode(0x85);
11686   ins_encode( OpcP, RegReg( src, src ) );
11687   ins_pipe( ialu_cr_reg_imm );
11688 %}
11689 
11690 // Unsigned pointer compare Instructions
11691 instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
11692   match(Set cr (CmpP op1 op2));
11693 
11694   format %{ "CMPu   $op1,$op2" %}
11695   opcode(0x3B);  /* Opcode 3B /r */
11696   ins_encode( OpcP, RegReg( op1, op2) );
11697   ins_pipe( ialu_cr_reg_reg );
11698 %}
11699 
11700 instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
11701   match(Set cr (CmpP op1 op2));
11702 
11703   format %{ "CMPu   $op1,$op2" %}
11704   opcode(0x81,0x07);  /* Opcode 81 /7 */
11705   ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11706   ins_pipe( ialu_cr_reg_imm );
11707 %}
11708 
11709 // // Cisc-spilled version of cmpP_eReg
11710 instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
11711   match(Set cr (CmpP op1 (LoadP op2)));
11712 
11713   format %{ "CMPu   $op1,$op2" %}
11714   ins_cost(500);
11715   opcode(0x3B);  /* Opcode 3B /r */
11716   ins_encode( OpcP, RegMem( op1, op2) );
11717   ins_pipe( ialu_cr_reg_mem );
11718 %}
11719 
11720 // // Cisc-spilled version of cmpP_eReg
11721 //instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
11722 //  match(Set cr (CmpP (LoadP op1) op2));
11723 //
11724 //  format %{ "CMPu   $op1,$op2" %}
11725 //  ins_cost(500);
11726 //  opcode(0x39);  /* Opcode 39 /r */
11727 //  ins_encode( OpcP, RegMem( op1, op2) );
11728 //%}
11729 
11730 // Compare raw pointer (used in out-of-heap check).
11731 // Only works because non-oop pointers must be raw pointers
11732 // and raw pointers have no anti-dependencies.
11733 instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
11734   predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
11735   match(Set cr (CmpP op1 (LoadP op2)));
11736 
11737   format %{ "CMPu   $op1,$op2" %}
11738   opcode(0x3B);  /* Opcode 3B /r */
11739   ins_encode( OpcP, RegMem( op1, op2) );
11740   ins_pipe( ialu_cr_reg_mem );
11741 %}
11742 
11743 //
11744 // This will generate a signed flags result. This should be ok
11745 // since any compare to a zero should be eq/neq.
11746 instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
11747   match(Set cr (CmpP src zero));
11748 
11749   format %{ "TEST   $src,$src" %}
11750   opcode(0x85);
11751   ins_encode( OpcP, RegReg( src, src ) );
11752   ins_pipe( ialu_cr_reg_imm );
11753 %}
11754 
11755 // Cisc-spilled version of testP_reg
11756 // This will generate a signed flags result. This should be ok
11757 // since any compare to a zero should be eq/neq.
11758 instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
11759   match(Set cr (CmpP (LoadP op) zero));
11760 
11761   format %{ "TEST   $op,0xFFFFFFFF" %}
11762   ins_cost(500);
11763   opcode(0xF7);               /* Opcode F7 /0 */
11764   ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
11765   ins_pipe( ialu_cr_reg_imm );
11766 %}
11767 
11768 // Yanked all unsigned pointer compare operations.
11769 // Pointer compares are done with CmpP which is already unsigned.
11770 
11771 //----------Max and Min--------------------------------------------------------
11772 // Min Instructions
11773 ////
11774 //   *** Min and Max using the conditional move are slower than the
11775 //   *** branch version on a Pentium III.
11776 // // Conditional move for min
11777 //instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11778 //  effect( USE_DEF op2, USE op1, USE cr );
11779 //  format %{ "CMOVlt $op2,$op1\t! min" %}
11780 //  opcode(0x4C,0x0F);
11781 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11782 //  ins_pipe( pipe_cmov_reg );
11783 //%}
11784 //
11785 //// Min Register with Register (P6 version)
11786 //instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
11787 //  predicate(VM_Version::supports_cmov() );
11788 //  match(Set op2 (MinI op1 op2));
11789 //  ins_cost(200);
11790 //  expand %{
11791 //    eFlagsReg cr;
11792 //    compI_eReg(cr,op1,op2);
11793 //    cmovI_reg_lt(op2,op1,cr);
11794 //  %}
11795 //%}
11796 
11797 // Min Register with Register (generic version)
11798 instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11799   match(Set dst (MinI dst src));
11800   effect(KILL flags);
11801   ins_cost(300);
11802 
11803   format %{ "MIN    $dst,$src" %}
11804   opcode(0xCC);
11805   ins_encode( min_enc(dst,src) );
11806   ins_pipe( pipe_slow );
11807 %}
11808 
11809 // Max Register with Register
11810 //   *** Min and Max using the conditional move are slower than the
11811 //   *** branch version on a Pentium III.
11812 // // Conditional move for max
11813 //instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11814 //  effect( USE_DEF op2, USE op1, USE cr );
11815 //  format %{ "CMOVgt $op2,$op1\t! max" %}
11816 //  opcode(0x4F,0x0F);
11817 //  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11818 //  ins_pipe( pipe_cmov_reg );
11819 //%}
11820 //
11821 // // Max Register with Register (P6 version)
11822 //instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
11823 //  predicate(VM_Version::supports_cmov() );
11824 //  match(Set op2 (MaxI op1 op2));
11825 //  ins_cost(200);
11826 //  expand %{
11827 //    eFlagsReg cr;
11828 //    compI_eReg(cr,op1,op2);
11829 //    cmovI_reg_gt(op2,op1,cr);
11830 //  %}
11831 //%}
11832 
11833 // Max Register with Register (generic version)
11834 instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11835   match(Set dst (MaxI dst src));
11836   effect(KILL flags);
11837   ins_cost(300);
11838 
11839   format %{ "MAX    $dst,$src" %}
11840   opcode(0xCC);
11841   ins_encode( max_enc(dst,src) );
11842   ins_pipe( pipe_slow );
11843 %}
11844 
11845 // ============================================================================
11846 // Counted Loop limit node which represents exact final iterator value.
11847 // Note: the resulting value should fit into integer range since
11848 // counted loops have limit check on overflow.
11849 instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
11850   match(Set limit (LoopLimit (Binary init limit) stride));
11851   effect(TEMP limit_hi, TEMP tmp, KILL flags);
11852   ins_cost(300);
11853 
11854   format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
11855   ins_encode %{
11856     int strd = (int)$stride$$constant;
11857     assert(strd != 1 && strd != -1, "sanity");
11858     int m1 = (strd > 0) ? 1 : -1;
11859     // Convert limit to long (EAX:EDX)
11860     __ cdql();
11861     // Convert init to long (init:tmp)
11862     __ movl($tmp$$Register, $init$$Register);
11863     __ sarl($tmp$$Register, 31);
11864     // $limit - $init
11865     __ subl($limit$$Register, $init$$Register);
11866     __ sbbl($limit_hi$$Register, $tmp$$Register);
11867     // + ($stride - 1)
11868     if (strd > 0) {
11869       __ addl($limit$$Register, (strd - 1));
11870       __ adcl($limit_hi$$Register, 0);
11871       __ movl($tmp$$Register, strd);
11872     } else {
11873       __ addl($limit$$Register, (strd + 1));
11874       __ adcl($limit_hi$$Register, -1);
11875       __ lneg($limit_hi$$Register, $limit$$Register);
11876       __ movl($tmp$$Register, -strd);
11877     }
11878     // signed devision: (EAX:EDX) / pos_stride
11879     __ idivl($tmp$$Register);
11880     if (strd < 0) {
11881       // restore sign
11882       __ negl($tmp$$Register);
11883     }
11884     // (EAX) * stride
11885     __ mull($tmp$$Register);
11886     // + init (ignore upper bits)
11887     __ addl($limit$$Register, $init$$Register);
11888   %}
11889   ins_pipe( pipe_slow );
11890 %}
11891 
11892 // ============================================================================
11893 // Branch Instructions
11894 // Jump Table
11895 instruct jumpXtnd(rRegI switch_val) %{
11896   match(Jump switch_val);
11897   ins_cost(350);
11898   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
11899   ins_encode %{
11900     // Jump to Address(table_base + switch_reg)
11901     Address index(noreg, $switch_val$$Register, Address::times_1);
11902     __ jump(ArrayAddress($constantaddress, index));
11903   %}
11904   ins_pipe(pipe_jmp);
11905 %}
11906 
11907 // Jump Direct - Label defines a relative address from JMP+1
11908 instruct jmpDir(label labl) %{
11909   match(Goto);
11910   effect(USE labl);
11911 
11912   ins_cost(300);
11913   format %{ "JMP    $labl" %}
11914   size(5);
11915   ins_encode %{
11916     Label* L = $labl$$label;
11917     __ jmp(*L, false); // Always long jump
11918   %}
11919   ins_pipe( pipe_jmp );
11920 %}
11921 
11922 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11923 instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
11924   match(If cop cr);
11925   effect(USE labl);
11926 
11927   ins_cost(300);
11928   format %{ "J$cop    $labl" %}
11929   size(6);
11930   ins_encode %{
11931     Label* L = $labl$$label;
11932     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11933   %}
11934   ins_pipe( pipe_jcc );
11935 %}
11936 
11937 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11938 instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
11939   match(CountedLoopEnd cop cr);
11940   effect(USE labl);
11941 
11942   ins_cost(300);
11943   format %{ "J$cop    $labl\t# Loop end" %}
11944   size(6);
11945   ins_encode %{
11946     Label* L = $labl$$label;
11947     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11948   %}
11949   ins_pipe( pipe_jcc );
11950 %}
11951 
11952 // Jump Direct Conditional - Label defines a relative address from Jcc+1
11953 instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
11954   match(CountedLoopEnd cop cmp);
11955   effect(USE labl);
11956 
11957   ins_cost(300);
11958   format %{ "J$cop,u  $labl\t# Loop end" %}
11959   size(6);
11960   ins_encode %{
11961     Label* L = $labl$$label;
11962     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11963   %}
11964   ins_pipe( pipe_jcc );
11965 %}
11966 
11967 instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
11968   match(CountedLoopEnd cop cmp);
11969   effect(USE labl);
11970 
11971   ins_cost(200);
11972   format %{ "J$cop,u  $labl\t# Loop end" %}
11973   size(6);
11974   ins_encode %{
11975     Label* L = $labl$$label;
11976     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11977   %}
11978   ins_pipe( pipe_jcc );
11979 %}
11980 
11981 // Jump Direct Conditional - using unsigned comparison
11982 instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
11983   match(If cop cmp);
11984   effect(USE labl);
11985 
11986   ins_cost(300);
11987   format %{ "J$cop,u  $labl" %}
11988   size(6);
11989   ins_encode %{
11990     Label* L = $labl$$label;
11991     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
11992   %}
11993   ins_pipe(pipe_jcc);
11994 %}
11995 
11996 instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
11997   match(If cop cmp);
11998   effect(USE labl);
11999 
12000   ins_cost(200);
12001   format %{ "J$cop,u  $labl" %}
12002   size(6);
12003   ins_encode %{
12004     Label* L = $labl$$label;
12005     __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12006   %}
12007   ins_pipe(pipe_jcc);
12008 %}
12009 
12010 instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12011   match(If cop cmp);
12012   effect(USE labl);
12013 
12014   ins_cost(200);
12015   format %{ $$template
12016     if ($cop$$cmpcode == Assembler::notEqual) {
12017       $$emit$$"JP,u   $labl\n\t"
12018       $$emit$$"J$cop,u   $labl"
12019     } else {
12020       $$emit$$"JP,u   done\n\t"
12021       $$emit$$"J$cop,u   $labl\n\t"
12022       $$emit$$"done:"
12023     }
12024   %}
12025   ins_encode %{
12026     Label* l = $labl$$label;
12027     if ($cop$$cmpcode == Assembler::notEqual) {
12028       __ jcc(Assembler::parity, *l, false);
12029       __ jcc(Assembler::notEqual, *l, false);
12030     } else if ($cop$$cmpcode == Assembler::equal) {
12031       Label done;
12032       __ jccb(Assembler::parity, done);
12033       __ jcc(Assembler::equal, *l, false);
12034       __ bind(done);
12035     } else {
12036        ShouldNotReachHere();
12037     }
12038   %}
12039   ins_pipe(pipe_jcc);
12040 %}
12041 
12042 // ============================================================================
12043 // The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12044 // array for an instance of the superklass.  Set a hidden internal cache on a
12045 // hit (cache is checked with exposed code in gen_subtype_check()).  Return
12046 // NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12047 instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12048   match(Set result (PartialSubtypeCheck sub super));
12049   effect( KILL rcx, KILL cr );
12050 
12051   ins_cost(1100);  // slightly larger than the next version
12052   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12053             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12054             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12055             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12056             "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12057             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12058             "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12059      "miss:\t" %}
12060 
12061   opcode(0x1); // Force a XOR of EDI
12062   ins_encode( enc_PartialSubtypeCheck() );
12063   ins_pipe( pipe_slow );
12064 %}
12065 
12066 instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12067   match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12068   effect( KILL rcx, KILL result );
12069 
12070   ins_cost(1000);
12071   format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12072             "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12073             "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12074             "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12075             "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12076             "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12077      "miss:\t" %}
12078 
12079   opcode(0x0);  // No need to XOR EDI
12080   ins_encode( enc_PartialSubtypeCheck() );
12081   ins_pipe( pipe_slow );
12082 %}
12083 
12084 // ============================================================================
12085 // Branch Instructions -- short offset versions
12086 //
12087 // These instructions are used to replace jumps of a long offset (the default
12088 // match) with jumps of a shorter offset.  These instructions are all tagged
12089 // with the ins_short_branch attribute, which causes the ADLC to suppress the
12090 // match rules in general matching.  Instead, the ADLC generates a conversion
12091 // method in the MachNode which can be used to do in-place replacement of the
12092 // long variant with the shorter variant.  The compiler will determine if a
12093 // branch can be taken by the is_short_branch_offset() predicate in the machine
12094 // specific code section of the file.
12095 
12096 // Jump Direct - Label defines a relative address from JMP+1
12097 instruct jmpDir_short(label labl) %{
12098   match(Goto);
12099   effect(USE labl);
12100 
12101   ins_cost(300);
12102   format %{ "JMP,s  $labl" %}
12103   size(2);
12104   ins_encode %{
12105     Label* L = $labl$$label;
12106     __ jmpb(*L);
12107   %}
12108   ins_pipe( pipe_jmp );
12109   ins_short_branch(1);
12110 %}
12111 
12112 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12113 instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12114   match(If cop cr);
12115   effect(USE labl);
12116 
12117   ins_cost(300);
12118   format %{ "J$cop,s  $labl" %}
12119   size(2);
12120   ins_encode %{
12121     Label* L = $labl$$label;
12122     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12123   %}
12124   ins_pipe( pipe_jcc );
12125   ins_short_branch(1);
12126 %}
12127 
12128 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12129 instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12130   match(CountedLoopEnd cop cr);
12131   effect(USE labl);
12132 
12133   ins_cost(300);
12134   format %{ "J$cop,s  $labl\t# Loop end" %}
12135   size(2);
12136   ins_encode %{
12137     Label* L = $labl$$label;
12138     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12139   %}
12140   ins_pipe( pipe_jcc );
12141   ins_short_branch(1);
12142 %}
12143 
12144 // Jump Direct Conditional - Label defines a relative address from Jcc+1
12145 instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12146   match(CountedLoopEnd cop cmp);
12147   effect(USE labl);
12148 
12149   ins_cost(300);
12150   format %{ "J$cop,us $labl\t# Loop end" %}
12151   size(2);
12152   ins_encode %{
12153     Label* L = $labl$$label;
12154     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12155   %}
12156   ins_pipe( pipe_jcc );
12157   ins_short_branch(1);
12158 %}
12159 
12160 instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12161   match(CountedLoopEnd cop cmp);
12162   effect(USE labl);
12163 
12164   ins_cost(300);
12165   format %{ "J$cop,us $labl\t# Loop end" %}
12166   size(2);
12167   ins_encode %{
12168     Label* L = $labl$$label;
12169     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12170   %}
12171   ins_pipe( pipe_jcc );
12172   ins_short_branch(1);
12173 %}
12174 
12175 // Jump Direct Conditional - using unsigned comparison
12176 instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12177   match(If cop cmp);
12178   effect(USE labl);
12179 
12180   ins_cost(300);
12181   format %{ "J$cop,us $labl" %}
12182   size(2);
12183   ins_encode %{
12184     Label* L = $labl$$label;
12185     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12186   %}
12187   ins_pipe( pipe_jcc );
12188   ins_short_branch(1);
12189 %}
12190 
12191 instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12192   match(If cop cmp);
12193   effect(USE labl);
12194 
12195   ins_cost(300);
12196   format %{ "J$cop,us $labl" %}
12197   size(2);
12198   ins_encode %{
12199     Label* L = $labl$$label;
12200     __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12201   %}
12202   ins_pipe( pipe_jcc );
12203   ins_short_branch(1);
12204 %}
12205 
12206 instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12207   match(If cop cmp);
12208   effect(USE labl);
12209 
12210   ins_cost(300);
12211   format %{ $$template
12212     if ($cop$$cmpcode == Assembler::notEqual) {
12213       $$emit$$"JP,u,s   $labl\n\t"
12214       $$emit$$"J$cop,u,s   $labl"
12215     } else {
12216       $$emit$$"JP,u,s   done\n\t"
12217       $$emit$$"J$cop,u,s  $labl\n\t"
12218       $$emit$$"done:"
12219     }
12220   %}
12221   size(4);
12222   ins_encode %{
12223     Label* l = $labl$$label;
12224     if ($cop$$cmpcode == Assembler::notEqual) {
12225       __ jccb(Assembler::parity, *l);
12226       __ jccb(Assembler::notEqual, *l);
12227     } else if ($cop$$cmpcode == Assembler::equal) {
12228       Label done;
12229       __ jccb(Assembler::parity, done);
12230       __ jccb(Assembler::equal, *l);
12231       __ bind(done);
12232     } else {
12233        ShouldNotReachHere();
12234     }
12235   %}
12236   ins_pipe(pipe_jcc);
12237   ins_short_branch(1);
12238 %}
12239 
12240 // ============================================================================
12241 // Long Compare
12242 //
12243 // Currently we hold longs in 2 registers.  Comparing such values efficiently
12244 // is tricky.  The flavor of compare used depends on whether we are testing
12245 // for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12246 // The GE test is the negated LT test.  The LE test can be had by commuting
12247 // the operands (yielding a GE test) and then negating; negate again for the
12248 // GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12249 // NE test is negated from that.
12250 
12251 // Due to a shortcoming in the ADLC, it mixes up expressions like:
12252 // (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12253 // difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12254 // are collapsed internally in the ADLC's dfa-gen code.  The match for
12255 // (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12256 // foo match ends up with the wrong leaf.  One fix is to not match both
12257 // reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12258 // both forms beat the trinary form of long-compare and both are very useful
12259 // on Intel which has so few registers.
12260 
12261 // Manifest a CmpL result in an integer register.  Very painful.
12262 // This is the test to avoid.
12263 instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12264   match(Set dst (CmpL3 src1 src2));
12265   effect( KILL flags );
12266   ins_cost(1000);
12267   format %{ "XOR    $dst,$dst\n\t"
12268             "CMP    $src1.hi,$src2.hi\n\t"
12269             "JLT,s  m_one\n\t"
12270             "JGT,s  p_one\n\t"
12271             "CMP    $src1.lo,$src2.lo\n\t"
12272             "JB,s   m_one\n\t"
12273             "JEQ,s  done\n"
12274     "p_one:\tINC    $dst\n\t"
12275             "JMP,s  done\n"
12276     "m_one:\tDEC    $dst\n"
12277      "done:" %}
12278   ins_encode %{
12279     Label p_one, m_one, done;
12280     __ xorptr($dst$$Register, $dst$$Register);
12281     __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12282     __ jccb(Assembler::less,    m_one);
12283     __ jccb(Assembler::greater, p_one);
12284     __ cmpl($src1$$Register, $src2$$Register);
12285     __ jccb(Assembler::below,   m_one);
12286     __ jccb(Assembler::equal,   done);
12287     __ bind(p_one);
12288     __ incrementl($dst$$Register);
12289     __ jmpb(done);
12290     __ bind(m_one);
12291     __ decrementl($dst$$Register);
12292     __ bind(done);
12293   %}
12294   ins_pipe( pipe_slow );
12295 %}
12296 
12297 //======
12298 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12299 // compares.  Can be used for LE or GT compares by reversing arguments.
12300 // NOT GOOD FOR EQ/NE tests.
12301 instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12302   match( Set flags (CmpL src zero ));
12303   ins_cost(100);
12304   format %{ "TEST   $src.hi,$src.hi" %}
12305   opcode(0x85);
12306   ins_encode( OpcP, RegReg_Hi2( src, src ) );
12307   ins_pipe( ialu_cr_reg_reg );
12308 %}
12309 
12310 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
12311 // compares.  Can be used for LE or GT compares by reversing arguments.
12312 // NOT GOOD FOR EQ/NE tests.
12313 instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12314   match( Set flags (CmpL src1 src2 ));
12315   effect( TEMP tmp );
12316   ins_cost(300);
12317   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12318             "MOV    $tmp,$src1.hi\n\t"
12319             "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12320   ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12321   ins_pipe( ialu_cr_reg_reg );
12322 %}
12323 
12324 // Long compares reg < zero/req OR reg >= zero/req.
12325 // Just a wrapper for a normal branch, plus the predicate test.
12326 instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12327   match(If cmp flags);
12328   effect(USE labl);
12329   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12330   expand %{
12331     jmpCon(cmp,flags,labl);    // JLT or JGE...
12332   %}
12333 %}
12334 
12335 // Compare 2 longs and CMOVE longs.
12336 instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12337   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12338   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12339   ins_cost(400);
12340   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12341             "CMOV$cmp $dst.hi,$src.hi" %}
12342   opcode(0x0F,0x40);
12343   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12344   ins_pipe( pipe_cmov_reg_long );
12345 %}
12346 
12347 instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12348   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12349   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12350   ins_cost(500);
12351   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12352             "CMOV$cmp $dst.hi,$src.hi" %}
12353   opcode(0x0F,0x40);
12354   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12355   ins_pipe( pipe_cmov_reg_long );
12356 %}
12357 
12358 // Compare 2 longs and CMOVE ints.
12359 instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12360   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12361   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12362   ins_cost(200);
12363   format %{ "CMOV$cmp $dst,$src" %}
12364   opcode(0x0F,0x40);
12365   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12366   ins_pipe( pipe_cmov_reg );
12367 %}
12368 
12369 instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12370   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12371   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12372   ins_cost(250);
12373   format %{ "CMOV$cmp $dst,$src" %}
12374   opcode(0x0F,0x40);
12375   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12376   ins_pipe( pipe_cmov_mem );
12377 %}
12378 
12379 // Compare 2 longs and CMOVE ints.
12380 instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12381   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12382   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12383   ins_cost(200);
12384   format %{ "CMOV$cmp $dst,$src" %}
12385   opcode(0x0F,0x40);
12386   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12387   ins_pipe( pipe_cmov_reg );
12388 %}
12389 
12390 // Compare 2 longs and CMOVE doubles
12391 instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12392   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12393   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12394   ins_cost(200);
12395   expand %{
12396     fcmovDPR_regS(cmp,flags,dst,src);
12397   %}
12398 %}
12399 
12400 // Compare 2 longs and CMOVE doubles
12401 instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12402   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12403   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12404   ins_cost(200);
12405   expand %{
12406     fcmovD_regS(cmp,flags,dst,src);
12407   %}
12408 %}
12409 
12410 instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12411   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12412   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12413   ins_cost(200);
12414   expand %{
12415     fcmovFPR_regS(cmp,flags,dst,src);
12416   %}
12417 %}
12418 
12419 instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12420   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12421   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12422   ins_cost(200);
12423   expand %{
12424     fcmovF_regS(cmp,flags,dst,src);
12425   %}
12426 %}
12427 
12428 //======
12429 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12430 instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12431   match( Set flags (CmpL src zero ));
12432   effect(TEMP tmp);
12433   ins_cost(200);
12434   format %{ "MOV    $tmp,$src.lo\n\t"
12435             "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12436   ins_encode( long_cmp_flags0( src, tmp ) );
12437   ins_pipe( ialu_reg_reg_long );
12438 %}
12439 
12440 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12441 instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12442   match( Set flags (CmpL src1 src2 ));
12443   ins_cost(200+300);
12444   format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12445             "JNE,s  skip\n\t"
12446             "CMP    $src1.hi,$src2.hi\n\t"
12447      "skip:\t" %}
12448   ins_encode( long_cmp_flags1( src1, src2 ) );
12449   ins_pipe( ialu_cr_reg_reg );
12450 %}
12451 
12452 // Long compare reg == zero/reg OR reg != zero/reg
12453 // Just a wrapper for a normal branch, plus the predicate test.
12454 instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12455   match(If cmp flags);
12456   effect(USE labl);
12457   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12458   expand %{
12459     jmpCon(cmp,flags,labl);    // JEQ or JNE...
12460   %}
12461 %}
12462 
12463 // Compare 2 longs and CMOVE longs.
12464 instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12465   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12466   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12467   ins_cost(400);
12468   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12469             "CMOV$cmp $dst.hi,$src.hi" %}
12470   opcode(0x0F,0x40);
12471   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12472   ins_pipe( pipe_cmov_reg_long );
12473 %}
12474 
12475 instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12476   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12477   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12478   ins_cost(500);
12479   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12480             "CMOV$cmp $dst.hi,$src.hi" %}
12481   opcode(0x0F,0x40);
12482   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12483   ins_pipe( pipe_cmov_reg_long );
12484 %}
12485 
12486 // Compare 2 longs and CMOVE ints.
12487 instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
12488   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12489   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12490   ins_cost(200);
12491   format %{ "CMOV$cmp $dst,$src" %}
12492   opcode(0x0F,0x40);
12493   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12494   ins_pipe( pipe_cmov_reg );
12495 %}
12496 
12497 instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
12498   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12499   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12500   ins_cost(250);
12501   format %{ "CMOV$cmp $dst,$src" %}
12502   opcode(0x0F,0x40);
12503   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12504   ins_pipe( pipe_cmov_mem );
12505 %}
12506 
12507 // Compare 2 longs and CMOVE ints.
12508 instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
12509   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12510   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12511   ins_cost(200);
12512   format %{ "CMOV$cmp $dst,$src" %}
12513   opcode(0x0F,0x40);
12514   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12515   ins_pipe( pipe_cmov_reg );
12516 %}
12517 
12518 // Compare 2 longs and CMOVE doubles
12519 instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
12520   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12521   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12522   ins_cost(200);
12523   expand %{
12524     fcmovDPR_regS(cmp,flags,dst,src);
12525   %}
12526 %}
12527 
12528 // Compare 2 longs and CMOVE doubles
12529 instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
12530   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12531   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12532   ins_cost(200);
12533   expand %{
12534     fcmovD_regS(cmp,flags,dst,src);
12535   %}
12536 %}
12537 
12538 instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
12539   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12540   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12541   ins_cost(200);
12542   expand %{
12543     fcmovFPR_regS(cmp,flags,dst,src);
12544   %}
12545 %}
12546 
12547 instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
12548   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12549   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12550   ins_cost(200);
12551   expand %{
12552     fcmovF_regS(cmp,flags,dst,src);
12553   %}
12554 %}
12555 
12556 //======
12557 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12558 // Same as cmpL_reg_flags_LEGT except must negate src
12559 instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
12560   match( Set flags (CmpL src zero ));
12561   effect( TEMP tmp );
12562   ins_cost(300);
12563   format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
12564             "CMP    $tmp,$src.lo\n\t"
12565             "SBB    $tmp,$src.hi\n\t" %}
12566   ins_encode( long_cmp_flags3(src, tmp) );
12567   ins_pipe( ialu_reg_reg_long );
12568 %}
12569 
12570 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12571 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
12572 // requires a commuted test to get the same result.
12573 instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12574   match( Set flags (CmpL src1 src2 ));
12575   effect( TEMP tmp );
12576   ins_cost(300);
12577   format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
12578             "MOV    $tmp,$src2.hi\n\t"
12579             "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
12580   ins_encode( long_cmp_flags2( src2, src1, tmp ) );
12581   ins_pipe( ialu_cr_reg_reg );
12582 %}
12583 
12584 // Long compares reg < zero/req OR reg >= zero/req.
12585 // Just a wrapper for a normal branch, plus the predicate test
12586 instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
12587   match(If cmp flags);
12588   effect(USE labl);
12589   predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
12590   ins_cost(300);
12591   expand %{
12592     jmpCon(cmp,flags,labl);    // JGT or JLE...
12593   %}
12594 %}
12595 
12596 // Compare 2 longs and CMOVE longs.
12597 instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
12598   match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12599   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12600   ins_cost(400);
12601   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12602             "CMOV$cmp $dst.hi,$src.hi" %}
12603   opcode(0x0F,0x40);
12604   ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12605   ins_pipe( pipe_cmov_reg_long );
12606 %}
12607 
12608 instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
12609   match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12610   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12611   ins_cost(500);
12612   format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12613             "CMOV$cmp $dst.hi,$src.hi+4" %}
12614   opcode(0x0F,0x40);
12615   ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12616   ins_pipe( pipe_cmov_reg_long );
12617 %}
12618 
12619 // Compare 2 longs and CMOVE ints.
12620 instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
12621   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12622   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12623   ins_cost(200);
12624   format %{ "CMOV$cmp $dst,$src" %}
12625   opcode(0x0F,0x40);
12626   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12627   ins_pipe( pipe_cmov_reg );
12628 %}
12629 
12630 instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
12631   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12632   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12633   ins_cost(250);
12634   format %{ "CMOV$cmp $dst,$src" %}
12635   opcode(0x0F,0x40);
12636   ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12637   ins_pipe( pipe_cmov_mem );
12638 %}
12639 
12640 // Compare 2 longs and CMOVE ptrs.
12641 instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
12642   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12643   match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12644   ins_cost(200);
12645   format %{ "CMOV$cmp $dst,$src" %}
12646   opcode(0x0F,0x40);
12647   ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12648   ins_pipe( pipe_cmov_reg );
12649 %}
12650 
12651 // Compare 2 longs and CMOVE doubles
12652 instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
12653   predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12654   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12655   ins_cost(200);
12656   expand %{
12657     fcmovDPR_regS(cmp,flags,dst,src);
12658   %}
12659 %}
12660 
12661 // Compare 2 longs and CMOVE doubles
12662 instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
12663   predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12664   match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12665   ins_cost(200);
12666   expand %{
12667     fcmovD_regS(cmp,flags,dst,src);
12668   %}
12669 %}
12670 
12671 instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
12672   predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12673   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12674   ins_cost(200);
12675   expand %{
12676     fcmovFPR_regS(cmp,flags,dst,src);
12677   %}
12678 %}
12679 
12680 
12681 instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
12682   predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12683   match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12684   ins_cost(200);
12685   expand %{
12686     fcmovF_regS(cmp,flags,dst,src);
12687   %}
12688 %}
12689 
12690 
12691 // ============================================================================
12692 // Procedure Call/Return Instructions
12693 // Call Java Static Instruction
12694 // Note: If this code changes, the corresponding ret_addr_offset() and
12695 //       compute_padding() functions will have to be adjusted.
12696 instruct CallStaticJavaDirect(method meth) %{
12697   match(CallStaticJava);
12698   effect(USE meth);
12699 
12700   ins_cost(300);
12701   format %{ "CALL,static " %}
12702   opcode(0xE8); /* E8 cd */
12703   ins_encode( pre_call_resets,
12704               Java_Static_Call( meth ),
12705               call_epilog,
12706               post_call_FPU );
12707   ins_pipe( pipe_slow );
12708   ins_alignment(4);
12709 %}
12710 
12711 // Call Java Dynamic Instruction
12712 // Note: If this code changes, the corresponding ret_addr_offset() and
12713 //       compute_padding() functions will have to be adjusted.
12714 instruct CallDynamicJavaDirect(method meth) %{
12715   match(CallDynamicJava);
12716   effect(USE meth);
12717 
12718   ins_cost(300);
12719   format %{ "MOV    EAX,(oop)-1\n\t"
12720             "CALL,dynamic" %}
12721   opcode(0xE8); /* E8 cd */
12722   ins_encode( pre_call_resets,
12723               Java_Dynamic_Call( meth ),
12724               call_epilog,
12725               post_call_FPU );
12726   ins_pipe( pipe_slow );
12727   ins_alignment(4);
12728 %}
12729 
12730 // Call Runtime Instruction
12731 instruct CallRuntimeDirect(method meth) %{
12732   match(CallRuntime );
12733   effect(USE meth);
12734 
12735   ins_cost(300);
12736   format %{ "CALL,runtime " %}
12737   opcode(0xE8); /* E8 cd */
12738   // Use FFREEs to clear entries in float stack
12739   ins_encode( pre_call_resets,
12740               FFree_Float_Stack_All,
12741               Java_To_Runtime( meth ),
12742               post_call_FPU );
12743   ins_pipe( pipe_slow );
12744 %}
12745 
12746 // Call runtime without safepoint
12747 instruct CallLeafDirect(method meth) %{
12748   match(CallLeaf);
12749   effect(USE meth);
12750 
12751   ins_cost(300);
12752   format %{ "CALL_LEAF,runtime " %}
12753   opcode(0xE8); /* E8 cd */
12754   ins_encode( pre_call_resets,
12755               FFree_Float_Stack_All,
12756               Java_To_Runtime( meth ),
12757               Verify_FPU_For_Leaf, post_call_FPU );
12758   ins_pipe( pipe_slow );
12759 %}
12760 
12761 instruct CallLeafNoFPDirect(method meth) %{
12762   match(CallLeafNoFP);
12763   effect(USE meth);
12764 
12765   ins_cost(300);
12766   format %{ "CALL_LEAF_NOFP,runtime " %}
12767   opcode(0xE8); /* E8 cd */
12768   ins_encode(Java_To_Runtime(meth));
12769   ins_pipe( pipe_slow );
12770 %}
12771 
12772 
12773 // Return Instruction
12774 // Remove the return address & jump to it.
12775 instruct Ret() %{
12776   match(Return);
12777   format %{ "RET" %}
12778   opcode(0xC3);
12779   ins_encode(OpcP);
12780   ins_pipe( pipe_jmp );
12781 %}
12782 
12783 // Tail Call; Jump from runtime stub to Java code.
12784 // Also known as an 'interprocedural jump'.
12785 // Target of jump will eventually return to caller.
12786 // TailJump below removes the return address.
12787 instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
12788   match(TailCall jump_target method_oop );
12789   ins_cost(300);
12790   format %{ "JMP    $jump_target \t# EBX holds method oop" %}
12791   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12792   ins_encode( OpcP, RegOpc(jump_target) );
12793   ins_pipe( pipe_jmp );
12794 %}
12795 
12796 
12797 // Tail Jump; remove the return address; jump to target.
12798 // TailCall above leaves the return address around.
12799 instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
12800   match( TailJump jump_target ex_oop );
12801   ins_cost(300);
12802   format %{ "POP    EDX\t# pop return address into dummy\n\t"
12803             "JMP    $jump_target " %}
12804   opcode(0xFF, 0x4);  /* Opcode FF /4 */
12805   ins_encode( enc_pop_rdx,
12806               OpcP, RegOpc(jump_target) );
12807   ins_pipe( pipe_jmp );
12808 %}
12809 
12810 // Create exception oop: created by stack-crawling runtime code.
12811 // Created exception is now available to this handler, and is setup
12812 // just prior to jumping to this handler.  No code emitted.
12813 instruct CreateException( eAXRegP ex_oop )
12814 %{
12815   match(Set ex_oop (CreateEx));
12816 
12817   size(0);
12818   // use the following format syntax
12819   format %{ "# exception oop is in EAX; no code emitted" %}
12820   ins_encode();
12821   ins_pipe( empty );
12822 %}
12823 
12824 
12825 // Rethrow exception:
12826 // The exception oop will come in the first argument position.
12827 // Then JUMP (not call) to the rethrow stub code.
12828 instruct RethrowException()
12829 %{
12830   match(Rethrow);
12831 
12832   // use the following format syntax
12833   format %{ "JMP    rethrow_stub" %}
12834   ins_encode(enc_rethrow);
12835   ins_pipe( pipe_jmp );
12836 %}
12837 
12838 // inlined locking and unlocking
12839 
12840 instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
12841   predicate(Compile::current()->use_rtm());
12842   match(Set cr (FastLock object box));
12843   effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
12844   ins_cost(300);
12845   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
12846   ins_encode %{
12847     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12848                  $scr$$Register, $cx1$$Register, $cx2$$Register,
12849                  _counters, _rtm_counters, _stack_rtm_counters,
12850                  ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
12851                  true, ra_->C->profile_rtm());
12852   %}
12853   ins_pipe(pipe_slow);
12854 %}
12855 
12856 instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
12857   predicate(!Compile::current()->use_rtm());
12858   match(Set cr (FastLock object box));
12859   effect(TEMP tmp, TEMP scr, USE_KILL box);
12860   ins_cost(300);
12861   format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
12862   ins_encode %{
12863     __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
12864                  $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
12865   %}
12866   ins_pipe(pipe_slow);
12867 %}
12868 
12869 instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
12870   match(Set cr (FastUnlock object box));
12871   effect(TEMP tmp, USE_KILL box);
12872   ins_cost(300);
12873   format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
12874   ins_encode %{
12875     __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
12876   %}
12877   ins_pipe(pipe_slow);
12878 %}
12879 
12880 
12881 
12882 // ============================================================================
12883 // Safepoint Instruction
12884 instruct safePoint_poll(eFlagsReg cr) %{
12885   match(SafePoint);
12886   effect(KILL cr);
12887 
12888   // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
12889   // On SPARC that might be acceptable as we can generate the address with
12890   // just a sethi, saving an or.  By polling at offset 0 we can end up
12891   // putting additional pressure on the index-0 in the D$.  Because of
12892   // alignment (just like the situation at hand) the lower indices tend
12893   // to see more traffic.  It'd be better to change the polling address
12894   // to offset 0 of the last $line in the polling page.
12895 
12896   format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
12897   ins_cost(125);
12898   size(6) ;
12899   ins_encode( Safepoint_Poll() );
12900   ins_pipe( ialu_reg_mem );
12901 %}
12902 
12903 
12904 // ============================================================================
12905 // This name is KNOWN by the ADLC and cannot be changed.
12906 // The ADLC forces a 'TypeRawPtr::BOTTOM' output type
12907 // for this guy.
12908 instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
12909   match(Set dst (ThreadLocal));
12910   effect(DEF dst, KILL cr);
12911 
12912   format %{ "MOV    $dst, Thread::current()" %}
12913   ins_encode %{
12914     Register dstReg = as_Register($dst$$reg);
12915     __ get_thread(dstReg);
12916   %}
12917   ins_pipe( ialu_reg_fat );
12918 %}
12919 
12920 
12921 
12922 //----------PEEPHOLE RULES-----------------------------------------------------
12923 // These must follow all instruction definitions as they use the names
12924 // defined in the instructions definitions.
12925 //
12926 // peepmatch ( root_instr_name [preceding_instruction]* );
12927 //
12928 // peepconstraint %{
12929 // (instruction_number.operand_name relational_op instruction_number.operand_name
12930 //  [, ...] );
12931 // // instruction numbers are zero-based using left to right order in peepmatch
12932 //
12933 // peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
12934 // // provide an instruction_number.operand_name for each operand that appears
12935 // // in the replacement instruction's match rule
12936 //
12937 // ---------VM FLAGS---------------------------------------------------------
12938 //
12939 // All peephole optimizations can be turned off using -XX:-OptoPeephole
12940 //
12941 // Each peephole rule is given an identifying number starting with zero and
12942 // increasing by one in the order seen by the parser.  An individual peephole
12943 // can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
12944 // on the command-line.
12945 //
12946 // ---------CURRENT LIMITATIONS----------------------------------------------
12947 //
12948 // Only match adjacent instructions in same basic block
12949 // Only equality constraints
12950 // Only constraints between operands, not (0.dest_reg == EAX_enc)
12951 // Only one replacement instruction
12952 //
12953 // ---------EXAMPLE----------------------------------------------------------
12954 //
12955 // // pertinent parts of existing instructions in architecture description
12956 // instruct movI(rRegI dst, rRegI src) %{
12957 //   match(Set dst (CopyI src));
12958 // %}
12959 //
12960 // instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
12961 //   match(Set dst (AddI dst src));
12962 //   effect(KILL cr);
12963 // %}
12964 //
12965 // // Change (inc mov) to lea
12966 // peephole %{
12967 //   // increment preceeded by register-register move
12968 //   peepmatch ( incI_eReg movI );
12969 //   // require that the destination register of the increment
12970 //   // match the destination register of the move
12971 //   peepconstraint ( 0.dst == 1.dst );
12972 //   // construct a replacement instruction that sets
12973 //   // the destination to ( move's source register + one )
12974 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
12975 // %}
12976 //
12977 // Implementation no longer uses movX instructions since
12978 // machine-independent system no longer uses CopyX nodes.
12979 //
12980 // peephole %{
12981 //   peepmatch ( incI_eReg movI );
12982 //   peepconstraint ( 0.dst == 1.dst );
12983 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
12984 // %}
12985 //
12986 // peephole %{
12987 //   peepmatch ( decI_eReg movI );
12988 //   peepconstraint ( 0.dst == 1.dst );
12989 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
12990 // %}
12991 //
12992 // peephole %{
12993 //   peepmatch ( addI_eReg_imm movI );
12994 //   peepconstraint ( 0.dst == 1.dst );
12995 //   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
12996 // %}
12997 //
12998 // peephole %{
12999 //   peepmatch ( addP_eReg_imm movP );
13000 //   peepconstraint ( 0.dst == 1.dst );
13001 //   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13002 // %}
13003 
13004 // // Change load of spilled value to only a spill
13005 // instruct storeI(memory mem, rRegI src) %{
13006 //   match(Set mem (StoreI mem src));
13007 // %}
13008 //
13009 // instruct loadI(rRegI dst, memory mem) %{
13010 //   match(Set dst (LoadI mem));
13011 // %}
13012 //
13013 peephole %{
13014   peepmatch ( loadI storeI );
13015   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13016   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13017 %}
13018 
13019 //----------SMARTSPILL RULES---------------------------------------------------
13020 // These must follow all instruction definitions as they use the names
13021 // defined in the instructions definitions.